Browse Source

buffer: add Buffer.prototype.lastIndexOf()

* Remove unnecessary templating from SearchString

  SearchString used to have separate PatternChar and SubjectChar template type
  arguments, apparently to support things like searching for an 8-bit string
  inside a 16-bit string or vice versa. However, SearchString is only used from
  node_buffer.cc, where PatternChar and SubjectChar are always the same. Since
  this is extra complexity that's unused and untested (simplifying to a single
  Char template argument still compiles and didn't break any unit tests), I
  removed it.

* Use Boyer-Hoore[-Horspool] for both indexOf and lastIndexOf

  Add test cases for lastIndexOf. Test the fallback from BMH to
  Boyer-Moore, which looks like it was totally untested before.

* Extra bounds checks in node_buffer.cc

* Extra asserts in string_search.h

* Buffer.lastIndexOf: clean up, enforce consistency w/ String.lastIndexOf

* Polyfill memrchr(3) for non-GNU systems

PR-URL: https://github.com/nodejs/node/pull/4846
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
v6.x
dcposch@dcpos.ch 9 years ago
committed by James M Snell
parent
commit
3350b4712b
  1. 40
      doc/api/buffer.md
  2. 72
      lib/buffer.js
  3. 128
      src/node_buffer.cc
  4. 364
      src/string_search.h
  5. 117
      test/parallel/test-buffer-indexof.js

40
doc/api/buffer.md

@ -988,6 +988,46 @@ for (var key of buf.keys()) {
// 5 // 5
``` ```
### buf.lastIndexOf(value[, byteOffset][, encoding])
* `value` {String|Buffer|Number}
* `byteOffset` {Number} Default: `buf.length`
* `encoding` {String} Default: `'utf8'`
* Return: {Number}
Identical to [`Buffer#indexOf()`][], but searches the Buffer from back to front
instead of front to back. Returns the starting index position of `value` in
Buffer or `-1` if the Buffer does not contain `value`. The `value` can be a
String, Buffer or Number. Strings are by default interpreted as UTF8. If
`byteOffset` is provided, will return the last match that begins at or before
`byteOffset`.
```js
const buf = new Buffer('this buffer is a buffer');
buf.lastIndexOf('this');
// returns 0
buf.lastIndexOf('buffer');
// returns 17
buf.lastIndexOf(new Buffer('buffer'));
// returns 17
buf.lastIndexOf(97); // ascii for 'a'
// returns 15
buf.lastIndexOf(new Buffer('yolo'));
// returns -1
buf.lastIndexOf('buffer', 5)
// returns 5
buf.lastIndexOf('buffer', 4)
// returns -1
const utf16Buffer = new Buffer('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2');
utf16Buffer.lastIndexOf('\u03a3', null, 'ucs2');
// returns 6
utf16Buffer.lastIndexOf('\u03a3', -5, 'ucs2');
// returns 4
```
### buf.length ### buf.length
* {Number} * {Number}

72
lib/buffer.js

@ -598,7 +598,48 @@ Buffer.prototype.compare = function compare(target,
return binding.compareOffset(this, target, start, thisStart, end, thisEnd); return binding.compareOffset(this, target, start, thisStart, end, thisEnd);
}; };
function slowIndexOf(buffer, val, byteOffset, encoding) {
// Finds either the first index of `val` in `buffer` at offset >= `byteOffset`,
// OR the last index of `val` in `buffer` at offset <= `byteOffset`.
//
// Arguments:
// - buffer - a Buffer to search
// - val - a string, Buffer, or number
// - byteOffset - an index into `buffer`; will be clamped to an int32
// - encoding - an optional encoding, relevant is val is a string
// - dir - true for indexOf, false for lastIndexOf
function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
if (typeof byteOffset === 'string') {
encoding = byteOffset;
byteOffset = undefined;
} else if (byteOffset > 0x7fffffff) {
byteOffset = 0x7fffffff;
} else if (byteOffset < -0x80000000) {
byteOffset = -0x80000000;
}
byteOffset = +byteOffset; // Coerce to Number.
if (isNaN(byteOffset)) {
// If the offset is undefined, null, NaN, "foo", etc, search whole buffer.
byteOffset = dir ? 0 : (buffer.length - 1);
}
dir = !!dir; // Cast to bool.
if (typeof val === 'string') {
if (encoding === undefined) {
return binding.indexOfString(buffer, val, byteOffset, encoding, dir);
}
return slowIndexOf(buffer, val, byteOffset, encoding, dir);
} else if (val instanceof Buffer) {
return binding.indexOfBuffer(buffer, val, byteOffset, encoding, dir);
} else if (typeof val === 'number') {
return binding.indexOfNumber(buffer, val, byteOffset, dir);
}
throw new TypeError('"val" argument must be string, number or Buffer');
}
function slowIndexOf(buffer, val, byteOffset, encoding, dir) {
var loweredCase = false; var loweredCase = false;
for (;;) { for (;;) {
switch (encoding) { switch (encoding) {
@ -609,13 +650,13 @@ function slowIndexOf(buffer, val, byteOffset, encoding) {
case 'utf16le': case 'utf16le':
case 'utf-16le': case 'utf-16le':
case 'binary': case 'binary':
return binding.indexOfString(buffer, val, byteOffset, encoding); return binding.indexOfString(buffer, val, byteOffset, encoding, dir);
case 'base64': case 'base64':
case 'ascii': case 'ascii':
case 'hex': case 'hex':
return binding.indexOfBuffer( return binding.indexOfBuffer(
buffer, Buffer.from(val, encoding), byteOffset, encoding); buffer, Buffer.from(val, encoding), byteOffset, encoding, dir);
default: default:
if (loweredCase) { if (loweredCase) {
@ -628,29 +669,14 @@ function slowIndexOf(buffer, val, byteOffset, encoding) {
} }
} }
Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) { Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) {
if (typeof byteOffset === 'string') { return bidirectionalIndexOf(this, val, byteOffset, encoding, true);
encoding = byteOffset; };
byteOffset = 0;
} else if (byteOffset > 0x7fffffff) {
byteOffset = 0x7fffffff;
} else if (byteOffset < -0x80000000) {
byteOffset = -0x80000000;
}
byteOffset >>= 0;
if (typeof val === 'string') {
if (encoding === undefined) {
return binding.indexOfString(this, val, byteOffset, encoding);
}
return slowIndexOf(this, val, byteOffset, encoding);
} else if (val instanceof Buffer) {
return binding.indexOfBuffer(this, val, byteOffset, encoding);
} else if (typeof val === 'number') {
return binding.indexOfNumber(this, val, byteOffset);
}
throw new TypeError('"val" argument must be string, number or Buffer'); Buffer.prototype.lastIndexOf = function lastIndexOf(val, byteOffset, encoding) {
return bidirectionalIndexOf(this, val, byteOffset, encoding, false);
}; };

128
src/node_buffer.cc

@ -943,9 +943,44 @@ void Compare(const FunctionCallbackInfo<Value> &args) {
} }
// Computes the offset for starting an indexOf or lastIndexOf search.
// Returns either a valid offset in [0...<length - 1>], ie inside the Buffer,
// or -1 to signal that there is no possible match.
int64_t IndexOfOffset(size_t length, int64_t offset_i64, bool is_forward) {
int64_t length_i64 = static_cast<int64_t>(length);
if (length_i64 == 0) {
// Empty buffer, no match.
return -1;
}
if (offset_i64 < 0) {
if (offset_i64 + length_i64 >= 0) {
// Negative offsets count backwards from the end of the buffer.
return length_i64 + offset_i64;
} else if (is_forward) {
// indexOf from before the start of the buffer: search the whole buffer.
return 0;
} else {
// lastIndexOf from before the start of the buffer: no match.
return -1;
}
} else {
if (offset_i64 < length_i64) {
// Valid positive offset.
return offset_i64;
} else if (is_forward) {
// indexOf from past the end of the buffer: no match.
return -1;
} else {
// lastIndexOf from past the end of the buffer: search the whole buffer.
return length_i64 - 1;
}
}
}
void IndexOfString(const FunctionCallbackInfo<Value>& args) { void IndexOfString(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[1]->IsString()); ASSERT(args[1]->IsString());
ASSERT(args[2]->IsNumber()); ASSERT(args[2]->IsNumber());
ASSERT(args[4]->IsBoolean());
enum encoding enc = ParseEncoding(args.GetIsolate(), enum encoding enc = ParseEncoding(args.GetIsolate(),
args[3], args[3],
@ -955,31 +990,26 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
SPREAD_ARG(args[0], ts_obj); SPREAD_ARG(args[0], ts_obj);
Local<String> needle = args[1].As<String>(); Local<String> needle = args[1].As<String>();
int64_t offset_i64 = args[2]->IntegerValue();
bool is_forward = args[4]->IsTrue();
const char* haystack = ts_obj_data; const char* haystack = ts_obj_data;
const size_t haystack_length = ts_obj_length; const size_t haystack_length = ts_obj_length;
// Extended latin-1 characters are 2 bytes in Utf8. // Extended latin-1 characters are 2 bytes in Utf8.
const size_t needle_length = const size_t needle_length =
enc == BINARY ? needle->Length() : needle->Utf8Length(); enc == BINARY ? needle->Length() : needle->Utf8Length();
if (needle_length == 0 || haystack_length == 0) { if (needle_length == 0 || haystack_length == 0) {
return args.GetReturnValue().Set(-1); return args.GetReturnValue().Set(-1);
} }
int64_t offset_i64 = args[2]->IntegerValue(); int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward);
size_t offset = 0; if (opt_offset <= -1) {
return args.GetReturnValue().Set(-1);
if (offset_i64 < 0) {
if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) {
offset = 0;
} else {
offset = static_cast<size_t>(haystack_length + offset_i64);
}
} else {
offset = static_cast<size_t>(offset_i64);
} }
size_t offset = static_cast<size_t>(opt_offset);
if (haystack_length < offset || needle_length + offset > haystack_length) { CHECK_LT(offset, haystack_length);
if (is_forward && needle_length + offset > haystack_length) {
return args.GetReturnValue().Set(-1); return args.GetReturnValue().Set(-1);
} }
@ -1007,13 +1037,15 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
haystack_length / 2, haystack_length / 2,
decoded_string, decoded_string,
decoder.size() / 2, decoder.size() / 2,
offset / 2); offset / 2,
is_forward);
} else { } else {
result = SearchString(reinterpret_cast<const uint16_t*>(haystack), result = SearchString(reinterpret_cast<const uint16_t*>(haystack),
haystack_length / 2, haystack_length / 2,
reinterpret_cast<const uint16_t*>(*needle_value), reinterpret_cast<const uint16_t*>(*needle_value),
needle_value.length(), needle_value.length(),
offset / 2); offset / 2,
is_forward);
} }
result *= 2; result *= 2;
} else if (enc == UTF8) { } else if (enc == UTF8) {
@ -1025,7 +1057,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
haystack_length, haystack_length,
reinterpret_cast<const uint8_t*>(*needle_value), reinterpret_cast<const uint8_t*>(*needle_value),
needle_length, needle_length,
offset); offset,
is_forward);
} else if (enc == BINARY) { } else if (enc == BINARY) {
uint8_t* needle_data = static_cast<uint8_t*>(malloc(needle_length)); uint8_t* needle_data = static_cast<uint8_t*>(malloc(needle_length));
if (needle_data == nullptr) { if (needle_data == nullptr) {
@ -1038,7 +1071,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
haystack_length, haystack_length,
needle_data, needle_data,
needle_length, needle_length,
offset); offset,
is_forward);
free(needle_data); free(needle_data);
} }
@ -1049,17 +1083,18 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[1]->IsObject()); ASSERT(args[1]->IsObject());
ASSERT(args[2]->IsNumber()); ASSERT(args[2]->IsNumber());
ASSERT(args[4]->IsBoolean());
enum encoding enc = ParseEncoding(args.GetIsolate(), enum encoding enc = ParseEncoding(args.GetIsolate(),
args[3], args[3],
UTF8); UTF8);
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
SPREAD_ARG(args[0], ts_obj); SPREAD_ARG(args[0], ts_obj);
SPREAD_ARG(args[1], buf); SPREAD_ARG(args[1], buf);
int64_t offset_i64 = args[2]->IntegerValue();
if (buf_length > 0) bool is_forward = args[4]->IsTrue();
CHECK_NE(buf_data, nullptr);
const char* haystack = ts_obj_data; const char* haystack = ts_obj_data;
const size_t haystack_length = ts_obj_length; const size_t haystack_length = ts_obj_length;
@ -1070,19 +1105,13 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
return args.GetReturnValue().Set(-1); return args.GetReturnValue().Set(-1);
} }
int64_t offset_i64 = args[2]->IntegerValue(); int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward);
size_t offset = 0; if (opt_offset <= -1) {
return args.GetReturnValue().Set(-1);
if (offset_i64 < 0) {
if (offset_i64 + static_cast<int64_t>(haystack_length) < 0)
offset = 0;
else
offset = static_cast<size_t>(haystack_length + offset_i64);
} else {
offset = static_cast<size_t>(offset_i64);
} }
size_t offset = static_cast<size_t>(opt_offset);
if (haystack_length < offset || needle_length + offset > haystack_length) { CHECK_LT(offset, haystack_length);
if (is_forward && needle_length + offset > haystack_length) {
return args.GetReturnValue().Set(-1); return args.GetReturnValue().Set(-1);
} }
@ -1097,7 +1126,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
haystack_length / 2, haystack_length / 2,
reinterpret_cast<const uint16_t*>(needle), reinterpret_cast<const uint16_t*>(needle),
needle_length / 2, needle_length / 2,
offset / 2); offset / 2,
is_forward);
result *= 2; result *= 2;
} else { } else {
result = SearchString( result = SearchString(
@ -1105,7 +1135,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
haystack_length, haystack_length,
reinterpret_cast<const uint8_t*>(needle), reinterpret_cast<const uint8_t*>(needle),
needle_length, needle_length,
offset); offset,
is_forward);
} }
args.GetReturnValue().Set( args.GetReturnValue().Set(
@ -1115,28 +1146,29 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[1]->IsNumber()); ASSERT(args[1]->IsNumber());
ASSERT(args[2]->IsNumber()); ASSERT(args[2]->IsNumber());
ASSERT(args[3]->IsBoolean());
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
SPREAD_ARG(args[0], ts_obj); SPREAD_ARG(args[0], ts_obj);
uint32_t needle = args[1]->Uint32Value(); uint32_t needle = args[1]->Uint32Value();
int64_t offset_i64 = args[2]->IntegerValue(); int64_t offset_i64 = args[2]->IntegerValue();
size_t offset; bool is_forward = args[3]->IsTrue();
if (offset_i64 < 0) {
if (offset_i64 + static_cast<int64_t>(ts_obj_length) < 0)
offset = 0;
else
offset = static_cast<size_t>(ts_obj_length + offset_i64);
} else {
offset = static_cast<size_t>(offset_i64);
}
if (ts_obj_length == 0 || offset + 1 > ts_obj_length) int64_t opt_offset = IndexOfOffset(ts_obj_length, offset_i64, is_forward);
if (opt_offset <= -1) {
return args.GetReturnValue().Set(-1); return args.GetReturnValue().Set(-1);
}
size_t offset = static_cast<size_t>(opt_offset);
CHECK_LT(offset, ts_obj_length);
void* ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset); const void* ptr;
char* ptr_char = static_cast<char*>(ptr); if (is_forward) {
ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset);
} else {
ptr = node::stringsearch::MemrchrFill(ts_obj_data, needle, offset + 1);
}
const char* ptr_char = static_cast<const char*>(ptr);
args.GetReturnValue().Set(ptr ? static_cast<int>(ptr_char - ts_obj_data) args.GetReturnValue().Set(ptr ? static_cast<int>(ptr_char - ts_obj_data)
: -1); : -1);
} }

364
src/string_search.h

@ -21,60 +21,35 @@ T Max(T a, T b) {
static const uint32_t kMaxOneByteCharCodeU = 0xff; static const uint32_t kMaxOneByteCharCodeU = 0xff;
static inline size_t NonOneByteStart(const uint16_t* chars, size_t length) {
const uint16_t* limit = chars + length;
const uint16_t* start = chars;
while (chars < limit) {
if (*chars > kMaxOneByteCharCodeU)
return static_cast<size_t>(chars - start);
++chars;
}
return static_cast<size_t>(chars - start);
}
static inline bool IsOneByte(const uint16_t* chars, size_t length) {
return NonOneByteStart(chars, length) >= length;
}
template <typename T> template <typename T>
class Vector { class Vector {
public: public:
Vector(T* data, size_t length) : start_(data), length_(length) { Vector(T* data, size_t length, bool isForward)
: start_(data), length_(length), is_forward_(isForward) {
ASSERT(length > 0 && data != nullptr); ASSERT(length > 0 && data != nullptr);
} }
// Returns the length of the vector. // Returns the start of the memory range.
// For vector v this is NOT necessarily &v[0], see forward().
const T* start() const { return start_; }
// Returns the length of the vector, in characters.
size_t length() const { return length_; } size_t length() const { return length_; }
T* start() const { return start_; } // Returns true if the Vector is front-to-back, false if back-to-front.
// In the latter case, v[0] corresponds to the *end* of the memory range.
size_t forward() const { return is_forward_; }
// Access individual vector elements - checks bounds in debug mode. // Access individual vector elements - checks bounds in debug mode.
T& operator[](size_t index) const { T& operator[](size_t index) const {
ASSERT(0 <= index && index < length_); ASSERT(0 <= index && index < length_);
return start_[index]; return start_[is_forward_ ? index : (length_ - index - 1)];
}
const T& at(size_t index) const { return operator[](index); }
bool operator==(const Vector<T>& other) const {
if (length_ != other.length_)
return false;
if (start_ == other.start_)
return true;
for (size_t i = 0; i < length_; ++i) {
if (start_[i] != other.start_[i]) {
return false;
}
}
return true;
} }
private: private:
T* start_; T* start_;
size_t length_; size_t length_;
bool is_forward_;
}; };
@ -114,31 +89,17 @@ class StringSearchBase {
// Table used temporarily while building the BoyerMoore good suffix // Table used temporarily while building the BoyerMoore good suffix
// shift table. // shift table.
static int kSuffixTable[kBMMaxShift + 1]; static int kSuffixTable[kBMMaxShift + 1];
static inline bool IsOneByteString(Vector<const uint8_t> string) {
return true;
}
static inline bool IsOneByteString(Vector<const uint16_t> string) {
return IsOneByte(string.start(), string.length());
}
}; };
template <typename PatternChar, typename SubjectChar> template <typename Char>
class StringSearch : private StringSearchBase { class StringSearch : private StringSearchBase {
public: public:
explicit StringSearch(Vector<const PatternChar> pattern) explicit StringSearch(Vector<const Char> pattern)
: pattern_(pattern), start_(0) { : pattern_(pattern), start_(0) {
if (pattern.length() >= kBMMaxShift) { if (pattern.length() >= kBMMaxShift) {
start_ = pattern.length() - kBMMaxShift; start_ = pattern.length() - kBMMaxShift;
} }
if (sizeof(PatternChar) > sizeof(SubjectChar)) {
if (!IsOneByteString(pattern_)) {
strategy_ = &FailSearch;
return;
}
}
size_t pattern_length = pattern_.length(); size_t pattern_length = pattern_.length();
CHECK_GT(pattern_length, 0); CHECK_GT(pattern_length, 0);
if (pattern_length < kBMMinPatternLength) { if (pattern_length < kBMMinPatternLength) {
@ -152,12 +113,12 @@ class StringSearch : private StringSearchBase {
strategy_ = &InitialSearch; strategy_ = &InitialSearch;
} }
size_t Search(Vector<const SubjectChar> subject, size_t index) { size_t Search(Vector<const Char> subject, size_t index) {
return strategy_(this, subject, index); return strategy_(this, subject, index);
} }
static inline int AlphabetSize() { static inline int AlphabetSize() {
if (sizeof(PatternChar) == 1) { if (sizeof(Char) == 1) {
// Latin1 needle. // Latin1 needle.
return kLatin1AlphabetSize; return kLatin1AlphabetSize;
} else { } else {
@ -165,42 +126,42 @@ class StringSearch : private StringSearchBase {
return kUC16AlphabetSize; return kUC16AlphabetSize;
} }
static_assert(sizeof(PatternChar) == sizeof(uint8_t) || static_assert(sizeof(Char) == sizeof(uint8_t) ||
sizeof(PatternChar) == sizeof(uint16_t), sizeof(Char) == sizeof(uint16_t),
"sizeof(PatternChar) == sizeof(uint16_t) || sizeof(uint8_t)"); "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)");
} }
private: private:
typedef size_t (*SearchFunction)( // NOLINT - it's not a cast! typedef size_t (*SearchFunction)( // NOLINT - it's not a cast!
StringSearch<PatternChar, SubjectChar>*, StringSearch<Char>*,
Vector<const SubjectChar>, Vector<const Char>,
size_t); size_t);
static size_t FailSearch(StringSearch<PatternChar, SubjectChar>*, static size_t FailSearch(StringSearch<Char>*,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t) { size_t) {
return subject.length(); return subject.length();
} }
static size_t SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search, static size_t SingleCharSearch(StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index); size_t start_index);
static size_t LinearSearch(StringSearch<PatternChar, SubjectChar>* search, static size_t LinearSearch(StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index); size_t start_index);
static size_t InitialSearch(StringSearch<PatternChar, SubjectChar>* search, static size_t InitialSearch(StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index); size_t start_index);
static size_t BoyerMooreHorspoolSearch( static size_t BoyerMooreHorspoolSearch(
StringSearch<PatternChar, SubjectChar>* search, StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index); size_t start_index);
static size_t BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search, static size_t BoyerMooreSearch(StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index); size_t start_index);
void PopulateBoyerMooreHorspoolTable(); void PopulateBoyerMooreHorspoolTable();
@ -214,16 +175,10 @@ class StringSearch : private StringSearchBase {
} }
static inline int CharOccurrence(int* bad_char_occurrence, static inline int CharOccurrence(int* bad_char_occurrence,
SubjectChar char_code) { Char char_code) {
if (sizeof(SubjectChar) == 1) { if (sizeof(Char) == 1) {
return bad_char_occurrence[static_cast<int>(char_code)]; return bad_char_occurrence[static_cast<int>(char_code)];
} }
if (sizeof(PatternChar) == 1) {
if (exceedsOneByte(char_code)) {
return -1;
}
return bad_char_occurrence[static_cast<unsigned int>(char_code)];
}
// Both pattern and subject are UC16. Reduce character to equivalence class. // Both pattern and subject are UC16. Reduce character to equivalence class.
int equiv_class = char_code % kUC16AlphabetSize; int equiv_class = char_code % kUC16AlphabetSize;
return bad_char_occurrence[equiv_class]; return bad_char_occurrence[equiv_class];
@ -250,7 +205,7 @@ class StringSearch : private StringSearchBase {
} }
// The pattern to search for. // The pattern to search for.
Vector<const PatternChar> pattern_; Vector<const Char> pattern_;
// Pointer to implementation of the search. // Pointer to implementation of the search.
SearchFunction strategy_; SearchFunction strategy_;
// Cache value of Max(0, pattern_length() - kBMMaxShift) // Cache value of Max(0, pattern_length() - kBMMaxShift)
@ -274,111 +229,138 @@ inline uint8_t GetHighestValueByte(uint16_t character) {
inline uint8_t GetHighestValueByte(uint8_t character) { return character; } inline uint8_t GetHighestValueByte(uint8_t character) { return character; }
template <typename PatternChar, typename SubjectChar> // Searches for a byte value in a memory buffer, back to front.
inline size_t FindFirstCharacter(Vector<const PatternChar> pattern, // Uses memrchr(3) on systems which support it, for speed.
Vector<const SubjectChar> subject, size_t index) { // Falls back to a vanilla for loop on non-GNU systems such as Windows.
const PatternChar pattern_first_char = pattern[0]; inline const void* MemrchrFill(const void* haystack, uint8_t needle,
size_t haystack_len) {
#ifdef _GNU_SOURCE
return memrchr(haystack, needle, haystack_len);
#else
const uint8_t* haystack8 = static_cast<const uint8_t*>(haystack);
for (size_t i = haystack_len - 1; i != static_cast<size_t>(-1); i--) {
if (haystack8[i] == needle) {
return haystack8 + i;
}
}
return nullptr;
#endif
}
// Finds the first occurence of *two-byte* character pattern[0] in the string
// `subject`. Does not check that the whole pattern matches.
template <typename Char>
inline size_t FindFirstCharacter(Vector<const Char> pattern,
Vector<const Char> subject, size_t index) {
const Char pattern_first_char = pattern[0];
const size_t max_n = (subject.length() - pattern.length() + 1); const size_t max_n = (subject.length() - pattern.length() + 1);
// For speed, search for the more `rare` of the two bytes in pattern[0]
// using memchr / memrchr (which are much faster than a simple for loop).
const uint8_t search_byte = GetHighestValueByte(pattern_first_char); const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
const SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
size_t pos = index; size_t pos = index;
do { do {
const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>( size_t bytes_to_search;
memchr(subject.start() + pos, search_byte, const void* void_pos;
(max_n - pos) * sizeof(SubjectChar))); if (subject.forward()) {
// Assert that bytes_to_search won't overflow
CHECK_LE(pos, max_n);
CHECK_LE(max_n - pos, SIZE_MAX / sizeof(Char));
bytes_to_search = (max_n - pos) * sizeof(Char);
void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search);
} else {
CHECK_LE(pos, subject.length());
CHECK_LE(subject.length() - pos, SIZE_MAX / sizeof(Char));
bytes_to_search = (subject.length() - pos) * sizeof(Char);
void_pos = MemrchrFill(subject.start(), search_byte, bytes_to_search);
}
const Char* char_pos = static_cast<const Char*>(void_pos);
if (char_pos == nullptr) if (char_pos == nullptr)
return subject.length(); return subject.length();
char_pos = AlignDown(char_pos, sizeof(SubjectChar));
pos = static_cast<size_t>(char_pos - subject.start()); // Then, for each match, verify that the full two bytes match pattern[0].
if (subject[pos] == search_char) char_pos = AlignDown(char_pos, sizeof(Char));
size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1);
if (subject[pos] == pattern_first_char) {
// Match found, hooray.
return pos; return pos;
}
// Search byte matched, but the other byte of pattern[0] didn't. Keep going.
} while (++pos < max_n); } while (++pos < max_n);
return subject.length(); return subject.length();
} }
// Finds the first occurance of the byte pattern[0] in string `subject`.
// Does not verify that the whole pattern matches.
template <> template <>
inline size_t FindFirstCharacter(Vector<const uint8_t> pattern, inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
Vector<const uint8_t> subject, Vector<const uint8_t> subject,
size_t index) { size_t index) {
const uint8_t pattern_first_char = pattern[0]; const uint8_t pattern_first_char = pattern[0];
const size_t subj_len = subject.length();
const size_t max_n = (subject.length() - pattern.length() + 1); const size_t max_n = (subject.length() - pattern.length() + 1);
const uint8_t* char_pos = reinterpret_cast<const uint8_t*>( const void* pos;
memchr(subject.start() + index, pattern_first_char, max_n - index)); if (subject.forward()) {
if (char_pos == nullptr) pos = memchr(subject.start() + index, pattern_first_char, max_n - index);
return subject.length(); } else {
return static_cast<size_t>(char_pos - subject.start()); pos = MemrchrFill(subject.start(), pattern_first_char, subj_len - index);
}
const uint8_t* char_pos = static_cast<const uint8_t*>(pos);
if (char_pos == nullptr) {
return subj_len;
}
size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
return subject.forward() ? raw_pos : (subj_len - raw_pos - 1);
} }
//--------------------------------------------------------------------- //---------------------------------------------------------------------
// Single Character Pattern Search Strategy // Single Character Pattern Search Strategy
//--------------------------------------------------------------------- //---------------------------------------------------------------------
template <typename PatternChar, typename SubjectChar> template <typename Char>
size_t StringSearch<PatternChar, SubjectChar>::SingleCharSearch( size_t StringSearch<Char>::SingleCharSearch(
StringSearch<PatternChar, SubjectChar>* search, StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t index) { size_t index) {
CHECK_EQ(1, search->pattern_.length()); CHECK_EQ(1, search->pattern_.length());
PatternChar pattern_first_char = search->pattern_[0];
if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
return FindFirstCharacter(search->pattern_, subject, index); return FindFirstCharacter(search->pattern_, subject, index);
} else {
if (sizeof(PatternChar) > sizeof(SubjectChar)) {
if (exceedsOneByte(pattern_first_char)) {
return -1;
}
}
return FindFirstCharacter(search->pattern_, subject, index);
}
} }
//--------------------------------------------------------------------- //---------------------------------------------------------------------
// Linear Search Strategy // Linear Search Strategy
//--------------------------------------------------------------------- //---------------------------------------------------------------------
template <typename PatternChar, typename SubjectChar>
inline bool CharCompare(const PatternChar* pattern,
const SubjectChar* subject,
size_t length) {
ASSERT_GT(length, 0);
size_t pos = 0;
do {
if (pattern[pos] != subject[pos]) {
return false;
}
pos++;
} while (pos < length);
return true;
}
// Simple linear search for short patterns. Never bails out. // Simple linear search for short patterns. Never bails out.
template <typename PatternChar, typename SubjectChar> template <typename Char>
size_t StringSearch<PatternChar, SubjectChar>::LinearSearch( size_t StringSearch<Char>::LinearSearch(
StringSearch<PatternChar, SubjectChar>* search, StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t index) { size_t index) {
Vector<const PatternChar> pattern = search->pattern_; Vector<const Char> pattern = search->pattern_;
CHECK_GT(pattern.length(), 1); CHECK_GT(pattern.length(), 1);
const size_t pattern_length = pattern.length(); const size_t pattern_length = pattern.length();
size_t i = index;
const size_t n = subject.length() - pattern_length; const size_t n = subject.length() - pattern_length;
while (i <= n) { for (size_t i = index; i <= n; i++) {
i = FindFirstCharacter(pattern, subject, i); i = FindFirstCharacter(pattern, subject, i);
if (i == subject.length()) if (i == subject.length())
return subject.length(); return subject.length();
ASSERT_LE(i, n); ASSERT_LE(i, n);
i++;
// Loop extracted to separate function to allow using return to do bool matches = true;
// a deeper break. for (size_t j = 1; j < pattern_length; j++) {
if (CharCompare(pattern.start() + 1, subject.start() + i, if (pattern[j] != subject[i + j]) {
pattern_length - 1)) { matches = false;
return i - 1; break;
}
}
if (matches) {
return i;
} }
} }
return subject.length(); return subject.length();
@ -388,12 +370,12 @@ size_t StringSearch<PatternChar, SubjectChar>::LinearSearch(
// Boyer-Moore string search // Boyer-Moore string search
//--------------------------------------------------------------------- //---------------------------------------------------------------------
template <typename PatternChar, typename SubjectChar> template <typename Char>
size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch( size_t StringSearch<Char>::BoyerMooreSearch(
StringSearch<PatternChar, SubjectChar>* search, StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index) { size_t start_index) {
Vector<const PatternChar> pattern = search->pattern_; Vector<const Char> pattern = search->pattern_;
const size_t subject_length = subject.length(); const size_t subject_length = subject.length();
const size_t pattern_length = pattern.length(); const size_t pattern_length = pattern.length();
// Only preprocess at most kBMMaxShift last characters of pattern. // Only preprocess at most kBMMaxShift last characters of pattern.
@ -402,7 +384,7 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
int* bad_char_occurence = search->bad_char_table(); int* bad_char_occurence = search->bad_char_table();
int* good_suffix_shift = search->good_suffix_shift_table(); int* good_suffix_shift = search->good_suffix_shift_table();
PatternChar last_char = pattern[pattern_length - 1]; Char last_char = pattern[pattern_length - 1];
size_t index = start_index; size_t index = start_index;
// Continue search from i. // Continue search from i.
while (index <= subject_length - pattern_length) { while (index <= subject_length - pattern_length) {
@ -426,7 +408,7 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
// Fall back on BMH shift. // Fall back on BMH shift.
index += pattern_length - 1 - index += pattern_length - 1 -
CharOccurrence(bad_char_occurence, CharOccurrence(bad_char_occurence,
static_cast<SubjectChar>(last_char)); static_cast<Char>(last_char));
} else { } else {
int gs_shift = good_suffix_shift[j + 1]; int gs_shift = good_suffix_shift[j + 1];
int bc_occ = CharOccurrence(bad_char_occurence, c); int bc_occ = CharOccurrence(bad_char_occurence, c);
@ -441,10 +423,10 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
return subject.length(); return subject.length();
} }
template <typename PatternChar, typename SubjectChar> template <typename Char>
void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() { void StringSearch<Char>::PopulateBoyerMooreTable() {
const size_t pattern_length = pattern_.length(); const size_t pattern_length = pattern_.length();
const PatternChar* pattern = pattern_.start(); Vector<const Char> pattern = pattern_;
// Only look at the last kBMMaxShift characters of pattern (from start_ // Only look at the last kBMMaxShift characters of pattern (from start_
// to pattern_length). // to pattern_length).
const size_t start = start_; const size_t start = start_;
@ -467,12 +449,12 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
} }
// Find suffixes. // Find suffixes.
PatternChar last_char = pattern[pattern_length - 1]; Char last_char = pattern_[pattern_length - 1];
size_t suffix = pattern_length + 1; size_t suffix = pattern_length + 1;
{ {
size_t i = pattern_length; size_t i = pattern_length;
while (i > start) { while (i > start) {
PatternChar c = pattern[i - 1]; Char c = pattern[i - 1];
while (suffix <= pattern_length && c != pattern[suffix - 1]) { while (suffix <= pattern_length && c != pattern[suffix - 1]) {
if (static_cast<size_t>(shift_table[suffix]) == length) { if (static_cast<size_t>(shift_table[suffix]) == length) {
shift_table[suffix] = suffix - i; shift_table[suffix] = suffix - i;
@ -511,22 +493,22 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
// Boyer-Moore-Horspool string search. // Boyer-Moore-Horspool string search.
//--------------------------------------------------------------------- //---------------------------------------------------------------------
template <typename PatternChar, typename SubjectChar> template <typename Char>
size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch( size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
StringSearch<PatternChar, SubjectChar>* search, StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t start_index) { size_t start_index) {
Vector<const PatternChar> pattern = search->pattern_; Vector<const Char> pattern = search->pattern_;
const size_t subject_length = subject.length(); const size_t subject_length = subject.length();
const size_t pattern_length = pattern.length(); const size_t pattern_length = pattern.length();
int* char_occurrences = search->bad_char_table(); int* char_occurrences = search->bad_char_table();
int64_t badness = -pattern_length; int64_t badness = -pattern_length;
// How bad we are doing without a good-suffix table. // How bad we are doing without a good-suffix table.
PatternChar last_char = pattern[pattern_length - 1]; Char last_char = pattern[pattern_length - 1];
int last_char_shift = int last_char_shift =
pattern_length - 1 - pattern_length - 1 -
CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char)); CharOccurrence(char_occurrences, static_cast<Char>(last_char));
// Perform search // Perform search
size_t index = start_index; // No matches found prior to this index. size_t index = start_index; // No matches found prior to this index.
@ -564,8 +546,8 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
return subject.length(); return subject.length();
} }
template <typename PatternChar, typename SubjectChar> template <typename Char>
void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() { void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
const size_t pattern_length = pattern_.length(); const size_t pattern_length = pattern_.length();
int* bad_char_occurrence = bad_char_table(); int* bad_char_occurrence = bad_char_table();
@ -585,8 +567,8 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
} }
} }
for (size_t i = start; i < pattern_length - 1; i++) { for (size_t i = start; i < pattern_length - 1; i++) {
PatternChar c = pattern_[i]; Char c = pattern_[i];
int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize(); int bucket = (sizeof(Char) == 1) ? c : c % AlphabetSize();
bad_char_occurrence[bucket] = i; bad_char_occurrence[bucket] = i;
} }
} }
@ -597,12 +579,12 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
// Simple linear search for short patterns, which bails out if the string // Simple linear search for short patterns, which bails out if the string
// isn't found very early in the subject. Upgrades to BoyerMooreHorspool. // isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
template <typename PatternChar, typename SubjectChar> template <typename Char>
size_t StringSearch<PatternChar, SubjectChar>::InitialSearch( size_t StringSearch<Char>::InitialSearch(
StringSearch<PatternChar, SubjectChar>* search, StringSearch<Char>* search,
Vector<const SubjectChar> subject, Vector<const Char> subject,
size_t index) { size_t index) {
Vector<const PatternChar> pattern = search->pattern_; Vector<const Char> pattern = search->pattern_;
const size_t pattern_length = pattern.length(); const size_t pattern_length = pattern.length();
// Badness is a count of how much work we have done. When we have // Badness is a count of how much work we have done. When we have
// done enough work we decide it's probably worth switching to a better // done enough work we decide it's probably worth switching to a better
@ -642,11 +624,11 @@ size_t StringSearch<PatternChar, SubjectChar>::InitialSearch(
// If searching multiple times for the same pattern, a search // If searching multiple times for the same pattern, a search
// object should be constructed once and the Search function then called // object should be constructed once and the Search function then called
// for each search. // for each search.
template <typename SubjectChar, typename PatternChar> template <typename Char>
size_t SearchString(Vector<const SubjectChar> subject, size_t SearchString(Vector<const Char> subject,
Vector<const PatternChar> pattern, Vector<const Char> pattern,
size_t start_index) { size_t start_index) {
StringSearch<PatternChar, SubjectChar> search(pattern); StringSearch<Char> search(pattern);
return search.Search(subject, start_index); return search.Search(subject, start_index);
} }
} }
@ -655,16 +637,38 @@ size_t SearchString(Vector<const SubjectChar> subject,
namespace node { namespace node {
using node::stringsearch::Vector; using node::stringsearch::Vector;
template <typename SubjectChar, typename PatternChar> template <typename Char>
size_t SearchString(const SubjectChar* haystack, size_t SearchString(const Char* haystack,
size_t haystack_length, size_t haystack_length,
const PatternChar* needle, const Char* needle,
size_t needle_length, size_t needle_length,
size_t start_index) { size_t start_index,
return node::stringsearch::SearchString( bool is_forward) {
Vector<const SubjectChar>(haystack, haystack_length), // To do a reverse search (lastIndexOf instead of indexOf) without redundant
Vector<const PatternChar>(needle, needle_length), // code, create two vectors that are reversed views into the input strings.
start_index); // For example, v_needle[0] would return the *last* character of the needle.
// So we're searching for the first instance of rev(needle) in rev(haystack)
Vector<const Char> v_needle = Vector<const Char>(
needle, needle_length, is_forward);
Vector<const Char> v_haystack = Vector<const Char>(
haystack, haystack_length, is_forward);
ASSERT(haystack_length >= needle_length);
size_t diff = haystack_length - needle_length;
size_t relative_start_index;
if (is_forward) {
relative_start_index = start_index;
} else if (diff < start_index) {
relative_start_index = 0;
} else {
relative_start_index = diff - start_index;
}
size_t pos = node::stringsearch::SearchString(
v_haystack, v_needle, relative_start_index);
if (pos == haystack_length) {
// not found
return pos;
}
return is_forward ? pos : (haystack_length - needle_length - pos);
} }
} // namespace node } // namespace node

117
test/parallel/test-buffer-indexof.js

@ -282,3 +282,120 @@ assert.throws(function() {
assert.throws(function() { assert.throws(function() {
b.indexOf([]); b.indexOf([]);
}); });
// All code for handling encodings is shared between Buffer.indexOf and
// Buffer.lastIndexOf, so only testing the separate lastIndexOf semantics.
// Test lastIndexOf basic functionality; Buffer b contains 'abcdef'.
// lastIndexOf string:
assert.equal(b.lastIndexOf('a'), 0);
assert.equal(b.lastIndexOf('a', 1), 0);
assert.equal(b.lastIndexOf('b', 1), 1);
assert.equal(b.lastIndexOf('c', 1), -1);
assert.equal(b.lastIndexOf('a', -1), 0);
assert.equal(b.lastIndexOf('a', -4), 0);
assert.equal(b.lastIndexOf('a', -b.length), 0);
assert.equal(b.lastIndexOf('a', -b.length - 1), -1);
assert.equal(b.lastIndexOf('a', NaN), 0);
assert.equal(b.lastIndexOf('a', -Infinity), -1);
assert.equal(b.lastIndexOf('a', Infinity), 0);
// lastIndexOf Buffer:
assert.equal(b.lastIndexOf(buf_a), 0);
assert.equal(b.lastIndexOf(buf_a, 1), 0);
assert.equal(b.lastIndexOf(buf_a, -1), 0);
assert.equal(b.lastIndexOf(buf_a, -4), 0);
assert.equal(b.lastIndexOf(buf_a, -b.length), 0);
assert.equal(b.lastIndexOf(buf_a, -b.length - 1), -1);
assert.equal(b.lastIndexOf(buf_a, NaN), 0);
assert.equal(b.lastIndexOf(buf_a, -Infinity), -1);
assert.equal(b.lastIndexOf(buf_a, Infinity), 0);
assert.equal(b.lastIndexOf(buf_bc), 1);
assert.equal(b.lastIndexOf(buf_bc, 2), 1);
assert.equal(b.lastIndexOf(buf_bc, -1), 1);
assert.equal(b.lastIndexOf(buf_bc, -3), 1);
assert.equal(b.lastIndexOf(buf_bc, -5), 1);
assert.equal(b.lastIndexOf(buf_bc, -6), -1);
assert.equal(b.lastIndexOf(buf_bc, NaN), 1);
assert.equal(b.lastIndexOf(buf_bc, -Infinity), -1);
assert.equal(b.lastIndexOf(buf_bc, Infinity), 1);
assert.equal(b.lastIndexOf(buf_f), b.length - 1);
assert.equal(b.lastIndexOf(buf_z), -1);
assert.equal(b.lastIndexOf(buf_empty), -1);
assert.equal(b.lastIndexOf(buf_empty, 1), -1);
assert.equal(b.lastIndexOf(buf_empty, b.length + 1), -1);
assert.equal(b.lastIndexOf(buf_empty, Infinity), -1);
// lastIndexOf number:
assert.equal(b.lastIndexOf(0x61), 0);
assert.equal(b.lastIndexOf(0x61, 1), 0);
assert.equal(b.lastIndexOf(0x61, -1), 0);
assert.equal(b.lastIndexOf(0x61, -4), 0);
assert.equal(b.lastIndexOf(0x61, -b.length), 0);
assert.equal(b.lastIndexOf(0x61, -b.length - 1), -1);
assert.equal(b.lastIndexOf(0x61, NaN), 0);
assert.equal(b.lastIndexOf(0x61, -Infinity), -1);
assert.equal(b.lastIndexOf(0x61, Infinity), 0);
assert.equal(b.lastIndexOf(0x0), -1);
// Test weird offset arguments.
// Behaviour should match String.lastIndexOf:
assert.equal(b.lastIndexOf('b', 0), -1);
assert.equal(b.lastIndexOf('b', undefined), 1);
assert.equal(b.lastIndexOf('b', null), -1);
assert.equal(b.lastIndexOf('b', {}), 1);
assert.equal(b.lastIndexOf('b', []), -1);
assert.equal(b.lastIndexOf('b', [2]), 1);
// Test lastIndexOf on a longer buffer:
var bufferString = new Buffer('a man a plan a canal panama');
assert.equal(15, bufferString.lastIndexOf('canal'));
assert.equal(21, bufferString.lastIndexOf('panama'));
assert.equal(0, bufferString.lastIndexOf('a man a plan a canal panama'));
assert.equal(-1, bufferString.lastIndexOf('a man a plan a canal mexico'));
assert.equal(13, bufferString.lastIndexOf('a '));
assert.equal(13, bufferString.lastIndexOf('a ', 13));
assert.equal(6, bufferString.lastIndexOf('a ', 12));
assert.equal(0, bufferString.lastIndexOf('a ', 5));
assert.equal(13, bufferString.lastIndexOf('a ', -1));
assert.equal(0, bufferString.lastIndexOf('a ', -27));
assert.equal(-1, bufferString.lastIndexOf('a ', -28));
// The above tests test the LINEAR and SINGLE-CHAR strategies.
// Now, we test the BOYER-MOORE-HORSPOOL strategy.
// Test lastIndexOf on a long buffer w multiple matches:
pattern = 'JABACABADABACABA';
assert.equal(1535, longBufferString.lastIndexOf(pattern));
assert.equal(1535, longBufferString.lastIndexOf(pattern, 1535));
assert.equal(511, longBufferString.lastIndexOf(pattern, 1534));
// Finally, give it a really long input to trigger fallback from BMH to
// regular BOYER-MOORE (which has better worst-case complexity).
// Generate a really long Thue-Morse sequence of 'yolo' and 'swag',
// "yolo swag swag yolo swag yolo yolo swag" ..., goes on for about 5MB.
// This is hard to search because it all looks similar, but never repeats.
// countBits returns the number of bits in the binary reprsentation of n.
function countBits(n) {
for (var count = 0; n > 0; count++) {
n = n & (n - 1); // remove top bit
}
return count;
}
var parts = [];
for (var i = 0; i < 1000000; i++) {
parts.push((countBits(i) % 2 === 0) ? 'yolo' : 'swag');
}
var reallyLong = new Buffer(parts.join(' '));
assert.equal('yolo swag swag yolo', reallyLong.slice(0, 19).toString());
// Expensive reverse searches. Stress test lastIndexOf:
pattern = reallyLong.slice(0, 100000); // First 1/50th of the pattern.
assert.equal(4751360, reallyLong.lastIndexOf(pattern));
assert.equal(3932160, reallyLong.lastIndexOf(pattern, 4000000));
assert.equal(2949120, reallyLong.lastIndexOf(pattern, 3000000));
pattern = reallyLong.slice(100000, 200000); // Second 1/50th.
assert.equal(4728480, reallyLong.lastIndexOf(pattern));
pattern = reallyLong.slice(0, 1000000); // First 1/5th.
assert.equal(3932160, reallyLong.lastIndexOf(pattern));
pattern = reallyLong.slice(0, 2000000); // first 2/5ths.
assert.equal(0, reallyLong.lastIndexOf(pattern));

Loading…
Cancel
Save