From a550ddbf3c6887d77530ec3faaf51fde3c18b94a Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Mon, 2 May 2016 05:31:48 +0200 Subject: [PATCH] buffer: fix needle length misestimation for UCS2 Use `StringBytes::Size` to determine the needle string length instead of assuming latin-1 or UTF-8. Previously, `Buffer.indexOf` could fail with an assertion failure when the needle's byte length, but not its character count, exceeded the haystack's byte length. PR-URL: https://github.com/nodejs/node/pull/6511 Reviewed-By: James M Snell Reviewed-By: Trevor Norris --- src/node_buffer.cc | 4 ++-- test/parallel/test-buffer-indexof.js | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 51ee511f34..586bb6a3a6 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -995,9 +995,9 @@ void IndexOfString(const FunctionCallbackInfo& args) { const char* haystack = ts_obj_data; const size_t haystack_length = ts_obj_length; - // Extended latin-1 characters are 2 bytes in Utf8. + const size_t needle_length = - enc == BINARY ? needle->Length() : needle->Utf8Length(); + StringBytes::Size(args.GetIsolate(), needle, enc); if (needle_length == 0 || haystack_length == 0) { return args.GetReturnValue().Set(-1); diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 4cd46c0460..aa50d8f72a 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -222,6 +222,12 @@ var allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2'); assert.equal(-1, allCharsBufferUtf8.indexOf('notfound')); assert.equal(-1, allCharsBufferUcs2.indexOf('notfound')); +// Needle is longer than haystack, but only because it's encoded as UTF-16 +assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1); + +assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0); +assert.strictEqual(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1); + { // Find substrings in Utf8. const lengths = [1, 3, 15]; // Single char, simple and complex.