Browse Source

deps: revert breaking UTF-8 decoder changes in V8

Refs: 7c462455b0
Refs: aadb1c83fc

PR-URL: https://github.com/nodejs/node/pull/11029
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Myles Borins <myles.borins@gmail.com>
v7.x
Michaël Zasso 8 years ago
committed by Italo A. Casas
parent
commit
028bb632b2
No known key found for this signature in database GPG Key ID: 23EFEFE93C4CFFFE
  1. 3
      deps/v8/src/unicode-decoder.h
  2. 123
      deps/v8/src/unicode.cc
  3. 445
      deps/v8/test/cctest/test-parsing.cc
  4. 1
      deps/v8/test/unittests/BUILD.gn
  5. 39
      deps/v8/test/unittests/unicode-unittest.cc
  6. 1
      deps/v8/test/unittests/unittests.gyp

3
deps/v8/src/unicode-decoder.h

@ -7,11 +7,10 @@
#include <sys/types.h> #include <sys/types.h>
#include "src/globals.h" #include "src/globals.h"
#include "src/utils.h"
namespace unibrow { namespace unibrow {
class V8_EXPORT_PRIVATE Utf8DecoderBase { class Utf8DecoderBase {
public: public:
// Initialization done in subclass. // Initialization done in subclass.
inline Utf8DecoderBase(); inline Utf8DecoderBase();

123
deps/v8/src/unicode.cc

@ -228,52 +228,80 @@ static inline bool IsContinuationCharacter(byte chr) {
// This method decodes an UTF-8 value according to RFC 3629. // This method decodes an UTF-8 value according to RFC 3629.
uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
size_t length = NonASCIISequenceLength(str[0]); size_t length = NonASCIISequenceLength(str[0]);
if (length == 0 || max_length < length) {
// Check continuation characters. *cursor += 1;
size_t max_count = std::min(length, max_length); return kBadChar;
size_t count = 1;
while (count < max_count && IsContinuationCharacter(str[count])) {
count++;
} }
*cursor += count; if (length == 2) {
if (!IsContinuationCharacter(str[1])) {
// There must be enough continuation characters. *cursor += 1;
if (count != length) return kBadChar;
// Check overly long sequences & other conditions.
if (length == 3) {
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
// Overlong three-byte sequence?
return kBadChar; return kBadChar;
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { }
// High and low surrogate halves? *cursor += 2;
return ((str[0] << 6) + str[1]) - 0x00003080;
}
if (length == 3) {
switch (str[0]) {
case 0xE0:
// Overlong three-byte sequence.
if (str[1] < 0xA0 || str[1] > 0xBF) {
*cursor += 1;
return kBadChar;
}
break;
case 0xED:
// High and low surrogate halves.
if (str[1] < 0x80 || str[1] > 0x9F) {
*cursor += 1;
return kBadChar;
}
break;
default:
if (!IsContinuationCharacter(str[1])) {
*cursor += 1;
return kBadChar;
}
}
if (!IsContinuationCharacter(str[2])) {
*cursor += 1;
return kBadChar; return kBadChar;
} }
} else if (length == 4) { *cursor += 3;
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
}
DCHECK(length == 4);
switch (str[0]) {
case 0xF0:
// Overlong four-byte sequence. // Overlong four-byte sequence.
return kBadChar; if (str[1] < 0x90 || str[1] > 0xBF) {
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { *cursor += 1;
return kBadChar;
}
break;
case 0xF4:
// Code points outside of the unicode range. // Code points outside of the unicode range.
return kBadChar; if (str[1] < 0x80 || str[1] > 0x8F) {
} *cursor += 1;
return kBadChar;
}
break;
default:
if (!IsContinuationCharacter(str[1])) {
*cursor += 1;
return kBadChar;
}
} }
if (!IsContinuationCharacter(str[2])) {
// All errors have been handled, so we only have to assemble the result. *cursor += 1;
switch (length) { return kBadChar;
case 1:
return str[0];
case 2:
return ((str[0] << 6) + str[1]) - 0x00003080;
case 3:
return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
case 4:
return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
0x03C82080;
} }
if (!IsContinuationCharacter(str[3])) {
UNREACHABLE(); *cursor += 1;
return kBadChar; return kBadChar;
}
*cursor += 4;
return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
0x03C82080;
} }
uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
@ -295,10 +323,9 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
// with one shift. // with one shift.
uint8_t mask = 0x7f >> kind; uint8_t mask = 0x7f >> kind;
// Store the kind in the top nibble, and kind - 1 (i.e., remaining bytes) // Store the kind - 1 (i.e., remaining bytes) in the top byte, value
// in 2nd nibble, and the value in the bottom three. The 2nd nibble is // in the bottom three.
// intended as a counter about how many bytes are still needed. *buffer = (kind - 1) << 24 | (next & mask);
*buffer = kind << 28 | (kind - 1) << 24 | (next & mask);
return kIncomplete; return kIncomplete;
} else { } else {
// No buffer, and not the start of a 1-byte char (handled at the // No buffer, and not the start of a 1-byte char (handled at the
@ -327,19 +354,15 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
// We're inside of a character, as described by buffer. // We're inside of a character, as described by buffer.
// How many bytes (excluding this one) do we still expect? // How many bytes (excluding this one) do we still expect?
uint8_t bytes_expected = *buffer >> 28; uint8_t count = (*buffer >> 24) - 1;
uint8_t bytes_left = (*buffer >> 24) & 0x0f;
bytes_left--;
// Update the value. // Update the value.
uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F); uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F);
if (bytes_left) { if (count) {
*buffer = (bytes_expected << 28 | bytes_left << 24 | value); *buffer = count << 24 | value;
return kIncomplete; return kIncomplete;
} else { } else {
*buffer = 0; *buffer = 0;
bool sequence_was_too_long = (bytes_expected == 2 && value < 0x80) || return value;
(bytes_expected == 3 && value < 0x800);
return sequence_was_too_long ? kBadChar : value;
} }
} else { } else {
// Within a character, but not a continuation character? Then the // Within a character, but not a continuation character? Then the

445
deps/v8/test/cctest/test-parsing.cc

@ -684,26 +684,74 @@ TEST(RegExpScanning) {
TestScanRegExp("/=?/", "=?"); TestScanRegExp("/=?/", "=?");
} }
static int Ucs2CharLength(unibrow::uchar c) {
if (c == unibrow::Utf8::kIncomplete || c == unibrow::Utf8::kBufferEmpty) {
return 0;
} else if (c < 0xffff) {
return 1;
} else {
return 2;
}
}
static int Utf8LengthHelper(const char* s) { static int Utf8LengthHelper(const char* s) {
unibrow::Utf8::Utf8IncrementalBuffer buffer(unibrow::Utf8::kBufferEmpty); int len = i::StrLength(s);
int length = 0; int character_length = len;
for (; *s != '\0'; s++) { for (int i = 0; i < len; i++) {
unibrow::uchar tmp = unibrow::Utf8::ValueOfIncremental(*s, &buffer); unsigned char c = s[i];
length += Ucs2CharLength(tmp); int input_offset = 0;
int output_adjust = 0;
if (c > 0x7f) {
if (c < 0xc0) continue;
if (c >= 0xf0) {
if (c >= 0xf8) {
// 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8
// byte.
continue; // Handle first UTF-8 byte.
}
if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) {
// This 4 byte sequence could have been coded as a 3 byte sequence.
// Record a single kBadChar for the first byte and continue.
continue;
}
input_offset = 3;
// 4 bytes of UTF-8 turn into 2 UTF-16 code units.
character_length -= 2;
} else if (c >= 0xe0) {
if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) {
// This 3 byte sequence could have been coded as a 2 byte sequence.
// Record a single kBadChar for the first byte and continue.
continue;
}
if (c == 0xed) {
unsigned char d = s[i + 1];
if ((d < 0x80) || (d > 0x9f)) {
// This 3 byte sequence is part of a surrogate pair which is not
// supported by UTF-8. Record a single kBadChar for the first byte
// and continue.
continue;
}
}
input_offset = 2;
// 3 bytes of UTF-8 turn into 1 UTF-16 code unit.
output_adjust = 2;
} else {
if ((c & 0x1e) == 0) {
// This 2 byte sequence could have been coded as a 1 byte sequence.
// Record a single kBadChar for the first byte and continue.
continue;
}
input_offset = 1;
// 2 bytes of UTF-8 turn into 1 UTF-16 code unit.
output_adjust = 1;
}
bool bad = false;
for (int j = 1; j <= input_offset; j++) {
if ((s[i + j] & 0xc0) != 0x80) {
// Bad UTF-8 sequence turns the first in the sequence into kBadChar,
// which is a single UTF-16 code unit.
bad = true;
break;
}
}
if (!bad) {
i += input_offset;
character_length -= output_adjust;
}
}
} }
unibrow::uchar tmp = unibrow::Utf8::ValueOfIncrementalFinish(&buffer); return character_length;
length += Ucs2CharLength(tmp);
return length;
} }
@ -933,206 +981,169 @@ TEST(ScopePositions) {
}; };
const SourceData source_data[] = { const SourceData source_data[] = {
{" with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY}, { " with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY },
{" with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY}, { " with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY },
{" with ({}) ", { " with ({}) ", "{\n"
"{\n" " block;\n"
" block;\n" " }", "\n"
" }", " more;", i::WITH_SCOPE, i::SLOPPY },
"\n" { " with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY },
" more;", { " with ({}) ", "statement", "\n"
i::WITH_SCOPE, i::SLOPPY}, " more;", i::WITH_SCOPE, i::SLOPPY },
{" with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY}, { " with ({})\n"
{" with ({}) ", "statement", " ", "statement;", "\n"
"\n" " more;", i::WITH_SCOPE, i::SLOPPY },
" more;", { " try {} catch ", "(e) { block; }", " more;",
i::WITH_SCOPE, i::SLOPPY}, i::CATCH_SCOPE, i::SLOPPY },
{" with ({})\n" { " try {} catch ", "(e) { block; }", "; more;",
" ", i::CATCH_SCOPE, i::SLOPPY },
"statement;", { " try {} catch ", "(e) {\n"
"\n" " block;\n"
" more;", " }", "\n"
i::WITH_SCOPE, i::SLOPPY}, " more;", i::CATCH_SCOPE, i::SLOPPY },
{" try {} catch ", "(e) { block; }", " more;", i::CATCH_SCOPE, { " try {} catch ", "(e) { block; }", " finally { block; } more;",
i::SLOPPY}, i::CATCH_SCOPE, i::SLOPPY },
{" try {} catch ", "(e) { block; }", "; more;", i::CATCH_SCOPE, { " start;\n"
i::SLOPPY}, " ", "{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT },
{" try {} catch ", { " start;\n"
"(e) {\n" " ", "{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT },
" block;\n" { " start;\n"
" }", " ", "{\n"
"\n" " let block;\n"
" more;", " }", "\n"
i::CATCH_SCOPE, i::SLOPPY}, " more;", i::BLOCK_SCOPE, i::STRICT },
{" try {} catch ", "(e) { block; }", " finally { block; } more;", { " start;\n"
i::CATCH_SCOPE, i::SLOPPY}, " function fun", "(a,b) { infunction; }", " more;",
{" start;\n" i::FUNCTION_SCOPE, i::SLOPPY },
" ", { " start;\n"
"{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT}, " function fun", "(a,b) {\n"
{" start;\n" " infunction;\n"
" ", " }", "\n"
"{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT}, " more;", i::FUNCTION_SCOPE, i::SLOPPY },
{" start;\n" { " start;\n", "(a,b) => a + b", "; more;",
" ", i::FUNCTION_SCOPE, i::SLOPPY },
"{\n" { " start;\n", "(a,b) => { return a+b; }", "\nmore;",
" let block;\n" i::FUNCTION_SCOPE, i::SLOPPY },
" }", { " start;\n"
"\n" " (function fun", "(a,b) { infunction; }", ")();",
" more;", i::FUNCTION_SCOPE, i::SLOPPY },
i::BLOCK_SCOPE, i::STRICT}, { " for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;",
{" start;\n" i::BLOCK_SCOPE, i::STRICT },
" function fun", { " for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;",
"(a,b) { infunction; }", " more;", i::FUNCTION_SCOPE, i::SLOPPY}, i::BLOCK_SCOPE, i::STRICT },
{" start;\n" { " for ", "(let x = 1 ; x < 10; ++ x) {\n"
" function fun", " block;\n"
"(a,b) {\n" " }", "\n"
" infunction;\n" " more;", i::BLOCK_SCOPE, i::STRICT },
" }", { " for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;",
"\n" i::BLOCK_SCOPE, i::STRICT },
" more;", { " for ", "(let x = 1 ; x < 10; ++ x) statement", "\n"
i::FUNCTION_SCOPE, i::SLOPPY}, " more;", i::BLOCK_SCOPE, i::STRICT },
{" start;\n", "(a,b) => a + b", "; more;", i::FUNCTION_SCOPE, i::SLOPPY}, { " for ", "(let x = 1 ; x < 10; ++ x)\n"
{" start;\n", "(a,b) => { return a+b; }", "\nmore;", i::FUNCTION_SCOPE, " statement;", "\n"
i::SLOPPY}, " more;", i::BLOCK_SCOPE, i::STRICT },
{" start;\n" { " for ", "(let x in {}) { block; }", " more;",
" (function fun", i::BLOCK_SCOPE, i::STRICT },
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " for ", "(let x in {}) { block; }", "; more;",
{" for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;", i::BLOCK_SCOPE, i::STRICT },
i::BLOCK_SCOPE, i::STRICT}, { " for ", "(let x in {}) {\n"
{" for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;", " block;\n"
i::BLOCK_SCOPE, i::STRICT}, " }", "\n"
{" for ", " more;", i::BLOCK_SCOPE, i::STRICT },
"(let x = 1 ; x < 10; ++ x) {\n" { " for ", "(let x in {}) statement;", " more;",
" block;\n" i::BLOCK_SCOPE, i::STRICT },
" }", { " for ", "(let x in {}) statement", "\n"
"\n" " more;", i::BLOCK_SCOPE, i::STRICT },
" more;", { " for ", "(let x in {})\n"
i::BLOCK_SCOPE, i::STRICT}, " statement;", "\n"
{" for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;", " more;", i::BLOCK_SCOPE, i::STRICT },
i::BLOCK_SCOPE, i::STRICT}, // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw
{" for ", "(let x = 1 ; x < 10; ++ x) statement", // the preparser off in terms of byte offsets.
"\n" // 6 byte encoding.
" more;", { " 'foo\355\240\201\355\260\211';\n"
i::BLOCK_SCOPE, i::STRICT}, " (function fun", "(a,b) { infunction; }", ")();",
{" for ", i::FUNCTION_SCOPE, i::SLOPPY },
"(let x = 1 ; x < 10; ++ x)\n" // 4 byte encoding.
" statement;", { " 'foo\360\220\220\212';\n"
"\n" " (function fun", "(a,b) { infunction; }", ")();",
" more;", i::FUNCTION_SCOPE, i::SLOPPY },
i::BLOCK_SCOPE, i::STRICT}, // 3 byte encoding of \u0fff.
{" for ", "(let x in {}) { block; }", " more;", i::BLOCK_SCOPE, { " 'foo\340\277\277';\n"
i::STRICT}, " (function fun", "(a,b) { infunction; }", ")();",
{" for ", "(let x in {}) { block; }", "; more;", i::BLOCK_SCOPE, i::FUNCTION_SCOPE, i::SLOPPY },
i::STRICT}, // Broken 6 byte encoding with missing last byte.
{" for ", { " 'foo\355\240\201\355\211';\n"
"(let x in {}) {\n" " (function fun", "(a,b) { infunction; }", ")();",
" block;\n" i::FUNCTION_SCOPE, i::SLOPPY },
" }", // Broken 3 byte encoding of \u0fff with missing last byte.
"\n" { " 'foo\340\277';\n"
" more;", " (function fun", "(a,b) { infunction; }", ")();",
i::BLOCK_SCOPE, i::STRICT}, i::FUNCTION_SCOPE, i::SLOPPY },
{" for ", "(let x in {}) statement;", " more;", i::BLOCK_SCOPE, // Broken 3 byte encoding of \u0fff with missing 2 last bytes.
i::STRICT}, { " 'foo\340';\n"
{" for ", "(let x in {}) statement", " (function fun", "(a,b) { infunction; }", ")();",
"\n" i::FUNCTION_SCOPE, i::SLOPPY },
" more;", // Broken 3 byte encoding of \u00ff should be a 2 byte encoding.
i::BLOCK_SCOPE, i::STRICT}, { " 'foo\340\203\277';\n"
{" for ", " (function fun", "(a,b) { infunction; }", ")();",
"(let x in {})\n" i::FUNCTION_SCOPE, i::SLOPPY },
" statement;", // Broken 3 byte encoding of \u007f should be a 2 byte encoding.
"\n" { " 'foo\340\201\277';\n"
" more;", " (function fun", "(a,b) { infunction; }", ")();",
i::BLOCK_SCOPE, i::STRICT}, i::FUNCTION_SCOPE, i::SLOPPY },
// Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw // Unpaired lead surrogate.
// the preparser off in terms of byte offsets. { " 'foo\355\240\201';\n"
// 2 surrogates, encode a character that doesn't need a surrogate. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\355\240\201\355\260\211';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Unpaired lead surrogate where following code point is a 3 byte sequence.
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\355\240\201\340\277\277';\n"
// 4 byte encoding. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\360\220\220\212';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Unpaired lead surrogate where following code point is a 4 byte encoding
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, // of a trail surrogate.
// 3 byte encoding of \u0fff. { " 'foo\355\240\201\360\215\260\211';\n"
{" 'foo\340\277\277';\n" " (function fun", "(a,b) { infunction; }", ")();",
" (function fun", i::FUNCTION_SCOPE, i::SLOPPY },
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, // Unpaired trail surrogate.
// 3 byte surrogate, followed by broken 2-byte surrogate w/ impossible 2nd { " 'foo\355\260\211';\n"
// byte and last byte missing. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\355\240\201\355\211';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // 2 byte encoding of \u00ff.
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\303\277';\n"
// Broken 3 byte encoding of \u0fff with missing last byte. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\340\277';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Broken 2 byte encoding of \u00ff with missing last byte.
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\303';\n"
// Broken 3 byte encoding of \u0fff with missing 2 last bytes. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\340';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Broken 2 byte encoding of \u007f should be a 1 byte encoding.
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\301\277';\n"
// Broken 3 byte encoding of \u00ff should be a 2 byte encoding. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\340\203\277';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Illegal 5 byte encoding.
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\370\277\277\277\277';\n"
// Broken 3 byte encoding of \u007f should be a 2 byte encoding. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\340\201\277';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Illegal 6 byte encoding.
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\374\277\277\277\277\277';\n"
// Unpaired lead surrogate. " (function fun", "(a,b) { infunction; }", ")();",
{" 'foo\355\240\201';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", // Illegal 0xfe byte
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo\376\277\277\277\277\277\277';\n"
// Unpaired lead surrogate where following code point is a 3 byte " (function fun", "(a,b) { infunction; }", ")();",
// sequence. i::FUNCTION_SCOPE, i::SLOPPY },
{" 'foo\355\240\201\340\277\277';\n" // Illegal 0xff byte
" (function fun", { " 'foo\377\277\277\277\277\277\277\277';\n"
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, " (function fun", "(a,b) { infunction; }", ")();",
// Unpaired lead surrogate where following code point is a 4 byte encoding i::FUNCTION_SCOPE, i::SLOPPY },
// of a trail surrogate. { " 'foo';\n"
{" 'foo\355\240\201\360\215\260\211';\n" " (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();",
" (function fun", i::FUNCTION_SCOPE, i::SLOPPY },
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, { " 'foo';\n"
// Unpaired trail surrogate. " (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();",
{" 'foo\355\260\211';\n" i::FUNCTION_SCOPE, i::SLOPPY },
" (function fun", { NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY }
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, };
// 2 byte encoding of \u00ff.
{" 'foo\303\277';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
// Broken 2 byte encoding of \u00ff with missing last byte.
{" 'foo\303';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
// Broken 2 byte encoding of \u007f should be a 1 byte encoding.
{" 'foo\301\277';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
// Illegal 5 byte encoding.
{" 'foo\370\277\277\277\277';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
// Illegal 6 byte encoding.
{" 'foo\374\277\277\277\277\277';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
// Illegal 0xfe byte
{" 'foo\376\277\277\277\277\277\277';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
// Illegal 0xff byte
{" 'foo\377\277\277\277\277\277\277\277';\n"
" (function fun",
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
{" 'foo';\n"
" (function fun",
"(a,b) { 'bar\355\240\201\355\260\213'; }", ")();", i::FUNCTION_SCOPE,
i::SLOPPY},
{" 'foo';\n"
" (function fun",
"(a,b) { 'bar\360\220\220\214'; }", ")();", i::FUNCTION_SCOPE,
i::SLOPPY},
{NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY}};
i::Isolate* isolate = CcTest::i_isolate(); i::Isolate* isolate = CcTest::i_isolate();
i::Factory* factory = isolate->factory(); i::Factory* factory = isolate->factory();

1
deps/v8/test/unittests/BUILD.gn

@ -117,7 +117,6 @@ v8_executable("unittests") {
"source-position-table-unittest.cc", "source-position-table-unittest.cc",
"test-utils.cc", "test-utils.cc",
"test-utils.h", "test-utils.h",
"unicode-unittest.cc",
"value-serializer-unittest.cc", "value-serializer-unittest.cc",
"wasm/asm-types-unittest.cc", "wasm/asm-types-unittest.cc",
"wasm/ast-decoder-unittest.cc", "wasm/ast-decoder-unittest.cc",

39
deps/v8/test/unittests/unicode-unittest.cc

@ -1,39 +0,0 @@
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <string>
#include "src/unicode-decoder.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace v8 {
namespace internal {
namespace {
using Utf8Decoder = unibrow::Utf8Decoder<512>;
void Decode(Utf8Decoder* decoder, const std::string& str) {
// Put the string in its own buffer on the heap to make sure that
// AddressSanitizer's heap-buffer-overflow logic can see what's going on.
std::unique_ptr<char[]> buffer(new char[str.length()]);
memcpy(buffer.get(), str.data(), str.length());
decoder->Reset(buffer.get(), str.length());
}
} // namespace
TEST(UnicodeTest, ReadOffEndOfUtf8String) {
Utf8Decoder decoder;
// Not enough continuation bytes before string ends.
Decode(&decoder, "\xE0");
Decode(&decoder, "\xED");
Decode(&decoder, "\xF0");
Decode(&decoder, "\xF4");
}
} // namespace internal
} // namespace v8

1
deps/v8/test/unittests/unittests.gyp

@ -115,7 +115,6 @@
'source-position-table-unittest.cc', 'source-position-table-unittest.cc',
'test-utils.h', 'test-utils.h',
'test-utils.cc', 'test-utils.cc',
'unicode-unittest.cc',
'value-serializer-unittest.cc', 'value-serializer-unittest.cc',
'wasm/asm-types-unittest.cc', 'wasm/asm-types-unittest.cc',
'wasm/ast-decoder-unittest.cc', 'wasm/ast-decoder-unittest.cc',

Loading…
Cancel
Save