node/test/parallel/test-string-decoder.js

'use strict';
require('../common');
var assert = require('assert');
var inspect = require('util').inspect;
var StringDecoder = require('string_decoder').StringDecoder;

// Test default encoding
var decoder = new StringDecoder();
assert.strictEqual(decoder.encoding, 'utf8');

process.stdout.write('scanning ');

// UTF-8
test('utf-8', Buffer.from('$', 'utf-8'), '$');
test('utf-8', Buffer.from('¢', 'utf-8'), '¢');
test('utf-8', Buffer.from('€', 'utf-8'), '€');
test('utf-8', Buffer.from('𤭢', 'utf-8'), '𤭢');
// A mixed ascii and non-ascii string
// Test stolen from deps/v8/test/cctest/test-strings.cc
// U+02E4 -> CB A4
// U+0064 -> 64
// U+12E4 -> E1 8B A4
// U+0030 -> 30
// U+3045 -> E3 81 85
test(
  'utf-8',
  Buffer.from([0xCB, 0xA4, 0x64, 0xE1, 0x8B, 0xA4, 0x30, 0xE3, 0x81, 0x85]),
  '\u02e4\u0064\u12e4\u0030\u3045'
);

// UCS-2
test('ucs2', Buffer.from('ababc', 'ucs2'), 'ababc');

// UTF-16LE
test('utf16le', Buffer.from('3DD84DDC', 'hex'), '\ud83d\udc4d'); // thumbs up

console.log(' crayon!');

// Additional UTF-8 tests
decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('E1', 'hex')), '');
assert.strictEqual(decoder.end(), '\ufffd');

decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('E18B', 'hex')), '');
assert.strictEqual(decoder.end(), '\ufffd\ufffd');

decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('\ufffd')), '\ufffd');
assert.strictEqual(decoder.end(), '');

decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('\ufffd\ufffd\ufffd')),
                   '\ufffd\ufffd\ufffd');
assert.strictEqual(decoder.end(), '');

decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('efbfbde2', 'hex')), '\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');


// Additional UTF-16LE surrogate pair tests
decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('4D', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('DC', 'hex')), '\ud83d\udc4d');
assert.strictEqual(decoder.end(), '');

decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');
assert.strictEqual(decoder.end(), '\ud83d');

decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('4D', 'hex')), '');
assert.strictEqual(decoder.end(), '\ud83d');

// test verifies that StringDecoder will correctly decode the given input
// buffer with the given encoding to the expected output. It will attempt all
// possible ways to write() the input buffer, see writeSequences(). The
// singleSequence allows for easy debugging of a specific sequence which is
// useful in case of test failures.
function test(encoding, input, expected, singleSequence) {
  var sequences;
  if (!singleSequence) {
    sequences = writeSequences(input.length);
  } else {
    sequences = [singleSequence];
  }
  sequences.forEach(function(sequence) {
    var decoder = new StringDecoder(encoding);
    var output = '';
    sequence.forEach(function(write) {
      output += decoder.write(input.slice(write[0], write[1]));
    });
    process.stdout.write('.');
    if (output !== expected) {
      var message =
        'Expected "' + unicodeEscape(expected) + '", ' +
        'but got "' + unicodeEscape(output) + '"\n' +
        'input: ' + input.toString('hex').match(/.{2}/g) + '\n' +
        'Write sequence: ' + JSON.stringify(sequence) + '\n' +
        'Full Decoder State: ' + inspect(decoder);
      assert.fail(output, expected, message);
    }
  });
}

// unicodeEscape prints the str contents as unicode escape codes.
function unicodeEscape(str) {
  var r = '';
  for (var i = 0; i < str.length; i++) {
    r += '\\u' + str.charCodeAt(i).toString(16);
  }
  return r;
}

// writeSequences returns an array of arrays that describes all possible ways a
// buffer of the given length could be split up and passed to sequential write
// calls.
//
// e.G. writeSequences(3) will return: [
//   [ [ 0, 3 ] ],
//   [ [ 0, 2 ], [ 2, 3 ] ],
//   [ [ 0, 1 ], [ 1, 3 ] ],
//   [ [ 0, 1 ], [ 1, 2 ], [ 2, 3 ] ]
// ]
function writeSequences(length, start, sequence) {
  if (start === undefined) {
    start = 0;
    sequence = [];
  } else if (start === length) {
    return [sequence];
  }
  var sequences = [];
  for (var end = length; end > start; end--) {
    var subSequence = sequence.concat([[start, end]]);
    var subSequences = writeSequences(length, end, subSequence, sequences);
    sequences = sequences.concat(subSequences);
  }
  return sequences;
}
test: enable linting for tests Enable linting for the test directory. A number of changes was made so all tests conform the current rules used by lib and src directories. The only exception for tests is that unreachable (dead) code is allowed. test-fs-non-number-arguments-throw had to be excluded from the changes because of a weird issue on Windows CI. PR-URL: https://github.com/nodejs/io.js/pull/1721 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 10 years ago			`'use strict';`
test: remove unnecessary assignments common.js needs to be loaded in all tests so that there is checking for variable leaks and possibly other things. However, it does not need to be assigned to a variable if nothing in common.js is referred to elsewhere in the test. PR-URL: https://github.com/nodejs/node/pull/4408 Reviewed-By: James M Snell <jasnell@gmail.com> 9 years ago			`require('../common');`
Fix global leaks 14 years ago			`var assert = require('assert');`
string_decoder: rewrite implementation This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 9 years ago			`var inspect = require('util').inspect;`
Fix global leaks 14 years ago			`var StringDecoder = require('string_decoder').StringDecoder;`
string_decoder: added support for UTF-16LE Fixes #3223. 13 years ago
string_decoder: rewrite implementation This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 9 years ago			`// Test default encoding`
			`var decoder = new StringDecoder();`
			`assert.strictEqual(decoder.encoding, 'utf8');`

string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`process.stdout.write('scanning ');`
string_decoder: added support for UTF-16LE Fixes #3223. 13 years ago
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`// UTF-8`
buffer: add .from(), .alloc() and .allocUnsafe() Several changes: * Soft-Deprecate Buffer() constructors * Add `Buffer.from()`, `Buffer.alloc()`, and `Buffer.allocUnsafe()` * Add `--zero-fill-buffers` command line option * Add byteOffset and length to `new Buffer(arrayBuffer)` constructor * buffer.fill('') previously had no effect, now zero-fills * Update the docs PR-URL: https://github.com/nodejs/node/pull/4682 Reviewed-By: Сковорода Никита Андреевич <chalkerx@gmail.com> Reviewed-By: Stephen Belanger <admin@stephenbelanger.com> 9 years ago			`test('utf-8', Buffer.from('$', 'utf-8'), '$');`
			`test('utf-8', Buffer.from('¢', 'utf-8'), '¢');`
			`test('utf-8', Buffer.from('€', 'utf-8'), '€');`
			`test('utf-8', Buffer.from('𤭢', 'utf-8'), '𤭢');`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago			`// A mixed ascii and non-ascii string`
			`// Test stolen from deps/v8/test/cctest/test-strings.cc`
			`// U+02E4 -> CB A4`
			`// U+0064 -> 64`
			`// U+12E4 -> E1 8B A4`
			`// U+0030 -> 30`
			`// U+3045 -> E3 81 85`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`test(`
			`'utf-8',`
buffer: add .from(), .alloc() and .allocUnsafe() Several changes: * Soft-Deprecate Buffer() constructors * Add `Buffer.from()`, `Buffer.alloc()`, and `Buffer.allocUnsafe()` * Add `--zero-fill-buffers` command line option * Add byteOffset and length to `new Buffer(arrayBuffer)` constructor * buffer.fill('') previously had no effect, now zero-fills * Update the docs PR-URL: https://github.com/nodejs/node/pull/4682 Reviewed-By: Сковорода Никита Андреевич <chalkerx@gmail.com> Reviewed-By: Stephen Belanger <admin@stephenbelanger.com> 9 years ago			`Buffer.from([0xCB, 0xA4, 0x64, 0xE1, 0x8B, 0xA4, 0x30, 0xE3, 0x81, 0x85]),`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`'\u02e4\u0064\u12e4\u0030\u3045'`
			`);`

			`// UCS-2`
buffer: add .from(), .alloc() and .allocUnsafe() Several changes: * Soft-Deprecate Buffer() constructors * Add `Buffer.from()`, `Buffer.alloc()`, and `Buffer.allocUnsafe()` * Add `--zero-fill-buffers` command line option * Add byteOffset and length to `new Buffer(arrayBuffer)` constructor * buffer.fill('') previously had no effect, now zero-fills * Update the docs PR-URL: https://github.com/nodejs/node/pull/4682 Reviewed-By: Сковорода Никита Андреевич <chalkerx@gmail.com> Reviewed-By: Stephen Belanger <admin@stephenbelanger.com> 9 years ago			`test('ucs2', Buffer.from('ababc', 'ucs2'), 'ababc');`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago
			`// UTF-16LE`
string_decoder: rewrite implementation This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 9 years ago			`test('utf16le', Buffer.from('3DD84DDC', 'hex'), '\ud83d\udc4d'); // thumbs up`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`console.log(' crayon!');`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago
string_decoder: rewrite implementation This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 9 years ago			`// Additional UTF-8 tests`
			`decoder = new StringDecoder('utf8');`
			`assert.strictEqual(decoder.write(Buffer.from('E1', 'hex')), '');`
			`assert.strictEqual(decoder.end(), '\ufffd');`

			`decoder = new StringDecoder('utf8');`
			`assert.strictEqual(decoder.write(Buffer.from('E18B', 'hex')), '');`
			`assert.strictEqual(decoder.end(), '\ufffd\ufffd');`

			`decoder = new StringDecoder('utf8');`
			`assert.strictEqual(decoder.write(Buffer.from('\ufffd')), '\ufffd');`
			`assert.strictEqual(decoder.end(), '');`

			`decoder = new StringDecoder('utf8');`
			`assert.strictEqual(decoder.write(Buffer.from('\ufffd\ufffd\ufffd')),`
			`'\ufffd\ufffd\ufffd');`
			`assert.strictEqual(decoder.end(), '');`

			`decoder = new StringDecoder('utf8');`
			`assert.strictEqual(decoder.write(Buffer.from('efbfbde2', 'hex')), '\ufffd');`
			`assert.strictEqual(decoder.end(), '\ufffd');`


			`// Additional UTF-16LE surrogate pair tests`
			`decoder = new StringDecoder('utf16le');`
			`assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');`
			`assert.strictEqual(decoder.write(Buffer.from('4D', 'hex')), '');`
			`assert.strictEqual(decoder.write(Buffer.from('DC', 'hex')), '\ud83d\udc4d');`
			`assert.strictEqual(decoder.end(), '');`

			`decoder = new StringDecoder('utf16le');`
			`assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');`
			`assert.strictEqual(decoder.end(), '\ud83d');`

			`decoder = new StringDecoder('utf16le');`
			`assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');`
			`assert.strictEqual(decoder.write(Buffer.from('4D', 'hex')), '');`
			`assert.strictEqual(decoder.end(), '\ud83d');`

string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`// test verifies that StringDecoder will correctly decode the given input`
			`// buffer with the given encoding to the expected output. It will attempt all`
			`// possible ways to write() the input buffer, see writeSequences(). The`
			`// singleSequence allows for easy debugging of a specific sequence which is`
			`// useful in case of test failures.`
			`function test(encoding, input, expected, singleSequence) {`
			`var sequences;`
			`if (!singleSequence) {`
			`sequences = writeSequences(input.length);`
			`} else {`
			`sequences = [singleSequence];`
			`}`
			`sequences.forEach(function(sequence) {`
			`var decoder = new StringDecoder(encoding);`
			`var output = '';`
			`sequence.forEach(function(write) {`
			`output += decoder.write(input.slice(write[0], write[1]));`
			`});`
			`process.stdout.write('.');`
			`if (output !== expected) {`
			`var message =`
test: enable linting for tests Enable linting for the test directory. A number of changes was made so all tests conform the current rules used by lib and src directories. The only exception for tests is that unreachable (dead) code is allowed. test-fs-non-number-arguments-throw had to be excluded from the changes because of a weird issue on Windows CI. PR-URL: https://github.com/nodejs/io.js/pull/1721 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 10 years ago			`'Expected "' + unicodeEscape(expected) + '", ' +`
			`'but got "' + unicodeEscape(output) + '"\n' +`
string_decoder: rewrite implementation This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 9 years ago			`'input: ' + input.toString('hex').match(/.{2}/g) + '\n' +`
test: enable linting for tests Enable linting for the test directory. A number of changes was made so all tests conform the current rules used by lib and src directories. The only exception for tests is that unreachable (dead) code is allowed. test-fs-non-number-arguments-throw had to be excluded from the changes because of a weird issue on Windows CI. PR-URL: https://github.com/nodejs/io.js/pull/1721 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 10 years ago			`'Write sequence: ' + JSON.stringify(sequence) + '\n' +`
string_decoder: rewrite implementation This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 9 years ago			`'Full Decoder State: ' + inspect(decoder);`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`assert.fail(output, expected, message);`
			`}`
			`});`
			`}`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`// unicodeEscape prints the str contents as unicode escape codes.`
			`function unicodeEscape(str) {`
			`var r = '';`
			`for (var i = 0; i < str.length; i++) {`
test: enable linting for tests Enable linting for the test directory. A number of changes was made so all tests conform the current rules used by lib and src directories. The only exception for tests is that unreachable (dead) code is allowed. test-fs-non-number-arguments-throw had to be excluded from the changes because of a weird issue on Windows CI. PR-URL: https://github.com/nodejs/io.js/pull/1721 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 10 years ago			`r += '\\u' + str.charCodeAt(i).toString(16);`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`}`
			`return r;`
			`}`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`// writeSequences returns an array of arrays that describes all possible ways a`
			`// buffer of the given length could be split up and passed to sequential write`
			`// calls.`
			`//`
			`// e.G. writeSequences(3) will return: [`
			`// [ [ 0, 3 ] ],`
			`// [ [ 0, 2 ], [ 2, 3 ] ],`
			`// [ [ 0, 1 ], [ 1, 3 ] ],`
			`// [ [ 0, 1 ], [ 1, 2 ], [ 2, 3 ] ]`
			`// ]`
			`function writeSequences(length, start, sequence) {`
			`if (start === undefined) {`
			`start = 0;`
test: enable linting for tests Enable linting for the test directory. A number of changes was made so all tests conform the current rules used by lib and src directories. The only exception for tests is that unreachable (dead) code is allowed. test-fs-non-number-arguments-throw had to be excluded from the changes because of a weird issue on Windows CI. PR-URL: https://github.com/nodejs/io.js/pull/1721 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> 10 years ago			`sequence = [];`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`} else if (start === length) {`
			`return [sequence];`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago			`}`
string_decoder: Improve test coverage The test cases are still essentially the same, but now all possible ways of writing a buffer into the decoder are tested, which has exposed a few failing scenarios that had not been discovered so far! 11 years ago			`var sequences = [];`
			`for (var end = length; end > start; end--) {`
			`var subSequence = sequence.concat([[start, end]]);`
			`var subSequences = writeSequences(length, end, subSequence, sequences);`
			`sequences = sequences.concat(subSequences);`
			`}`
			`return sequences;`
Implemented Utf8Decoder module Allows to safely decode a utf8 stream into strings without breaking on multibyte characters. 15 years ago			`}`