mirror of https://github.com/lukechilds/node.git
Browse Source
Provide an (initially experimental) implementation of the WHATWG Encoding Standard API (`TextDecoder` and `TextEncoder`). The is the same API implemented on the browser side. By default, with small-icu, only the UTF-8, UTF-16le and UTF-16be decoders are supported. With full-icu enabled, every encoding other than iso-8859-16 is supported. This provides a basic test, but does not include the full web platform tests. Note: many of the web platform tests for this would fail by default because we ship with small-icu by default. A process warning will be emitted on first use to indicate that the API is still experimental. No runtime flag is required to use the feature. Refs: https://encoding.spec.whatwg.org/ PR-URL: https://github.com/nodejs/node/pull/13644 Reviewed-By: Timothy Gu <timothygu99@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>v6
James M Snell
8 years ago
12 changed files with 1189 additions and 16 deletions
@ -0,0 +1,458 @@ |
|||
'use strict'; |
|||
|
|||
// An implementation of the WHATWG Encoding Standard
|
|||
// https://encoding.spec.whatwg.org
|
|||
|
|||
const errors = require('internal/errors'); |
|||
const kHandle = Symbol('handle'); |
|||
const kFlags = Symbol('flags'); |
|||
const kEncoding = Symbol('encoding'); |
|||
const kDecoder = Symbol('decoder'); |
|||
const kEncoder = Symbol('encoder'); |
|||
|
|||
let warned = false; |
|||
const experimental = |
|||
'The WHATWG Encoding Standard implementation is an experimental API. It ' + |
|||
'should not yet be used in production applications.'; |
|||
|
|||
const { |
|||
getConstructorOf, |
|||
customInspectSymbol: inspect |
|||
} = require('internal/util'); |
|||
|
|||
const { |
|||
isArrayBuffer |
|||
} = process.binding('util'); |
|||
|
|||
const { |
|||
encodeUtf8String |
|||
} = process.binding('buffer'); |
|||
|
|||
const { |
|||
decode: _decode, |
|||
getConverter, |
|||
hasConverter |
|||
} = process.binding('icu'); |
|||
|
|||
const CONVERTER_FLAGS_FLUSH = 0x1; |
|||
const CONVERTER_FLAGS_FATAL = 0x2; |
|||
const CONVERTER_FLAGS_IGNORE_BOM = 0x4; |
|||
|
|||
const empty = new Uint8Array(0); |
|||
|
|||
const encodings = new Map([ |
|||
['unicode-1-1-utf-8', 'utf-8'], |
|||
['utf8', 'utf-8'], |
|||
['utf-8', 'utf-8'], |
|||
['866', 'ibm866'], |
|||
['cp866', 'ibm866'], |
|||
['csibm866', 'ibm866'], |
|||
['ibm866', 'ibm866'], |
|||
['csisolatin2', 'iso-8859-2'], |
|||
['iso-8859-2', 'iso-8859-2'], |
|||
['iso-ir-101', 'iso-8859-2'], |
|||
['iso8859-2', 'iso-8859-2'], |
|||
['iso88592', 'iso-8859-2'], |
|||
['iso_8859-2', 'iso-8859-2'], |
|||
['iso_8859-2:1987', 'iso-8859-2'], |
|||
['l2', 'iso-8859-2'], |
|||
['latin2', 'iso-8859-2'], |
|||
['csisolatin3', 'iso-8859-3'], |
|||
['iso-8859-3', 'iso-8859-3'], |
|||
['iso-ir-109', 'iso-8859-3'], |
|||
['iso8859-3', 'iso-8859-3'], |
|||
['iso88593', 'iso-8859-3'], |
|||
['iso_8859-3', 'iso-8859-3'], |
|||
['iso_8859-3:1988', 'iso-8859-3'], |
|||
['l3', 'iso-8859-3'], |
|||
['latin3', 'iso-8859-3'], |
|||
['csisolatin4', 'iso-8859-4'], |
|||
['iso-8859-4', 'iso-8859-4'], |
|||
['iso-ir-110', 'iso-8859-4'], |
|||
['iso8859-4', 'iso-8859-4'], |
|||
['iso88594', 'iso-8859-4'], |
|||
['iso_8859-4', 'iso-8859-4'], |
|||
['iso_8859-4:1988', 'iso-8859-4'], |
|||
['l4', 'iso-8859-4'], |
|||
['latin4', 'iso-8859-4'], |
|||
['csisolatincyrillic', 'iso-8859-5'], |
|||
['cyrillic', 'iso-8859-5'], |
|||
['iso-8859-5', 'iso-8859-5'], |
|||
['iso-ir-144', 'iso-8859-5'], |
|||
['iso8859-5', 'iso-8859-5'], |
|||
['iso88595', 'iso-8859-5'], |
|||
['iso_8859-5', 'iso-8859-5'], |
|||
['iso_8859-5:1988', 'iso-8859-5'], |
|||
['arabic', 'iso-8859-6'], |
|||
['asmo-708', 'iso-8859-6'], |
|||
['csiso88596e', 'iso-8859-6'], |
|||
['csiso88596i', 'iso-8859-6'], |
|||
['csisolatinarabic', 'iso-8859-6'], |
|||
['ecma-114', 'iso-8859-6'], |
|||
['iso-8859-6', 'iso-8859-6'], |
|||
['iso-8859-6-e', 'iso-8859-6'], |
|||
['iso-8859-6-i', 'iso-8859-6'], |
|||
['iso-ir-127', 'iso-8859-6'], |
|||
['iso8859-6', 'iso-8859-6'], |
|||
['iso88596', 'iso-8859-6'], |
|||
['iso_8859-6', 'iso-8859-6'], |
|||
['iso_8859-6:1987', 'iso-8859-6'], |
|||
['csisolatingreek', 'iso-8859-7'], |
|||
['ecma-118', 'iso-8859-7'], |
|||
['elot_928', 'iso-8859-7'], |
|||
['greek', 'iso-8859-7'], |
|||
['greek8', 'iso-8859-7'], |
|||
['iso-8859-7', 'iso-8859-7'], |
|||
['iso-ir-126', 'iso-8859-7'], |
|||
['iso8859-7', 'iso-8859-7'], |
|||
['iso88597', 'iso-8859-7'], |
|||
['iso_8859-7', 'iso-8859-7'], |
|||
['iso_8859-7:1987', 'iso-8859-7'], |
|||
['sun_eu_greek', 'iso-8859-7'], |
|||
['csiso88598e', 'iso-8859-8'], |
|||
['csisolatinhebrew', 'iso-8859-8'], |
|||
['hebrew', 'iso-8859-8'], |
|||
['iso-8859-8', 'iso-8859-8'], |
|||
['iso-8859-8-e', 'iso-8859-8'], |
|||
['iso-ir-138', 'iso-8859-8'], |
|||
['iso8859-8', 'iso-8859-8'], |
|||
['iso88598', 'iso-8859-8'], |
|||
['iso_8859-8', 'iso-8859-8'], |
|||
['iso_8859-8:1988', 'iso-8859-8'], |
|||
['visual', 'iso-8859-8'], |
|||
['csiso88598i', 'iso-8859-8-i'], |
|||
['iso-8859-8-i', 'iso-8859-8-i'], |
|||
['logical', 'iso-8859-8-i'], |
|||
['csisolatin6', 'iso-8859-10'], |
|||
['iso-8859-10', 'iso-8859-10'], |
|||
['iso-ir-157', 'iso-8859-10'], |
|||
['iso8859-10', 'iso-8859-10'], |
|||
['iso885910', 'iso-8859-10'], |
|||
['l6', 'iso-8859-10'], |
|||
['latin6', 'iso-8859-10'], |
|||
['iso-8859-13', 'iso-8859-13'], |
|||
['iso8859-13', 'iso-8859-13'], |
|||
['iso885913', 'iso-8859-13'], |
|||
['iso-8859-14', 'iso-8859-14'], |
|||
['iso8859-14', 'iso-8859-14'], |
|||
['iso885914', 'iso-8859-14'], |
|||
['csisolatin9', 'iso-8859-15'], |
|||
['iso-8859-15', 'iso-8859-15'], |
|||
['iso8859-15', 'iso-8859-15'], |
|||
['iso885915', 'iso-8859-15'], |
|||
['iso_8859-15', 'iso-8859-15'], |
|||
['l9', 'iso-8859-15'], |
|||
['cskoi8r', 'koi8-r'], |
|||
['koi', 'koi8-r'], |
|||
['koi8', 'koi8-r'], |
|||
['koi8-r', 'koi8-r'], |
|||
['koi8_r', 'koi8-r'], |
|||
['koi8-ru', 'koi8-u'], |
|||
['koi8-u', 'koi8-u'], |
|||
['csmacintosh', 'macintosh'], |
|||
['mac', 'macintosh'], |
|||
['macintosh', 'macintosh'], |
|||
['x-mac-roman', 'macintosh'], |
|||
['dos-874', 'windows-874'], |
|||
['iso-8859-11', 'windows-874'], |
|||
['iso8859-11', 'windows-874'], |
|||
['iso885911', 'windows-874'], |
|||
['tis-620', 'windows-874'], |
|||
['windows-874', 'windows-874'], |
|||
['cp1250', 'windows-1250'], |
|||
['windows-1250', 'windows-1250'], |
|||
['x-cp1250', 'windows-1250'], |
|||
['cp1251', 'windows-1251'], |
|||
['windows-1251', 'windows-1251'], |
|||
['x-cp1251', 'windows-1251'], |
|||
['ansi_x3.4-1968', 'windows-1252'], |
|||
['ascii', 'windows-1252'], |
|||
['cp1252', 'windows-1252'], |
|||
['cp819', 'windows-1252'], |
|||
['csisolatin1', 'windows-1252'], |
|||
['ibm819', 'windows-1252'], |
|||
['iso-8859-1', 'windows-1252'], |
|||
['iso-ir-100', 'windows-1252'], |
|||
['iso8859-1', 'windows-1252'], |
|||
['iso88591', 'windows-1252'], |
|||
['iso_8859-1', 'windows-1252'], |
|||
['iso_8859-1:1987', 'windows-1252'], |
|||
['l1', 'windows-1252'], |
|||
['latin1', 'windows-1252'], |
|||
['us-ascii', 'windows-1252'], |
|||
['windows-1252', 'windows-1252'], |
|||
['x-cp1252', 'windows-1252'], |
|||
['cp1253', 'windows-1253'], |
|||
['windows-1253', 'windows-1253'], |
|||
['x-cp1253', 'windows-1253'], |
|||
['cp1254', 'windows-1254'], |
|||
['csisolatin5', 'windows-1254'], |
|||
['iso-8859-9', 'windows-1254'], |
|||
['iso-ir-148', 'windows-1254'], |
|||
['iso8859-9', 'windows-1254'], |
|||
['iso88599', 'windows-1254'], |
|||
['iso_8859-9', 'windows-1254'], |
|||
['iso_8859-9:1989', 'windows-1254'], |
|||
['l5', 'windows-1254'], |
|||
['latin5', 'windows-1254'], |
|||
['windows-1254', 'windows-1254'], |
|||
['x-cp1254', 'windows-1254'], |
|||
['cp1255', 'windows-1255'], |
|||
['windows-1255', 'windows-1255'], |
|||
['x-cp1255', 'windows-1255'], |
|||
['cp1256', 'windows-1256'], |
|||
['windows-1256', 'windows-1256'], |
|||
['x-cp1256', 'windows-1256'], |
|||
['cp1257', 'windows-1257'], |
|||
['windows-1257', 'windows-1257'], |
|||
['x-cp1257', 'windows-1257'], |
|||
['cp1258', 'windows-1258'], |
|||
['windows-1258', 'windows-1258'], |
|||
['x-cp1258', 'windows-1258'], |
|||
['x-mac-cyrillic', 'x-mac-cyrillic'], |
|||
['x-mac-ukrainian', 'x-mac-cyrillic'], |
|||
['chinese', 'gbk'], |
|||
['csgb2312', 'gbk'], |
|||
['csiso58gb231280', 'gbk'], |
|||
['gb2312', 'gbk'], |
|||
['gb_2312', 'gbk'], |
|||
['gb_2312-80', 'gbk'], |
|||
['gbk', 'gbk'], |
|||
['iso-ir-58', 'gbk'], |
|||
['x-gbk', 'gbk'], |
|||
['gb18030', 'gb18030'], |
|||
['big5', 'big5'], |
|||
['big5-hkscs', 'big5'], |
|||
['cn-big5', 'big5'], |
|||
['csbig5', 'big5'], |
|||
['x-x-big5', 'big5'], |
|||
['cseucpkdfmtjapanese', 'euc-jp'], |
|||
['euc-jp', 'euc-jp'], |
|||
['x-euc-jp', 'euc-jp'], |
|||
['csiso2022jp', 'iso-2022-jp'], |
|||
['iso-2022-jp', 'iso-2022-jp'], |
|||
['csshiftjis', 'shift_jis'], |
|||
['ms932', 'shift_jis'], |
|||
['ms_kanji', 'shift_jis'], |
|||
['shift-jis', 'shift_jis'], |
|||
['shift_jis', 'shift_jis'], |
|||
['sjis', 'shift_jis'], |
|||
['windows-31j', 'shift_jis'], |
|||
['x-sjis', 'shift_jis'], |
|||
['cseuckr', 'euc-kr'], |
|||
['csksc56011987', 'euc-kr'], |
|||
['euc-kr', 'euc-kr'], |
|||
['iso-ir-149', 'euc-kr'], |
|||
['korean', 'euc-kr'], |
|||
['ks_c_5601-1987', 'euc-kr'], |
|||
['ks_c_5601-1989', 'euc-kr'], |
|||
['ksc5601', 'euc-kr'], |
|||
['ksc_5601', 'euc-kr'], |
|||
['windows-949', 'euc-kr'], |
|||
['utf-16be', 'utf-16be'], |
|||
['utf-16le', 'utf-16le'], |
|||
['utf-16', 'utf-16le'] |
|||
]); |
|||
|
|||
// Unfortunately, String.prototype.trim also removes non-ascii whitespace,
|
|||
// so we have to do this manually
|
|||
function trimAsciiWhitespace(label) { |
|||
var s = 0; |
|||
var e = label.length; |
|||
while (s < e && ( |
|||
label[s] === '\u0009' || |
|||
label[s] === '\u000a' || |
|||
label[s] === '\u000c' || |
|||
label[s] === '\u000d' || |
|||
label[s] === '\u0020')) { |
|||
s++; |
|||
} |
|||
while (e > s && ( |
|||
label[e - 1] === '\u0009' || |
|||
label[e - 1] === '\u000a' || |
|||
label[e - 1] === '\u000c' || |
|||
label[e - 1] === '\u000d' || |
|||
label[e - 1] === '\u0020')) { |
|||
e--; |
|||
} |
|||
return label.slice(s, e); |
|||
} |
|||
|
|||
function getEncodingFromLabel(label) { |
|||
const enc = encodings.get(label); |
|||
if (enc !== undefined) return enc; |
|||
return encodings.get(trimAsciiWhitespace(label.toLowerCase())); |
|||
} |
|||
|
|||
function hasTextDecoder(encoding = 'utf-8') { |
|||
if (typeof encoding !== 'string') |
|||
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'encoding', 'string'); |
|||
return hasConverter(getEncodingFromLabel(encoding)); |
|||
} |
|||
|
|||
var Buffer; |
|||
function lazyBuffer() { |
|||
if (Buffer === undefined) |
|||
Buffer = require('buffer').Buffer; |
|||
return Buffer; |
|||
} |
|||
|
|||
class TextDecoder { |
|||
constructor(encoding = 'utf-8', options = {}) { |
|||
if (!warned) { |
|||
warned = true; |
|||
process.emitWarning(experimental, 'ExperimentalWarning'); |
|||
} |
|||
|
|||
encoding = `${encoding}`; |
|||
if (typeof options !== 'object') |
|||
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); |
|||
|
|||
const enc = getEncodingFromLabel(encoding); |
|||
if (enc === undefined) |
|||
throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding); |
|||
|
|||
var flags = 0; |
|||
if (options !== null) { |
|||
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0; |
|||
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; |
|||
} |
|||
|
|||
const handle = getConverter(enc, flags); |
|||
if (handle === undefined) |
|||
throw new errors.Error('ERR_ENCODING_NOT_SUPPORTED', encoding); |
|||
|
|||
this[kHandle] = handle; |
|||
this[kFlags] = flags; |
|||
this[kEncoding] = enc; |
|||
} |
|||
|
|||
get encoding() { |
|||
if (this == null || this[kDecoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
|||
return this[kEncoding]; |
|||
} |
|||
|
|||
get fatal() { |
|||
if (this == null || this[kDecoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
|||
return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL; |
|||
} |
|||
|
|||
get ignoreBOM() { |
|||
if (this == null || this[kDecoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
|||
return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) === |
|||
CONVERTER_FLAGS_IGNORE_BOM; |
|||
} |
|||
|
|||
decode(input = empty, options = {}) { |
|||
if (this == null || this[kDecoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
|||
if (isArrayBuffer(input)) { |
|||
input = lazyBuffer().from(input); |
|||
} else if (!ArrayBuffer.isView(input)) { |
|||
throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input', |
|||
['ArrayBuffer', 'ArrayBufferView']); |
|||
} |
|||
if (typeof options !== 'object') { |
|||
throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object'); |
|||
} |
|||
|
|||
var flags = 0; |
|||
if (options !== null) |
|||
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; |
|||
|
|||
const ret = _decode(this[kHandle], input, flags); |
|||
if (typeof ret === 'number') { |
|||
const err = new errors.TypeError('ERR_ENCODING_INVALID_ENCODED_DATA', |
|||
this.encoding); |
|||
err.errno = ret; |
|||
throw err; |
|||
} |
|||
return ret.toString('ucs2'); |
|||
} |
|||
|
|||
[inspect](depth, opts) { |
|||
if (this == null || this[kDecoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
|||
if (typeof depth === 'number' && depth < 0) |
|||
return opts.stylize('[Object]', 'special'); |
|||
var ctor = getConstructorOf(this); |
|||
var obj = Object.create({ |
|||
constructor: ctor === null ? TextDecoder : ctor |
|||
}); |
|||
obj.encoding = this.encoding; |
|||
obj.fatal = this.fatal; |
|||
obj.ignoreBOM = this.ignoreBOM; |
|||
if (opts.showHidden) { |
|||
obj[kFlags] = this[kFlags]; |
|||
obj[kHandle] = this[kHandle]; |
|||
} |
|||
// Lazy to avoid circular dependency
|
|||
return require('util').inspect(obj, opts); |
|||
} |
|||
} |
|||
|
|||
class TextEncoder { |
|||
constructor() { |
|||
if (!warned) { |
|||
warned = true; |
|||
process.emitWarning(experimental, 'ExperimentalWarning'); |
|||
} |
|||
} |
|||
|
|||
get encoding() { |
|||
if (this == null || this[kEncoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder'); |
|||
return 'utf-8'; |
|||
} |
|||
|
|||
encode(input = '') { |
|||
if (this == null || this[kEncoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder'); |
|||
return encodeUtf8String(`${input}`); |
|||
} |
|||
|
|||
[inspect](depth, opts) { |
|||
if (this == null || this[kEncoder] !== true) |
|||
throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder'); |
|||
if (typeof depth === 'number' && depth < 0) |
|||
return opts.stylize('[Object]', 'special'); |
|||
var ctor = getConstructorOf(this); |
|||
var obj = Object.create({ |
|||
constructor: ctor === null ? TextEncoder : ctor |
|||
}); |
|||
obj.encoding = this.encoding; |
|||
// Lazy to avoid circular dependency
|
|||
return require('util').inspect(obj, opts); |
|||
} |
|||
} |
|||
|
|||
Object.defineProperties( |
|||
TextDecoder.prototype, { |
|||
[kDecoder]: { enumerable: false, value: true, configurable: false }, |
|||
'decode': { enumerable: true }, |
|||
'encoding': { enumerable: true }, |
|||
'fatal': { enumerable: true }, |
|||
'ignoreBOM': { enumerable: true }, |
|||
[Symbol.toStringTag]: { |
|||
configurable: true, |
|||
value: 'TextDecoder' |
|||
} }); |
|||
Object.defineProperties( |
|||
TextEncoder.prototype, { |
|||
[kEncoder]: { enumerable: false, value: true, configurable: false }, |
|||
'encode': { enumerable: true }, |
|||
'encoding': { enumerable: true }, |
|||
[Symbol.toStringTag]: { |
|||
configurable: true, |
|||
value: 'TextEncoder' |
|||
} }); |
|||
|
|||
module.exports = { |
|||
getEncodingFromLabel, |
|||
hasTextDecoder, |
|||
TextDecoder, |
|||
TextEncoder |
|||
}; |
@ -0,0 +1,385 @@ |
|||
// Flags: --expose-internals
|
|||
'use strict'; |
|||
|
|||
const common = require('../common'); |
|||
const assert = require('assert'); |
|||
const { TextEncoder, TextDecoder } = require('util'); |
|||
const { customInspectSymbol: inspect } = require('internal/util'); |
|||
const { getEncodingFromLabel } = require('internal/encoding'); |
|||
|
|||
const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, |
|||
0x73, 0x74, 0xe2, 0x82, 0xac]); |
|||
|
|||
if (!common.hasIntl) { |
|||
common.skip('WHATWG Encoding tests because ICU is not present.'); |
|||
} |
|||
|
|||
// Make Sure TextDecoder and TextEncoder exist
|
|||
assert(TextDecoder); |
|||
assert(TextEncoder); |
|||
|
|||
// Test TextEncoder
|
|||
const enc = new TextEncoder(); |
|||
assert(enc); |
|||
const buf = enc.encode('\ufefftest€'); |
|||
|
|||
assert.strictEqual(Buffer.compare(buf, encoded), 0); |
|||
|
|||
|
|||
// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false
|
|||
{ |
|||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
|||
const dec = new TextDecoder(i); |
|||
const res = dec.decode(buf); |
|||
assert.strictEqual(res, 'test€'); |
|||
}); |
|||
|
|||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
|||
const dec = new TextDecoder(i); |
|||
let res = ''; |
|||
res += dec.decode(buf.slice(0, 8), { stream: true }); |
|||
res += dec.decode(buf.slice(8)); |
|||
assert.strictEqual(res, 'test€'); |
|||
}); |
|||
} |
|||
|
|||
// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true
|
|||
{ |
|||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
|||
const dec = new TextDecoder(i, { ignoreBOM: true }); |
|||
const res = dec.decode(buf); |
|||
assert.strictEqual(res, '\ufefftest€'); |
|||
}); |
|||
|
|||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
|||
const dec = new TextDecoder(i, { ignoreBOM: true }); |
|||
let res = ''; |
|||
res += dec.decode(buf.slice(0, 8), { stream: true }); |
|||
res += dec.decode(buf.slice(8)); |
|||
assert.strictEqual(res, '\ufefftest€'); |
|||
}); |
|||
} |
|||
|
|||
// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false
|
|||
{ |
|||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
|||
const dec = new TextDecoder(i, { fatal: true }); |
|||
assert.throws(() => dec.decode(buf.slice(0, 8)), |
|||
common.expectsError({ |
|||
code: 'ERR_ENCODING_INVALID_ENCODED_DATA', |
|||
type: TypeError, |
|||
message: |
|||
/^The encoded data was not valid for encoding utf-8$/ |
|||
})); |
|||
}); |
|||
|
|||
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
|||
const dec = new TextDecoder(i, { fatal: true }); |
|||
assert.doesNotThrow(() => dec.decode(buf.slice(0, 8), { stream: true })); |
|||
assert.doesNotThrow(() => dec.decode(buf.slice(8))); |
|||
}); |
|||
} |
|||
|
|||
// Test TextDecoder, UTF-16le
|
|||
{ |
|||
const dec = new TextDecoder('utf-16le'); |
|||
const res = dec.decode(Buffer.from('test€', 'utf-16le')); |
|||
assert.strictEqual(res, 'test€'); |
|||
} |
|||
|
|||
// Test TextDecoder, UTF-16be
|
|||
{ |
|||
const dec = new TextDecoder('utf-16be'); |
|||
const res = dec.decode(Buffer.from([0x00, 0x74, 0x00, 0x65, 0x00, |
|||
0x73, 0x00, 0x74, 0x20, 0xac])); |
|||
assert.strictEqual(res, 'test€'); |
|||
} |
|||
|
|||
{ |
|||
const fn = TextDecoder.prototype[inspect]; |
|||
fn.call(new TextDecoder(), Infinity, {}); |
|||
|
|||
[{}, [], true, 1, '', new TextEncoder()].forEach((i) => { |
|||
assert.throws(() => fn.call(i, Infinity, {}), |
|||
common.expectsError({ |
|||
code: 'ERR_INVALID_THIS', |
|||
message: 'Value of "this" must be of type TextDecoder' |
|||
})); |
|||
}); |
|||
} |
|||
|
|||
{ |
|||
const fn = TextEncoder.prototype[inspect]; |
|||
fn.call(new TextEncoder(), Infinity, {}); |
|||
|
|||
[{}, [], true, 1, '', new TextDecoder()].forEach((i) => { |
|||
assert.throws(() => fn.call(i, Infinity, {}), |
|||
common.expectsError({ |
|||
code: 'ERR_INVALID_THIS', |
|||
message: 'Value of "this" must be of type TextEncoder' |
|||
})); |
|||
}); |
|||
} |
|||
|
|||
// Test Encoding Mappings
|
|||
{ |
|||
|
|||
const mappings = { |
|||
'utf-8': [ |
|||
'unicode-1-1-utf-8', |
|||
'utf8' |
|||
], |
|||
'utf-16be': [], |
|||
'utf-16le': [ |
|||
'utf-16' |
|||
], |
|||
'ibm866': [ |
|||
'866', |
|||
'cp866', |
|||
'csibm866' |
|||
], |
|||
'iso-8859-2': [ |
|||
'csisolatin2', |
|||
'iso-ir-101', |
|||
'iso8859-2', |
|||
'iso88592', |
|||
'iso_8859-2', |
|||
'iso_8859-2:1987', |
|||
'l2', |
|||
'latin2' |
|||
], |
|||
'iso-8859-3': [ |
|||
'csisolatin3', |
|||
'iso-ir-109', |
|||
'iso8859-3', |
|||
'iso88593', |
|||
'iso_8859-3', |
|||
'iso_8859-3:1988', |
|||
'l3', |
|||
'latin3' |
|||
], |
|||
'iso-8859-4': [ |
|||
'csisolatin4', |
|||
'iso-ir-110', |
|||
'iso8859-4', |
|||
'iso88594', |
|||
'iso_8859-4', |
|||
'iso_8859-4:1988', |
|||
'l4', |
|||
'latin4' |
|||
], |
|||
'iso-8859-5': [ |
|||
'csisolatincyrillic', |
|||
'cyrillic', |
|||
'iso-ir-144', |
|||
'iso8859-5', |
|||
'iso88595', |
|||
'iso_8859-5', |
|||
'iso_8859-5:1988' |
|||
], |
|||
'iso-8859-6': [ |
|||
'arabic', |
|||
'asmo-708', |
|||
'csiso88596e', |
|||
'csiso88596i', |
|||
'csisolatinarabic', |
|||
'ecma-114', |
|||
'iso-8859-6-e', |
|||
'iso-8859-6-i', |
|||
'iso-ir-127', |
|||
'iso8859-6', |
|||
'iso88596', |
|||
'iso_8859-6', |
|||
'iso_8859-6:1987' |
|||
], |
|||
'iso-8859-7': [ |
|||
'csisolatingreek', |
|||
'ecma-118', |
|||
'elot_928', |
|||
'greek', |
|||
'greek8', |
|||
'iso-ir-126', |
|||
'iso8859-7', |
|||
'iso88597', |
|||
'iso_8859-7', |
|||
'iso_8859-7:1987', |
|||
'sun_eu_greek' |
|||
], |
|||
'iso-8859-8': [ |
|||
'csiso88598e', |
|||
'csisolatinhebrew', |
|||
'hebrew', |
|||
'iso-8859-8-e', |
|||
'iso-ir-138', |
|||
'iso8859-8', |
|||
'iso88598', |
|||
'iso_8859-8', |
|||
'iso_8859-8:1988', |
|||
'visual' |
|||
], |
|||
'iso-8859-8-i': [ |
|||
'csiso88598i', |
|||
'logical' |
|||
], |
|||
'iso-8859-10': [ |
|||
'csisolatin6', |
|||
'iso-ir-157', |
|||
'iso8859-10', |
|||
'iso885910', |
|||
'l6', |
|||
'latin6' |
|||
], |
|||
'iso-8859-13': [ |
|||
'iso8859-13', |
|||
'iso885913' |
|||
], |
|||
'iso-8859-14': [ |
|||
'iso8859-14', |
|||
'iso885914' |
|||
], |
|||
'iso-8859-15': [ |
|||
'csisolatin9', |
|||
'iso8859-15', |
|||
'iso885915', |
|||
'iso_8859-15', |
|||
'l9' |
|||
], |
|||
'koi8-r': [ |
|||
'cskoi8r', |
|||
'koi', |
|||
'koi8', |
|||
'koi8_r' |
|||
], |
|||
'koi8-u': [ |
|||
'koi8-ru' |
|||
], |
|||
'macintosh': [ |
|||
'csmacintosh', |
|||
'mac', |
|||
'x-mac-roman' |
|||
], |
|||
'windows-874': [ |
|||
'dos-874', |
|||
'iso-8859-11', |
|||
'iso8859-11', |
|||
'iso885911', |
|||
'tis-620' |
|||
], |
|||
'windows-1250': [ |
|||
'cp1250', |
|||
'x-cp1250' |
|||
], |
|||
'windows-1251': [ |
|||
'cp1251', |
|||
'x-cp1251' |
|||
], |
|||
'windows-1252': [ |
|||
'ansi_x3.4-1968', |
|||
'ascii', |
|||
'cp1252', |
|||
'cp819', |
|||
'csisolatin1', |
|||
'ibm819', |
|||
'iso-8859-1', |
|||
'iso-ir-100', |
|||
'iso8859-1', |
|||
'iso88591', |
|||
'iso_8859-1', |
|||
'iso_8859-1:1987', |
|||
'l1', |
|||
'latin1', |
|||
'us-ascii', |
|||
'x-cp1252' |
|||
], |
|||
'windows-1253': [ |
|||
'cp1253', |
|||
'x-cp1253' |
|||
], |
|||
'windows-1254': [ |
|||
'cp1254', |
|||
'csisolatin5', |
|||
'iso-8859-9', |
|||
'iso-ir-148', |
|||
'iso8859-9', |
|||
'iso88599', |
|||
'iso_8859-9', |
|||
'iso_8859-9:1989', |
|||
'l5', |
|||
'latin5', |
|||
'x-cp1254' |
|||
], |
|||
'windows-1255': [ |
|||
'cp1255', |
|||
'x-cp1255' |
|||
], |
|||
'windows-1256': [ |
|||
'cp1256', |
|||
'x-cp1256' |
|||
], |
|||
'windows-1257': [ |
|||
'cp1257', |
|||
'x-cp1257' |
|||
], |
|||
'windows-1258': [ |
|||
'cp1258', |
|||
'x-cp1258' |
|||
], |
|||
'x-mac-cyrillic': [ |
|||
'x-mac-ukrainian' |
|||
], |
|||
'gbk': [ |
|||
'chinese', |
|||
'csgb2312', |
|||
'csiso58gb231280', |
|||
'gb2312', |
|||
'gb_2312', |
|||
'gb_2312-80', |
|||
'iso-ir-58', |
|||
'x-gbk' |
|||
], |
|||
'gb18030': [ ], |
|||
'big5': [ |
|||
'big5-hkscs', |
|||
'cn-big5', |
|||
'csbig5', |
|||
'x-x-big5' |
|||
], |
|||
'euc-jp': [ |
|||
'cseucpkdfmtjapanese', |
|||
'x-euc-jp' |
|||
], |
|||
'iso-2022-jp': [ |
|||
'csiso2022jp' |
|||
], |
|||
'shift_jis': [ |
|||
'csshiftjis', |
|||
'ms932', |
|||
'ms_kanji', |
|||
'shift-jis', |
|||
'sjis', |
|||
'windows-31j', |
|||
'x-sjis' |
|||
], |
|||
'euc-kr': [ |
|||
' euc-kr \t', |
|||
'EUC-kr \n', |
|||
'cseuckr', |
|||
'csksc56011987', |
|||
'iso-ir-149', |
|||
'korean', |
|||
'ks_c_5601-1987', |
|||
'ks_c_5601-1989', |
|||
'ksc5601', |
|||
'ksc_5601', |
|||
'windows-949' |
|||
] |
|||
}; |
|||
Object.entries(mappings).forEach((i) => { |
|||
const enc = i[0]; |
|||
const labels = i[1]; |
|||
assert.strictEqual(getEncodingFromLabel(enc), enc); |
|||
labels.forEach((l) => assert.strictEqual(getEncodingFromLabel(l), enc)); |
|||
}); |
|||
|
|||
assert.strictEqual(getEncodingFromLabel('made-up'), undefined); |
|||
} |
Loading…
Reference in new issue