mirror of https://github.com/lukechilds/node.git
Browse Source
Provide an (initially experimental) implementation of the WHATWG Encoding Standard API (`TextDecoder` and `TextEncoder`). The is the same API implemented on the browser side. By default, with small-icu, only the UTF-8, UTF-16le and UTF-16be decoders are supported. With full-icu enabled, every encoding other than iso-8859-16 is supported. This provides a basic test, but does not include the full web platform tests. Note: many of the web platform tests for this would fail by default because we ship with small-icu by default. A process warning will be emitted on first use to indicate that the API is still experimental. No runtime flag is required to use the feature. Refs: https://encoding.spec.whatwg.org/ PR-URL: https://github.com/nodejs/node/pull/13644 Reviewed-By: Timothy Gu <timothygu99@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>v6
James M Snell
8 years ago
12 changed files with 1189 additions and 16 deletions
@ -0,0 +1,458 @@ |
|||||
|
'use strict'; |
||||
|
|
||||
|
// An implementation of the WHATWG Encoding Standard
|
||||
|
// https://encoding.spec.whatwg.org
|
||||
|
|
||||
|
const errors = require('internal/errors'); |
||||
|
const kHandle = Symbol('handle'); |
||||
|
const kFlags = Symbol('flags'); |
||||
|
const kEncoding = Symbol('encoding'); |
||||
|
const kDecoder = Symbol('decoder'); |
||||
|
const kEncoder = Symbol('encoder'); |
||||
|
|
||||
|
let warned = false; |
||||
|
const experimental = |
||||
|
'The WHATWG Encoding Standard implementation is an experimental API. It ' + |
||||
|
'should not yet be used in production applications.'; |
||||
|
|
||||
|
const { |
||||
|
getConstructorOf, |
||||
|
customInspectSymbol: inspect |
||||
|
} = require('internal/util'); |
||||
|
|
||||
|
const { |
||||
|
isArrayBuffer |
||||
|
} = process.binding('util'); |
||||
|
|
||||
|
const { |
||||
|
encodeUtf8String |
||||
|
} = process.binding('buffer'); |
||||
|
|
||||
|
const { |
||||
|
decode: _decode, |
||||
|
getConverter, |
||||
|
hasConverter |
||||
|
} = process.binding('icu'); |
||||
|
|
||||
|
const CONVERTER_FLAGS_FLUSH = 0x1; |
||||
|
const CONVERTER_FLAGS_FATAL = 0x2; |
||||
|
const CONVERTER_FLAGS_IGNORE_BOM = 0x4; |
||||
|
|
||||
|
const empty = new Uint8Array(0); |
||||
|
|
||||
|
const encodings = new Map([ |
||||
|
['unicode-1-1-utf-8', 'utf-8'], |
||||
|
['utf8', 'utf-8'], |
||||
|
['utf-8', 'utf-8'], |
||||
|
['866', 'ibm866'], |
||||
|
['cp866', 'ibm866'], |
||||
|
['csibm866', 'ibm866'], |
||||
|
['ibm866', 'ibm866'], |
||||
|
['csisolatin2', 'iso-8859-2'], |
||||
|
['iso-8859-2', 'iso-8859-2'], |
||||
|
['iso-ir-101', 'iso-8859-2'], |
||||
|
['iso8859-2', 'iso-8859-2'], |
||||
|
['iso88592', 'iso-8859-2'], |
||||
|
['iso_8859-2', 'iso-8859-2'], |
||||
|
['iso_8859-2:1987', 'iso-8859-2'], |
||||
|
['l2', 'iso-8859-2'], |
||||
|
['latin2', 'iso-8859-2'], |
||||
|
['csisolatin3', 'iso-8859-3'], |
||||
|
['iso-8859-3', 'iso-8859-3'], |
||||
|
['iso-ir-109', 'iso-8859-3'], |
||||
|
['iso8859-3', 'iso-8859-3'], |
||||
|
['iso88593', 'iso-8859-3'], |
||||
|
['iso_8859-3', 'iso-8859-3'], |
||||
|
['iso_8859-3:1988', 'iso-8859-3'], |
||||
|
['l3', 'iso-8859-3'], |
||||
|
['latin3', 'iso-8859-3'], |
||||
|
['csisolatin4', 'iso-8859-4'], |
||||
|
['iso-8859-4', 'iso-8859-4'], |
||||
|
['iso-ir-110', 'iso-8859-4'], |
||||
|
['iso8859-4', 'iso-8859-4'], |
||||
|
['iso88594', 'iso-8859-4'], |
||||
|
['iso_8859-4', 'iso-8859-4'], |
||||
|
['iso_8859-4:1988', 'iso-8859-4'], |
||||
|
['l4', 'iso-8859-4'], |
||||
|
['latin4', 'iso-8859-4'], |
||||
|
['csisolatincyrillic', 'iso-8859-5'], |
||||
|
['cyrillic', 'iso-8859-5'], |
||||
|
['iso-8859-5', 'iso-8859-5'], |
||||
|
['iso-ir-144', 'iso-8859-5'], |
||||
|
['iso8859-5', 'iso-8859-5'], |
||||
|
['iso88595', 'iso-8859-5'], |
||||
|
['iso_8859-5', 'iso-8859-5'], |
||||
|
['iso_8859-5:1988', 'iso-8859-5'], |
||||
|
['arabic', 'iso-8859-6'], |
||||
|
['asmo-708', 'iso-8859-6'], |
||||
|
['csiso88596e', 'iso-8859-6'], |
||||
|
['csiso88596i', 'iso-8859-6'], |
||||
|
['csisolatinarabic', 'iso-8859-6'], |
||||
|
['ecma-114', 'iso-8859-6'], |
||||
|
['iso-8859-6', 'iso-8859-6'], |
||||
|
['iso-8859-6-e', 'iso-8859-6'], |
||||
|
['iso-8859-6-i', 'iso-8859-6'], |
||||
|
['iso-ir-127', 'iso-8859-6'], |
||||
|
['iso8859-6', 'iso-8859-6'], |
||||
|
['iso88596', 'iso-8859-6'], |
||||
|
['iso_8859-6', 'iso-8859-6'], |
||||
|
['iso_8859-6:1987', 'iso-8859-6'], |
||||
|
['csisolatingreek', 'iso-8859-7'], |
||||
|
['ecma-118', 'iso-8859-7'], |
||||
|
['elot_928', 'iso-8859-7'], |
||||
|
['greek', 'iso-8859-7'], |
||||
|
['greek8', 'iso-8859-7'], |
||||
|
['iso-8859-7', 'iso-8859-7'], |
||||
|
['iso-ir-126', 'iso-8859-7'], |
||||
|
['iso8859-7', 'iso-8859-7'], |
||||
|
['iso88597', 'iso-8859-7'], |
||||
|
['iso_8859-7', 'iso-8859-7'], |
||||
|
['iso_8859-7:1987', 'iso-8859-7'], |
||||
|
['sun_eu_greek', 'iso-8859-7'], |
||||
|
['csiso88598e', 'iso-8859-8'], |
||||
|
['csisolatinhebrew', 'iso-8859-8'], |
||||
|
['hebrew', 'iso-8859-8'], |
||||
|
['iso-8859-8', 'iso-8859-8'], |
||||
|
['iso-8859-8-e', 'iso-8859-8'], |
||||
|
['iso-ir-138', 'iso-8859-8'], |
||||
|
['iso8859-8', 'iso-8859-8'], |
||||
|
['iso88598', 'iso-8859-8'], |
||||
|
['iso_8859-8', 'iso-8859-8'], |
||||
|
['iso_8859-8:1988', 'iso-8859-8'], |
||||
|
['visual', 'iso-8859-8'], |
||||
|
['csiso88598i', 'iso-8859-8-i'], |
||||
|
['iso-8859-8-i', 'iso-8859-8-i'], |
||||
|
['logical', 'iso-8859-8-i'], |
||||
|
['csisolatin6', 'iso-8859-10'], |
||||
|
['iso-8859-10', 'iso-8859-10'], |
||||
|
['iso-ir-157', 'iso-8859-10'], |
||||
|
['iso8859-10', 'iso-8859-10'], |
||||
|
['iso885910', 'iso-8859-10'], |
||||
|
['l6', 'iso-8859-10'], |
||||
|
['latin6', 'iso-8859-10'], |
||||
|
['iso-8859-13', 'iso-8859-13'], |
||||
|
['iso8859-13', 'iso-8859-13'], |
||||
|
['iso885913', 'iso-8859-13'], |
||||
|
['iso-8859-14', 'iso-8859-14'], |
||||
|
['iso8859-14', 'iso-8859-14'], |
||||
|
['iso885914', 'iso-8859-14'], |
||||
|
['csisolatin9', 'iso-8859-15'], |
||||
|
['iso-8859-15', 'iso-8859-15'], |
||||
|
['iso8859-15', 'iso-8859-15'], |
||||
|
['iso885915', 'iso-8859-15'], |
||||
|
['iso_8859-15', 'iso-8859-15'], |
||||
|
['l9', 'iso-8859-15'], |
||||
|
['cskoi8r', 'koi8-r'], |
||||
|
['koi', 'koi8-r'], |
||||
|
['koi8', 'koi8-r'], |
||||
|
['koi8-r', 'koi8-r'], |
||||
|
['koi8_r', 'koi8-r'], |
||||
|
['koi8-ru', 'koi8-u'], |
||||
|
['koi8-u', 'koi8-u'], |
||||
|
['csmacintosh', 'macintosh'], |
||||
|
['mac', 'macintosh'], |
||||
|
['macintosh', 'macintosh'], |
||||
|
['x-mac-roman', 'macintosh'], |
||||
|
['dos-874', 'windows-874'], |
||||
|
['iso-8859-11', 'windows-874'], |
||||
|
['iso8859-11', 'windows-874'], |
||||
|
['iso885911', 'windows-874'], |
||||
|
['tis-620', 'windows-874'], |
||||
|
['windows-874', 'windows-874'], |
||||
|
['cp1250', 'windows-1250'], |
||||
|
['windows-1250', 'windows-1250'], |
||||
|
['x-cp1250', 'windows-1250'], |
||||
|
['cp1251', 'windows-1251'], |
||||
|
['windows-1251', 'windows-1251'], |
||||
|
['x-cp1251', 'windows-1251'], |
||||
|
['ansi_x3.4-1968', 'windows-1252'], |
||||
|
['ascii', 'windows-1252'], |
||||
|
['cp1252', 'windows-1252'], |
||||
|
['cp819', 'windows-1252'], |
||||
|
['csisolatin1', 'windows-1252'], |
||||
|
['ibm819', 'windows-1252'], |
||||
|
['iso-8859-1', 'windows-1252'], |
||||
|
['iso-ir-100', 'windows-1252'], |
||||
|
['iso8859-1', 'windows-1252'], |
||||
|
['iso88591', 'windows-1252'], |
||||
|
['iso_8859-1', 'windows-1252'], |
||||
|
['iso_8859-1:1987', 'windows-1252'], |
||||
|
['l1', 'windows-1252'], |
||||
|
['latin1', 'windows-1252'], |
||||
|
['us-ascii', 'windows-1252'], |
||||
|
['windows-1252', 'windows-1252'], |
||||
|
['x-cp1252', 'windows-1252'], |
||||
|
['cp1253', 'windows-1253'], |
||||
|
['windows-1253', 'windows-1253'], |
||||
|
['x-cp1253', 'windows-1253'], |
||||
|
['cp1254', 'windows-1254'], |
||||
|
['csisolatin5', 'windows-1254'], |
||||
|
['iso-8859-9', 'windows-1254'], |
||||
|
['iso-ir-148', 'windows-1254'], |
||||
|
['iso8859-9', 'windows-1254'], |
||||
|
['iso88599', 'windows-1254'], |
||||
|
['iso_8859-9', 'windows-1254'], |
||||
|
['iso_8859-9:1989', 'windows-1254'], |
||||
|
['l5', 'windows-1254'], |
||||
|
['latin5', 'windows-1254'], |
||||
|
['windows-1254', 'windows-1254'], |
||||
|
['x-cp1254', 'windows-1254'], |
||||
|
['cp1255', 'windows-1255'], |
||||
|
['windows-1255', 'windows-1255'], |
||||
|
['x-cp1255', 'windows-1255'], |
||||
|
['cp1256', 'windows-1256'], |
||||
|
['windows-1256', 'windows-1256'], |
||||
|
['x-cp1256', 'windows-1256'], |
||||
|
['cp1257', 'windows-1257'], |
||||
|
['windows-1257', 'windows-1257'], |
||||
|
['x-cp1257', 'windows-1257'], |
||||
|
['cp1258', 'windows-1258'], |
||||
|
['windows-1258', 'windows-1258'], |
||||
|
['x-cp1258', 'windows-1258'], |
||||
|
['x-mac-cyrillic', 'x-mac-cyrillic'], |
||||
|
['x-mac-ukrainian', 'x-mac-cyrillic'], |
||||
|
['chinese', 'gbk'], |
||||
|
['csgb2312', 'gbk'], |
||||
|
['csiso58gb231280', 'gbk'], |
||||
|
['gb2312', 'gbk'], |
||||
|
['gb_2312', 'gbk'], |
||||
|
['gb_2312-80', 'gbk'], |
||||
|
['gbk', 'gbk'], |
||||
|
['iso-ir-58', 'gbk'], |
||||
|
['x-gbk', 'gbk'], |
||||
|
['gb18030', 'gb18030'], |
||||
|
['big5', 'big5'], |
||||
|
['big5-hkscs', 'big5'], |
||||
|
['cn-big5', 'big5'], |
||||
|
['csbig5', 'big5'], |
||||
|
['x-x-big5', 'big5'], |
||||
|
['cseucpkdfmtjapanese', 'euc-jp'], |
||||
|
['euc-jp', 'euc-jp'], |
||||
|
['x-euc-jp', 'euc-jp'], |
||||
|
['csiso2022jp', 'iso-2022-jp'], |
||||
|
['iso-2022-jp', 'iso-2022-jp'], |
||||
|
['csshiftjis', 'shift_jis'], |
||||
|
['ms932', 'shift_jis'], |
||||
|
['ms_kanji', 'shift_jis'], |
||||
|
['shift-jis', 'shift_jis'], |
||||
|
['shift_jis', 'shift_jis'], |
||||
|
['sjis', 'shift_jis'], |
||||
|
['windows-31j', 'shift_jis'], |
||||
|
['x-sjis', 'shift_jis'], |
||||
|
['cseuckr', 'euc-kr'], |
||||
|
['csksc56011987', 'euc-kr'], |
||||
|
['euc-kr', 'euc-kr'], |
||||
|
['iso-ir-149', 'euc-kr'], |
||||
|
['korean', 'euc-kr'], |
||||
|
['ks_c_5601-1987', 'euc-kr'], |
||||
|
['ks_c_5601-1989', 'euc-kr'], |
||||
|
['ksc5601', 'euc-kr'], |
||||
|
['ksc_5601', 'euc-kr'], |
||||
|
['windows-949', 'euc-kr'], |
||||
|
['utf-16be', 'utf-16be'], |
||||
|
['utf-16le', 'utf-16le'], |
||||
|
['utf-16', 'utf-16le'] |
||||
|
]); |
||||
|
|
||||
|
// Unfortunately, String.prototype.trim also removes non-ascii whitespace,
|
||||
|
// so we have to do this manually
|
||||
|
function trimAsciiWhitespace(label) { |
||||
|
var s = 0; |
||||
|
var e = label.length; |
||||
|
while (s < e && ( |
||||
|
label[s] === '\u0009' || |
||||
|
label[s] === '\u000a' || |
||||
|
label[s] === '\u000c' || |
||||
|
label[s] === '\u000d' || |
||||
|
label[s] === '\u0020')) { |
||||
|
s++; |
||||
|
} |
||||
|
while (e > s && ( |
||||
|
label[e - 1] === '\u0009' || |
||||
|
label[e - 1] === '\u000a' || |
||||
|
label[e - 1] === '\u000c' || |
||||
|
label[e - 1] === '\u000d' || |
||||
|
label[e - 1] === '\u0020')) { |
||||
|
e--; |
||||
|
} |
||||
|
return label.slice(s, e); |
||||
|
} |
||||
|
|
||||
|
function getEncodingFromLabel(label) { |
||||
|
const enc = encodings.get(label); |
||||
|
if (enc !== undefined) return enc; |
||||
|
return encodings.get(trimAsciiWhitespace(label.toLowerCase())); |
||||
|
} |
||||
|
|
||||
|
function hasTextDecoder(encoding = 'utf-8') { |
||||
|
if (typeof encoding !== 'string') |
||||
|
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'encoding', 'string'); |
||||
|
return hasConverter(getEncodingFromLabel(encoding)); |
||||
|
} |
||||
|
|
||||
|
var Buffer; |
||||
|
function lazyBuffer() { |
||||
|
if (Buffer === undefined) |
||||
|
Buffer = require('buffer').Buffer; |
||||
|
return Buffer; |
||||
|
} |
||||
|
|
||||
|
class TextDecoder { |
||||
|
constructor(encoding = 'utf-8', options = {}) { |
||||
|
if (!warned) { |
||||
|
warned = true; |
||||
|
process.emitWarning(experimental, 'ExperimentalWarning'); |
||||
|
} |
||||
|
|
||||
|
encoding = `${encoding}`; |
||||
|
if (typeof options !== 'object') |
||||
|
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); |
||||
|
|
||||
|
const enc = getEncodingFromLabel(encoding); |
||||
|
if (enc === undefined) |
||||
|
throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding); |
||||
|
|
||||
|
var flags = 0; |
||||
|
if (options !== null) { |
||||
|
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0; |
||||
|
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; |
||||
|
} |
||||
|
|
||||
|
const handle = getConverter(enc, flags); |
||||
|
if (handle === undefined) |
||||
|
throw new errors.Error('ERR_ENCODING_NOT_SUPPORTED', encoding); |
||||
|
|
||||
|
this[kHandle] = handle; |
||||
|
this[kFlags] = flags; |
||||
|
this[kEncoding] = enc; |
||||
|
} |
||||
|
|
||||
|
get encoding() { |
||||
|
if (this == null || this[kDecoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
||||
|
return this[kEncoding]; |
||||
|
} |
||||
|
|
||||
|
get fatal() { |
||||
|
if (this == null || this[kDecoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
||||
|
return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL; |
||||
|
} |
||||
|
|
||||
|
get ignoreBOM() { |
||||
|
if (this == null || this[kDecoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
||||
|
return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) === |
||||
|
CONVERTER_FLAGS_IGNORE_BOM; |
||||
|
} |
||||
|
|
||||
|
decode(input = empty, options = {}) { |
||||
|
if (this == null || this[kDecoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
||||
|
if (isArrayBuffer(input)) { |
||||
|
input = lazyBuffer().from(input); |
||||
|
} else if (!ArrayBuffer.isView(input)) { |
||||
|
throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input', |
||||
|
['ArrayBuffer', 'ArrayBufferView']); |
||||
|
} |
||||
|
if (typeof options !== 'object') { |
||||
|
throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object'); |
||||
|
} |
||||
|
|
||||
|
var flags = 0; |
||||
|
if (options !== null) |
||||
|
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; |
||||
|
|
||||
|
const ret = _decode(this[kHandle], input, flags); |
||||
|
if (typeof ret === 'number') { |
||||
|
const err = new errors.TypeError('ERR_ENCODING_INVALID_ENCODED_DATA', |
||||
|
this.encoding); |
||||
|
err.errno = ret; |
||||
|
throw err; |
||||
|
} |
||||
|
return ret.toString('ucs2'); |
||||
|
} |
||||
|
|
||||
|
[inspect](depth, opts) { |
||||
|
if (this == null || this[kDecoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); |
||||
|
if (typeof depth === 'number' && depth < 0) |
||||
|
return opts.stylize('[Object]', 'special'); |
||||
|
var ctor = getConstructorOf(this); |
||||
|
var obj = Object.create({ |
||||
|
constructor: ctor === null ? TextDecoder : ctor |
||||
|
}); |
||||
|
obj.encoding = this.encoding; |
||||
|
obj.fatal = this.fatal; |
||||
|
obj.ignoreBOM = this.ignoreBOM; |
||||
|
if (opts.showHidden) { |
||||
|
obj[kFlags] = this[kFlags]; |
||||
|
obj[kHandle] = this[kHandle]; |
||||
|
} |
||||
|
// Lazy to avoid circular dependency
|
||||
|
return require('util').inspect(obj, opts); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class TextEncoder { |
||||
|
constructor() { |
||||
|
if (!warned) { |
||||
|
warned = true; |
||||
|
process.emitWarning(experimental, 'ExperimentalWarning'); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
get encoding() { |
||||
|
if (this == null || this[kEncoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder'); |
||||
|
return 'utf-8'; |
||||
|
} |
||||
|
|
||||
|
encode(input = '') { |
||||
|
if (this == null || this[kEncoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder'); |
||||
|
return encodeUtf8String(`${input}`); |
||||
|
} |
||||
|
|
||||
|
[inspect](depth, opts) { |
||||
|
if (this == null || this[kEncoder] !== true) |
||||
|
throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder'); |
||||
|
if (typeof depth === 'number' && depth < 0) |
||||
|
return opts.stylize('[Object]', 'special'); |
||||
|
var ctor = getConstructorOf(this); |
||||
|
var obj = Object.create({ |
||||
|
constructor: ctor === null ? TextEncoder : ctor |
||||
|
}); |
||||
|
obj.encoding = this.encoding; |
||||
|
// Lazy to avoid circular dependency
|
||||
|
return require('util').inspect(obj, opts); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
Object.defineProperties( |
||||
|
TextDecoder.prototype, { |
||||
|
[kDecoder]: { enumerable: false, value: true, configurable: false }, |
||||
|
'decode': { enumerable: true }, |
||||
|
'encoding': { enumerable: true }, |
||||
|
'fatal': { enumerable: true }, |
||||
|
'ignoreBOM': { enumerable: true }, |
||||
|
[Symbol.toStringTag]: { |
||||
|
configurable: true, |
||||
|
value: 'TextDecoder' |
||||
|
} }); |
||||
|
Object.defineProperties( |
||||
|
TextEncoder.prototype, { |
||||
|
[kEncoder]: { enumerable: false, value: true, configurable: false }, |
||||
|
'encode': { enumerable: true }, |
||||
|
'encoding': { enumerable: true }, |
||||
|
[Symbol.toStringTag]: { |
||||
|
configurable: true, |
||||
|
value: 'TextEncoder' |
||||
|
} }); |
||||
|
|
||||
|
module.exports = { |
||||
|
getEncodingFromLabel, |
||||
|
hasTextDecoder, |
||||
|
TextDecoder, |
||||
|
TextEncoder |
||||
|
}; |
@ -0,0 +1,385 @@ |
|||||
|
// Flags: --expose-internals
|
||||
|
'use strict'; |
||||
|
|
||||
|
const common = require('../common'); |
||||
|
const assert = require('assert'); |
||||
|
const { TextEncoder, TextDecoder } = require('util'); |
||||
|
const { customInspectSymbol: inspect } = require('internal/util'); |
||||
|
const { getEncodingFromLabel } = require('internal/encoding'); |
||||
|
|
||||
|
const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, |
||||
|
0x73, 0x74, 0xe2, 0x82, 0xac]); |
||||
|
|
||||
|
if (!common.hasIntl) { |
||||
|
common.skip('WHATWG Encoding tests because ICU is not present.'); |
||||
|
} |
||||
|
|
||||
|
// Make Sure TextDecoder and TextEncoder exist
|
||||
|
assert(TextDecoder); |
||||
|
assert(TextEncoder); |
||||
|
|
||||
|
// Test TextEncoder
|
||||
|
const enc = new TextEncoder(); |
||||
|
assert(enc); |
||||
|
const buf = enc.encode('\ufefftest€'); |
||||
|
|
||||
|
assert.strictEqual(Buffer.compare(buf, encoded), 0); |
||||
|
|
||||
|
|
||||
|
// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false
|
||||
|
{ |
||||
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
||||
|
const dec = new TextDecoder(i); |
||||
|
const res = dec.decode(buf); |
||||
|
assert.strictEqual(res, 'test€'); |
||||
|
}); |
||||
|
|
||||
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
||||
|
const dec = new TextDecoder(i); |
||||
|
let res = ''; |
||||
|
res += dec.decode(buf.slice(0, 8), { stream: true }); |
||||
|
res += dec.decode(buf.slice(8)); |
||||
|
assert.strictEqual(res, 'test€'); |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true
|
||||
|
{ |
||||
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
||||
|
const dec = new TextDecoder(i, { ignoreBOM: true }); |
||||
|
const res = dec.decode(buf); |
||||
|
assert.strictEqual(res, '\ufefftest€'); |
||||
|
}); |
||||
|
|
||||
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
||||
|
const dec = new TextDecoder(i, { ignoreBOM: true }); |
||||
|
let res = ''; |
||||
|
res += dec.decode(buf.slice(0, 8), { stream: true }); |
||||
|
res += dec.decode(buf.slice(8)); |
||||
|
assert.strictEqual(res, '\ufefftest€'); |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false
|
||||
|
{ |
||||
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
||||
|
const dec = new TextDecoder(i, { fatal: true }); |
||||
|
assert.throws(() => dec.decode(buf.slice(0, 8)), |
||||
|
common.expectsError({ |
||||
|
code: 'ERR_ENCODING_INVALID_ENCODED_DATA', |
||||
|
type: TypeError, |
||||
|
message: |
||||
|
/^The encoded data was not valid for encoding utf-8$/ |
||||
|
})); |
||||
|
}); |
||||
|
|
||||
|
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { |
||||
|
const dec = new TextDecoder(i, { fatal: true }); |
||||
|
assert.doesNotThrow(() => dec.decode(buf.slice(0, 8), { stream: true })); |
||||
|
assert.doesNotThrow(() => dec.decode(buf.slice(8))); |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
// Test TextDecoder, UTF-16le
|
||||
|
{ |
||||
|
const dec = new TextDecoder('utf-16le'); |
||||
|
const res = dec.decode(Buffer.from('test€', 'utf-16le')); |
||||
|
assert.strictEqual(res, 'test€'); |
||||
|
} |
||||
|
|
||||
|
// Test TextDecoder, UTF-16be
|
||||
|
{ |
||||
|
const dec = new TextDecoder('utf-16be'); |
||||
|
const res = dec.decode(Buffer.from([0x00, 0x74, 0x00, 0x65, 0x00, |
||||
|
0x73, 0x00, 0x74, 0x20, 0xac])); |
||||
|
assert.strictEqual(res, 'test€'); |
||||
|
} |
||||
|
|
||||
|
{ |
||||
|
const fn = TextDecoder.prototype[inspect]; |
||||
|
fn.call(new TextDecoder(), Infinity, {}); |
||||
|
|
||||
|
[{}, [], true, 1, '', new TextEncoder()].forEach((i) => { |
||||
|
assert.throws(() => fn.call(i, Infinity, {}), |
||||
|
common.expectsError({ |
||||
|
code: 'ERR_INVALID_THIS', |
||||
|
message: 'Value of "this" must be of type TextDecoder' |
||||
|
})); |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
{ |
||||
|
const fn = TextEncoder.prototype[inspect]; |
||||
|
fn.call(new TextEncoder(), Infinity, {}); |
||||
|
|
||||
|
[{}, [], true, 1, '', new TextDecoder()].forEach((i) => { |
||||
|
assert.throws(() => fn.call(i, Infinity, {}), |
||||
|
common.expectsError({ |
||||
|
code: 'ERR_INVALID_THIS', |
||||
|
message: 'Value of "this" must be of type TextEncoder' |
||||
|
})); |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
// Test Encoding Mappings
|
||||
|
{ |
||||
|
|
||||
|
const mappings = { |
||||
|
'utf-8': [ |
||||
|
'unicode-1-1-utf-8', |
||||
|
'utf8' |
||||
|
], |
||||
|
'utf-16be': [], |
||||
|
'utf-16le': [ |
||||
|
'utf-16' |
||||
|
], |
||||
|
'ibm866': [ |
||||
|
'866', |
||||
|
'cp866', |
||||
|
'csibm866' |
||||
|
], |
||||
|
'iso-8859-2': [ |
||||
|
'csisolatin2', |
||||
|
'iso-ir-101', |
||||
|
'iso8859-2', |
||||
|
'iso88592', |
||||
|
'iso_8859-2', |
||||
|
'iso_8859-2:1987', |
||||
|
'l2', |
||||
|
'latin2' |
||||
|
], |
||||
|
'iso-8859-3': [ |
||||
|
'csisolatin3', |
||||
|
'iso-ir-109', |
||||
|
'iso8859-3', |
||||
|
'iso88593', |
||||
|
'iso_8859-3', |
||||
|
'iso_8859-3:1988', |
||||
|
'l3', |
||||
|
'latin3' |
||||
|
], |
||||
|
'iso-8859-4': [ |
||||
|
'csisolatin4', |
||||
|
'iso-ir-110', |
||||
|
'iso8859-4', |
||||
|
'iso88594', |
||||
|
'iso_8859-4', |
||||
|
'iso_8859-4:1988', |
||||
|
'l4', |
||||
|
'latin4' |
||||
|
], |
||||
|
'iso-8859-5': [ |
||||
|
'csisolatincyrillic', |
||||
|
'cyrillic', |
||||
|
'iso-ir-144', |
||||
|
'iso8859-5', |
||||
|
'iso88595', |
||||
|
'iso_8859-5', |
||||
|
'iso_8859-5:1988' |
||||
|
], |
||||
|
'iso-8859-6': [ |
||||
|
'arabic', |
||||
|
'asmo-708', |
||||
|
'csiso88596e', |
||||
|
'csiso88596i', |
||||
|
'csisolatinarabic', |
||||
|
'ecma-114', |
||||
|
'iso-8859-6-e', |
||||
|
'iso-8859-6-i', |
||||
|
'iso-ir-127', |
||||
|
'iso8859-6', |
||||
|
'iso88596', |
||||
|
'iso_8859-6', |
||||
|
'iso_8859-6:1987' |
||||
|
], |
||||
|
'iso-8859-7': [ |
||||
|
'csisolatingreek', |
||||
|
'ecma-118', |
||||
|
'elot_928', |
||||
|
'greek', |
||||
|
'greek8', |
||||
|
'iso-ir-126', |
||||
|
'iso8859-7', |
||||
|
'iso88597', |
||||
|
'iso_8859-7', |
||||
|
'iso_8859-7:1987', |
||||
|
'sun_eu_greek' |
||||
|
], |
||||
|
'iso-8859-8': [ |
||||
|
'csiso88598e', |
||||
|
'csisolatinhebrew', |
||||
|
'hebrew', |
||||
|
'iso-8859-8-e', |
||||
|
'iso-ir-138', |
||||
|
'iso8859-8', |
||||
|
'iso88598', |
||||
|
'iso_8859-8', |
||||
|
'iso_8859-8:1988', |
||||
|
'visual' |
||||
|
], |
||||
|
'iso-8859-8-i': [ |
||||
|
'csiso88598i', |
||||
|
'logical' |
||||
|
], |
||||
|
'iso-8859-10': [ |
||||
|
'csisolatin6', |
||||
|
'iso-ir-157', |
||||
|
'iso8859-10', |
||||
|
'iso885910', |
||||
|
'l6', |
||||
|
'latin6' |
||||
|
], |
||||
|
'iso-8859-13': [ |
||||
|
'iso8859-13', |
||||
|
'iso885913' |
||||
|
], |
||||
|
'iso-8859-14': [ |
||||
|
'iso8859-14', |
||||
|
'iso885914' |
||||
|
], |
||||
|
'iso-8859-15': [ |
||||
|
'csisolatin9', |
||||
|
'iso8859-15', |
||||
|
'iso885915', |
||||
|
'iso_8859-15', |
||||
|
'l9' |
||||
|
], |
||||
|
'koi8-r': [ |
||||
|
'cskoi8r', |
||||
|
'koi', |
||||
|
'koi8', |
||||
|
'koi8_r' |
||||
|
], |
||||
|
'koi8-u': [ |
||||
|
'koi8-ru' |
||||
|
], |
||||
|
'macintosh': [ |
||||
|
'csmacintosh', |
||||
|
'mac', |
||||
|
'x-mac-roman' |
||||
|
], |
||||
|
'windows-874': [ |
||||
|
'dos-874', |
||||
|
'iso-8859-11', |
||||
|
'iso8859-11', |
||||
|
'iso885911', |
||||
|
'tis-620' |
||||
|
], |
||||
|
'windows-1250': [ |
||||
|
'cp1250', |
||||
|
'x-cp1250' |
||||
|
], |
||||
|
'windows-1251': [ |
||||
|
'cp1251', |
||||
|
'x-cp1251' |
||||
|
], |
||||
|
'windows-1252': [ |
||||
|
'ansi_x3.4-1968', |
||||
|
'ascii', |
||||
|
'cp1252', |
||||
|
'cp819', |
||||
|
'csisolatin1', |
||||
|
'ibm819', |
||||
|
'iso-8859-1', |
||||
|
'iso-ir-100', |
||||
|
'iso8859-1', |
||||
|
'iso88591', |
||||
|
'iso_8859-1', |
||||
|
'iso_8859-1:1987', |
||||
|
'l1', |
||||
|
'latin1', |
||||
|
'us-ascii', |
||||
|
'x-cp1252' |
||||
|
], |
||||
|
'windows-1253': [ |
||||
|
'cp1253', |
||||
|
'x-cp1253' |
||||
|
], |
||||
|
'windows-1254': [ |
||||
|
'cp1254', |
||||
|
'csisolatin5', |
||||
|
'iso-8859-9', |
||||
|
'iso-ir-148', |
||||
|
'iso8859-9', |
||||
|
'iso88599', |
||||
|
'iso_8859-9', |
||||
|
'iso_8859-9:1989', |
||||
|
'l5', |
||||
|
'latin5', |
||||
|
'x-cp1254' |
||||
|
], |
||||
|
'windows-1255': [ |
||||
|
'cp1255', |
||||
|
'x-cp1255' |
||||
|
], |
||||
|
'windows-1256': [ |
||||
|
'cp1256', |
||||
|
'x-cp1256' |
||||
|
], |
||||
|
'windows-1257': [ |
||||
|
'cp1257', |
||||
|
'x-cp1257' |
||||
|
], |
||||
|
'windows-1258': [ |
||||
|
'cp1258', |
||||
|
'x-cp1258' |
||||
|
], |
||||
|
'x-mac-cyrillic': [ |
||||
|
'x-mac-ukrainian' |
||||
|
], |
||||
|
'gbk': [ |
||||
|
'chinese', |
||||
|
'csgb2312', |
||||
|
'csiso58gb231280', |
||||
|
'gb2312', |
||||
|
'gb_2312', |
||||
|
'gb_2312-80', |
||||
|
'iso-ir-58', |
||||
|
'x-gbk' |
||||
|
], |
||||
|
'gb18030': [ ], |
||||
|
'big5': [ |
||||
|
'big5-hkscs', |
||||
|
'cn-big5', |
||||
|
'csbig5', |
||||
|
'x-x-big5' |
||||
|
], |
||||
|
'euc-jp': [ |
||||
|
'cseucpkdfmtjapanese', |
||||
|
'x-euc-jp' |
||||
|
], |
||||
|
'iso-2022-jp': [ |
||||
|
'csiso2022jp' |
||||
|
], |
||||
|
'shift_jis': [ |
||||
|
'csshiftjis', |
||||
|
'ms932', |
||||
|
'ms_kanji', |
||||
|
'shift-jis', |
||||
|
'sjis', |
||||
|
'windows-31j', |
||||
|
'x-sjis' |
||||
|
], |
||||
|
'euc-kr': [ |
||||
|
' euc-kr \t', |
||||
|
'EUC-kr \n', |
||||
|
'cseuckr', |
||||
|
'csksc56011987', |
||||
|
'iso-ir-149', |
||||
|
'korean', |
||||
|
'ks_c_5601-1987', |
||||
|
'ks_c_5601-1989', |
||||
|
'ksc5601', |
||||
|
'ksc_5601', |
||||
|
'windows-949' |
||||
|
] |
||||
|
}; |
||||
|
Object.entries(mappings).forEach((i) => { |
||||
|
const enc = i[0]; |
||||
|
const labels = i[1]; |
||||
|
assert.strictEqual(getEncodingFromLabel(enc), enc); |
||||
|
labels.forEach((l) => assert.strictEqual(getEncodingFromLabel(l), enc)); |
||||
|
}); |
||||
|
|
||||
|
assert.strictEqual(getEncodingFromLabel('made-up'), undefined); |
||||
|
} |
Loading…
Reference in new issue