@ -22,109 +22,14 @@
var common = require ( '../common' ) ;
var assert = require ( 'assert' ) ;
var StringDecoder = require ( 'string_decoder' ) . StringDecoder ;
var decoder = new StringDecoder ( 'utf8' ) ;
var buffer = new Buffer ( '$' ) ;
assert . deepEqual ( '$' , decoder . write ( buffer ) ) ;
buffer = new Buffer ( '¢' ) ;
assert . deepEqual ( '' , decoder . write ( buffer . slice ( 0 , 1 ) ) ) ;
assert . deepEqual ( '¢' , decoder . write ( buffer . slice ( 1 , 2 ) ) ) ;
buffer = new Buffer ( '€' ) ;
assert . deepEqual ( '' , decoder . write ( buffer . slice ( 0 , 1 ) ) ) ;
assert . deepEqual ( '' , decoder . write ( buffer . slice ( 1 , 2 ) ) ) ;
assert . deepEqual ( '€' , decoder . write ( buffer . slice ( 2 , 3 ) ) ) ;
buffer = new Buffer ( [ 0xF0 , 0xA4 , 0xAD , 0xA2 ] ) ;
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 1 ) ) ;
s += decoder . write ( buffer . slice ( 1 , 2 ) ) ;
s += decoder . write ( buffer . slice ( 2 , 3 ) ) ;
s += decoder . write ( buffer . slice ( 3 , 4 ) ) ;
assert . ok ( s . length > 0 ) ;
// CESU-8
buffer = new Buffer ( 'EDA0BDEDB18D' , 'hex' ) ; // THUMBS UP SIGN (in CESU-8)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 1 ) ) ;
s += decoder . write ( buffer . slice ( 1 , 2 ) ) ;
s += decoder . write ( buffer . slice ( 2 , 3 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 3 , 4 ) ) ;
s += decoder . write ( buffer . slice ( 4 , 5 ) ) ;
s += decoder . write ( buffer . slice ( 5 , 6 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 2 ) ) ;
s += decoder . write ( buffer . slice ( 2 , 4 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 4 , 6 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 3 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 3 , 6 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 4 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 4 , 5 ) ) ;
s += decoder . write ( buffer . slice ( 5 , 6 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 5 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 5 , 6 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 6 ) ) ;
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
// UCS-2
decoder = new StringDecoder ( 'ucs2' ) ;
buffer = new Buffer ( 'ab' , 'ucs2' ) ;
assert . equal ( decoder . write ( buffer ) , 'ab' ) ; // 2 complete chars
buffer = new Buffer ( 'abc' , 'ucs2' ) ;
assert . equal ( decoder . write ( buffer . slice ( 0 , 3 ) ) , 'a' ) ; // 'a' and first of 'b'
assert . equal ( decoder . write ( buffer . slice ( 3 , 6 ) ) , 'bc' ) ; // second of 'b' and 'c'
// UTF-16LE
buffer = new Buffer ( '3DD84DDC' , 'hex' ) ; // THUMBS UP SIGN (in CESU-8)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 1 ) ) ;
s += decoder . write ( buffer . slice ( 1 , 2 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 2 , 3 ) ) ;
s += decoder . write ( buffer . slice ( 3 , 4 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 2 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 2 , 4 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 3 ) ) ; // complete lead surrogate
assert . equal ( s , '' ) ;
s += decoder . write ( buffer . slice ( 3 , 4 ) ) ; // complete trail surrogate
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
var s = '' ;
s += decoder . write ( buffer . slice ( 0 , 4 ) ) ;
assert . equal ( s , '\uD83D\uDC4D' ) ; // THUMBS UP SIGN (in UTF-16)
process . stdout . write ( 'scanning ' ) ;
// UTF-8
test ( 'utf-8' , new Buffer ( '$' , 'utf-8' ) , '$' ) ;
test ( 'utf-8' , new Buffer ( '¢' , 'utf-8' ) , '¢' ) ;
test ( 'utf-8' , new Buffer ( '€' , 'utf-8' ) , '€' ) ;
test ( 'utf-8' , new Buffer ( '𤭢' , 'utf-8' ) , '𤭢' ) ;
// A mixed ascii and non-ascii string
// Test stolen from deps/v8/test/cctest/test-strings.cc
// U+02E4 -> CB A4
@ -132,32 +37,86 @@ assert.equal(s, '\uD83D\uDC4D'); // THUMBS UP SIGN (in UTF-16)
// U+12E4 -> E1 8B A4
// U+0030 -> 30
// U+3045 -> E3 81 85
var expected = '\u02e4\u0064\u12e4\u0030\u3045' ;
var buffer = new Buffer ( [ 0xCB , 0xA4 , 0x64 , 0xE1 , 0x8B , 0xA4 ,
0x30 , 0xE3 , 0x81 , 0x85 ] ) ;
var charLengths = [ 0 , 0 , 1 , 2 , 2 , 2 , 3 , 4 , 4 , 4 , 5 , 5 ] ;
test (
'utf-8' ,
new Buffer ( [ 0xCB , 0xA4 , 0x64 , 0xE1 , 0x8B , 0xA4 , 0x30 , 0xE3 , 0x81 , 0x85 ] ) ,
'\u02e4\u0064\u12e4\u0030\u3045'
) ;
// Split the buffer into 3 segments
// |----|------|-------|
// 0 i j buffer.length
// Scan through every possible 3 segment combination
// and make sure that the string is always parsed.
common . print ( 'scanning ' ) ;
for ( var j = 2 ; j < buffer . length ; j ++ ) {
for ( var i = 1 ; i < j ; i ++ ) {
var decoder = new StringDecoder ( 'utf8' ) ;
// CESU-8
test ( 'utf-8' , new Buffer ( 'EDA0BDEDB18D' , 'hex' ) , '\ud83d\udc4d' ) ; // thumbs up
var sum = decoder . write ( buffer . slice ( 0 , i ) ) ;
// UCS-2
test ( 'ucs2' , new Buffer ( 'ababc' , 'ucs2' ) , 'ababc' ) ;
// just check that we've received the right amount
// after the first write
assert . equal ( charLengths [ i ] , sum . length ) ;
// UTF-16LE
test ( 'ucs2' , new Buffer ( '3DD84DDC' , 'hex' ) , '\ud83d\udc4d' ) ; // thumbs up
console . log ( ' crayon!' ) ;
sum += decoder . write ( buffer . slice ( i , j ) ) ;
sum += decoder . write ( buffer . slice ( j , buffer . length ) ) ;
assert . equal ( expected , sum ) ;
common . print ( '.' ) ;
// test verifies that StringDecoder will correctly decode the given input
// buffer with the given encoding to the expected output. It will attempt all
// possible ways to write() the input buffer, see writeSequences(). The
// singleSequence allows for easy debugging of a specific sequence which is
// useful in case of test failures.
function test ( encoding , input , expected , singleSequence ) {
var sequences ;
if ( ! singleSequence ) {
sequences = writeSequences ( input . length ) ;
} else {
sequences = [ singleSequence ] ;
}
sequences . forEach ( function ( sequence ) {
var decoder = new StringDecoder ( encoding ) ;
var output = '' ;
sequence . forEach ( function ( write ) {
output += decoder . write ( input . slice ( write [ 0 ] , write [ 1 ] ) ) ;
} ) ;
process . stdout . write ( '.' ) ;
if ( output !== expected ) {
var message =
'Expected "' + unicodeEscape ( expected ) + '", ' +
'but got "' + unicodeEscape ( output ) + '"\n' +
'Write sequence: ' + JSON . stringify ( sequence ) + '\n' +
'Decoder charBuffer: 0x' + decoder . charBuffer . toString ( 'hex' ) + '\n' +
'Full Decoder State: ' + JSON . stringify ( decoder , null , 2 ) ;
assert . fail ( output , expected , message ) ;
}
} ) ;
}
// unicodeEscape prints the str contents as unicode escape codes.
function unicodeEscape ( str ) {
var r = '' ;
for ( var i = 0 ; i < str . length ; i ++ ) {
r += '\\u' + str . charCodeAt ( i ) . toString ( 16 ) ;
}
return r ;
}
// writeSequences returns an array of arrays that describes all possible ways a
// buffer of the given length could be split up and passed to sequential write
// calls.
//
// e.G. writeSequences(3) will return: [
// [ [ 0, 3 ] ],
// [ [ 0, 2 ], [ 2, 3 ] ],
// [ [ 0, 1 ], [ 1, 3 ] ],
// [ [ 0, 1 ], [ 1, 2 ], [ 2, 3 ] ]
// ]
function writeSequences ( length , start , sequence ) {
if ( start === undefined ) {
start = 0 ;
sequence = [ ]
} else if ( start === length ) {
return [ sequence ] ;
}
var sequences = [ ] ;
for ( var end = length ; end > start ; end -- ) {
var subSequence = sequence . concat ( [ [ start , end ] ] ) ;
var subSequences = writeSequences ( length , end , subSequence , sequences ) ;
sequences = sequences . concat ( subSequences ) ;
}
return sequences ;
}
console . log ( ' crayon!' ) ;