@ -25,8 +25,10 @@ function StringDecoder(encoding) {
case 'utf16le' :
case 'utf16le' :
this . text = utf16Text ;
this . text = utf16Text ;
this . end = utf16End ;
this . end = utf16End ;
// fall through
nb = 4 ;
break ;
case 'utf8' :
case 'utf8' :
this . fillLast = utf8FillLast ;
nb = 4 ;
nb = 4 ;
break ;
break ;
case 'base64' :
case 'base64' :
@ -68,7 +70,7 @@ StringDecoder.prototype.end = utf8End;
// Returns only complete characters in a Buffer
// Returns only complete characters in a Buffer
StringDecoder . prototype . text = utf8Text ;
StringDecoder . prototype . text = utf8Text ;
// Attempts to complete a partial character using bytes from a Buffer
// Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
StringDecoder . prototype . fillLast = function ( buf ) {
StringDecoder . prototype . fillLast = function ( buf ) {
if ( this . lastNeed <= buf . length ) {
if ( this . lastNeed <= buf . length ) {
buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , this . lastNeed ) ;
buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , this . lastNeed ) ;
@ -92,38 +94,83 @@ function utf8CheckByte(byte) {
return - 1 ;
return - 1 ;
}
}
// Checks at most the last 3 bytes of a Buffer for an incomplete UTF-8
// Checks at most 3 bytes at the end of a Buffer in order to detect an
// character, returning the total number of bytes needed to complete the partial
// incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)
// character (if applicable).
// needed to complete the UTF-8 character (if applicable) are returned .
function utf8CheckIncomplete ( self , buf , i ) {
function utf8CheckIncomplete ( self , buf , i ) {
var j = buf . length - 1 ;
var j = buf . length - 1 ;
if ( j < i )
if ( j < i )
return 0 ;
return 0 ;
var nb = utf8CheckByte ( buf [ j -- ] ) ;
var nb = utf8CheckByte ( buf [ j ] ) ;
if ( nb >= 0 ) {
if ( nb >= 0 ) {
if ( nb > 0 )
if ( nb > 0 )
self . lastNeed = nb + 1 - ( buf . length - j ) ;
self . lastNeed = nb - 1 ;
return nb ;
return nb ;
}
}
if ( j < i )
if ( -- j < i )
return 0 ;
return 0 ;
nb = utf8CheckByte ( buf [ j -- ] ) ;
nb = utf8CheckByte ( buf [ j ] ) ;
if ( nb >= 0 ) {
if ( nb >= 0 ) {
if ( nb > 0 )
if ( nb > 0 )
self . lastNeed = nb + 1 - ( buf . length - j ) ;
self . lastNeed = nb - 2 ;
return nb ;
return nb ;
}
}
if ( j < i )
if ( -- j < i )
return 0 ;
return 0 ;
nb = utf8CheckByte ( buf [ j -- ] ) ;
nb = utf8CheckByte ( buf [ j ] ) ;
if ( nb >= 0 ) {
if ( nb >= 0 ) {
if ( nb > 0 )
if ( nb > 0 ) {
self . lastNeed = nb + 1 - ( buf . length - j ) ;
if ( nb === 2 )
nb = 0 ;
else
self . lastNeed = nb - 3 ;
}
return nb ;
return nb ;
}
}
return 0 ;
return 0 ;
}
}
// Validates as many continuation bytes for a multi-byte UTF-8 character as
// needed or are available. If we see a non-continuation byte where we expect
// one, we "replace" the validated continuation bytes we've seen so far with
// UTF-8 replacement characters ('\ufffd'), to match v8's UTF-8 decoding
// behavior. The continuation byte check is included three times in the case
// where all of the continuation bytes for a character exist in the same buffer.
// It is also done this way as a slight performance increase instead of using a
// loop.
function utf8CheckExtraBytes ( self , buf , p ) {
if ( ( buf [ 0 ] & 0xC0 ) !== 0x80 ) {
self . lastNeed = 0 ;
return '\ufffd' . repeat ( p ) ;
}
if ( self . lastNeed > 1 && buf . length > 1 ) {
if ( ( buf [ 1 ] & 0xC0 ) !== 0x80 ) {
self . lastNeed = 1 ;
return '\ufffd' . repeat ( p + 1 ) ;
}
if ( self . lastNeed > 2 && buf . length > 2 ) {
if ( ( buf [ 2 ] & 0xC0 ) !== 0x80 ) {
self . lastNeed = 2 ;
return '\ufffd' . repeat ( p + 2 ) ;
}
}
}
}
// Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.
function utf8FillLast ( buf ) {
const p = this . lastTotal - this . lastNeed ;
var r = utf8CheckExtraBytes ( this , buf , p ) ;
if ( r !== undefined )
return r ;
if ( this . lastNeed <= buf . length ) {
buf . copy ( this . lastChar , p , 0 , this . lastNeed ) ;
return this . lastChar . toString ( this . encoding , 0 , this . lastTotal ) ;
}
buf . copy ( this . lastChar , p , 0 , buf . length ) ;
this . lastNeed -= buf . length ;
}
// Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
// Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
// partial character, the character's bytes are buffered until the required
// partial character, the character's bytes are buffered until the required
// number of bytes are available.
// number of bytes are available.