node/lib/string_decoder.js


								'use strict';


								const Buffer = require('buffer').Buffer;

								const internalUtil = require('internal/util');

								const isEncoding = Buffer[internalUtil.kIsEncodingSymbol];


								// Do not cache `Buffer.isEncoding` when checking encoding names as some

								// modules monkey-patch it to support additional encodings

								function normalizeEncoding(enc) {

								  const nenc = internalUtil.normalizeEncoding(enc);

								  if (typeof nenc !== 'string' &&

								      (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))

								    throw new Error(`Unknown encoding: ${enc}`);

								  return nenc || enc;

								}


								// StringDecoder provides an interface for efficiently splitting a series of

								// buffers into a series of JS strings without breaking apart multi-byte

								// characters.

								exports.StringDecoder = StringDecoder;

								function StringDecoder(encoding) {

								  this.encoding = normalizeEncoding(encoding);

								  var nb;

								  switch (this.encoding) {

								    case 'utf16le':

								      this.text = utf16Text;

								      this.end = utf16End;

								      nb = 4;

								      break;

								    case 'utf8':

								      this.fillLast = utf8FillLast;

								      nb = 4;

								      break;

								    case 'base64':

								      this.text = base64Text;

								      this.end = base64End;

								      nb = 3;

								      break;

								    default:

								      this.write = simpleWrite;

								      this.end = simpleEnd;

								      return;

								  }

								  this.lastNeed = 0;

								  this.lastTotal = 0;

								  this.lastChar = Buffer.allocUnsafe(nb);

								}


								StringDecoder.prototype.write = function(buf) {

								  if (buf.length === 0)

								    return '';

								  var r;

								  var i;

								  if (this.lastNeed) {

								    r = this.fillLast(buf);

								    if (r === undefined)

								      return '';

								    i = this.lastNeed;

								    this.lastNeed = 0;

								  } else {

								    i = 0;

								  }

								  if (i < buf.length)

								    return (r ? r + this.text(buf, i) : this.text(buf, i));

								  return r || '';

								};


								StringDecoder.prototype.end = utf8End;


								// Returns only complete characters in a Buffer

								StringDecoder.prototype.text = utf8Text;


								// Attempts to complete a partial non-UTF-8 character using bytes from a Buffer

								StringDecoder.prototype.fillLast = function(buf) {

								  if (this.lastNeed <= buf.length) {

								    buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed);

								    return this.lastChar.toString(this.encoding, 0, this.lastTotal);

								  }

								  buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length);

								  this.lastNeed -= buf.length;

								};


								// Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a

								// continuation byte. If an invalid byte is detected, -2 is returned.

								function utf8CheckByte(byte) {

								  if (byte <= 0x7F)

								    return 0;

								  else if (byte >> 5 === 0x06)

								    return 2;

								  else if (byte >> 4 === 0x0E)

								    return 3;

								  else if (byte >> 3 === 0x1E)

								    return 4;

								  return (byte >> 6 === 0x02 ? -1 : -2);

								}


								// Checks at most 3 bytes at the end of a Buffer in order to detect an

								// incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)

								// needed to complete the UTF-8 character (if applicable) are returned.

								function utf8CheckIncomplete(self, buf, i) {

								  var j = buf.length - 1;

								  if (j < i)

								    return 0;

								  var nb = utf8CheckByte(buf[j]);

								  if (nb >= 0) {

								    if (nb > 0)

								      self.lastNeed = nb - 1;

								    return nb;

								  }

								  if (--j < i || nb === -2)

								    return 0;

								  nb = utf8CheckByte(buf[j]);

								  if (nb >= 0) {

								    if (nb > 0)

								      self.lastNeed = nb - 2;

								    return nb;

								  }

								  if (--j < i || nb === -2)

								    return 0;

								  nb = utf8CheckByte(buf[j]);

								  if (nb >= 0) {

								    if (nb > 0) {

								      if (nb === 2)

								        nb = 0;

								      else

								        self.lastNeed = nb - 3;

								    }

								    return nb;

								  }

								  return 0;

								}


								// Validates as many continuation bytes for a multi-byte UTF-8 character as

								// needed or are available. If we see a non-continuation byte where we expect

								// one, we "replace" the validated continuation bytes we've seen so far with

								// a single UTF-8 replacement character ('\ufffd'), to match v8's UTF-8 decoding

								// behavior. The continuation byte check is included three times in the case

								// where all of the continuation bytes for a character exist in the same buffer.

								// It is also done this way as a slight performance increase instead of using a

								// loop.

								function utf8CheckExtraBytes(self, buf, p) {

								  if ((buf[0] & 0xC0) !== 0x80) {

								    self.lastNeed = 0;

								    return '\ufffd';

								  }

								  if (self.lastNeed > 1 && buf.length > 1) {

								    if ((buf[1] & 0xC0) !== 0x80) {

								      self.lastNeed = 1;

								      return '\ufffd';

								    }

								    if (self.lastNeed > 2 && buf.length > 2) {

								      if ((buf[2] & 0xC0) !== 0x80) {

								        self.lastNeed = 2;

								        return '\ufffd';

								      }

								    }

								  }

								}


								// Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.

								function utf8FillLast(buf) {

								  const p = this.lastTotal - this.lastNeed;

								  var r = utf8CheckExtraBytes(this, buf, p);

								  if (r !== undefined)

								    return r;

								  if (this.lastNeed <= buf.length) {

								    buf.copy(this.lastChar, p, 0, this.lastNeed);

								    return this.lastChar.toString(this.encoding, 0, this.lastTotal);

								  }

								  buf.copy(this.lastChar, p, 0, buf.length);

								  this.lastNeed -= buf.length;

								}


								// Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a

								// partial character, the character's bytes are buffered until the required

								// number of bytes are available.

								function utf8Text(buf, i) {

								  const total = utf8CheckIncomplete(this, buf, i);

								  if (!this.lastNeed)

								    return buf.toString('utf8', i);

								  this.lastTotal = total;

								  const end = buf.length - (total - this.lastNeed);

								  buf.copy(this.lastChar, 0, end);

								  return buf.toString('utf8', i, end);

								}


								// For UTF-8, a replacement character is added when ending on a partial

								// character.

								function utf8End(buf) {

								  const r = (buf && buf.length ? this.write(buf) : '');

								  if (this.lastNeed)

								    return r + '\ufffd';

								  return r;

								}


								// UTF-16LE typically needs two bytes per character, but even if we have an even

								// number of bytes available, we need to check if we end on a leading/high

								// surrogate. In that case, we need to wait for the next two bytes in order to

								// decode the last character properly.

								function utf16Text(buf, i) {

								  if ((buf.length - i) % 2 === 0) {

								    const r = buf.toString('utf16le', i);

								    if (r) {

								      const c = r.charCodeAt(r.length - 1);

								      if (c >= 0xD800 && c <= 0xDBFF) {

								        this.lastNeed = 2;

								        this.lastTotal = 4;

								        this.lastChar[0] = buf[buf.length - 2];

								        this.lastChar[1] = buf[buf.length - 1];

								        return r.slice(0, -1);

								      }

								    }

								    return r;

								  }

								  this.lastNeed = 1;

								  this.lastTotal = 2;

								  this.lastChar[0] = buf[buf.length - 1];

								  return buf.toString('utf16le', i, buf.length - 1);

								}


								// For UTF-16LE we do not explicitly append special replacement characters if we

								// end on a partial character, we simply let v8 handle that.

								function utf16End(buf) {

								  const r = (buf && buf.length ? this.write(buf) : '');

								  if (this.lastNeed) {

								    const end = this.lastTotal - this.lastNeed;

								    return r + this.lastChar.toString('utf16le', 0, end);

								  }

								  return r;

								}


								function base64Text(buf, i) {

								  const n = (buf.length - i) % 3;

								  if (n === 0)

								    return buf.toString('base64', i);

								  this.lastNeed = 3 - n;

								  this.lastTotal = 3;

								  if (n === 1) {

								    this.lastChar[0] = buf[buf.length - 1];

								  } else {

								    this.lastChar[0] = buf[buf.length - 2];

								    this.lastChar[1] = buf[buf.length - 1];

								  }

								  return buf.toString('base64', i, buf.length - n);

								}


								function base64End(buf) {

								  const r = (buf && buf.length ? this.write(buf) : '');

								  if (this.lastNeed)

								    return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed);

								  return r;

								}


								// Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)

								function simpleWrite(buf) {

								  return buf.toString(this.encoding);

								}


								function simpleEnd(buf) {

								  return (buf && buf.length ? this.write(buf) : '');

								}