node/tools/eslint/node_modules/jschardet/src/universaldetector.js


								/*

								 * The Original Code is Mozilla Universal charset detector code.

								 *

								 * The Initial Developer of the Original Code is

								 * Netscape Communications Corporation.

								 * Portions created by the Initial Developer are Copyright (C) 2001

								 * the Initial Developer. All Rights Reserved.

								 *

								 * Contributor(s):

								 *   António Afonso (antonio.afonso gmail.com) - port to JavaScript

								 *   Mark Pilgrim - port to Python

								 *   Shy Shalom - original C code

								 *

								 * This library is free software; you can redistribute it and/or

								 * modify it under the terms of the GNU Lesser General Public

								 * License as published by the Free Software Foundation; either

								 * version 2.1 of the License, or (at your option) any later version.

								 *

								 * This library is distributed in the hope that it will be useful,

								 * but WITHOUT ANY WARRANTY; without even the implied warranty of

								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

								 * Lesser General Public License for more details.

								 *

								 * You should have received a copy of the GNU Lesser General Public

								 * License along with this library; if not, write to the Free Software

								 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA

								 * 02110-1301  USA

								 */


								/**

								 * This is a port from the python port, version "2.0.1"

								 */


								!function(jschardet) {


								jschardet.UniversalDetector = function() {

								    var MINIMUM_THRESHOLD = jschardet.Constants.MINIMUM_THRESHOLD;

								    var _state = {

								        pureAscii   : 0,

								        escAscii    : 1,

								        highbyte    : 2

								    };

								    var self = this;


								    function init() {

								        self._highBitDetector = /[\x80-\xFF]/;

								        self._escDetector = /(\x1B|~\{)/;

								        self._mEscCharsetProber = null;

								        self._mCharsetProbers = [];

								        self.reset();

								    }


								    this.reset = function() {

								        this.result = {"encoding": null, "confidence": 0.0};

								        this.done = false;

								        this._mStart = true;

								        this._mGotData = false;

								        this._mInputState = _state.pureAscii;

								        this._mLastChar = "";

								        this._mBOM = "";

								        if( this._mEscCharsetProber ) {

								            this._mEscCharsetProber.reset();

								        }

								        for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {

								            prober.reset();

								        }

								    }


								    this.feed = function(aBuf) {

								        if( this.done ) return;


								        var aLen = aBuf.length;

								        if( !aLen ) return;


								        if( !this._mGotData ) {

								            this._mBOM += aBuf;

								            // If the data starts with BOM, we know it is UTF

								            if( this._mBOM.slice(0,3) == "\xEF\xBB\xBF" ) {

								                // EF BB BF  UTF-8 with BOM

								                this.result = {"encoding": "UTF-8", "confidence": 1.0};

								            } else if( this._mBOM.slice(0,4) == "\xFF\xFE\x00\x00" ) {

								                // FF FE 00 00  UTF-32, little-endian BOM

								                this.result = {"encoding": "UTF-32LE", "confidence": 1.0};

								            } else if( this._mBOM.slice(0,4) == "\x00\x00\xFE\xFF" ) {

								                // 00 00 FE FF  UTF-32, big-endian BOM

								                this.result = {"encoding": "UTF-32BE", "confidence": 1.0};

								            } else if( this._mBOM.slice(0,4) == "\xFE\xFF\x00\x00" ) {

								                // FE FF 00 00  UCS-4, unusual octet order BOM (3412)

								                this.result = {"encoding": "X-ISO-10646-UCS-4-3412", "confidence": 1.0};

								            } else if( this._mBOM.slice(0,4) == "\x00\x00\xFF\xFE" ) {

								                // 00 00 FF FE  UCS-4, unusual octet order BOM (2143)

								                this.result = {"encoding": "X-ISO-10646-UCS-4-2143", "confidence": 1.0};

								            } else if( this._mBOM.slice(0,2) == "\xFF\xFE" ) {

								                // FF FE  UTF-16, little endian BOM

								                this.result = {"encoding": "UTF-16LE", "confidence": 1.0};

								            } else if( this._mBOM.slice(0,2) == "\xFE\xFF" ) {

								                // FE FF  UTF-16, big endian BOM

								                this.result = {"encoding": "UTF-16BE", "confidence": 1.0};

								            }


								            // If we got to 4 chars without being able to detect a BOM we

								            // stop trying.

								            if( this._mBOM.length > 3 ) {

								                this._mGotData = true;

								            }

								        }


								        if( this.result.encoding && (this.result.confidence > 0.0) ) {

								            this.done = true;

								            return;

								        }


								        if( this._mInputState == _state.pureAscii ) {

								            if( this._highBitDetector.test(aBuf) ) {

								                this._mInputState = _state.highbyte;

								            } else if( this._escDetector.test(this._mLastChar + aBuf) ) {

								                this._mInputState = _state.escAscii;

								            }

								        }


								        this._mLastChar = aBuf.slice(-1);


								        if( this._mInputState == _state.escAscii ) {

								            if( !this._mEscCharsetProber ) {

								                this._mEscCharsetProber = new jschardet.EscCharSetProber();

								            }

								            if( this._mEscCharsetProber.feed(aBuf) == jschardet.Constants.foundIt ) {

								                this.result = {

								                    "encoding": this._mEscCharsetProber.getCharsetName(),

								                    "confidence": this._mEscCharsetProber.getConfidence()

								                };

								                this.done = true;

								            }

								        } else if( this._mInputState == _state.highbyte ) {

								            if( this._mCharsetProbers.length == 0 ) {

								                this._mCharsetProbers = [

								                    new jschardet.MBCSGroupProber(),

								                    new jschardet.SBCSGroupProber(),

								                    new jschardet.Latin1Prober()

								                ];

								            }

								            for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {

								                if( prober.feed(aBuf) == jschardet.Constants.foundIt ) {

								                    this.result = {

								                        "encoding": prober.getCharsetName(),

								                        "confidence": prober.getConfidence()

								                    };

								                    this.done = true;

								                    break;

								                }

								            }

								        }

								    }


								    this.close = function() {

								        if( this.done ) return;

								        if( this._mBOM.length === 0 ) {

								            if( jschardet.Constants._debug ) {

								                jschardet.log("no data received!\n");

								            }

								            return;

								        }

								        this.done = true;


								        if( this._mInputState == _state.pureAscii ) {

								            if( jschardet.Constants._debug ) {

								                jschardet.log("pure ascii")

								            }

								            this.result = {"encoding": "ascii", "confidence": 1.0};

								            return this.result;

								        }


								        if( this._mInputState == _state.highbyte ) {

								            var proberConfidence = null;

								            var maxProberConfidence = 0.0;

								            var maxProber = null;

								            for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {

								                if( !prober ) continue;

								                proberConfidence = prober.getConfidence();

								                if( proberConfidence > maxProberConfidence ) {

								                    maxProberConfidence = proberConfidence;

								                    maxProber = prober;

								                }

								                if( jschardet.Constants._debug ) {

								                    jschardet.log(prober.getCharsetName() + " confidence " + prober.getConfidence());

								                }

								            }

								            if( maxProber && maxProberConfidence > MINIMUM_THRESHOLD ) {

								                this.result = {

								                    "encoding": maxProber.getCharsetName(),

								                    "confidence": maxProber.getConfidence()

								                };

								                return this.result;

								            }

								        }


								        if( jschardet.Constants._debug ) {

								            jschardet.log("no probers hit minimum threshhold\n");

								            for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {

								                if( !prober ) continue;

								                jschardet.log(prober.getCharsetName() + " confidence = " +

								                    prober.getConfidence() + "\n");

								            }

								        }

								    }


								    init();

								}


								}(require('./init'));