node/deps/v8/src/unicode-inl.h


								// Copyright 2007-2010 the V8 project authors. All rights reserved.

								// Redistribution and use in source and binary forms, with or without

								// modification, are permitted provided that the following conditions are

								// met:

								//

								//     * Redistributions of source code must retain the above copyright

								//       notice, this list of conditions and the following disclaimer.

								//     * Redistributions in binary form must reproduce the above

								//       copyright notice, this list of conditions and the following

								//       disclaimer in the documentation and/or other materials provided

								//       with the distribution.

								//     * Neither the name of Google Inc. nor the names of its

								//       contributors may be used to endorse or promote products derived

								//       from this software without specific prior written permission.

								//

								// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

								// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

								// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

								// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

								// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

								// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

								// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

								// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

								// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

								// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

								// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


								#ifndef V8_UNICODE_INL_H_

								#define V8_UNICODE_INL_H_


								#include "unicode.h"


								namespace unibrow {


								template <class T, int s> bool Predicate<T, s>::get(uchar code_point) {

								  CacheEntry entry = entries_[code_point & kMask];

								  if (entry.code_point_ == code_point) return entry.value_;

								  return CalculateValue(code_point);

								}


								template <class T, int s> bool Predicate<T, s>::CalculateValue(

								    uchar code_point) {

								  bool result = T::Is(code_point);

								  entries_[code_point & kMask] = CacheEntry(code_point, result);

								  return result;

								}


								template <class T, int s> int Mapping<T, s>::get(uchar c, uchar n,

								    uchar* result) {

								  CacheEntry entry = entries_[c & kMask];

								  if (entry.code_point_ == c) {

								    if (entry.offset_ == 0) {

								      return 0;

								    } else {

								      result[0] = c + entry.offset_;

								      return 1;

								    }

								  } else {

								    return CalculateValue(c, n, result);

								  }

								}


								template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,

								    uchar* result) {

								  bool allow_caching = true;

								  int length = T::Convert(c, n, result, &allow_caching);

								  if (allow_caching) {

								    if (length == 1) {

								      entries_[c & kMask] = CacheEntry(c, result[0] - c);

								      return 1;

								    } else {

								      entries_[c & kMask] = CacheEntry(c, 0);

								      return 0;

								    }

								  } else {

								    return length;

								  }

								}


								unsigned Utf8::Encode(char* str, uchar c, int previous) {

								  static const int kMask = ~(1 << 6);

								  if (c <= kMaxOneByteChar) {

								    str[0] = c;

								    return 1;

								  } else if (c <= kMaxTwoByteChar) {

								    str[0] = 0xC0 | (c >> 6);

								    str[1] = 0x80 | (c & kMask);

								    return 2;

								  } else if (c <= kMaxThreeByteChar) {

								    if (Utf16::IsTrailSurrogate(c) &&

								        Utf16::IsLeadSurrogate(previous)) {

								      const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;

								      return Encode(str - kUnmatchedSize,

								                    Utf16::CombineSurrogatePair(previous, c),

								                    Utf16::kNoPreviousCharacter) - kUnmatchedSize;

								    }

								    str[0] = 0xE0 | (c >> 12);

								    str[1] = 0x80 | ((c >> 6) & kMask);

								    str[2] = 0x80 | (c & kMask);

								    return 3;

								  } else {

								    str[0] = 0xF0 | (c >> 18);

								    str[1] = 0x80 | ((c >> 12) & kMask);

								    str[2] = 0x80 | ((c >> 6) & kMask);

								    str[3] = 0x80 | (c & kMask);

								    return 4;

								  }

								}


								uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) {

								  if (length <= 0) return kBadChar;

								  byte first = bytes[0];

								  // Characters between 0000 and 0007F are encoded as a single character

								  if (first <= kMaxOneByteChar) {

								    *cursor += 1;

								    return first;

								  }

								  return CalculateValue(bytes, length, cursor);

								}


								unsigned Utf8::Length(uchar c, int previous) {

								  if (c <= kMaxOneByteChar) {

								    return 1;

								  } else if (c <= kMaxTwoByteChar) {

								    return 2;

								  } else if (c <= kMaxThreeByteChar) {

								    if (Utf16::IsTrailSurrogate(c) &&

								        Utf16::IsLeadSurrogate(previous)) {

								      return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates;

								    }

								    return 3;

								  } else {

								    return 4;

								  }

								}


								uchar CharacterStream::GetNext() {

								  uchar result = DecodeCharacter(buffer_, &cursor_);

								  if (remaining_ == 1) {

								    cursor_ = 0;

								    FillBuffer();

								  } else {

								    remaining_--;

								  }

								  return result;

								}


								#if __BYTE_ORDER == __LITTLE_ENDIAN

								#define IF_LITTLE(expr) expr

								#define IF_BIG(expr)    ((void) 0)

								#elif __BYTE_ORDER == __BIG_ENDIAN

								#define IF_LITTLE(expr) ((void) 0)

								#define IF_BIG(expr)    expr

								#else

								#warning Unknown byte ordering

								#endif


								bool CharacterStream::EncodeAsciiCharacter(uchar c, byte* buffer,

								    unsigned capacity, unsigned& offset) {

								  if (offset >= capacity) return false;

								  buffer[offset] = c;

								  offset += 1;

								  return true;

								}


								bool CharacterStream::EncodeNonAsciiCharacter(uchar c, byte* buffer,

								    unsigned capacity, unsigned& offset) {

								  unsigned aligned = (offset + 0x3) & ~0x3;

								  if ((aligned + sizeof(uchar)) > capacity)

								    return false;

								  if (offset == aligned) {

								    IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = (c << 8) | 0x80);

								    IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c | (1 << 31));

								  } else {

								    buffer[offset] = 0x80;

								    IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = c << 8);

								    IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c);

								  }

								  offset = aligned + sizeof(uchar);

								  return true;

								}


								bool CharacterStream::EncodeCharacter(uchar c, byte* buffer, unsigned capacity,

								    unsigned& offset) {

								  if (c <= Utf8::kMaxOneByteChar) {

								    return EncodeAsciiCharacter(c, buffer, capacity, offset);

								  } else {

								    return EncodeNonAsciiCharacter(c, buffer, capacity, offset);

								  }

								}


								uchar CharacterStream::DecodeCharacter(const byte* buffer, unsigned* offset) {

								  byte b = buffer[*offset];

								  if (b <= Utf8::kMaxOneByteChar) {

								    (*offset)++;

								    return b;

								  } else {

								    unsigned aligned = (*offset + 0x3) & ~0x3;

								    *offset = aligned + sizeof(uchar);

								    IF_LITTLE(return *reinterpret_cast<const uchar*>(buffer + aligned) >> 8);

								    IF_BIG(return *reinterpret_cast<const uchar*>(buffer + aligned) &

								                    ~(1 << 31));

								  }

								}


								#undef IF_LITTLE

								#undef IF_BIG


								template <class R, class I, unsigned s>

								void InputBuffer<R, I, s>::FillBuffer() {

								  buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);

								}


								template <class R, class I, unsigned s>

								void InputBuffer<R, I, s>::Rewind() {

								  Reset(input_);

								}


								template <class R, class I, unsigned s>

								void InputBuffer<R, I, s>::Reset(unsigned position, I input) {

								  input_ = input;

								  remaining_ = 0;

								  cursor_ = 0;

								  offset_ = position;

								  buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);

								}


								template <class R, class I, unsigned s>

								void InputBuffer<R, I, s>::Reset(I input) {

								  Reset(0, input);

								}


								template <class R, class I, unsigned s>

								void InputBuffer<R, I, s>::Seek(unsigned position) {

								  offset_ = position;

								  buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);

								}


								template <unsigned s>

								Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length)

								    : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data,

								                                                                    length)) {

								}


								}  // namespace unibrow


								#endif  // V8_UNICODE_INL_H_