node/deps/icu-small/source/common/dictionarydata.cpp


								/*

								*******************************************************************************

								* Copyright (C) 2014-2016, International Business Machines

								* Corporation and others.  All Rights Reserved.

								*******************************************************************************

								* dictionarydata.h

								*

								* created on: 2012may31

								* created by: Markus W. Scherer & Maxime Serrano

								*/


								#include "dictionarydata.h"

								#include "unicode/ucharstrie.h"

								#include "unicode/bytestrie.h"

								#include "unicode/udata.h"

								#include "cmemory.h"


								#if !UCONFIG_NO_BREAK_ITERATION


								U_NAMESPACE_BEGIN


								const int32_t  DictionaryData::TRIE_TYPE_BYTES = 0;

								const int32_t  DictionaryData::TRIE_TYPE_UCHARS = 1;

								const int32_t  DictionaryData::TRIE_TYPE_MASK = 7;

								const int32_t  DictionaryData::TRIE_HAS_VALUES = 8;


								const int32_t  DictionaryData::TRANSFORM_NONE = 0;

								const int32_t  DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;

								const int32_t  DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;

								const int32_t  DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;


								DictionaryMatcher::~DictionaryMatcher() {

								}


								UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {

								    udata_close(file);

								}


								int32_t UCharsDictionaryMatcher::getType() const {

								    return DictionaryData::TRIE_TYPE_UCHARS;

								}


								int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,

								                            int32_t *lengths, int32_t *cpLengths, int32_t *values,

								                            int32_t *prefix) const {


								    UCharsTrie uct(characters);

								    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);

								    int32_t wordCount = 0;

								    int32_t codePointsMatched = 0;


								    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {

								        UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);

								        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;

								        codePointsMatched += 1;

								        if (USTRINGTRIE_HAS_VALUE(result)) {

								            if (wordCount < limit) {

								                if (values != NULL) {

								                    values[wordCount] = uct.getValue();

								                }

								                if (lengths != NULL) {

								                    lengths[wordCount] = lengthMatched;

								                }

								                if (cpLengths != NULL) {

								                    cpLengths[wordCount] = codePointsMatched;

								                }

								                ++wordCount;

								            }

								            if (result == USTRINGTRIE_FINAL_VALUE) {

								                break;

								            }

								        }

								        else if (result == USTRINGTRIE_NO_MATCH) {

								            break;

								        }

								        if (lengthMatched >= maxLength) {

								            break;

								        }

								    }


								    if (prefix != NULL) {

								        *prefix = codePointsMatched;

								    }

								    return wordCount;

								}


								BytesDictionaryMatcher::~BytesDictionaryMatcher() {

								    udata_close(file);

								}


								UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {

								    if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {

								        if (c == 0x200D) {

								            return 0xFF;

								        } else if (c == 0x200C) {

								            return 0xFE;

								        }

								        int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);

								        if (delta < 0 || 0xFD < delta) {

								            return U_SENTINEL;

								        }

								        return (UChar32)delta;

								    }

								    return c;

								}


								int32_t BytesDictionaryMatcher::getType() const {

								    return DictionaryData::TRIE_TYPE_BYTES;

								}


								int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,

								                            int32_t *lengths, int32_t *cpLengths, int32_t *values,

								                            int32_t *prefix) const {

								    BytesTrie bt(characters);

								    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);

								    int32_t wordCount = 0;

								    int32_t codePointsMatched = 0;


								    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {

								        UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));

								        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;

								        codePointsMatched += 1;

								        if (USTRINGTRIE_HAS_VALUE(result)) {

								            if (wordCount < limit) {

								                if (values != NULL) {

								                    values[wordCount] = bt.getValue();

								                }

								                if (lengths != NULL) {

								                    lengths[wordCount] = lengthMatched;

								                }

								                if (cpLengths != NULL) {

								                    cpLengths[wordCount] = codePointsMatched;

								                }

								                ++wordCount;

								            }

								            if (result == USTRINGTRIE_FINAL_VALUE) {

								                break;

								            }

								        }

								        else if (result == USTRINGTRIE_NO_MATCH) {

								            break;

								        }

								        if (lengthMatched >= maxLength) {

								            break;

								        }

								    }


								    if (prefix != NULL) {

								        *prefix = codePointsMatched;

								    }

								    return wordCount;

								}


								U_NAMESPACE_END


								U_NAMESPACE_USE


								U_CAPI int32_t U_EXPORT2

								udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,

								           void *outData, UErrorCode *pErrorCode) {

								    const UDataInfo *pInfo;

								    int32_t headerSize;

								    const uint8_t *inBytes;

								    uint8_t *outBytes;

								    const int32_t *inIndexes;

								    int32_t indexes[DictionaryData::IX_COUNT];

								    int32_t i, offset, size;


								    headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

								    if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;

								    pInfo = (const UDataInfo *)((const char *)inData + 4);

								    if (!(pInfo->dataFormat[0] == 0x44 &&

								          pInfo->dataFormat[1] == 0x69 &&

								          pInfo->dataFormat[2] == 0x63 &&

								          pInfo->dataFormat[3] == 0x74 &&

								          pInfo->formatVersion[0] == 1)) {

								        udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",

								                         pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);

								        *pErrorCode = U_UNSUPPORTED_ERROR;

								        return 0;

								    }


								    inBytes = (const uint8_t *)inData + headerSize;

								    outBytes = (uint8_t *)outData + headerSize;


								    inIndexes = (const int32_t *)inBytes;

								    if (length >= 0) {

								        length -= headerSize;

								        if (length < (int32_t)(sizeof(indexes))) {

								            udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);

								            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;

								            return 0;

								        }

								    }


								    for (i = 0; i < DictionaryData::IX_COUNT; i++) {

								        indexes[i] = udata_readInt32(ds, inIndexes[i]);

								    }


								    size = indexes[DictionaryData::IX_TOTAL_SIZE];


								    if (length >= 0) {

								        if (length < size) {

								            udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);

								            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;

								            return 0;

								        }


								        if (inBytes != outBytes) {

								            uprv_memcpy(outBytes, inBytes, size);

								        }


								        offset = 0;

								        ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);

								        offset = (int32_t)sizeof(indexes);

								        int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;

								        int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];


								        if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {

								            ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);

								        } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {

								            // nothing to do

								        } else {

								            udata_printError(ds, "udict_swap(): unknown trie type!\n");

								            *pErrorCode = U_UNSUPPORTED_ERROR;

								            return 0;

								        }


								        // these next two sections are empty in the current format,

								        // but may be used later.

								        offset = nextOffset;

								        nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];

								        offset = nextOffset;

								        nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];

								        offset = nextOffset;

								    }

								    return headerSize + size;

								}

								#endif