node/deps/icu-small/source/common/filterednormalizer2.cpp


								// Copyright (C) 2016 and later: Unicode, Inc. and others.

								// License & terms of use: http://www.unicode.org/copyright.html

								/*

								*******************************************************************************

								*

								*   Copyright (C) 2009-2012, International Business Machines

								*   Corporation and others.  All Rights Reserved.

								*

								*******************************************************************************

								*   file name:  filterednormalizer2.cpp

								*   encoding:   US-ASCII

								*   tab size:   8 (not used)

								*   indentation:4

								*

								*   created on: 2009dec10

								*   created by: Markus W. Scherer

								*/


								#include "unicode/utypes.h"


								#if !UCONFIG_NO_NORMALIZATION


								#include "unicode/normalizer2.h"

								#include "unicode/uniset.h"

								#include "unicode/unistr.h"

								#include "unicode/unorm.h"

								#include "cpputils.h"


								U_NAMESPACE_BEGIN


								FilteredNormalizer2::~FilteredNormalizer2() {}


								UnicodeString &

								FilteredNormalizer2::normalize(const UnicodeString &src,

								                               UnicodeString &dest,

								                               UErrorCode &errorCode) const {

								    uprv_checkCanGetBuffer(src, errorCode);

								    if(U_FAILURE(errorCode)) {

								        dest.setToBogus();

								        return dest;

								    }

								    if(&dest==&src) {

								        errorCode=U_ILLEGAL_ARGUMENT_ERROR;

								        return dest;

								    }

								    dest.remove();

								    return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);

								}


								// Internal: No argument checking, and appends to dest.

								// Pass as input spanCondition the one that is likely to yield a non-zero

								// span length at the start of src.

								// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,

								// USET_SPAN_SIMPLE should be passed in for the start of src

								// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after

								// an in-filter prefix.

								UnicodeString &

								FilteredNormalizer2::normalize(const UnicodeString &src,

								                               UnicodeString &dest,

								                               USetSpanCondition spanCondition,

								                               UErrorCode &errorCode) const {

								    UnicodeString tempDest;  // Don't throw away destination buffer between iterations.

								    for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {

								        int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);

								        int32_t spanLength=spanLimit-prevSpanLimit;

								        if(spanCondition==USET_SPAN_NOT_CONTAINED) {

								            if(spanLength!=0) {

								                dest.append(src, prevSpanLimit, spanLength);

								            }

								            spanCondition=USET_SPAN_SIMPLE;

								        } else {

								            if(spanLength!=0) {

								                // Not norm2.normalizeSecondAndAppend() because we do not want

								                // to modify the non-filter part of dest.

								                dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),

								                                            tempDest, errorCode));

								                if(U_FAILURE(errorCode)) {

								                    break;

								                }

								            }

								            spanCondition=USET_SPAN_NOT_CONTAINED;

								        }

								        prevSpanLimit=spanLimit;

								    }

								    return dest;

								}


								UnicodeString &

								FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,

								                                              const UnicodeString &second,

								                                              UErrorCode &errorCode) const {

								    return normalizeSecondAndAppend(first, second, TRUE, errorCode);

								}


								UnicodeString &

								FilteredNormalizer2::append(UnicodeString &first,

								                            const UnicodeString &second,

								                            UErrorCode &errorCode) const {

								    return normalizeSecondAndAppend(first, second, FALSE, errorCode);

								}


								UnicodeString &

								FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,

								                                              const UnicodeString &second,

								                                              UBool doNormalize,

								                                              UErrorCode &errorCode) const {

								    uprv_checkCanGetBuffer(first, errorCode);

								    uprv_checkCanGetBuffer(second, errorCode);

								    if(U_FAILURE(errorCode)) {

								        return first;

								    }

								    if(&first==&second) {

								        errorCode=U_ILLEGAL_ARGUMENT_ERROR;

								        return first;

								    }

								    if(first.isEmpty()) {

								        if(doNormalize) {

								            return normalize(second, first, errorCode);

								        } else {

								            return first=second;

								        }

								    }

								    // merge the in-filter suffix of the first string with the in-filter prefix of the second

								    int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);

								    if(prefixLimit!=0) {

								        UnicodeString prefix(second.tempSubString(0, prefixLimit));

								        int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);

								        if(suffixStart==0) {

								            if(doNormalize) {

								                norm2.normalizeSecondAndAppend(first, prefix, errorCode);

								            } else {

								                norm2.append(first, prefix, errorCode);

								            }

								        } else {

								            UnicodeString middle(first, suffixStart, INT32_MAX);

								            if(doNormalize) {

								                norm2.normalizeSecondAndAppend(middle, prefix, errorCode);

								            } else {

								                norm2.append(middle, prefix, errorCode);

								            }

								            first.replace(suffixStart, INT32_MAX, middle);

								        }

								    }

								    if(prefixLimit<second.length()) {

								        UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));

								        if(doNormalize) {

								            normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);

								        } else {

								            first.append(rest);

								        }

								    }

								    return first;

								}


								UBool

								FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {

								    return set.contains(c) && norm2.getDecomposition(c, decomposition);

								}


								UBool

								FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {

								    return set.contains(c) && norm2.getRawDecomposition(c, decomposition);

								}


								UChar32

								FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {

								    return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;

								}


								uint8_t

								FilteredNormalizer2::getCombiningClass(UChar32 c) const {

								    return set.contains(c) ? norm2.getCombiningClass(c) : 0;

								}


								UBool

								FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {

								    uprv_checkCanGetBuffer(s, errorCode);

								    if(U_FAILURE(errorCode)) {

								        return FALSE;

								    }

								    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

								    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

								        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

								        if(spanCondition==USET_SPAN_NOT_CONTAINED) {

								            spanCondition=USET_SPAN_SIMPLE;

								        } else {

								            if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||

								                U_FAILURE(errorCode)

								            ) {

								                return FALSE;

								            }

								            spanCondition=USET_SPAN_NOT_CONTAINED;

								        }

								        prevSpanLimit=spanLimit;

								    }

								    return TRUE;

								}


								UNormalizationCheckResult

								FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {

								    uprv_checkCanGetBuffer(s, errorCode);

								    if(U_FAILURE(errorCode)) {

								        return UNORM_MAYBE;

								    }

								    UNormalizationCheckResult result=UNORM_YES;

								    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

								    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

								        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

								        if(spanCondition==USET_SPAN_NOT_CONTAINED) {

								            spanCondition=USET_SPAN_SIMPLE;

								        } else {

								            UNormalizationCheckResult qcResult=

								                norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);

								            if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {

								                return qcResult;

								            } else if(qcResult==UNORM_MAYBE) {

								                result=qcResult;

								            }

								            spanCondition=USET_SPAN_NOT_CONTAINED;

								        }

								        prevSpanLimit=spanLimit;

								    }

								    return result;

								}


								int32_t

								FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {

								    uprv_checkCanGetBuffer(s, errorCode);

								    if(U_FAILURE(errorCode)) {

								        return 0;

								    }

								    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

								    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

								        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

								        if(spanCondition==USET_SPAN_NOT_CONTAINED) {

								            spanCondition=USET_SPAN_SIMPLE;

								        } else {

								            int32_t yesLimit=

								                prevSpanLimit+

								                norm2.spanQuickCheckYes(

								                    s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);

								            if(U_FAILURE(errorCode) || yesLimit<spanLimit) {

								                return yesLimit;

								            }

								            spanCondition=USET_SPAN_NOT_CONTAINED;

								        }

								        prevSpanLimit=spanLimit;

								    }

								    return s.length();

								}


								UBool

								FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {

								    return !set.contains(c) || norm2.hasBoundaryBefore(c);

								}


								UBool

								FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {

								    return !set.contains(c) || norm2.hasBoundaryAfter(c);

								}


								UBool

								FilteredNormalizer2::isInert(UChar32 c) const {

								    return !set.contains(c) || norm2.isInert(c);

								}


								U_NAMESPACE_END


								// C API ------------------------------------------------------------------- ***


								U_NAMESPACE_USE


								U_CAPI UNormalizer2 * U_EXPORT2

								unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {

								    if(U_FAILURE(*pErrorCode)) {

								        return NULL;

								    }

								    if(filterSet==NULL) {

								        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

								        return NULL;

								    }

								    Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,

								                                             *UnicodeSet::fromUSet(filterSet));

								    if(fn2==NULL) {

								        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

								    }

								    return (UNormalizer2 *)fn2;

								}


								#endif  // !UCONFIG_NO_NORMALIZATION