node/deps/icu-small/source/i18n/strmatch.cpp


								/*

								**********************************************************************

								*   Copyright (c) 2001-2012, International Business Machines Corporation

								*   and others.  All Rights Reserved.

								**********************************************************************

								*   Date        Name        Description

								*   07/23/01    aliu        Creation.

								**********************************************************************

								*/


								#include "unicode/utypes.h"


								#if !UCONFIG_NO_TRANSLITERATION


								#include "strmatch.h"

								#include "rbt_data.h"

								#include "util.h"

								#include "unicode/uniset.h"

								#include "unicode/utf16.h"


								U_NAMESPACE_BEGIN


								UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher)


								StringMatcher::StringMatcher(const UnicodeString& theString,

								                             int32_t start,

								                             int32_t limit,

								                             int32_t segmentNum,

								                             const TransliterationRuleData& theData) :

								    data(&theData),

								    segmentNumber(segmentNum),

								    matchStart(-1),

								    matchLimit(-1)

								{

								    theString.extractBetween(start, limit, pattern);

								}


								StringMatcher::StringMatcher(const StringMatcher& o) :

								    UnicodeFunctor(o),

								    UnicodeMatcher(o),

								    UnicodeReplacer(o),

								    pattern(o.pattern),

								    data(o.data),

								    segmentNumber(o.segmentNumber),

								    matchStart(o.matchStart),

								    matchLimit(o.matchLimit)

								{

								}


								/**

								 * Destructor

								 */

								StringMatcher::~StringMatcher() {

								}


								/**

								 * Implement UnicodeFunctor

								 */

								UnicodeFunctor* StringMatcher::clone() const {

								    return new StringMatcher(*this);

								}


								/**

								 * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer

								 * and return the pointer.

								 */

								UnicodeMatcher* StringMatcher::toMatcher() const {

								  StringMatcher  *nonconst_this = const_cast<StringMatcher *>(this);

								  UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this);


								  return nonconst_base;

								}


								/**

								 * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer

								 * and return the pointer.

								 */

								UnicodeReplacer* StringMatcher::toReplacer() const {

								  StringMatcher  *nonconst_this = const_cast<StringMatcher *>(this);

								  UnicodeReplacer *nonconst_base = static_cast<UnicodeReplacer *>(nonconst_this);


								  return nonconst_base;

								}


								/**

								 * Implement UnicodeMatcher

								 */

								UMatchDegree StringMatcher::matches(const Replaceable& text,

								                                    int32_t& offset,

								                                    int32_t limit,

								                                    UBool incremental) {

								    int32_t i;

								    int32_t cursor = offset;

								    if (limit < cursor) {

								        // Match in the reverse direction

								        for (i=pattern.length()-1; i>=0; --i) {

								            UChar keyChar = pattern.charAt(i);

								            UnicodeMatcher* subm = data->lookupMatcher(keyChar);

								            if (subm == 0) {

								                if (cursor > limit &&

								                    keyChar == text.charAt(cursor)) {

								                    --cursor;

								                } else {

								                    return U_MISMATCH;

								                }

								            } else {

								                UMatchDegree m =

								                    subm->matches(text, cursor, limit, incremental);

								                if (m != U_MATCH) {

								                    return m;

								                }

								            }

								        }

								        // Record the match position, but adjust for a normal

								        // forward start, limit, and only if a prior match does not

								        // exist -- we want the rightmost match.

								        if (matchStart < 0) {

								            matchStart = cursor+1;

								            matchLimit = offset+1;

								        }

								    } else {

								        for (i=0; i<pattern.length(); ++i) {

								            if (incremental && cursor == limit) {

								                // We've reached the context limit without a mismatch and

								                // without completing our match.

								                return U_PARTIAL_MATCH;

								            }

								            UChar keyChar = pattern.charAt(i);

								            UnicodeMatcher* subm = data->lookupMatcher(keyChar);

								            if (subm == 0) {

								                // Don't need the cursor < limit check if

								                // incremental is TRUE (because it's done above); do need

								                // it otherwise.

								                if (cursor < limit &&

								                    keyChar == text.charAt(cursor)) {

								                    ++cursor;

								                } else {

								                    return U_MISMATCH;

								                }

								            } else {

								                UMatchDegree m =

								                    subm->matches(text, cursor, limit, incremental);

								                if (m != U_MATCH) {

								                    return m;

								                }

								            }

								        }

								        // Record the match position

								        matchStart = offset;

								        matchLimit = cursor;

								    }


								    offset = cursor;

								    return U_MATCH;

								}


								/**

								 * Implement UnicodeMatcher

								 */

								UnicodeString& StringMatcher::toPattern(UnicodeString& result,

								                                        UBool escapeUnprintable) const

								{

								    result.truncate(0);

								    UnicodeString str, quoteBuf;

								    if (segmentNumber > 0) {

								        result.append((UChar)40); /*(*/

								    }

								    for (int32_t i=0; i<pattern.length(); ++i) {

								        UChar keyChar = pattern.charAt(i);

								        const UnicodeMatcher* m = data->lookupMatcher(keyChar);

								        if (m == 0) {

								            ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);

								        } else {

								            ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),

								                         TRUE, escapeUnprintable, quoteBuf);

								        }

								    }

								    if (segmentNumber > 0) {

								        result.append((UChar)41); /*)*/

								    }

								    // Flush quoteBuf out to result

								    ICU_Utility::appendToRule(result, -1,

								                              TRUE, escapeUnprintable, quoteBuf);

								    return result;

								}


								/**

								 * Implement UnicodeMatcher

								 */

								UBool StringMatcher::matchesIndexValue(uint8_t v) const {

								    if (pattern.length() == 0) {

								        return TRUE;

								    }

								    UChar32 c = pattern.char32At(0);

								    const UnicodeMatcher *m = data->lookupMatcher(c);

								    return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);

								}


								/**

								 * Implement UnicodeMatcher

								 */

								void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {

								    UChar32 ch;

								    for (int32_t i=0; i<pattern.length(); i+=U16_LENGTH(ch)) {

								        ch = pattern.char32At(i);

								        const UnicodeMatcher* matcher = data->lookupMatcher(ch);

								        if (matcher == NULL) {

								            toUnionTo.add(ch);

								        } else {

								            matcher->addMatchSetTo(toUnionTo);

								        }

								    }

								}


								/**

								 * UnicodeReplacer API

								 */

								int32_t StringMatcher::replace(Replaceable& text,

								                               int32_t start,

								                               int32_t limit,

								                               int32_t& /*cursor*/) {


								    int32_t outLen = 0;


								    // Copy segment with out-of-band data

								    int32_t dest = limit;

								    // If there was no match, that means that a quantifier

								    // matched zero-length.  E.g., x (a)* y matched "xy".

								    if (matchStart >= 0) {

								        if (matchStart != matchLimit) {

								            text.copy(matchStart, matchLimit, dest);

								            outLen = matchLimit - matchStart;

								        }

								    }


								    text.handleReplaceBetween(start, limit, UnicodeString()); // delete original text


								    return outLen;

								}


								/**

								 * UnicodeReplacer API

								 */

								UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,

								                                                UBool /*escapeUnprintable*/) const {

								    // assert(segmentNumber > 0);

								    rule.truncate(0);

								    rule.append((UChar)0x0024 /*$*/);

								    ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);

								    return rule;

								}


								/**

								 * Remove any match info.  This must be called before performing a

								 * set of matches with this segment.

								 */

								 void StringMatcher::resetMatch() {

								    matchStart = matchLimit = -1;

								}


								/**

								 * Union the set of all characters that may output by this object

								 * into the given set.

								 * @param toUnionTo the set into which to union the output characters

								 */

								void StringMatcher::addReplacementSetTo(UnicodeSet& /*toUnionTo*/) const {

								    // The output of this replacer varies; it is the source text between

								    // matchStart and matchLimit.  Since this varies depending on the

								    // input text, we can't compute it here.  We can either do nothing

								    // or we can add ALL characters to the set.  It's probably more useful

								    // to do nothing.

								}


								/**

								 * Implement UnicodeFunctor

								 */

								void StringMatcher::setData(const TransliterationRuleData* d) {

								    data = d;

								    int32_t i = 0;

								    while (i<pattern.length()) {

								        UChar32 c = pattern.char32At(i);

								        UnicodeFunctor* f = data->lookup(c);

								        if (f != NULL) {

								            f->setData(data);

								        }

								        i += U16_LENGTH(c);

								    }

								}


								U_NAMESPACE_END


								#endif /* #if !UCONFIG_NO_TRANSLITERATION */


								//eof