node/deps/icu-small/source/i18n/strrepl.cpp


								/*

								**********************************************************************

								*   Copyright (c) 2002-2012, International Business Machines Corporation

								*   and others.  All Rights Reserved.

								**********************************************************************

								*   Date        Name        Description

								*   01/21/2002  aliu        Creation.

								**********************************************************************

								*/


								#include "unicode/utypes.h"


								#if !UCONFIG_NO_TRANSLITERATION


								#include "unicode/uniset.h"

								#include "unicode/utf16.h"

								#include "strrepl.h"

								#include "rbt_data.h"

								#include "util.h"


								U_NAMESPACE_BEGIN


								UnicodeReplacer::~UnicodeReplacer() {}

								UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer)


								/**

								 * Construct a StringReplacer that sets the emits the given output

								 * text and sets the cursor to the given position.

								 * @param theOutput text that will replace input text when the

								 * replace() method is called.  May contain stand-in characters

								 * that represent nested replacers.

								 * @param theCursorPos cursor position that will be returned by

								 * the replace() method

								 * @param theData transliterator context object that translates

								 * stand-in characters to UnicodeReplacer objects

								 */

								StringReplacer::StringReplacer(const UnicodeString& theOutput,

								                               int32_t theCursorPos,

								                               const TransliterationRuleData* theData) {

								    output = theOutput;

								    cursorPos = theCursorPos;

								    hasCursor = TRUE;

								    data = theData;

								    isComplex = TRUE;

								}


								/**

								 * Construct a StringReplacer that sets the emits the given output

								 * text and does not modify the cursor.

								 * @param theOutput text that will replace input text when the

								 * replace() method is called.  May contain stand-in characters

								 * that represent nested replacers.

								 * @param theData transliterator context object that translates

								 * stand-in characters to UnicodeReplacer objects

								 */

								StringReplacer::StringReplacer(const UnicodeString& theOutput,

								                               const TransliterationRuleData* theData) {

								    output = theOutput;

								    cursorPos = 0;

								    hasCursor = FALSE;

								    data = theData;

								    isComplex = TRUE;

								}


								/**

								 * Copy constructor.

								 */

								StringReplacer::StringReplacer(const StringReplacer& other) :

								    UnicodeFunctor(other),

								    UnicodeReplacer(other)

								{

								    output = other.output;

								    cursorPos = other.cursorPos;

								    hasCursor = other.hasCursor;

								    data = other.data;

								    isComplex = other.isComplex;

								}


								/**

								 * Destructor

								 */

								StringReplacer::~StringReplacer() {

								}


								/**

								 * Implement UnicodeFunctor

								 */

								UnicodeFunctor* StringReplacer::clone() const {

								    return new StringReplacer(*this);

								}


								/**

								 * Implement UnicodeFunctor

								 */

								UnicodeReplacer* StringReplacer::toReplacer() const {

								  return const_cast<StringReplacer *>(this);

								}


								/**

								 * UnicodeReplacer API

								 */

								int32_t StringReplacer::replace(Replaceable& text,

								                                int32_t start,

								                                int32_t limit,

								                                int32_t& cursor) {

								    int32_t outLen;

								    int32_t newStart = 0;


								    // NOTE: It should be possible to _always_ run the complex

								    // processing code; just slower.  If not, then there is a bug

								    // in the complex processing code.


								    // Simple (no nested replacers) Processing Code :

								    if (!isComplex) {

								        text.handleReplaceBetween(start, limit, output);

								        outLen = output.length();


								        // Setup default cursor position (for cursorPos within output)

								        newStart = cursorPos;

								    }


								    // Complex (nested replacers) Processing Code :

								    else {

								        /* When there are segments to be copied, use the Replaceable.copy()

								         * API in order to retain out-of-band data.  Copy everything to the

								         * end of the string, then copy them back over the key.  This preserves

								         * the integrity of indices into the key and surrounding context while

								         * generating the output text.

								         */

								        UnicodeString buf;

								        int32_t oOutput; // offset into 'output'

								        isComplex = FALSE;


								        // The temporary buffer starts at tempStart, and extends

								        // to destLimit.  The start of the buffer has a single

								        // character from before the key.  This provides style

								        // data when addition characters are filled into the

								        // temporary buffer.  If there is nothing to the left, use

								        // the non-character U+FFFF, which Replaceable subclasses

								        // should treat specially as a "no-style character."

								        // destStart points to the point after the style context

								        // character, so it is tempStart+1 or tempStart+2.

								        int32_t tempStart = text.length(); // start of temp buffer

								        int32_t destStart = tempStart; // copy new text to here

								        if (start > 0) {

								            int32_t len = U16_LENGTH(text.char32At(start-1));

								            text.copy(start-len, start, tempStart);

								            destStart += len;

								        } else {

								            UnicodeString str((UChar) 0xFFFF);

								            text.handleReplaceBetween(tempStart, tempStart, str);

								            destStart++;

								        }

								        int32_t destLimit = destStart;


								        for (oOutput=0; oOutput<output.length(); ) {

								            if (oOutput == cursorPos) {

								                // Record the position of the cursor

								                newStart = destLimit - destStart; // relative to start

								            }

								            UChar32 c = output.char32At(oOutput);

								            UnicodeReplacer* r = data->lookupReplacer(c);

								            if (r == NULL) {

								                // Accumulate straight (non-segment) text.

								                buf.append(c);

								            } else {

								                isComplex = TRUE;


								                // Insert any accumulated straight text.

								                if (buf.length() > 0) {

								                    text.handleReplaceBetween(destLimit, destLimit, buf);

								                    destLimit += buf.length();

								                    buf.truncate(0);

								                }


								                // Delegate output generation to replacer object

								                int32_t len = r->replace(text, destLimit, destLimit, cursor);

								                destLimit += len;

								            }

								            oOutput += U16_LENGTH(c);

								        }

								        // Insert any accumulated straight text.

								        if (buf.length() > 0) {

								            text.handleReplaceBetween(destLimit, destLimit, buf);

								            destLimit += buf.length();

								        }

								        if (oOutput == cursorPos) {

								            // Record the position of the cursor

								            newStart = destLimit - destStart; // relative to start

								        }


								        outLen = destLimit - destStart;


								        // Copy new text to start, and delete it

								        text.copy(destStart, destLimit, start);

								        text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, UnicodeString());


								        // Delete the old text (the key)

								        text.handleReplaceBetween(start + outLen, limit + outLen, UnicodeString());

								    }


								    if (hasCursor) {

								        // Adjust the cursor for positions outside the key.  These

								        // refer to code points rather than code units.  If cursorPos

								        // is within the output string, then use newStart, which has

								        // already been set above.

								        if (cursorPos < 0) {

								            newStart = start;

								            int32_t n = cursorPos;

								            // Outside the output string, cursorPos counts code points

								            while (n < 0 && newStart > 0) {

								                newStart -= U16_LENGTH(text.char32At(newStart-1));

								                ++n;

								            }

								            newStart += n;

								        } else if (cursorPos > output.length()) {

								            newStart = start + outLen;

								            int32_t n = cursorPos - output.length();

								            // Outside the output string, cursorPos counts code points

								            while (n > 0 && newStart < text.length()) {

								                newStart += U16_LENGTH(text.char32At(newStart));

								                --n;

								            }

								            newStart += n;

								        } else {

								            // Cursor is within output string.  It has been set up above

								            // to be relative to start.

								            newStart += start;

								        }


								        cursor = newStart;

								    }


								    return outLen;

								}


								/**

								 * UnicodeReplacer API

								 */

								UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,

								                                                 UBool escapeUnprintable) const {

								    rule.truncate(0);

								    UnicodeString quoteBuf;


								    int32_t cursor = cursorPos;


								    // Handle a cursor preceding the output

								    if (hasCursor && cursor < 0) {

								        while (cursor++ < 0) {

								            ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);

								        }

								        // Fall through and append '|' below

								    }


								    for (int32_t i=0; i<output.length(); ++i) {

								        if (hasCursor && i == cursor) {

								            ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);

								        }

								        UChar c = output.charAt(i); // Ok to use 16-bits here


								        UnicodeReplacer* r = data->lookupReplacer(c);

								        if (r == NULL) {

								            ICU_Utility::appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf);

								        } else {

								            UnicodeString buf;

								            r->toReplacerPattern(buf, escapeUnprintable);

								            buf.insert(0, (UChar)0x20);

								            buf.append((UChar)0x20);

								            ICU_Utility::appendToRule(rule, buf,

								                                      TRUE, escapeUnprintable, quoteBuf);

								        }

								    }


								    // Handle a cursor after the output.  Use > rather than >= because

								    // if cursor == output.length() it is at the end of the output,

								    // which is the default position, so we need not emit it.

								    if (hasCursor && cursor > output.length()) {

								        cursor -= output.length();

								        while (cursor-- > 0) {

								            ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);

								        }

								        ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);

								    }

								    // Flush quoteBuf out to result

								    ICU_Utility::appendToRule(rule, -1,

								                              TRUE, escapeUnprintable, quoteBuf);


								    return rule;

								}


								/**

								 * Implement UnicodeReplacer

								 */

								void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {

								    UChar32 ch;

								    for (int32_t i=0; i<output.length(); i+=U16_LENGTH(ch)) {

								    ch = output.char32At(i);

								    UnicodeReplacer* r = data->lookupReplacer(ch);

								    if (r == NULL) {

								        toUnionTo.add(ch);

								    } else {

								        r->addReplacementSetTo(toUnionTo);

								    }

								    }

								}


								/**

								 * UnicodeFunctor API

								 */

								void StringReplacer::setData(const TransliterationRuleData* d) {

								    data = d;

								    int32_t i = 0;

								    while (i<output.length()) {

								        UChar32 c = output.char32At(i);

								        UnicodeFunctor* f = data->lookup(c);

								        if (f != NULL) {

								            f->setData(data);

								        }

								        i += U16_LENGTH(c);

								    }

								}


								U_NAMESPACE_END


								#endif /* #if !UCONFIG_NO_TRANSLITERATION */


								//eof