diff --git a/deps/v8/ChangeLog b/deps/v8/ChangeLog index 0de330847b..a15cce80d0 100644 --- a/deps/v8/ChangeLog +++ b/deps/v8/ChangeLog @@ -1,3 +1,10 @@ +2010-03-29: Version 2.2.0 + + Fixed a few minor bugs. + + Performance improvements for string operations. + + 2010-03-26: Version 2.1.10 Fixed scons build issues. diff --git a/deps/v8/include/v8.h b/deps/v8/include/v8.h index f64b386736..90c43831f4 100644 --- a/deps/v8/include/v8.h +++ b/deps/v8/include/v8.h @@ -855,12 +855,15 @@ class V8EXPORT String : public Primitive { * \param start The starting position within the string at which * copying begins. * \param length The number of bytes to copy from the string. - * \return The number of characters copied to the buffer + * \param nchars The number of characters written. + * \return The number of bytes copied to the buffer * excluding the NULL terminator. */ int Write(uint16_t* buffer, int start = 0, int length = -1) const; // UTF-16 int WriteAscii(char* buffer, int start = 0, int length = -1) const; // ASCII - int WriteUtf8(char* buffer, int length = -1) const; // UTF-8 + int WriteUtf8(char* buffer, + int length = -1, + int* nchars = NULL) const; // UTF-8 /** * A zero length string. diff --git a/deps/v8/src/SConscript b/deps/v8/src/SConscript index a1d4796b7f..1f1c1c1833 100755 --- a/deps/v8/src/SConscript +++ b/deps/v8/src/SConscript @@ -102,6 +102,7 @@ SOURCES = { stub-cache.cc token.cc top.cc + type-info.cc unicode.cc utils.cc v8-counters.cc diff --git a/deps/v8/src/api.cc b/deps/v8/src/api.cc index 2100480e85..3ba22d63ea 100644 --- a/deps/v8/src/api.cc +++ b/deps/v8/src/api.cc @@ -2639,7 +2639,7 @@ int String::Utf8Length() const { } -int String::WriteUtf8(char* buffer, int capacity) const { +int String::WriteUtf8(char* buffer, int capacity, int *ncharsRef) const { if (IsDeadCheck("v8::String::WriteUtf8()")) return 0; LOG_API("String::WriteUtf8"); ENTER_V8; @@ -2653,10 +2653,12 @@ int String::WriteUtf8(char* buffer, int capacity) const { int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1); int i; int pos = 0; + int nchars = 0; for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) { i::uc32 c = write_input_buffer.GetNext(); int written = unibrow::Utf8::Encode(buffer + pos, c); pos += written; + nchars++; } if (i < len) { // For the last characters we need to check the length for each one @@ -2670,12 +2672,14 @@ int String::WriteUtf8(char* buffer, int capacity) const { for (int j = 0; j < written; j++) buffer[pos + j] = intermediate[j]; pos += written; + nchars++; } else { // We've reached the end of the buffer break; } } } + if (ncharsRef) *ncharsRef = nchars; if (i == len && (capacity == -1 || pos < capacity)) buffer[pos++] = '\0'; return pos; diff --git a/deps/v8/src/builtins.cc b/deps/v8/src/builtins.cc index 122fbba2c4..feb912f414 100644 --- a/deps/v8/src/builtins.cc +++ b/deps/v8/src/builtins.cc @@ -495,7 +495,9 @@ BUILTIN(ArrayShift) { } if (Heap::new_space()->Contains(elms)) { - array->set_elements(LeftTrimFixedArray(elms)); + // As elms still in the same space they used to be (new space), + // there is no need to update remembered set. + array->set_elements(LeftTrimFixedArray(elms), SKIP_WRITE_BARRIER); } else { // Shift the elements. AssertNoAllocation no_gc; diff --git a/deps/v8/src/date.js b/deps/v8/src/date.js index c7c39406fa..46769c6bdf 100644 --- a/deps/v8/src/date.js +++ b/deps/v8/src/date.js @@ -121,9 +121,16 @@ function EquivalentTime(t) { } -// Because computing the DST offset is a pretty expensive operation -// we keep a cache of last computed offset along with a time interval +// local_time_offset is initialized when the DST_offset_cache is missed. +// It must not be used until after a call to DaylightSavingsOffset(). +// In this way, only one check, for a DST cache miss, is needed. +var local_time_offset; + + +// Because computing the DST offset is an expensive operation, +// we keep a cache of the last computed DST offset along with a time interval // where we know the cache is valid. +// When the cache is valid, local_time_offset is also valid. var DST_offset_cache = { // Cached DST offset. offset: 0, @@ -149,6 +156,11 @@ function DaylightSavingsOffset(t) { // If the time fits in the cached interval, return the cached offset. if (t <= end) return cache.offset; + // If the cache misses, the local_time_offset may not be initialized. + if (IS_UNDEFINED(local_time_offset)) { + local_time_offset = %DateLocalTimeOffset(); + } + // Compute a possible new interval end. var new_end = end + cache.increment; @@ -185,6 +197,10 @@ function DaylightSavingsOffset(t) { } } + // If the cache misses, the local_time_offset may not be initialized. + if (IS_UNDEFINED(local_time_offset)) { + local_time_offset = %DateLocalTimeOffset(); + } // Compute the DST offset for the time and shrink the cache interval // to only contain the time. This allows fast repeated DST offset // computations for the same time. @@ -215,11 +231,11 @@ function WeekDay(time) { return Modulo(DAY(time) + 4, 7); } -var local_time_offset = %DateLocalTimeOffset(); function LocalTime(time) { if (NUMBER_IS_NAN(time)) return time; - return time + local_time_offset + DaylightSavingsOffset(time); + // DaylightSavingsOffset called before local_time_offset used. + return time + DaylightSavingsOffset(time) + local_time_offset; } function LocalTimeNoCheck(time) { @@ -228,6 +244,8 @@ function LocalTimeNoCheck(time) { } // Inline the DST offset cache checks for speed. + // The cache is hit, or DaylightSavingsOffset is called, + // before local_time_offset is used. var cache = DST_offset_cache; if (cache.start <= time && time <= cache.end) { var dst_offset = cache.offset; @@ -240,6 +258,11 @@ function LocalTimeNoCheck(time) { function UTC(time) { if (NUMBER_IS_NAN(time)) return time; + // local_time_offset is needed before the call to DaylightSavingsOffset, + // so it may be uninitialized. + if (IS_UNDEFINED(local_time_offset)) { + local_time_offset = %DateLocalTimeOffset(); + } var tmp = time - local_time_offset; return tmp - DaylightSavingsOffset(tmp); } @@ -566,7 +589,7 @@ function TimeString(time) { function LocalTimezoneString(time) { var timezoneOffset = - (local_time_offset + DaylightSavingsOffset(time)) / msPerMinute; + (DaylightSavingsOffset(time) + local_time_offset) / msPerMinute; var sign = (timezoneOffset >= 0) ? 1 : -1; var hours = FLOOR((sign * timezoneOffset)/60); var min = FLOOR((sign * timezoneOffset)%60); diff --git a/deps/v8/src/frame-element.h b/deps/v8/src/frame-element.h index 83db5c3342..48bb354aa8 100644 --- a/deps/v8/src/frame-element.h +++ b/deps/v8/src/frame-element.h @@ -28,7 +28,7 @@ #ifndef V8_FRAME_ELEMENT_H_ #define V8_FRAME_ELEMENT_H_ -#include "type-info-inl.h" +#include "type-info.h" #include "macro-assembler.h" #include "zone.h" diff --git a/deps/v8/src/heap.cc b/deps/v8/src/heap.cc index 1f1599a79b..5421dcc195 100644 --- a/deps/v8/src/heap.cc +++ b/deps/v8/src/heap.cc @@ -1961,8 +1961,9 @@ Object* Heap::AllocateConsString(String* first, String* second) { return MakeOrFindTwoCharacterString(c1, c2); } - bool is_ascii = first->IsAsciiRepresentation() - && second->IsAsciiRepresentation(); + bool first_is_ascii = first->IsAsciiRepresentation(); + bool second_is_ascii = second->IsAsciiRepresentation(); + bool is_ascii = first_is_ascii && second_is_ascii; // Make sure that an out of memory exception is thrown if the length // of the new cons string is too large. @@ -1997,6 +1998,25 @@ Object* Heap::AllocateConsString(String* first, String* second) { for (int i = 0; i < second_length; i++) *dest++ = src[i]; return result; } else { + // For short external two-byte strings we check whether they can + // be represented using ascii. + if (!first_is_ascii) { + first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars(); + } + if (first_is_ascii && !second_is_ascii) { + second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars(); + } + if (first_is_ascii && second_is_ascii) { + Object* result = AllocateRawAsciiString(length); + if (result->IsFailure()) return result; + // Copy the characters into the new object. + char* dest = SeqAsciiString::cast(result)->GetChars(); + String::WriteToFlat(first, dest, 0, first_length); + String::WriteToFlat(second, dest + first_length, 0, second_length); + Counters::string_add_runtime_ext_to_ascii.Increment(); + return result; + } + Object* result = AllocateRawTwoByteString(length); if (result->IsFailure()) return result; // Copy the characters into the new object. diff --git a/deps/v8/src/objects-inl.h b/deps/v8/src/objects-inl.h index a26da7dd62..d363d4d6ab 100644 --- a/deps/v8/src/objects-inl.h +++ b/deps/v8/src/objects-inl.h @@ -255,6 +255,16 @@ bool String::IsTwoByteRepresentation() { } +bool String::IsExternalTwoByteStringWithAsciiChars() { + if (!IsExternalTwoByteString()) return false; + const uc16* data = ExternalTwoByteString::cast(this)->resource()->data(); + for (int i = 0, len = length(); i < len; i++) { + if (data[i] > kMaxAsciiCharCode) return false; + } + return true; +} + + bool StringShape::IsCons() { return (type_ & kStringRepresentationMask) == kConsStringTag; } @@ -732,7 +742,8 @@ Object* Object::GetProperty(String* key, PropertyAttributes* attributes) { } else { \ ASSERT(mode == SKIP_WRITE_BARRIER); \ ASSERT(Heap::InNewSpace(object) || \ - !Heap::InNewSpace(READ_FIELD(object, offset))); \ + !Heap::InNewSpace(READ_FIELD(object, offset)) || \ + Page::IsRSetSet(object->address(), offset)); \ } #define READ_DOUBLE_FIELD(p, offset) \ diff --git a/deps/v8/src/objects.cc b/deps/v8/src/objects.cc index a1fbc99277..02ea5b0455 100644 --- a/deps/v8/src/objects.cc +++ b/deps/v8/src/objects.cc @@ -4660,13 +4660,38 @@ bool String::IsEqualTo(Vector str) { } +template +static inline uint32_t HashSequentialString(const schar* chars, int length) { + StringHasher hasher(length); + if (!hasher.has_trivial_hash()) { + int i; + for (i = 0; hasher.is_array_index() && (i < length); i++) { + hasher.AddCharacter(chars[i]); + } + for (; i < length; i++) { + hasher.AddCharacterNoIndex(chars[i]); + } + } + return hasher.GetHashField(); +} + + uint32_t String::ComputeAndSetHash() { // Should only be called if hash code has not yet been computed. ASSERT(!(hash_field() & kHashComputedMask)); + const int len = length(); + // Compute the hash code. - StringInputBuffer buffer(this); - uint32_t field = ComputeHashField(&buffer, length()); + uint32_t field = 0; + if (StringShape(this).IsSequentialAscii()) { + field = HashSequentialString(SeqAsciiString::cast(this)->GetChars(), len); + } else if (StringShape(this).IsSequentialTwoByte()) { + field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(), len); + } else { + StringInputBuffer buffer(this); + field = ComputeHashField(&buffer, len); + } // Store the hash code in the object. set_hash_field(field); diff --git a/deps/v8/src/objects.h b/deps/v8/src/objects.h index 01977f0929..5a0db0177b 100644 --- a/deps/v8/src/objects.h +++ b/deps/v8/src/objects.h @@ -3919,6 +3919,13 @@ class String: public HeapObject { inline bool IsAsciiRepresentation(); inline bool IsTwoByteRepresentation(); + // Check whether this string is an external two-byte string that in + // fact contains only ascii characters. + // + // Such strings may appear when the embedder prefers two-byte + // representations even for ascii data. + inline bool IsExternalTwoByteStringWithAsciiChars(); + // Get and set individual two byte chars in the string. inline void Set(int index, uint16_t value); // Get individual two byte char in the string. Repeated calls diff --git a/deps/v8/src/regexp.js b/deps/v8/src/regexp.js index dc1b0429f7..e2492f7245 100644 --- a/deps/v8/src/regexp.js +++ b/deps/v8/src/regexp.js @@ -344,7 +344,6 @@ function RegExpToString() { // on the captures array of the last successful match and the subject string // of the last successful match. function RegExpGetLastMatch() { - if (lastMatchInfoOverride) { return lastMatchInfoOverride[0]; } var regExpSubject = LAST_SUBJECT(lastMatchInfo); return SubString(regExpSubject, lastMatchInfo[CAPTURE0], @@ -353,11 +352,6 @@ function RegExpGetLastMatch() { function RegExpGetLastParen() { - if (lastMatchInfoOverride) { - var override = lastMatchInfoOverride; - if (override.length <= 3) return ''; - return override[override.length - 3]; - } var length = NUMBER_OF_CAPTURES(lastMatchInfo); if (length <= 2) return ''; // There were no captures. // We match the SpiderMonkey behavior: return the substring defined by the @@ -374,32 +368,17 @@ function RegExpGetLastParen() { function RegExpGetLeftContext() { - var start_index; - var subject; - if (!lastMatchInfoOverride) { - start_index = lastMatchInfo[CAPTURE0]; - subject = LAST_SUBJECT(lastMatchInfo); - } else { - var override = lastMatchInfoOverride; - start_index = override[override.length - 2]; - subject = override[override.length - 1]; - } - return SubString(subject, 0, start_index); + return SubString(LAST_SUBJECT(lastMatchInfo), + 0, + lastMatchInfo[CAPTURE0]); } function RegExpGetRightContext() { - var start_index; - var subject; - if (!lastMatchInfoOverride) { - start_index = lastMatchInfo[CAPTURE1]; - subject = LAST_SUBJECT(lastMatchInfo); - } else { - var override = lastMatchInfoOverride; - subject = override[override.length - 1]; - start_index = override[override.length - 2] + subject.length; - } - return SubString(subject, start_index, subject.length); + var subject = LAST_SUBJECT(lastMatchInfo); + return SubString(subject, + lastMatchInfo[CAPTURE1], + subject.length); } @@ -408,10 +387,6 @@ function RegExpGetRightContext() { // called with indices from 1 to 9. function RegExpMakeCaptureGetter(n) { return function() { - if (lastMatchInfoOverride) { - if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n]; - return ''; - } var index = n * 2; if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; var matchStart = lastMatchInfo[CAPTURE(index)]; @@ -436,12 +411,6 @@ var lastMatchInfo = [ 0, // REGEXP_FIRST_CAPTURE + 1 ]; -// Override last match info with an array of actual substrings. -// Used internally by replace regexp with function. -// The array has the format of an "apply" argument for a replacement -// function. -var lastMatchInfoOverride = null; - // ------------------------------------------------------------------- function SetupRegExp() { diff --git a/deps/v8/src/register-allocator.h b/deps/v8/src/register-allocator.h index 0fbc83b821..4564533911 100644 --- a/deps/v8/src/register-allocator.h +++ b/deps/v8/src/register-allocator.h @@ -29,7 +29,7 @@ #define V8_REGISTER_ALLOCATOR_H_ #include "macro-assembler.h" -#include "type-info-inl.h" +#include "type-info.h" #if V8_TARGET_ARCH_IA32 #include "ia32/register-allocator-ia32.h" diff --git a/deps/v8/src/runtime.cc b/deps/v8/src/runtime.cc index c77d518371..b349815219 100644 --- a/deps/v8/src/runtime.cc +++ b/deps/v8/src/runtime.cc @@ -1567,91 +1567,9 @@ static Object* Runtime_CharFromCode(Arguments args) { return CharFromCode(args[0]); } - -class FixedArrayBuilder { - public: - explicit FixedArrayBuilder(int initial_capacity) - : array_(Factory::NewFixedArrayWithHoles(initial_capacity)), - length_(0) { - // Require a non-zero initial size. Ensures that doubling the size to - // extend the array will work. - ASSERT(initial_capacity > 0); - } - - explicit FixedArrayBuilder(Handle backing_store) - : array_(backing_store), - length_(0) { - // Require a non-zero initial size. Ensures that doubling the size to - // extend the array will work. - ASSERT(backing_store->length() > 0); - } - - bool HasCapacity(int elements) { - int length = array_->length(); - int required_length = length_ + elements; - return (length >= required_length); - } - - void EnsureCapacity(int elements) { - int length = array_->length(); - int required_length = length_ + elements; - if (length < required_length) { - int new_length = length; - do { - new_length *= 2; - } while (new_length < required_length); - Handle extended_array = - Factory::NewFixedArrayWithHoles(new_length); - array_->CopyTo(0, *extended_array, 0, length_); - array_ = extended_array; - } - } - - void Add(Object* value) { - ASSERT(length_ < capacity()); - array_->set(length_, value); - length_++; - } - - void Add(Smi* value) { - ASSERT(length_ < capacity()); - array_->set(length_, value); - length_++; - } - - Handle array() { - return array_; - } - - int length() { - return length_; - } - - int capacity() { - return array_->length(); - } - - Handle ToJSArray() { - Handle result_array = Factory::NewJSArrayWithElements(array_); - result_array->set_length(Smi::FromInt(length_)); - return result_array; - } - - Handle ToJSArray(Handle target_array) { - target_array->set_elements(*array_); - target_array->set_length(Smi::FromInt(length_)); - return target_array; - } - - private: - Handle array_; - int length_; -}; - - // Forward declarations. -const int kStringBuilderConcatHelperLengthBits = 11; -const int kStringBuilderConcatHelperPositionBits = 19; +static const int kStringBuilderConcatHelperLengthBits = 11; +static const int kStringBuilderConcatHelperPositionBits = 19; template static inline void StringBuilderConcatHelper(String*, @@ -1659,19 +1577,15 @@ static inline void StringBuilderConcatHelper(String*, FixedArray*, int); -typedef BitField - StringBuilderSubstringLength; -typedef BitField - StringBuilderSubstringPosition; - +typedef BitField StringBuilderSubstringLength; +typedef BitField StringBuilderSubstringPosition; class ReplacementStringBuilder { public: ReplacementStringBuilder(Handle subject, int estimated_part_count) - : array_builder_(estimated_part_count), - subject_(subject), + : subject_(subject), + parts_(Factory::NewFixedArray(estimated_part_count)), + part_count_(0), character_count_(0), is_ascii_(subject->IsAsciiRepresentation()) { // Require a non-zero initial size. Ensures that doubling the size to @@ -1679,35 +1593,38 @@ class ReplacementStringBuilder { ASSERT(estimated_part_count > 0); } - static inline void AddSubjectSlice(FixedArrayBuilder* builder, - int from, - int to) { + void EnsureCapacity(int elements) { + int length = parts_->length(); + int required_length = part_count_ + elements; + if (length < required_length) { + int new_length = length; + do { + new_length *= 2; + } while (new_length < required_length); + Handle extended_array = + Factory::NewFixedArray(new_length); + parts_->CopyTo(0, *extended_array, 0, part_count_); + parts_ = extended_array; + } + } + + void AddSubjectSlice(int from, int to) { ASSERT(from >= 0); int length = to - from; ASSERT(length > 0); + // Can we encode the slice in 11 bits for length and 19 bits for + // start position - as used by StringBuilderConcatHelper? if (StringBuilderSubstringLength::is_valid(length) && StringBuilderSubstringPosition::is_valid(from)) { int encoded_slice = StringBuilderSubstringLength::encode(length) | StringBuilderSubstringPosition::encode(from); - builder->Add(Smi::FromInt(encoded_slice)); + AddElement(Smi::FromInt(encoded_slice)); } else { // Otherwise encode as two smis. - builder->Add(Smi::FromInt(-length)); - builder->Add(Smi::FromInt(from)); + AddElement(Smi::FromInt(-length)); + AddElement(Smi::FromInt(from)); } - } - - - void EnsureCapacity(int elements) { - array_builder_.EnsureCapacity(elements); - } - - - void AddSubjectSlice(int from, int to) { - AddSubjectSlice(&array_builder_, from, to); - // Can we encode the slice in 11 bits for length and 19 bits for - // start position - as used by StringBuilderConcatHelper? - IncrementCharacterCount(to - from); + IncrementCharacterCount(length); } @@ -1723,7 +1640,7 @@ class ReplacementStringBuilder { Handle ToString() { - if (array_builder_.length() == 0) { + if (part_count_ == 0) { return Factory::empty_string(); } @@ -1735,8 +1652,8 @@ class ReplacementStringBuilder { char* char_buffer = seq->GetChars(); StringBuilderConcatHelper(*subject_, char_buffer, - *array_builder_.array(), - array_builder_.length()); + *parts_, + part_count_); } else { // Non-ASCII. joined_string = NewRawTwoByteString(character_count_); @@ -1745,8 +1662,8 @@ class ReplacementStringBuilder { uc16* char_buffer = seq->GetChars(); StringBuilderConcatHelper(*subject_, char_buffer, - *array_builder_.array(), - array_builder_.length()); + *parts_, + part_count_); } return joined_string; } @@ -1759,14 +1676,8 @@ class ReplacementStringBuilder { character_count_ += by; } - Handle GetParts() { - Handle result = - Factory::NewJSArrayWithElements(array_builder_.array()); - result->set_length(Smi::FromInt(array_builder_.length())); - return result; - } - private: + Handle NewRawAsciiString(int size) { CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String); } @@ -1779,12 +1690,14 @@ class ReplacementStringBuilder { void AddElement(Object* element) { ASSERT(element->IsSmi() || element->IsString()); - ASSERT(array_builder_.capacity() > array_builder_.length()); - array_builder_.Add(element); + ASSERT(parts_->length() > part_count_); + parts_->set(part_count_, element); + part_count_++; } - FixedArrayBuilder array_builder_; Handle subject_; + Handle parts_; + int part_count_; int character_count_; bool is_ascii_; }; @@ -2192,6 +2105,7 @@ static Object* Runtime_StringReplaceRegExpWithString(Arguments args) { } + // Cap on the maximal shift in the Boyer-Moore implementation. By setting a // limit, we can fix the size of tables. static const int kBMMaxShift = 0xff; @@ -2955,468 +2869,6 @@ static Object* Runtime_StringMatch(Arguments args) { } -// Two smis before and after the match, for very long strings. -const int kMaxBuilderEntriesPerRegExpMatch = 5; - - -static void SetLastMatchInfoNoCaptures(Handle subject, - Handle last_match_info, - int match_start, - int match_end) { - // Fill last_match_info with a single capture. - last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead); - AssertNoAllocation no_gc; - FixedArray* elements = FixedArray::cast(last_match_info->elements()); - RegExpImpl::SetLastCaptureCount(elements, 2); - RegExpImpl::SetLastInput(elements, *subject); - RegExpImpl::SetLastSubject(elements, *subject); - RegExpImpl::SetCapture(elements, 0, match_start); - RegExpImpl::SetCapture(elements, 1, match_end); -} - - -template -static bool SearchCharMultiple(Vector subject, - String* pattern, - schar pattern_char, - FixedArrayBuilder* builder, - int* match_pos) { - // Position of last match. - int pos = *match_pos; - int subject_length = subject.length(); - while (pos < subject_length) { - int match_end = pos + 1; - if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { - *match_pos = pos; - return false; - } - int new_pos = SingleCharIndexOf(subject, pattern_char, match_end); - if (new_pos >= 0) { - // Match has been found. - if (new_pos > match_end) { - ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos); - } - pos = new_pos; - builder->Add(pattern); - } else { - break; - } - } - if (pos + 1 < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length); - } - *match_pos = pos; - return true; -} - - -static bool SearchCharMultiple(Handle subject, - Handle pattern, - Handle last_match_info, - FixedArrayBuilder* builder) { - ASSERT(subject->IsFlat()); - ASSERT_EQ(1, pattern->length()); - uc16 pattern_char = pattern->Get(0); - // Treating position before first as initial "previous match position". - int match_pos = -1; - - for (;;) { // Break when search complete. - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - AssertNoAllocation no_gc; - if (subject->IsAsciiRepresentation()) { - if (pattern_char > String::kMaxAsciiCharCode) { - break; - } - Vector subject_vector = subject->ToAsciiVector(); - char pattern_ascii_char = static_cast(pattern_char); - bool complete = SearchCharMultiple(subject_vector, - *pattern, - pattern_ascii_char, - builder, - &match_pos); - if (complete) break; - } else { - Vector subject_vector = subject->ToUC16Vector(); - bool complete = SearchCharMultiple(subject_vector, - *pattern, - pattern_char, - builder, - &match_pos); - if (complete) break; - } - } - - if (match_pos >= 0) { - SetLastMatchInfoNoCaptures(subject, - last_match_info, - match_pos, - match_pos + 1); - return true; - } - return false; // No matches at all. -} - - -template -static bool SearchStringMultiple(Vector subject, - String* pattern, - Vector pattern_string, - FixedArrayBuilder* builder, - int* match_pos) { - int pos = *match_pos; - int subject_length = subject.length(); - int pattern_length = pattern_string.length(); - int max_search_start = subject_length - pattern_length; - bool is_ascii = (sizeof(schar) == 1); - StringSearchStrategy strategy = - InitializeStringSearch(pattern_string, is_ascii); - switch (strategy) { - case SEARCH_FAIL: return false; - case SEARCH_SHORT: - while (pos <= max_search_start) { - if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { - *match_pos = pos; - return false; - } - // Position of end of previous match. - int match_end = pos + pattern_length; - int new_pos = SimpleIndexOf(subject, pattern_string, match_end); - if (new_pos >= 0) { - // A match. - if (new_pos > match_end) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - new_pos); - } - pos = new_pos; - builder->Add(pattern); - } else { - break; - } - } - break; - case SEARCH_LONG: - while (pos <= max_search_start) { - if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { - *match_pos = pos; - return false; - } - int new_pos = ComplexIndexOf(subject, - pattern_string, - pos + pattern_length); - if (new_pos >= 0) { - // A match has been found. - if (new_pos > pos) { - ReplacementStringBuilder::AddSubjectSlice(builder, pos, new_pos); - } - pos = new_pos; - builder->Add(pattern); - } else { - break; - } - } - break; - } - if (pos < max_search_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, - pos + pattern_length, - subject_length); - } - *match_pos = pos; - return true; -} - - -static bool SearchStringMultiple(Handle subject, - Handle pattern, - Handle last_match_info, - FixedArrayBuilder* builder) { - ASSERT(subject->IsFlat()); - ASSERT(pattern->IsFlat()); - ASSERT(pattern->length() > 1); - - // Treating as if a previous match was before first character. - int match_pos = -pattern->length(); - - for (;;) { // Break when search complete. - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - AssertNoAllocation no_gc; - if (subject->IsAsciiRepresentation()) { - Vector subject_vector = subject->ToAsciiVector(); - if (pattern->IsAsciiRepresentation()) { - if (SearchStringMultiple(subject_vector, - *pattern, - pattern->ToAsciiVector(), - builder, - &match_pos)) break; - } else { - if (SearchStringMultiple(subject_vector, - *pattern, - pattern->ToUC16Vector(), - builder, - &match_pos)) break; - } - } else { - Vector subject_vector = subject->ToUC16Vector(); - if (pattern->IsAsciiRepresentation()) { - if (SearchStringMultiple(subject_vector, - *pattern, - pattern->ToAsciiVector(), - builder, - &match_pos)) break; - } else { - if (SearchStringMultiple(subject_vector, - *pattern, - pattern->ToUC16Vector(), - builder, - &match_pos)) break; - } - } - } - - if (match_pos >= 0) { - SetLastMatchInfoNoCaptures(subject, - last_match_info, - match_pos, - match_pos + pattern->length()); - return true; - } - return false; // No matches at all. -} - - -static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple( - Handle subject, - Handle regexp, - Handle last_match_array, - FixedArrayBuilder* builder) { - ASSERT(subject->IsFlat()); - int match_start = -1; - int match_end = 0; - int pos = 0; - int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); - if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; - - OffsetsVector registers(required_registers); - Vector register_vector(registers.vector(), registers.length()); - int subject_length = subject->length(); - - for (;;) { // Break on failure, return on exception. - RegExpImpl::IrregexpResult result = - RegExpImpl::IrregexpExecOnce(regexp, - subject, - pos, - register_vector); - if (result == RegExpImpl::RE_SUCCESS) { - match_start = register_vector[0]; - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - if (match_end < match_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - match_start); - } - match_end = register_vector[1]; - HandleScope loop_scope; - builder->Add(*Factory::NewSubString(subject, match_start, match_end)); - if (match_start != match_end) { - pos = match_end; - } else { - pos = match_end + 1; - if (pos > subject_length) break; - } - } else if (result == RegExpImpl::RE_FAILURE) { - break; - } else { - ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); - return result; - } - } - - if (match_start >= 0) { - if (match_end < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - subject_length); - } - SetLastMatchInfoNoCaptures(subject, - last_match_array, - match_start, - match_end); - return RegExpImpl::RE_SUCCESS; - } else { - return RegExpImpl::RE_FAILURE; // No matches at all. - } -} - - -static RegExpImpl::IrregexpResult SearchRegExpMultiple( - Handle subject, - Handle regexp, - Handle last_match_array, - FixedArrayBuilder* builder) { - - ASSERT(subject->IsFlat()); - int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); - if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; - - OffsetsVector registers(required_registers); - Vector register_vector(registers.vector(), registers.length()); - - RegExpImpl::IrregexpResult result = - RegExpImpl::IrregexpExecOnce(regexp, - subject, - 0, - register_vector); - - int capture_count = regexp->CaptureCount(); - int subject_length = subject->length(); - - // Position to search from. - int pos = 0; - // End of previous match. Differs from pos if match was empty. - int match_end = 0; - if (result == RegExpImpl::RE_SUCCESS) { - // Need to keep a copy of the previous match for creating last_match_info - // at the end, so we have two vectors that we swap between. - OffsetsVector registers2(required_registers); - Vector prev_register_vector(registers2.vector(), registers2.length()); - - do { - int match_start = register_vector[0]; - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - if (match_end < match_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - match_start); - } - match_end = register_vector[1]; - - { - // Avoid accumulating new handles inside loop. - HandleScope temp_scope; - // Arguments array to replace function is match, captures, index and - // subject, i.e., 3 + capture count in total. - Handle elements = Factory::NewFixedArray(3 + capture_count); - elements->set(0, *Factory::NewSubString(subject, - match_start, - match_end)); - for (int i = 1; i <= capture_count; i++) { - Handle substring = - Factory::NewSubString(subject, - register_vector[i * 2], - register_vector[i * 2 + 1]); - elements->set(i, *substring); - } - elements->set(capture_count + 1, Smi::FromInt(match_start)); - elements->set(capture_count + 2, *subject); - builder->Add(*Factory::NewJSArrayWithElements(elements)); - } - // Swap register vectors, so the last successful match is in - // prev_register_vector. - Vector tmp = prev_register_vector; - prev_register_vector = register_vector; - register_vector = tmp; - - if (match_end > match_start) { - pos = match_end; - } else { - pos = match_end + 1; - if (pos > subject_length) { - break; - } - } - - result = RegExpImpl::IrregexpExecOnce(regexp, - subject, - pos, - register_vector); - } while (result == RegExpImpl::RE_SUCCESS); - - if (result != RegExpImpl::RE_EXCEPTION) { - // Finished matching, with at least one match. - if (match_end < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - subject_length); - } - - int last_match_capture_count = (capture_count + 1) * 2; - int last_match_array_size = - last_match_capture_count + RegExpImpl::kLastMatchOverhead; - last_match_array->EnsureSize(last_match_array_size); - AssertNoAllocation no_gc; - FixedArray* elements = FixedArray::cast(last_match_array->elements()); - RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); - RegExpImpl::SetLastSubject(elements, *subject); - RegExpImpl::SetLastInput(elements, *subject); - for (int i = 0; i < last_match_capture_count; i++) { - RegExpImpl::SetCapture(elements, i, prev_register_vector[i]); - } - return RegExpImpl::RE_SUCCESS; - } - } - // No matches at all, return failure or exception result directly. - return result; -} - - -static Object* Runtime_RegExpExecMultiple(Arguments args) { - ASSERT(args.length() == 4); - HandleScope handles; - - CONVERT_ARG_CHECKED(String, subject, 1); - if (!subject->IsFlat()) { FlattenString(subject); } - CONVERT_ARG_CHECKED(JSRegExp, regexp, 0); - CONVERT_ARG_CHECKED(JSArray, last_match_info, 2); - CONVERT_ARG_CHECKED(JSArray, result_array, 3); - - ASSERT(last_match_info->HasFastElements()); - ASSERT(regexp->GetFlags().is_global()); - Handle result_elements; - if (result_array->HasFastElements()) { - result_elements = - Handle(FixedArray::cast(result_array->elements())); - } else { - result_elements = Factory::NewFixedArrayWithHoles(16); - } - FixedArrayBuilder builder(result_elements); - - if (regexp->TypeTag() == JSRegExp::ATOM) { - Handle pattern( - String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); - int pattern_length = pattern->length(); - if (pattern_length == 1) { - if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) { - return *builder.ToJSArray(result_array); - } - return Heap::null_value(); - } - - if (!pattern->IsFlat()) FlattenString(pattern); - if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) { - return *builder.ToJSArray(result_array); - } - return Heap::null_value(); - } - - ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); - - RegExpImpl::IrregexpResult result; - if (regexp->CaptureCount() == 0) { - result = SearchRegExpNoCaptureMultiple(subject, - regexp, - last_match_info, - &builder); - } else { - result = SearchRegExpMultiple(subject, regexp, last_match_info, &builder); - } - if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); - if (result == RegExpImpl::RE_FAILURE) return Heap::null_value(); - ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); - return Failure::Exception(); -} - - static Object* Runtime_NumberToRadixString(Arguments args) { NoHandleAllocation ha; ASSERT(args.length() == 2); diff --git a/deps/v8/src/runtime.h b/deps/v8/src/runtime.h index 42af3df88a..4175902c45 100644 --- a/deps/v8/src/runtime.h +++ b/deps/v8/src/runtime.h @@ -153,7 +153,6 @@ namespace internal { /* Regular expressions */ \ F(RegExpCompile, 3, 1) \ F(RegExpExec, 4, 1) \ - F(RegExpExecMultiple, 4, 1) \ \ /* Strings */ \ F(StringCharCodeAt, 2, 1) \ diff --git a/deps/v8/src/string.js b/deps/v8/src/string.js index f4489efa12..ca438fdde5 100644 --- a/deps/v8/src/string.js +++ b/deps/v8/src/string.js @@ -405,91 +405,97 @@ function addCaptureString(builder, matchInfo, index) { builder.addSpecialSlice(start, end); }; -// TODO(lrn): This array will survive indefinitely if replace is never -// called again. However, it will be empty, since the contents are cleared -// in the finally block. -var reusableReplaceArray = $Array(16); // Helper function for replacing regular expressions with the result of a -// function application in String.prototype.replace. +// function application in String.prototype.replace. The function application +// must be interleaved with the regexp matching (contrary to ECMA-262 +// 15.5.4.11) to mimic SpiderMonkey and KJS behavior when the function uses +// the static properties of the RegExp constructor. Example: +// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; } +// should be 'abcd' and not 'dddd' (or anything else). function StringReplaceRegExpWithFunction(subject, regexp, replace) { + var matchInfo = DoRegExpExec(regexp, subject, 0); + if (IS_NULL(matchInfo)) return subject; + + var result = new ReplaceResultBuilder(subject); + // There's at least one match. If the regexp is global, we have to loop + // over all matches. The loop is not in C++ code here like the one in + // RegExp.prototype.exec, because of the interleaved function application. + // Unfortunately, that means this code is nearly duplicated, here and in + // jsregexp.cc. if (regexp.global) { - var resultArray = reusableReplaceArray; - if (resultArray) { - reusableReplaceArray = null; + var previous = 0; + var startOfMatch; + if (NUMBER_OF_CAPTURES(matchInfo) == 2) { + // Both branches contain essentially the same loop except for the call + // to the replace function. The branch is put outside of the loop for + // speed + do { + startOfMatch = matchInfo[CAPTURE0]; + result.addSpecialSlice(previous, startOfMatch); + previous = matchInfo[CAPTURE1]; + var match = SubString(subject, startOfMatch, previous); + // Don't call directly to avoid exposing the built-in global object. + result.add(replace.call(null, match, startOfMatch, subject)); + // Can't use matchInfo any more from here, since the function could + // overwrite it. + // Continue with the next match. + // Increment previous if we matched an empty string, as per ECMA-262 + // 15.5.4.10. + if (previous == startOfMatch) { + // Add the skipped character to the output, if any. + if (previous < subject.length) { + result.addSpecialSlice(previous, previous + 1); + } + previous++; + // Per ECMA-262 15.10.6.2, if the previous index is greater than the + // string length, there is no match + if (previous > subject.length) { + return result.generate(); + } + } + matchInfo = DoRegExpExec(regexp, subject, previous); + } while (!IS_NULL(matchInfo)); } else { - // Inside a nested replace (replace called from the replacement function - // of another replace) or we have failed to set the reusable array - // back due to an exception in a replacement function. Create a new - // array to use in the future, or until the original is written back. - resultArray = $Array(16); - } - try { - // Must handle exceptions thrown by the replace functions correctly, - // including unregistering global regexps. - var res = %RegExpExecMultiple(regexp, - subject, - lastMatchInfo, - resultArray); - regexp.lastIndex = 0; - if (IS_NULL(res)) { - // No matches at all. - return subject; - } - var len = res.length; - var i = 0; - if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) { - var match_start = 0; - while (i < len) { - var elem = res[i]; - if (%_IsSmi(elem)) { - if (elem > 0) { - match_start = (elem >> 11) + (elem & 0x7ff); - } else { - match_start = res[++i] - elem; - } - } else { - var func_result = replace.call(null, elem, match_start, subject); - if (!IS_STRING(func_result)) func_result = TO_STRING(func_result); - res[i] = func_result; - match_start += elem.length; + do { + startOfMatch = matchInfo[CAPTURE0]; + result.addSpecialSlice(previous, startOfMatch); + previous = matchInfo[CAPTURE1]; + result.add(ApplyReplacementFunction(replace, matchInfo, subject)); + // Can't use matchInfo any more from here, since the function could + // overwrite it. + // Continue with the next match. + // Increment previous if we matched an empty string, as per ECMA-262 + // 15.5.4.10. + if (previous == startOfMatch) { + // Add the skipped character to the output, if any. + if (previous < subject.length) { + result.addSpecialSlice(previous, previous + 1); } - i++; - } - } else { - while (i < len) { - var elem = res[i]; - if (!%_IsSmi(elem)) { - // elem must be an Array. - // Use the apply argument as backing for global RegExp properties. - lastMatchInfoOverride = elem; - var func_result = replace.apply(null, elem); - if (!IS_STRING(func_result)) func_result = TO_STRING(func_result); - res[i] = func_result; + previous++; + // Per ECMA-262 15.10.6.2, if the previous index is greater than the + // string length, there is no match + if (previous > subject.length) { + return result.generate(); } - i++; } - } - var result = new ReplaceResultBuilder(subject, res); - return result.generate(); - } finally { - lastMatchInfoOverride = null; - resultArray.length = 0; - reusableReplaceArray = resultArray; + matchInfo = DoRegExpExec(regexp, subject, previous); + } while (!IS_NULL(matchInfo)); } + + // Tack on the final right substring after the last match. + result.addSpecialSlice(previous, subject.length); + } else { // Not a global regexp, no need to loop. - var matchInfo = DoRegExpExec(regexp, subject, 0); - if (IS_NULL(matchInfo)) return subject; - - var result = new ReplaceResultBuilder(subject); result.addSpecialSlice(0, matchInfo[CAPTURE0]); var endOfMatch = matchInfo[CAPTURE1]; result.add(ApplyReplacementFunction(replace, matchInfo, subject)); // Can't use matchInfo any more from here, since the function could // overwrite it. result.addSpecialSlice(endOfMatch, subject.length); - return result.generate(); } + + return result.generate(); } @@ -888,11 +894,8 @@ function StringSup() { // ReplaceResultBuilder support. function ReplaceResultBuilder(str) { - if (%_ArgumentsLength() > 1) { - this.elements = %_Arguments(1); - } else { - this.elements = new $Array(); - } + this.__proto__ = void 0; + this.elements = new $Array(); this.special_string = str; } diff --git a/deps/v8/src/type-info-inl.h b/deps/v8/src/type-info.cc similarity index 95% rename from deps/v8/src/type-info-inl.h rename to deps/v8/src/type-info.cc index 90d3f55f9b..b1bde599f4 100644 --- a/deps/v8/src/type-info-inl.h +++ b/deps/v8/src/type-info.cc @@ -25,9 +25,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#ifndef V8_TYPE_INFO_INL_H_ -#define V8_TYPE_INFO_INL_H_ - +#include "v8.h" #include "type-info.h" #include "objects-inl.h" @@ -51,5 +49,3 @@ TypeInfo TypeInfo::TypeFromValue(Handle value) { } } // namespace v8::internal - -#endif // V8_TYPE_INFO_INL_H_ diff --git a/deps/v8/src/type-info.h b/deps/v8/src/type-info.h index 15bc128023..1d82634092 100644 --- a/deps/v8/src/type-info.h +++ b/deps/v8/src/type-info.h @@ -130,7 +130,7 @@ class TypeInfo { return false; } - static inline TypeInfo TypeFromValue(Handle value); + static TypeInfo TypeFromValue(Handle value); inline bool IsUnknown() { return type_ == kUnknownType; diff --git a/deps/v8/src/v8-counters.h b/deps/v8/src/v8-counters.h index a5f3594ca7..bd671a13fe 100644 --- a/deps/v8/src/v8-counters.h +++ b/deps/v8/src/v8-counters.h @@ -166,6 +166,7 @@ namespace internal { SC(generic_binary_stub_calls_regs, V8.GenericBinaryStubCallsRegs) \ SC(string_add_runtime, V8.StringAddRuntime) \ SC(string_add_native, V8.StringAddNative) \ + SC(string_add_runtime_ext_to_ascii, V8.StringAddRuntimeExtToAscii) \ SC(sub_string_runtime, V8.SubStringRuntime) \ SC(sub_string_native, V8.SubStringNative) \ SC(string_compare_native, V8.StringCompareNative) \ diff --git a/deps/v8/src/version.cc b/deps/v8/src/version.cc index e3d0887552..9d1aa72d5b 100644 --- a/deps/v8/src/version.cc +++ b/deps/v8/src/version.cc @@ -33,9 +33,9 @@ // NOTE these macros are used by the SCons build script so their names // cannot be changed without changing the SCons build script. #define MAJOR_VERSION 2 -#define MINOR_VERSION 1 -#define BUILD_NUMBER 10 -#define PATCH_LEVEL 0 +#define MINOR_VERSION 2 +#define BUILD_NUMBER 0 +#define PATCH_LEVEL 3 #define CANDIDATE_VERSION false // Define SONAME to have the SCons build the put a specific SONAME into the diff --git a/deps/v8/test/cctest/test-strings.cc b/deps/v8/test/cctest/test-strings.cc index 59a40af2a6..a87398740b 100644 --- a/deps/v8/test/cctest/test-strings.cc +++ b/deps/v8/test/cctest/test-strings.cc @@ -323,6 +323,7 @@ TEST(Utf8Conversion) { 0xE3, 0x81, 0x85, 0x00}; // The number of bytes expected to be written for each length const int lengths[12] = {0, 0, 2, 3, 3, 3, 6, 7, 7, 7, 10, 11}; + const int charLengths[12] = {0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5}; v8::Handle mixed = v8::String::New(mixed_string, 5); CHECK_EQ(10, mixed->Utf8Length()); // Try encoding the string with all capacities @@ -332,8 +333,10 @@ TEST(Utf8Conversion) { // Clear the buffer before reusing it for (int j = 0; j < 11; j++) buffer[j] = kNoChar; - int written = mixed->WriteUtf8(buffer, i); + int charsWritten; + int written = mixed->WriteUtf8(buffer, i, &charsWritten); CHECK_EQ(lengths[i], written); + CHECK_EQ(charLengths[i], charsWritten); // Check that the contents are correct for (int j = 0; j < lengths[i]; j++) CHECK_EQ(as_utf8[j], static_cast(buffer[j])); diff --git a/deps/v8/tools/gyp/v8.gyp b/deps/v8/tools/gyp/v8.gyp index d5bb0cb764..7d0e699528 100644 --- a/deps/v8/tools/gyp/v8.gyp +++ b/deps/v8/tools/gyp/v8.gyp @@ -388,7 +388,7 @@ '../../src/token.h', '../../src/top.cc', '../../src/top.h', - '../../src/type-info-inl.h', + '../../src/type-info.cc', '../../src/type-info.h', '../../src/unicode-inl.h', '../../src/unicode.cc', diff --git a/deps/v8/tools/visual_studio/v8_base.vcproj b/deps/v8/tools/visual_studio/v8_base.vcproj index 58bf92f8ff..5fa08f7038 100644 --- a/deps/v8/tools/visual_studio/v8_base.vcproj +++ b/deps/v8/tools/visual_studio/v8_base.vcproj @@ -945,7 +945,7 @@ >