// Copyright 2011 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_JSON_PARSER_H_ #define V8_JSON_PARSER_H_ #include "src/char-predicates.h" #include "src/conversions.h" #include "src/debug/debug.h" #include "src/factory.h" #include "src/messages.h" #include "src/scanner.h" #include "src/token.h" #include "src/transitions.h" #include "src/types.h" namespace v8 { namespace internal { enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle }; // A simple json parser. template class JsonParser BASE_EMBEDDED { public: MUST_USE_RESULT static MaybeHandle Parse(Handle source) { return JsonParser(source).ParseJson(); } static const int kEndOfString = -1; private: explicit JsonParser(Handle source) : source_(source), source_length_(source->length()), isolate_(source->map()->GetHeap()->isolate()), factory_(isolate_->factory()), object_constructor_(isolate_->native_context()->object_function(), isolate_), position_(-1) { source_ = String::Flatten(source_); pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED; // Optimized fast case where we only have Latin1 characters. if (seq_one_byte) { seq_source_ = Handle::cast(source_); } } // Parse a string containing a single JSON value. MaybeHandle ParseJson(); inline void Advance() { position_++; if (position_ >= source_length_) { c0_ = kEndOfString; } else if (seq_one_byte) { c0_ = seq_source_->SeqOneByteStringGet(position_); } else { c0_ = source_->Get(position_); } } // The JSON lexical grammar is specified in the ECMAScript 5 standard, // section 15.12.1.1. The only allowed whitespace characters between tokens // are tab, carriage-return, newline and space. inline void AdvanceSkipWhitespace() { do { Advance(); } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r'); } inline void SkipWhitespace() { while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') { Advance(); } } inline uc32 AdvanceGetChar() { Advance(); return c0_; } // Checks that current charater is c. // If so, then consume c and skip whitespace. inline bool MatchSkipWhiteSpace(uc32 c) { if (c0_ == c) { AdvanceSkipWhitespace(); return true; } return false; } // A JSON string (production JSONString) is subset of valid JavaScript string // literals. The string must only be double-quoted (not single-quoted), and // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. Handle ParseJsonString() { return ScanJsonString(); } bool ParseJsonString(Handle expected) { int length = expected->length(); if (source_->length() - position_ - 1 > length) { DisallowHeapAllocation no_gc; String::FlatContent content = expected->GetFlatContent(); if (content.IsOneByte()) { DCHECK_EQ('"', c0_); const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1; const uint8_t* expected_chars = content.ToOneByteVector().start(); for (int i = 0; i < length; i++) { uint8_t c0 = input_chars[i]; if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') { return false; } } if (input_chars[length] == '"') { position_ = position_ + length + 1; AdvanceSkipWhitespace(); return true; } } } return false; } Handle ParseJsonInternalizedString() { return ScanJsonString(); } template Handle ScanJsonString(); // Creates a new string and copies prefix[start..end] into the beginning // of it. Then scans the rest of the string, adding characters after the // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char. template Handle SlowScanJsonString(Handle prefix, int start, int end); // A JSON number (production JSONNumber) is a subset of the valid JavaScript // decimal number literals. // It includes an optional minus sign, must have at least one // digit before and after a decimal point, may not have prefixed zeros (unless // the integer part is zero), and may include an exponent part (e.g., "e-10"). // Hexadecimal and octal numbers are not allowed. Handle ParseJsonNumber(); // Parse a single JSON value from input (grammar production JSONValue). // A JSON value is either a (double-quoted) string literal, a number literal, // one of "true", "false", or "null", or an object or array literal. Handle ParseJsonValue(); // Parse a JSON object literal (grammar production JSONObject). // An object literal is a squiggly-braced and comma separated sequence // (possibly empty) of key/value pairs, where the key is a JSON string // literal, the value is a JSON value, and the two are separated by a colon. // A JSON array doesn't allow numbers and identifiers as keys, like a // JavaScript array. Handle ParseJsonObject(); // Helper for ParseJsonObject. Parses the form "123": obj, which is recorded // as an element, not a property. ParseElementResult ParseElement(Handle json_object); // Parses a JSON array literal (grammar production JSONArray). An array // literal is a square-bracketed and comma separated sequence (possibly empty) // of JSON values. // A JSON array doesn't allow leaving out values from the sequence, nor does // it allow a terminal comma, like a JavaScript array does. Handle ParseJsonArray(); // Mark that a parsing error has happened at the current token, and // return a null handle. Primarily for readability. inline Handle ReportUnexpectedCharacter() { return Handle::null(); } inline Isolate* isolate() { return isolate_; } inline Factory* factory() { return factory_; } inline Handle object_constructor() { return object_constructor_; } static const int kInitialSpecialStringLength = 32; static const int kPretenureTreshold = 100 * 1024; private: Zone* zone() { return &zone_; } void CommitStateToJsonObject(Handle json_object, Handle map, ZoneList >* properties); Handle source_; int source_length_; Handle seq_source_; PretenureFlag pretenure_; Isolate* isolate_; Factory* factory_; Zone zone_; Handle object_constructor_; uc32 c0_; int position_; }; template MaybeHandle JsonParser::ParseJson() { // Advance to the first character (possibly EOS) AdvanceSkipWhitespace(); Handle result = ParseJsonValue(); if (result.is_null() || c0_ != kEndOfString) { // Some exception (for example stack overflow) is already pending. if (isolate_->has_pending_exception()) return Handle::null(); // Parse failed. Current character is the unexpected token. Factory* factory = this->factory(); MessageTemplate::Template message; Handle argument; switch (c0_) { case kEndOfString: message = MessageTemplate::kUnexpectedEOS; break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': message = MessageTemplate::kUnexpectedTokenNumber; break; case '"': message = MessageTemplate::kUnexpectedTokenString; break; default: message = MessageTemplate::kUnexpectedToken; argument = factory->LookupSingleCharacterStringFromCode(c0_); break; } Handle