// Copyright 2010 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef V8_PARSER_H_ #define V8_PARSER_H_ #include "allocation.h" #include "ast.h" #include "scanner.h" #include "scopes.h" namespace v8 { namespace internal { class CompilationInfo; class FuncNameInferrer; class ParserLog; class PositionStack; class Target; class TemporaryScope; template class ZoneListWrapper; class ParserMessage : public Malloced { public: ParserMessage(Scanner::Location loc, const char* message, Vector args) : loc_(loc), message_(message), args_(args) { } ~ParserMessage(); Scanner::Location location() { return loc_; } const char* message() { return message_; } Vector args() { return args_; } private: Scanner::Location loc_; const char* message_; Vector args_; }; class FunctionEntry BASE_EMBEDDED { public: explicit FunctionEntry(Vector backing) : backing_(backing) { } FunctionEntry() : backing_(Vector::empty()) { } int start_pos() { return backing_[kStartPosOffset]; } void set_start_pos(int value) { backing_[kStartPosOffset] = value; } int end_pos() { return backing_[kEndPosOffset]; } void set_end_pos(int value) { backing_[kEndPosOffset] = value; } int literal_count() { return backing_[kLiteralCountOffset]; } void set_literal_count(int value) { backing_[kLiteralCountOffset] = value; } int property_count() { return backing_[kPropertyCountOffset]; } void set_property_count(int value) { backing_[kPropertyCountOffset] = value; } bool is_valid() { return backing_.length() > 0; } static const int kSize = 4; private: Vector backing_; static const int kStartPosOffset = 0; static const int kEndPosOffset = 1; static const int kLiteralCountOffset = 2; static const int kPropertyCountOffset = 3; }; class ScriptDataImpl : public ScriptData { public: explicit ScriptDataImpl(Vector store) : store_(store), owns_store_(true) { } // Create an empty ScriptDataImpl that is guaranteed to not satisfy // a SanityCheck. ScriptDataImpl() : store_(Vector()), owns_store_(false) { } virtual ~ScriptDataImpl(); virtual int Length(); virtual const char* Data(); virtual bool HasError(); void Initialize(); void ReadNextSymbolPosition(); FunctionEntry GetFunctionEntry(int start); int GetSymbolIdentifier(); bool SanityCheck(); Scanner::Location MessageLocation(); const char* BuildMessage(); Vector BuildArgs(); int symbol_count() { return (store_.length() > kHeaderSize) ? store_[kSymbolCountOffset] : 0; } // The following functions should only be called if SanityCheck has // returned true. bool has_error() { return store_[kHasErrorOffset]; } unsigned magic() { return store_[kMagicOffset]; } unsigned version() { return store_[kVersionOffset]; } static const unsigned kMagicNumber = 0xBadDead; static const unsigned kCurrentVersion = 5; static const int kMagicOffset = 0; static const int kVersionOffset = 1; static const int kHasErrorOffset = 2; static const int kFunctionsSizeOffset = 3; static const int kSymbolCountOffset = 4; static const int kSizeOffset = 5; static const int kHeaderSize = 6; // If encoding a message, the following positions are fixed. static const int kMessageStartPos = 0; static const int kMessageEndPos = 1; static const int kMessageArgCountPos = 2; static const int kMessageTextPos = 3; static const byte kNumberTerminator = 0x80u; private: Vector store_; unsigned char* symbol_data_; unsigned char* symbol_data_end_; int function_index_; bool owns_store_; unsigned Read(int position); unsigned* ReadAddress(int position); // Reads a number from the current symbols int ReadNumber(byte** source); ScriptDataImpl(const char* backing_store, int length) : store_(reinterpret_cast(const_cast(backing_store)), length / static_cast(sizeof(unsigned))), owns_store_(false) { ASSERT_EQ(0, static_cast( reinterpret_cast(backing_store) % sizeof(unsigned))); } // Read strings written by ParserRecorder::WriteString. static const char* ReadString(unsigned* start, int* chars); friend class ScriptData; }; // Record only functions. class PartialParserRecorder { public: PartialParserRecorder(); void LogFunction(int start, int end, int literals, int properties) { function_store_.Add(start); function_store_.Add(end); function_store_.Add(literals); function_store_.Add(properties); } void LogSymbol(int start, const char* symbol, int length) { } // Logs an error message and marks the log as containing an error. // Further logging will be ignored, and ExtractData will return a vector // representing the error only. void LogMessage(int start, int end, const char* message, const char* argument_opt) { Scanner::Location location(start, end); Vector arguments; if (argument_opt != NULL) { arguments = Vector(&argument_opt, 1); } this->LogMessage(location, message, arguments); } int function_position() { return function_store_.size(); } void LogMessage(Scanner::Location loc, const char* message, Vector args); Vector ExtractData(); void PauseRecording() { pause_count_++; is_recording_ = false; } void ResumeRecording() { ASSERT(pause_count_ > 0); if (--pause_count_ == 0) is_recording_ = !has_error(); } int symbol_position() { return 0; } int symbol_ids() { return 0; } protected: bool has_error() { return static_cast(preamble_[ScriptDataImpl::kHasErrorOffset]); } bool is_recording() { return is_recording_; } void WriteString(Vector str); Collector function_store_; unsigned preamble_[ScriptDataImpl::kHeaderSize]; bool is_recording_; int pause_count_; #ifdef DEBUG int prev_start_; #endif }; // Record both functions and symbols. class CompleteParserRecorder: public PartialParserRecorder { public: CompleteParserRecorder(); void LogSymbol(int start, Vector literal); void LogSymbol(int start, const char* symbol, int length) { LogSymbol(start, Vector(symbol, length)); } Vector ExtractData(); int symbol_position() { return symbol_store_.size(); } int symbol_ids() { return symbol_id_; } private: static int vector_hash(Vector string) { int hash = 0; for (int i = 0; i < string.length(); i++) { int c = string[i]; hash += c; hash += (hash << 10); hash ^= (hash >> 6); } return hash; } static bool vector_compare(void* a, void* b) { Vector* string1 = reinterpret_cast* >(a); Vector* string2 = reinterpret_cast* >(b); int length = string1->length(); if (string2->length() != length) return false; return memcmp(string1->start(), string2->start(), length) == 0; } // Write a non-negative number to the symbol store. void WriteNumber(int number); Collector symbol_store_; Collector > symbol_entries_; HashMap symbol_table_; int symbol_id_; }; class ParserApi { public: // Parses the source code represented by the compilation info and sets its // function literal. Returns false (and deallocates any allocated AST // nodes) if parsing failed. static bool Parse(CompilationInfo* info); // Generic preparser generating full preparse data. static ScriptDataImpl* PreParse(Handle source, unibrow::CharacterStream* stream, v8::Extension* extension); // Preparser that only does preprocessing that makes sense if only used // immediately after. static ScriptDataImpl* PartialPreParse(Handle source, unibrow::CharacterStream* stream, v8::Extension* extension); }; // ---------------------------------------------------------------------------- // REGEXP PARSING // A BuffferedZoneList is an automatically growing list, just like (and backed // by) a ZoneList, that is optimized for the case of adding and removing // a single element. The last element added is stored outside the backing list, // and if no more than one element is ever added, the ZoneList isn't even // allocated. // Elements must not be NULL pointers. template class BufferedZoneList { public: BufferedZoneList() : list_(NULL), last_(NULL) {} // Adds element at end of list. This element is buffered and can // be read using last() or removed using RemoveLast until a new Add or until // RemoveLast or GetList has been called. void Add(T* value) { if (last_ != NULL) { if (list_ == NULL) { list_ = new ZoneList(initial_size); } list_->Add(last_); } last_ = value; } T* last() { ASSERT(last_ != NULL); return last_; } T* RemoveLast() { ASSERT(last_ != NULL); T* result = last_; if ((list_ != NULL) && (list_->length() > 0)) last_ = list_->RemoveLast(); else last_ = NULL; return result; } T* Get(int i) { ASSERT((0 <= i) && (i < length())); if (list_ == NULL) { ASSERT_EQ(0, i); return last_; } else { if (i == list_->length()) { ASSERT(last_ != NULL); return last_; } else { return list_->at(i); } } } void Clear() { list_ = NULL; last_ = NULL; } int length() { int length = (list_ == NULL) ? 0 : list_->length(); return length + ((last_ == NULL) ? 0 : 1); } ZoneList* GetList() { if (list_ == NULL) { list_ = new ZoneList(initial_size); } if (last_ != NULL) { list_->Add(last_); last_ = NULL; } return list_; } private: ZoneList* list_; T* last_; }; // Accumulates RegExp atoms and assertions into lists of terms and alternatives. class RegExpBuilder: public ZoneObject { public: RegExpBuilder(); void AddCharacter(uc16 character); // "Adds" an empty expression. Does nothing except consume a // following quantifier void AddEmpty(); void AddAtom(RegExpTree* tree); void AddAssertion(RegExpTree* tree); void NewAlternative(); // '|' void AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type); RegExpTree* ToRegExp(); private: void FlushCharacters(); void FlushText(); void FlushTerms(); bool pending_empty_; ZoneList* characters_; BufferedZoneList terms_; BufferedZoneList text_; BufferedZoneList alternatives_; #ifdef DEBUG enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; #define LAST(x) last_added_ = x; #else #define LAST(x) #endif }; class RegExpParser { public: RegExpParser(FlatStringReader* in, Handle* error, bool multiline_mode); static bool ParseRegExp(FlatStringReader* input, bool multiline, RegExpCompileData* result); RegExpTree* ParsePattern(); RegExpTree* ParseDisjunction(); RegExpTree* ParseGroup(); RegExpTree* ParseCharacterClass(); // Parses a {...,...} quantifier and stores the range in the given // out parameters. bool ParseIntervalQuantifier(int* min_out, int* max_out); // Parses and returns a single escaped character. The character // must not be 'b' or 'B' since they are usually handle specially. uc32 ParseClassCharacterEscape(); // Checks whether the following is a length-digit hexadecimal number, // and sets the value if it is. bool ParseHexEscape(int length, uc32* value); uc32 ParseControlLetterEscape(); uc32 ParseOctalLiteral(); // Tries to parse the input as a back reference. If successful it // stores the result in the output parameter and returns true. If // it fails it will push back the characters read so the same characters // can be reparsed. bool ParseBackReferenceIndex(int* index_out); CharacterRange ParseClassAtom(uc16* char_class); RegExpTree* ReportError(Vector message); void Advance(); void Advance(int dist); void Reset(int pos); // Reports whether the pattern might be used as a literal search string. // Only use if the result of the parse is a single atom node. bool simple(); bool contains_anchor() { return contains_anchor_; } void set_contains_anchor() { contains_anchor_ = true; } int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } int position() { return next_pos_ - 1; } bool failed() { return failed_; } static const int kMaxCaptures = 1 << 16; static const uc32 kEndMarker = (1 << 21); private: enum SubexpressionType { INITIAL, CAPTURE, // All positive values represent captures. POSITIVE_LOOKAHEAD, NEGATIVE_LOOKAHEAD, GROUPING }; class RegExpParserState : public ZoneObject { public: RegExpParserState(RegExpParserState* previous_state, SubexpressionType group_type, int disjunction_capture_index) : previous_state_(previous_state), builder_(new RegExpBuilder()), group_type_(group_type), disjunction_capture_index_(disjunction_capture_index) {} // Parser state of containing expression, if any. RegExpParserState* previous_state() { return previous_state_; } bool IsSubexpression() { return previous_state_ != NULL; } // RegExpBuilder building this regexp's AST. RegExpBuilder* builder() { return builder_; } // Type of regexp being parsed (parenthesized group or entire regexp). SubexpressionType group_type() { return group_type_; } // Index in captures array of first capture in this sub-expression, if any. // Also the capture index of this sub-expression itself, if group_type // is CAPTURE. int capture_index() { return disjunction_capture_index_; } private: // Linked list implementation of stack of states. RegExpParserState* previous_state_; // Builder for the stored disjunction. RegExpBuilder* builder_; // Stored disjunction type (capture, look-ahead or grouping), if any. SubexpressionType group_type_; // Stored disjunction's capture index (if any). int disjunction_capture_index_; }; uc32 current() { return current_; } bool has_more() { return has_more_; } bool has_next() { return next_pos_ < in()->length(); } uc32 Next(); FlatStringReader* in() { return in_; } void ScanForCaptures(); Handle* error_; ZoneList* captures_; FlatStringReader* in_; uc32 current_; int next_pos_; // The capture count is only valid after we have scanned for captures. int capture_count_; bool has_more_; bool multiline_; bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; bool failed_; }; // ---------------------------------------------------------------------------- // JAVASCRIPT PARSING class Parser { public: Parser(Handle