From f070d3bdea93d11dfa1dece3d15acc501e060109 Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 9 Oct 2014 15:57:49 +0200 Subject: [PATCH] Initial implementation of Solidity parser finished, not yet tested much. --- libsolidity/AST.h | 110 +++++++++++++++---- libsolidity/Parser.cpp | 233 +++++++++++++++++++++++++++++++++++++++- libsolidity/Parser.h | 10 ++ libsolidity/grammar.txt | 13 ++- test/solidityParser.cpp | 78 ++++++++++++++ 5 files changed, 416 insertions(+), 28 deletions(-) diff --git a/libsolidity/AST.h b/libsolidity/AST.h index dad257f23..9a4b95215 100644 --- a/libsolidity/AST.h +++ b/libsolidity/AST.h @@ -54,6 +54,8 @@ public: : m_location(_location) {} + virtual ~ASTNode() {} + Location getLocation() const { return m_location; } private: Location m_location; @@ -146,9 +148,7 @@ private: class TypeName : public ASTNode { public: - explicit TypeName(Location const& _location) - : ASTNode(_location) - {} + explicit TypeName(Location const& _location) : ASTNode(_location) {} }; /// any pre-defined type that is not a mapping @@ -192,15 +192,13 @@ private: class Statement : public ASTNode { public: - explicit Statement(Location const& _location) - : ASTNode(_location) - {} + explicit Statement(Location const& _location) : ASTNode(_location) {} }; class Block : public Statement { public: - explicit Block(Location const& _location, vecptr const& _statements) + Block(Location const& _location, vecptr const& _statements) : Statement(_location), m_statements(_statements) {} private: @@ -209,7 +207,12 @@ private: class IfStatement : public Statement { - +public: + IfStatement(Location const& _location, ptr const& _condition, + ptr const& _trueBody, ptr const& _falseBody) + : Statement(_location), m_condition(_condition), + m_trueBody(_trueBody), m_falseBody(_falseBody) + {} private: ptr m_condition; ptr m_trueBody; @@ -218,11 +221,17 @@ private: class BreakableStatement : public Statement { - +public: + BreakableStatement(Location const& _location) : Statement(_location) {} }; class WhileStatement : public BreakableStatement { +public: + WhileStatement(Location const& _location, ptr const& _condition, + ptr const& _body) + : BreakableStatement(_location), m_condition(_condition), m_body(_body) + {} private: ptr m_condition; ptr m_body; @@ -230,31 +239,42 @@ private: class Continue : public Statement { - +public: + Continue(Location const& _location) : Statement(_location) {} }; class Break : public Statement { - +public: + Break(Location const& _location) : Statement(_location) {} }; class Return : public Statement { +public: + Return(Location const& _location, ptr _expression) + : Statement(_location), m_expression(_expression) + {} private: ptr m_expression; }; -class VariableAssignment : public Statement +class VariableDefinition : public Statement { +public: + VariableDefinition(Location const& _location, ptr _variable, + ptr _value) + : Statement(_location), m_variable(_variable), m_value(_value) + {} private: ptr m_variable; - Token::Value m_assigmentOperator; - ptr m_rightHandSide; ///< can be missing + ptr m_value; ///< can be missing }; class Expression : public Statement { -private: +public: + Expression(Location const& _location) : Statement(_location) {} }; /// @} @@ -264,6 +284,12 @@ private: class Assignment : public Expression { +public: + Assignment(Location const& _location, ptr const& _leftHandSide, + Token::Value _assignmentOperator, ptr const& _rightHandSide) + : Expression(_location), m_leftHandSide(_leftHandSide), + m_assigmentOperator(_assignmentOperator), m_rightHandSide(_rightHandSide) + {} private: ptr m_leftHandSide; Token::Value m_assigmentOperator; @@ -272,31 +298,52 @@ private: class UnaryOperation : public Expression { +public: + UnaryOperation(Location const& _location, Token::Value _operator, + ptr const& _subExpression, bool _isPrefix) + : Expression(_location), m_operator(_operator), + m_subExpression(_subExpression), m_isPrefix(_isPrefix) + {} + private: Token::Value m_operator; ptr m_subExpression; - bool isPrefix; + bool m_isPrefix; }; class BinaryOperation : public Expression { +public: + BinaryOperation(Location const& _location, ptr const& _left, + Token::Value _operator, ptr const& _right) + : Expression(_location), m_left(_left), m_operator(_operator), m_right(_right) + {} private: ptr m_left; - ptr m_right; Token::Value m_operator; + ptr m_right; }; /// Can be ordinary function call, type cast or struct construction. class FunctionCall : public Expression { +public: + FunctionCall(Location const& _location, ptr const& _expression, + vecptr const& _arguments) + : Expression(_location), m_expression(_expression), m_arguments(_arguments) + {} private: - // if m_functionName is the name of a type, store the token directly - std::string m_functionName; // "in place" calls of return values are not possible for now + ptr m_expression; vecptr m_arguments; }; class MemberAccess : public Expression { +public: + MemberAccess(Location const& _location, ptr _expression, + std::string const& _memberName) + : Expression(_location), m_expression(_expression), m_memberName(_memberName) + {} private: ptr m_expression; std::string m_memberName; @@ -304,23 +351,48 @@ private: class IndexAccess : public Expression { +public: + IndexAccess(Location const& _location, ptr const& _base, + ptr const& _index) + : Expression(_location), m_base(_base), m_index(_index) + {} +private: ptr m_base; ptr m_index; }; class PrimaryExpression : public Expression { +public: + PrimaryExpression(Location const& _location) : Expression(_location) {} }; class Identifier : public PrimaryExpression { +public: + Identifier(Location const& _location, std::string const& _name) + : PrimaryExpression(_location), m_name(_name) {} private: std::string m_name; }; +class ElementaryTypeNameExpression : public PrimaryExpression +{ +public: + ElementaryTypeNameExpression(Location const& _location, Token::Value _type) + : PrimaryExpression(_location), m_type(_type) {} +private: + Token::Value m_type; +}; + class Literal : public PrimaryExpression { +public: + Literal(Location const& _location, Token::Value _token, std::string const& _value) + : PrimaryExpression(_location), m_token(_token), m_value(_value) + {} private: + Token::Value m_token; std::string m_value; }; diff --git a/libsolidity/Parser.cpp b/libsolidity/Parser.cpp index 2886b2c18..24c8e599e 100644 --- a/libsolidity/Parser.cpp +++ b/libsolidity/Parser.cpp @@ -231,12 +231,10 @@ ptr Parser::parseParameterList() ptr Parser::parseBlock() { - ASTNodeFactory nodeFactory(*this); expectToken(Token::LBRACE); vecptr statements; while (m_scanner->getCurrentToken() != Token::RBRACE) { - m_scanner->next(); statements.push_back(parseStatement()); } nodeFactory.markEndPosition(); @@ -246,6 +244,7 @@ ptr Parser::parseBlock() ptr Parser::parseStatement() { + ptr statement; switch (m_scanner->getCurrentToken()) { case Token::IF: @@ -254,12 +253,229 @@ ptr Parser::parseStatement() return parseWhileStatement(); case Token::LBRACE: return parseBlock(); + // starting from here, all statements must be terminated by a semicolon - case Token::CONTINUE: // all following - return + case Token::CONTINUE: + statement = ASTNodeFactory(*this).createNode(); + break; + case Token::BREAK: + statement = ASTNodeFactory(*this).createNode(); + break; + case Token::RETURN: + { + ASTNodeFactory nodeFactory(*this); + ptr expression; + if (m_scanner->next() != Token::SEMICOLON) { + expression = parseExpression(); + nodeFactory.setEndPositionFromNode(expression); + } + statement = nodeFactory.createNode(expression); + } + break; + default: + // distinguish between variable definition (and potentially assignment) and expressions + // (which include assignments to other expressions and pre-declared variables) + // We have a variable definition if we ge a keyword that specifies a type name, or + // in the case of a user-defined type, we have two identifiers following each other. + if (m_scanner->getCurrentToken() == Token::MAPPING || + m_scanner->getCurrentToken() == Token::VAR || + Token::IsElementaryTypeName(m_scanner->getCurrentToken()) || + (m_scanner->getCurrentToken() == Token::IDENTIFIER && + m_scanner->peek() == Token::IDENTIFIER)) { + statement = parseVariableDefinition(); + } else { + // "ordinary" expression + statement = parseExpression(); + } + } + expectToken(Token::SEMICOLON); + return statement; +} + +ptr Parser::parseIfStatement() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::IF); + expectToken(Token::LPAREN); + ptr condition = parseExpression(); + expectToken(Token::RPAREN); + ptr trueBody = parseStatement(); + ptr falseBody; + if (m_scanner->getCurrentToken() == Token::ELSE) { + m_scanner->next(); + falseBody = parseStatement(); + nodeFactory.setEndPositionFromNode(falseBody); + } else { + nodeFactory.setEndPositionFromNode(trueBody); + } + return nodeFactory.createNode(condition, trueBody, falseBody); +} + +ptr Parser::parseWhileStatement() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::WHILE); + expectToken(Token::LPAREN); + ptr condition = parseExpression(); + expectToken(Token::RPAREN); + ptr body = parseStatement(); + nodeFactory.setEndPositionFromNode(body); + return nodeFactory.createNode(condition, body); +} + +ptr Parser::parseVariableDefinition() +{ + ASTNodeFactory nodeFactory(*this); + ptr variable = parseVariableDeclaration(); + ptr value; + if (m_scanner->getCurrentToken() == Token::ASSIGN) { + m_scanner->next(); + value = parseExpression(); + nodeFactory.setEndPositionFromNode(value); + } else { + nodeFactory.setEndPositionFromNode(variable); + } + return nodeFactory.createNode(variable, value); +} + +ptr Parser::parseExpression() +{ + ASTNodeFactory nodeFactory(*this); + ptr expression = parseBinaryExpression(); + if (!Token::IsAssignmentOp(m_scanner->getCurrentToken())) + return expression; + + Token::Value assignmentOperator = expectAssignmentOperator(); + ptr rightHandSide = parseExpression(); + nodeFactory.setEndPositionFromNode(rightHandSide); + return nodeFactory.createNode(expression, assignmentOperator, rightHandSide); +} + +ptr Parser::parseBinaryExpression(int _minPrecedence) +{ + ASTNodeFactory nodeFactory(*this); + ptr expression = parseUnaryExpression(); + int precedence = Token::Precedence(m_scanner->getCurrentToken()); + for (; precedence >= _minPrecedence; --precedence) { + while (Token::Precedence(m_scanner->getCurrentToken()) == precedence) { + Token::Value op = m_scanner->getCurrentToken(); + m_scanner->next(); + ptr right = parseBinaryExpression(precedence + 1); + nodeFactory.setEndPositionFromNode(right); + expression = nodeFactory.createNode(expression, op, right); + } + } + return expression; +} + +ptr Parser::parseUnaryExpression() +{ + ASTNodeFactory nodeFactory(*this); + Token::Value token = m_scanner->getCurrentToken(); + if (Token::IsUnaryOp(token) || Token::IsCountOp(token)) { + // prefix expression + m_scanner->next(); + ptr subExpression = parseUnaryExpression(); + nodeFactory.setEndPositionFromNode(subExpression); + return nodeFactory.createNode(token, subExpression, true); + } else { + // potential postfix expression + ptr subExpression = parseLeftHandSideExpression(); + token = m_scanner->getCurrentToken(); + if (!Token::IsCountOp(token)) + return subExpression; + nodeFactory.markEndPosition(); + m_scanner->next(); + return nodeFactory.createNode(token, subExpression, false); + } +} + +ptr Parser::parseLeftHandSideExpression() +{ + ASTNodeFactory nodeFactory(*this); + ptr expression = parsePrimaryExpression(); + + while (true) { + switch (m_scanner->getCurrentToken()) { + case Token::LBRACK: + { + m_scanner->next(); + ptr index = parseExpression(); + nodeFactory.markEndPosition(); + expectToken(Token::RBRACK); + expression = nodeFactory.createNode(expression, index); + } + break; + case Token::PERIOD: + { + m_scanner->next(); + nodeFactory.markEndPosition(); + std::string memberName = expectIdentifier(); + expression = nodeFactory.createNode(expression, memberName); + } + break; + case Token::LPAREN: + { + m_scanner->next(); + vecptr arguments = parseFunctionCallArguments(); + nodeFactory.markEndPosition(); + expectToken(Token::RPAREN); + expression = nodeFactory.createNode(expression, arguments); + } + break; + default: + return expression; + } + } +} + +ptr Parser::parsePrimaryExpression() +{ + Token::Value token = m_scanner->getCurrentToken(); + switch (token) { + case Token::TRUE_LITERAL: + case Token::FALSE_LITERAL: + m_scanner->next(); + return ASTNodeFactory(*this).createNode(token, std::string()); + case Token::NUMBER: + case Token::STRING_LITERAL: + m_scanner->next(); + return ASTNodeFactory(*this).createNode(token, m_scanner->getCurrentLiteral()); + case Token::IDENTIFIER: + m_scanner->next(); + return ASTNodeFactory(*this).createNode(m_scanner->getCurrentLiteral()); + case Token::LPAREN: + { + m_scanner->next(); + ptr expression = parseExpression(); + expectToken(Token::RPAREN); + return expression; + } + default: + if (Token::IsElementaryTypeName(token)) { + // used for casts + m_scanner->next(); + return ASTNodeFactory(*this).createNode(token); + } else { + throwExpectationError("Expected primary expression."); + return ptr(); // this is not reached + } } } +vecptr Parser::parseFunctionCallArguments() +{ + vecptr arguments; + if (m_scanner->getCurrentToken() != Token::RPAREN) { + arguments.push_back(parseExpression()); + while (m_scanner->getCurrentToken() != Token::RPAREN) { + expectToken(Token::COMMA); + arguments.push_back(parseExpression()); + } + } + return arguments; +} + void Parser::expectToken(Token::Value _value) { if (m_scanner->getCurrentToken() != _value) @@ -267,6 +483,15 @@ void Parser::expectToken(Token::Value _value) m_scanner->next(); } +Token::Value Parser::expectAssignmentOperator() +{ + Token::Value op = m_scanner->getCurrentToken(); + if (!Token::IsAssignmentOp(op)) + throwExpectationError(std::string("Expected assignment operator")); + m_scanner->next(); + return op; +} + std::string Parser::expectIdentifier() { if (m_scanner->getCurrentToken() != Token::IDENTIFIER) diff --git a/libsolidity/Parser.h b/libsolidity/Parser.h index 65409a296..7036c3c20 100644 --- a/libsolidity/Parser.h +++ b/libsolidity/Parser.h @@ -53,12 +53,22 @@ private: ptr parseParameterList(); ptr parseBlock(); ptr parseStatement(); + ptr parseIfStatement(); + ptr parseWhileStatement(); + ptr parseVariableDefinition(); + ptr parseExpression(); + ptr parseBinaryExpression(int _minPrecedence = 4); + ptr parseUnaryExpression(); + ptr parseLeftHandSideExpression(); + ptr parsePrimaryExpression(); + vecptr parseFunctionCallArguments(); /// @} /// Helper functions /// @{ /// If current token value is not _value, throw exception otherwise advance token. void expectToken(Token::Value _value); + Token::Value expectAssignmentOperator(); std::string expectIdentifier(); void throwExpectationError(const std::string& _description); /// @} diff --git a/libsolidity/grammar.txt b/libsolidity/grammar.txt index 1946325f4..c0ab06074 100644 --- a/libsolidity/grammar.txt +++ b/libsolidity/grammar.txt @@ -15,18 +15,21 @@ TypeName = ElementaryTypeName | Identifier | Mapping Mapping = 'mapping' '(' ElementaryTypeName '=>' TypeName ')' Block = '{' Statement* '}' -Statement = IfStatement | WhileStatement | Continue | Break | Return | VariableAssignment | Expression ';' | Block +Statement = IfStatement | WhileStatement | Block | + ( Continue | Break | Return | VariableDefinition | Expression ) ';' IfStatement = 'if' '(' Expression ')' Statement ( 'else' Statement )? WhileStatement = 'while' '(' Expression ')' Statement Continue = 'continue' ';' Break = 'break' ';' Return = 'return' Expression? ';' -VariableAssignment = VariableDeclaration ( AssignmentOp Expression )? ';' +VariableDefinition = VariableDeclaration ( = Expression )? ';' -Expression = Assignment | UnaryOperation | BinaryOperation | FunctionCall | IndexAccess | MemberAccess | PrimaryExpression +Expression = Assignment | UnaryOperation | BinaryOperation | FunctionCall | IndexAccess | + MemberAccess | PrimaryExpression +// The expression syntax is actually much more complicated Assignment = Expression (AssignmentOp Expression) -FunctionCall = Identifier '(' ( Expression ( ',' Expression )* ) ')' +FunctionCall = Expression '(' ( Expression ( ',' Expression )* ) ')' MemberAccess = Expression '.' Identifier IndexAccess = Expression '[' Expresison ']' -PrimaryExpression = Identifier | NumberLiteral | StringLiteral | '(' Expression ')' +PrimaryExpression = Identifier | NumberLiteral | StringLiteral | ElementaryTypeName | '(' Expression ')' diff --git a/test/solidityParser.cpp b/test/solidityParser.cpp index 91247a3b7..86d09f170 100644 --- a/test/solidityParser.cpp +++ b/test/solidityParser.cpp @@ -130,6 +130,84 @@ BOOST_AUTO_TEST_CASE(mapping_to_mapping_in_struct) BOOST_CHECK_NO_THROW(parseText(text)); } +BOOST_AUTO_TEST_CASE(variable_definition) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) {\n" + " var b;\n" + " uint256 c;\n" + " mapping(address=>hash) d;\n" + " customtype varname;\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + +BOOST_AUTO_TEST_CASE(variable_definition_with_initialization) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) {\n" + " var b = 2;\n" + " uint256 c = 0x87;\n" + " mapping(address=>hash) d;\n" + " string name = \"Solidity\";" + " customtype varname;\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + +BOOST_AUTO_TEST_CASE(operator_expression) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) {\n" + " uint256 x = (1 + 4) || false && (1 - 12) + -9;\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + +BOOST_AUTO_TEST_CASE(complex_expression) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) {\n" + " uint256 x = (1 + 4).member(++67)[a/=9] || true;\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + +BOOST_AUTO_TEST_CASE(while_loop) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) {\n" + " uint256 x = (1 + 4).member(++67) || true;\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + +BOOST_AUTO_TEST_CASE(if_statement) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) {\n" + " if (a >= 8) return 2; else { var b = 7; }\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + +BOOST_AUTO_TEST_CASE(else_if_statement) +{ + char const* text = "contract test {\n" + " function fun(uint256 a) returns (address b) {\n" + " if (a < 0) b = 0x67; else if (a == 0) b = 0x12; else b = 0x78;\n" + " }\n" + "}\n"; + BOOST_CHECK_NO_THROW(parseText(text)); +} + + BOOST_AUTO_TEST_SUITE_END()