From 49712025fd85045c57a69cd2800a86f01520487e Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 23 Mar 2015 13:09:25 +0100 Subject: [PATCH] Pattern matching for expression simplification. --- libevmcore/Assembly.cpp | 145 +------ libevmcore/Assembly.h | 55 +-- libevmcore/AssemblyItem.cpp | 135 ++++++ libevmcore/AssemblyItem.h | 92 ++++ libevmcore/CommonSubexpressionEliminator.cpp | 2 +- libevmcore/ExpressionClasses.cpp | 418 ++++++++++++------- libevmcore/ExpressionClasses.h | 77 +++- test/SolidityOptimizer.cpp | 41 ++ 8 files changed, 635 insertions(+), 330 deletions(-) create mode 100644 libevmcore/AssemblyItem.cpp create mode 100644 libevmcore/AssemblyItem.h diff --git a/libevmcore/Assembly.cpp b/libevmcore/Assembly.cpp index bd504dc22..abe932282 100644 --- a/libevmcore/Assembly.cpp +++ b/libevmcore/Assembly.cpp @@ -28,122 +28,6 @@ using namespace std; using namespace dev; using namespace dev::eth; -unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const -{ - switch (m_type) - { - case Operation: - case Tag: // 1 byte for the JUMPDEST - return 1; - case PushString: - return 33; - case Push: - return 1 + max(1, dev::bytesRequired(m_data)); - case PushSubSize: - case PushProgramSize: - return 4; // worst case: a 16MB program - case PushTag: - case PushData: - case PushSub: - return 1 + _addressLength; - default: - break; - } - BOOST_THROW_EXCEPTION(InvalidOpcode()); -} - -int AssemblyItem::deposit() const -{ - switch (m_type) - { - case Operation: - return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; - case Push: - case PushString: - case PushTag: - case PushData: - case PushSub: - case PushSubSize: - case PushProgramSize: - return 1; - case Tag: - return 0; - default:; - } - return 0; -} - -string AssemblyItem::getJumpTypeAsString() const -{ - switch (m_jumpType) - { - case JumpType::IntoFunction: - return "[in]"; - case JumpType::OutOfFunction: - return "[out]"; - case JumpType::Ordinary: - default: - return ""; - } -} - -ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) -{ - switch (_item.type()) - { - case Operation: - _out << " " << instructionInfo(_item.instruction()).name; - if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) - _out << "\t" << _item.getJumpTypeAsString(); - break; - case Push: - _out << " PUSH " << hex << _item.data(); - break; - case PushString: - _out << " PushString" << hex << (unsigned)_item.data(); - break; - case PushTag: - _out << " PushTag " << _item.data(); - break; - case Tag: - _out << " Tag " << _item.data(); - break; - case PushData: - _out << " PushData " << hex << (unsigned)_item.data(); - break; - case PushSub: - _out << " PushSub " << hex << h256(_item.data()).abridged(); - break; - case PushSubSize: - _out << " PushSubSize " << hex << h256(_item.data()).abridged(); - break; - case PushProgramSize: - _out << " PushProgramSize"; - break; - case UndefinedItem: - _out << " ???"; - break; - default: - BOOST_THROW_EXCEPTION(InvalidOpcode()); - } - return _out; -} - -unsigned Assembly::bytesRequired() const -{ - for (unsigned br = 1;; ++br) - { - unsigned ret = 1; - for (auto const& i: m_data) - ret += i.second.size(); - - for (AssemblyItem const& i: m_items) - ret += i.bytesRequired(br); - if (dev::bytesRequired(ret) <= br) - return ret; - } -} - void Assembly::append(Assembly const& _a) { auto newDeposit = m_deposit + _a.deposit(); @@ -180,11 +64,19 @@ void Assembly::append(Assembly const& _a, int _deposit) } } -ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i) +unsigned Assembly::bytesRequired() const { - for (AssemblyItem const& i: _i) - _out << i; - return _out; + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + ret += i.bytesRequired(br); + if (dev::bytesRequired(ret) <= br) + return ret; + } } string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const @@ -295,14 +187,6 @@ Assembly& Assembly::optimise(bool _enable) { if (!_enable) return *this; - map> const c_associative = - { - { Instruction::ADD, [](u256 a, u256 b)->u256{return a + b;} }, - { Instruction::MUL, [](u256 a, u256 b)->u256{return a * b;} }, - { Instruction::AND, [](u256 a, u256 b)->u256{return a & b;} }, - { Instruction::OR, [](u256 a, u256 b)->u256{return a | b;} }, - { Instruction::XOR, [](u256 a, u256 b)->u256{return a ^ b;} }, - }; std::vector>> rules = { { { Push, Instruction::POP }, [](AssemblyItemsConstRef) -> AssemblyItems { return {}; } }, @@ -315,11 +199,6 @@ Assembly& Assembly::optimise(bool _enable) { { Instruction::ISZERO, Instruction::ISZERO }, [](AssemblyItemsConstRef) -> AssemblyItems { return {}; } }, }; - for (auto const& i: c_associative) - { - rules.push_back({ { Push, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[1].data(), m[0].data()) }; } }); - rules.push_back({ { Push, i.first, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[2].data(), m[0].data()), i.first }; } }); - } // jump to next instruction rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].m_data == m[2].m_data) return {m[2]}; else return m.toVector(); }}); diff --git a/libevmcore/Assembly.h b/libevmcore/Assembly.h index 280a2e81c..315e6aaed 100644 --- a/libevmcore/Assembly.h +++ b/libevmcore/Assembly.h @@ -27,6 +27,7 @@ #include #include #include +#include #include "Exceptions.h" namespace dev @@ -34,60 +35,6 @@ namespace dev namespace eth { -enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData }; - -class Assembly; - -class AssemblyItem -{ - friend class Assembly; - -public: - enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; - - AssemblyItem(u256 _push): m_type(Push), m_data(_push) {} - AssemblyItem(Instruction _i): m_type(Operation), m_data((byte)_i) {} - AssemblyItem(AssemblyItemType _type, u256 _data = 0): m_type(_type), m_data(_data) {} - - AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } - AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } - - AssemblyItemType type() const { return m_type; } - u256 const& data() const { return m_data; } - /// @returns the instruction of this item (only valid if type() == Operation) - Instruction instruction() const { return Instruction(byte(m_data)); } - - /// @returns true iff the type and data of the items are equal. - bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } - bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } - - /// @returns an upper bound for the number of bytes required by this item, assuming that - /// the value of a jump tag takes @a _addressLength bytes. - unsigned bytesRequired(unsigned _addressLength) const; - int deposit() const; - - bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } - void setLocation(SourceLocation const& _location) { m_location = _location; } - SourceLocation const& getLocation() const { return m_location; } - - void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } - JumpType getJumpType() const { return m_jumpType; } - std::string getJumpTypeAsString() const; - -private: - AssemblyItemType m_type; - u256 m_data; - SourceLocation m_location; - JumpType m_jumpType = JumpType::Ordinary; -}; - -using AssemblyItems = std::vector; -using AssemblyItemsConstRef = vector_ref; - -std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); -std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); -inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } - class Assembly { public: diff --git a/libevmcore/AssemblyItem.cpp b/libevmcore/AssemblyItem.cpp new file mode 100644 index 000000000..a4485a144 --- /dev/null +++ b/libevmcore/AssemblyItem.cpp @@ -0,0 +1,135 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "AssemblyItem.h" +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const +{ + switch (m_type) + { + case Operation: + case Tag: // 1 byte for the JUMPDEST + return 1; + case PushString: + return 33; + case Push: + return 1 + max(1, dev::bytesRequired(m_data)); + case PushSubSize: + case PushProgramSize: + return 4; // worst case: a 16MB program + case PushTag: + case PushData: + case PushSub: + return 1 + _addressLength; + default: + break; + } + BOOST_THROW_EXCEPTION(InvalidOpcode()); +} + +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + return 1; + case Tag: + return 0; + default:; + } + return 0; +} + +string AssemblyItem::getJumpTypeAsString() const +{ + switch (m_jumpType) + { + case JumpType::IntoFunction: + return "[in]"; + case JumpType::OutOfFunction: + return "[out]"; + case JumpType::Ordinary: + default: + return ""; + } +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) +{ + switch (_item.type()) + { + case Operation: + _out << " " << instructionInfo(_item.instruction()).name; + if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) + _out << "\t" << _item.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << _item.data(); + break; + case PushString: + _out << " PushString" << hex << (unsigned)_item.data(); + break; + case PushTag: + _out << " PushTag " << _item.data(); + break; + case Tag: + _out << " Tag " << _item.data(); + break; + case PushData: + _out << " PushData " << hex << (unsigned)_item.data(); + break; + case PushSub: + _out << " PushSub " << hex << h256(_item.data()).abridged(); + break; + case PushSubSize: + _out << " PushSubSize " << hex << h256(_item.data()).abridged(); + break; + case PushProgramSize: + _out << " PushProgramSize"; + break; + case UndefinedItem: + _out << " ???"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + return _out; +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i) +{ + for (AssemblyItem const& i: _i) + _out << i; + return _out; +} diff --git a/libevmcore/AssemblyItem.h b/libevmcore/AssemblyItem.h new file mode 100644 index 000000000..e7beced39 --- /dev/null +++ b/libevmcore/AssemblyItem.h @@ -0,0 +1,92 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "Exceptions.h" + +namespace dev +{ +namespace eth +{ + +enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData }; + +class Assembly; + +class AssemblyItem +{ + friend class Assembly; + +public: + enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; + + AssemblyItem(u256 _push): m_type(Push), m_data(_push) {} + AssemblyItem(Instruction _i): m_type(Operation), m_data((byte)_i) {} + AssemblyItem(AssemblyItemType _type, u256 _data = 0): m_type(_type), m_data(_data) {} + + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + + AssemblyItemType type() const { return m_type; } + u256 const& data() const { return m_data; } + /// @returns the instruction of this item (only valid if type() == Operation) + Instruction instruction() const { return Instruction(byte(m_data)); } + + /// @returns true iff the type and data of the items are equal. + bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + + /// @returns an upper bound for the number of bytes required by this item, assuming that + /// the value of a jump tag takes @a _addressLength bytes. + unsigned bytesRequired(unsigned _addressLength) const; + int deposit() const; + + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + SourceLocation const& getLocation() const { return m_location; } + + void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } + JumpType getJumpType() const { return m_jumpType; } + std::string getJumpTypeAsString() const; + +private: + AssemblyItemType m_type; + u256 m_data; + SourceLocation m_location; + JumpType m_jumpType = JumpType::Ordinary; +}; + +using AssemblyItems = std::vector; +using AssemblyItemsConstRef = vector_ref; + +std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); +std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } + +} +} diff --git a/libevmcore/CommonSubexpressionEliminator.cpp b/libevmcore/CommonSubexpressionEliminator.cpp index 47251e5c8..7fed03b4e 100644 --- a/libevmcore/CommonSubexpressionEliminator.cpp +++ b/libevmcore/CommonSubexpressionEliminator.cpp @@ -43,7 +43,7 @@ vector CommonSubexpressionEliminator::getOptimizedItems() targetStackContents[height] = stackElement(height); // Debug info: - //stream(cout, currentStackContents, targetStackContents); + //stream(cout, initialStackContents, targetStackContents); return CSECodeGenerator(m_expressionClasses).generateCode(initialStackContents, targetStackContents); } diff --git a/libevmcore/ExpressionClasses.cpp b/libevmcore/ExpressionClasses.cpp index eebc98f66..49fd72c1b 100644 --- a/libevmcore/ExpressionClasses.cpp +++ b/libevmcore/ExpressionClasses.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ bool ExpressionClasses::Expression::operator<(const ExpressionClasses::Expressio ExpressionClasses::Id ExpressionClasses::find(AssemblyItem const& _item, Ids const& _arguments) { Expression exp; + exp.id = Id(-1); exp.item = &_item; exp.arguments = _arguments; @@ -72,158 +74,168 @@ ExpressionClasses::Id ExpressionClasses::find(AssemblyItem const& _item, Ids con return exp.id; } -ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) { - if (_expr.item->type() != Operation) - return -1; + Expression const& expr = representative(_id); + stringstream str; + str << dec << expr.id << ":" << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + return str.str(); +} - // @todo: - // ISZERO ISZERO - // associative operations (as done in Assembly.cpp) - // 2 * x == x + x - - Id arg1; - Id arg2; - Id arg3; - u256 data1; - u256 data2; - u256 data3; - switch (_expr.arguments.size()) - { - default: - arg3 = _expr.arguments.at(2); - data3 = representative(arg3).item->data(); - case 2: - arg2 = _expr.arguments.at(1); - data2 = representative(arg2).item->data(); - case 1: - arg1 = _expr.arguments.at(0); - data1 = representative(arg1).item->data(); - case 0: - break; - } +class Rules: public boost::noncopyable +{ +public: + Rules(); + void resetMatchGroups() { m_matchGroups.clear(); } + vector>> rules() const { return m_rules; } - /** - * Simplification rule. If _strict is false, Push or a constant matches any constant, - * otherwise Push matches "0" and a constant matches itself. - * "UndefinedItem" matches any expression, but all of them must be equal inside one rule. - */ - struct Rule - { - Rule(AssemblyItems const& _pattern, bool _strict, function const& _action): - pattern(_pattern), - assemblyItemAction(_action), - strict(_strict) - {} - Rule(AssemblyItems const& _pattern, function _action): - Rule(_pattern, false, _action) - {} - Rule(AssemblyItems const& _pattern, bool _strict, function const& _action): - pattern(_pattern), - idAction(_action), - strict(_strict) - {} - Rule(AssemblyItems const& _pattern, function _action): - Rule(_pattern, false, _action) - {} - bool matches(ExpressionClasses const& _classes, Expression const& _expr) const - { - if (!_expr.item->match(pattern.front())) - return false; - assertThrow(_expr.arguments.size() == pattern.size() - 1, OptimizerException, ""); - Id argRequiredToBeEqual(-1); - for (size_t i = 1; i < pattern.size(); ++i) - { - Id arg = _expr.arguments[i - 1]; - if (pattern[i].type() == UndefinedItem) - { - if (argRequiredToBeEqual == Id(-1)) - argRequiredToBeEqual = arg; - else if (argRequiredToBeEqual != arg) - return false; - } - else - { - AssemblyItem const& argItem = *_classes.representative(arg).item; - if (strict && argItem != pattern[i]) - return false; - else if (!strict && !argItem.match(pattern[i])) - return false; - } - } - return true; - } +private: + using Expression = ExpressionClasses::Expression; + map m_matchGroups; + vector>> m_rules; +}; - AssemblyItems pattern; - function assemblyItemAction; - function idAction; - bool strict; - }; +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); - vector c_singleLevel{ - // arithmetics on constants involving only stack variables - {{Instruction::ADD, Push, Push}, [&]{ return data1 + data2; }}, - {{Instruction::MUL, Push, Push}, [&]{ return data1 * data2; }}, - {{Instruction::SUB, Push, Push}, [&]{ return data1 - data2; }}, - {{Instruction::DIV, Push, Push}, [&]{ return data2 == 0 ? 0 : data1 / data2; }}, - {{Instruction::SDIV, Push, Push}, [&]{ return data2 == 0 ? 0 : s2u(u2s(data1) / u2s(data2)); }}, - {{Instruction::MOD, Push, Push}, [&]{ return data2 == 0 ? 0 : data1 % data2; }}, - {{Instruction::SMOD, Push, Push}, [&]{ return data2 == 0 ? 0 : s2u(u2s(data1) % u2s(data2)); }}, - {{Instruction::EXP, Push, Push}, [&]{ return u256(boost::multiprecision::powm(bigint(data1), bigint(data2), bigint(1) << 256)); }}, - {{Instruction::NOT, Push}, [&]{ return ~data1; }}, - {{Instruction::LT, Push, Push}, [&]() -> u256 { return data1 < data2 ? 1 : 0; }}, - {{Instruction::GT, Push, Push}, [&]() -> u256 { return data1 > data2 ? 1 : 0; }}, - {{Instruction::SLT, Push, Push}, [&]() -> u256 { return u2s(data1) < u2s( data2) ? 1 : 0; }}, - {{Instruction::SGT, Push, Push}, [&]() -> u256 { return u2s(data1) > u2s( data2) ? 1 : 0; }}, - {{Instruction::EQ, Push, Push}, [&]() -> u256 { return data1 == data2 ? 1 : 0; }}, - {{Instruction::ISZERO, Push}, [&]() -> u256 { return data1 == 0 ? 1 : 0; }}, - {{Instruction::AND, Push, Push}, [&]{ return data1 & data2; }}, - {{Instruction::OR, Push, Push}, [&]{ return data1 | data2; }}, - {{Instruction::XOR, Push, Push}, [&]{ return data1 ^ data2; }}, - {{Instruction::BYTE, Push, Push}, [&]{ return data1 >= 32 ? 0 : (data2 >> unsigned(8 * (31 - data1))) & 0xff; }}, - {{Instruction::ADDMOD, Push, Push, Push}, [&]{ return data3 == 0 ? 0 : u256((bigint(data1) + bigint(data2)) % data3); }}, - {{Instruction::MULMOD, Push, Push, Push}, [&]{ return data3 == 0 ? 0 : u256((bigint(data1) * bigint(data2)) % data3); }}, - {{Instruction::MULMOD, Push, Push, Push}, [&]{ return data1 * data2; }}, - {{Instruction::SIGNEXTEND, Push, Push}, [&]{ - if (data1 >= 31) - return data2; - unsigned testBit = unsigned(data1) * 8 + 7; + m_rules = vector>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; u256 mask = (u256(1) << testBit) - 1; - return u256(boost::multiprecision::bit_test(data2, testBit) ? data2 | ~mask : data2 & mask); + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); }}, - {{Instruction::ADD, UndefinedItem, u256(0)}, true, [&]{ return arg1; }}, - {{Instruction::MUL, UndefinedItem, u256(1)}, true, [&]{ return arg1; }}, - {{Instruction::OR, UndefinedItem, u256(0)}, true, [&]{ return arg1; }}, - {{Instruction::XOR, UndefinedItem, u256(0)}, true, [&]{ return arg1; }}, - {{Instruction::AND, UndefinedItem, ~u256(0)}, true, [&]{ return arg1; }}, - {{Instruction::MUL, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }}, - {{Instruction::DIV, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }}, - {{Instruction::MOD, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }}, - {{Instruction::MOD, u256(0), UndefinedItem}, true, [&]{ return u256(0); }}, - {{Instruction::AND, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }}, - {{Instruction::OR, UndefinedItem, ~u256(0)}, true, [&]{ return ~u256(0); }}, - {{Instruction::AND, UndefinedItem, UndefinedItem}, true, [&]{ return arg1; }}, - {{Instruction::OR, UndefinedItem, UndefinedItem}, true, [&]{ return arg1; }}, - {{Instruction::SUB, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }}, - {{Instruction::EQ, UndefinedItem, UndefinedItem}, true, [&]{ return u256(1); }}, - {{Instruction::LT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }}, - {{Instruction::SLT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }}, - {{Instruction::GT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }}, - {{Instruction::SGT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }}, - {{Instruction::MOD, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + }; + // Associative operations + for (auto const& opFun: vector>>{ + {Instruction::ADD, plus()}, + {Instruction::MUL, multiplies()}, + {Instruction::AND, bit_and()}, + {Instruction::OR, bit_or()}, + {Instruction::XOR, bit_xor()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + m_rules.push_back({ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }); + // X+(Y+A) -> (X+Y)+A + m_rules.push_back({ + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }); + // For now, we still need explicit commutativity for the inner pattern + m_rules.push_back({ + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }); + m_rules.push_back({ + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }); }; - for (auto const& rule: c_singleLevel) - if (rule.matches(*this, _expr)) + //@todo: (x+8)-3 and other things +} + +ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +{ + static Rules rules; + + if (_expr.item->type() != Operation) + return -1; + + for (auto const& rule: rules.rules()) + { + rules.resetMatchGroups(); + if (rule.first.matches(_expr, *this)) { - if (rule.idAction) - return rule.idAction(); - else - { - m_spareAssemblyItem.push_back(make_shared(rule.assemblyItemAction())); - return find(*m_spareAssemblyItem.back()); - } + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << rule.first.toString() << endl; + //ExpressionTemplate t(rule.second()); + //cout << "to" << rule.second().toString() << endl; + return rebuildExpression(ExpressionTemplate(rule.second())); } + } if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) { @@ -234,3 +246,127 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, return -1; } + +ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template) +{ + if (_template.hasId) + return _template.id; + + Ids arguments; + for (ExpressionTemplate const& t: _template.arguments) + arguments.push_back(rebuildExpression(t)); + m_spareAssemblyItem.push_back(make_shared(_template.item)); + return find(*m_spareAssemblyItem.back(), arguments); +} + + +Pattern::Pattern(Instruction _instruction, std::vector const& _arguments): + m_type(Operation), + m_requireDataMatch(true), + m_data(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(*_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(Instruction(unsigned(m_data))).name; + break; + case Push: + s << "PUSH " << hex << m_data; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << m_data; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +{ + if (m_type == UndefinedItem) + return true; + if (m_type != _item.type()) + return false; + if (m_requireDataMatch && m_data != _item.data()) + return false; + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/libevmcore/ExpressionClasses.h b/libevmcore/ExpressionClasses.h index 71fc96109..ecd9c9250 100644 --- a/libevmcore/ExpressionClasses.h +++ b/libevmcore/ExpressionClasses.h @@ -26,13 +26,16 @@ #include #include #include +#include +#include namespace dev { namespace eth { -class AssemblyItem; +class Pattern; +struct ExpressionTemplate; /** * Collection of classes of equivalent expressions that can also determine the class of an expression. @@ -60,16 +63,88 @@ public: /// @returns the number of classes. Id size() const { return m_representatives.size(); } + std::string fullDAGToString(Id _id); + private: /// Tries to simplify the given expression. /// @returns its class if it possible or Id(-1) otherwise. /// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed Id tryToSimplify(Expression const& _expr, bool _secondRun = false); + /// Rebuilds an expression from a (matched) pattern. + Id rebuildExpression(ExpressionTemplate const& _template); + + std::vector>> createRules() const; + /// Expression equivalence class representatives - we only store one item of an equivalence. std::vector m_representatives; std::vector> m_spareAssemblyItem; }; +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem() const { return AssemblyItem(m_type, m_data); } + std::vector arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + +private: + bool matchesBaseItem(AssemblyItem const& _item) const; + Expression const& matchGroupValue() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + u256 m_data = 0; + std::vector m_arguments; + unsigned m_matchGroup = 0; + std::map* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector arguments; +}; + } } diff --git a/test/SolidityOptimizer.cpp b/test/SolidityOptimizer.cpp index 2ced97430..2d5cff7ac 100644 --- a/test/SolidityOptimizer.cpp +++ b/test/SolidityOptimizer.cpp @@ -240,6 +240,12 @@ BOOST_AUTO_TEST_CASE(cse_unneeded_items) checkCSE(input, input); } +BOOST_AUTO_TEST_CASE(cse_constant_addition) +{ + AssemblyItems input{u256(7), u256(8), Instruction::ADD}; + checkCSE(input, {u256(7 + 8)}); +} + BOOST_AUTO_TEST_CASE(cse_invariants) { AssemblyItems input{ @@ -262,6 +268,41 @@ BOOST_AUTO_TEST_CASE(cse_subother) checkCSE({Instruction::SUB}, {Instruction::SUB}); } +BOOST_AUTO_TEST_CASE(cse_double_negation) +{ + checkCSE({Instruction::DUP5, Instruction::NOT, Instruction::NOT}, {Instruction::DUP5}); +} + +BOOST_AUTO_TEST_CASE(cse_associativity) +{ + AssemblyItems input{ + Instruction::DUP1, + Instruction::DUP1, + u256(0), + Instruction::OR, + Instruction::OR + }; + checkCSE(input, {Instruction::DUP1}); +} + +BOOST_AUTO_TEST_CASE(cse_associativity2) +{ + AssemblyItems input{ + u256(0), + Instruction::DUP2, + u256(2), + u256(1), + Instruction::DUP6, + Instruction::ADD, + u256(2), + Instruction::ADD, + Instruction::ADD, + Instruction::ADD, + Instruction::ADD + }; + checkCSE(input, {Instruction::DUP2, Instruction::DUP2, Instruction::ADD, u256(5), Instruction::ADD}); +} + BOOST_AUTO_TEST_SUITE_END() }