Browse Source

Pattern matching for expression simplification.

cl-refactor
chriseth 10 years ago
parent
commit
49712025fd
  1. 145
      libevmcore/Assembly.cpp
  2. 55
      libevmcore/Assembly.h
  3. 135
      libevmcore/AssemblyItem.cpp
  4. 92
      libevmcore/AssemblyItem.h
  5. 2
      libevmcore/CommonSubexpressionEliminator.cpp
  6. 418
      libevmcore/ExpressionClasses.cpp
  7. 77
      libevmcore/ExpressionClasses.h
  8. 41
      test/SolidityOptimizer.cpp

145
libevmcore/Assembly.cpp

@ -28,122 +28,6 @@ using namespace std;
using namespace dev;
using namespace dev::eth;
unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const
{
switch (m_type)
{
case Operation:
case Tag: // 1 byte for the JUMPDEST
return 1;
case PushString:
return 33;
case Push:
return 1 + max<unsigned>(1, dev::bytesRequired(m_data));
case PushSubSize:
case PushProgramSize:
return 4; // worst case: a 16MB program
case PushTag:
case PushData:
case PushSub:
return 1 + _addressLength;
default:
break;
}
BOOST_THROW_EXCEPTION(InvalidOpcode());
}
int AssemblyItem::deposit() const
{
switch (m_type)
{
case Operation:
return instructionInfo(instruction()).ret - instructionInfo(instruction()).args;
case Push:
case PushString:
case PushTag:
case PushData:
case PushSub:
case PushSubSize:
case PushProgramSize:
return 1;
case Tag:
return 0;
default:;
}
return 0;
}
string AssemblyItem::getJumpTypeAsString() const
{
switch (m_jumpType)
{
case JumpType::IntoFunction:
return "[in]";
case JumpType::OutOfFunction:
return "[out]";
case JumpType::Ordinary:
default:
return "";
}
}
ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item)
{
switch (_item.type())
{
case Operation:
_out << " " << instructionInfo(_item.instruction()).name;
if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI)
_out << "\t" << _item.getJumpTypeAsString();
break;
case Push:
_out << " PUSH " << hex << _item.data();
break;
case PushString:
_out << " PushString" << hex << (unsigned)_item.data();
break;
case PushTag:
_out << " PushTag " << _item.data();
break;
case Tag:
_out << " Tag " << _item.data();
break;
case PushData:
_out << " PushData " << hex << (unsigned)_item.data();
break;
case PushSub:
_out << " PushSub " << hex << h256(_item.data()).abridged();
break;
case PushSubSize:
_out << " PushSubSize " << hex << h256(_item.data()).abridged();
break;
case PushProgramSize:
_out << " PushProgramSize";
break;
case UndefinedItem:
_out << " ???";
break;
default:
BOOST_THROW_EXCEPTION(InvalidOpcode());
}
return _out;
}
unsigned Assembly::bytesRequired() const
{
for (unsigned br = 1;; ++br)
{
unsigned ret = 1;
for (auto const& i: m_data)
ret += i.second.size();
for (AssemblyItem const& i: m_items)
ret += i.bytesRequired(br);
if (dev::bytesRequired(ret) <= br)
return ret;
}
}
void Assembly::append(Assembly const& _a)
{
auto newDeposit = m_deposit + _a.deposit();
@ -180,11 +64,19 @@ void Assembly::append(Assembly const& _a, int _deposit)
}
}
ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i)
unsigned Assembly::bytesRequired() const
{
for (AssemblyItem const& i: _i)
_out << i;
return _out;
for (unsigned br = 1;; ++br)
{
unsigned ret = 1;
for (auto const& i: m_data)
ret += i.second.size();
for (AssemblyItem const& i: m_items)
ret += i.bytesRequired(br);
if (dev::bytesRequired(ret) <= br)
return ret;
}
}
string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const
@ -295,14 +187,6 @@ Assembly& Assembly::optimise(bool _enable)
{
if (!_enable)
return *this;
map<Instruction, function<u256(u256, u256)>> const c_associative =
{
{ Instruction::ADD, [](u256 a, u256 b)->u256{return a + b;} },
{ Instruction::MUL, [](u256 a, u256 b)->u256{return a * b;} },
{ Instruction::AND, [](u256 a, u256 b)->u256{return a & b;} },
{ Instruction::OR, [](u256 a, u256 b)->u256{return a | b;} },
{ Instruction::XOR, [](u256 a, u256 b)->u256{return a ^ b;} },
};
std::vector<pair<AssemblyItems, function<AssemblyItems(AssemblyItemsConstRef)>>> rules =
{
{ { Push, Instruction::POP }, [](AssemblyItemsConstRef) -> AssemblyItems { return {}; } },
@ -315,11 +199,6 @@ Assembly& Assembly::optimise(bool _enable)
{ { Instruction::ISZERO, Instruction::ISZERO }, [](AssemblyItemsConstRef) -> AssemblyItems { return {}; } },
};
for (auto const& i: c_associative)
{
rules.push_back({ { Push, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[1].data(), m[0].data()) }; } });
rules.push_back({ { Push, i.first, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[2].data(), m[0].data()), i.first }; } });
}
// jump to next instruction
rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].m_data == m[2].m_data) return {m[2]}; else return m.toVector(); }});

55
libevmcore/Assembly.h

@ -27,6 +27,7 @@
#include <libdevcore/Assertions.h>
#include <libevmcore/SourceLocation.h>
#include <libevmcore/Instruction.h>
#include <libevmcore/AssemblyItem.h>
#include "Exceptions.h"
namespace dev
@ -34,60 +35,6 @@ namespace dev
namespace eth
{
enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData };
class Assembly;
class AssemblyItem
{
friend class Assembly;
public:
enum class JumpType { Ordinary, IntoFunction, OutOfFunction };
AssemblyItem(u256 _push): m_type(Push), m_data(_push) {}
AssemblyItem(Instruction _i): m_type(Operation), m_data((byte)_i) {}
AssemblyItem(AssemblyItemType _type, u256 _data = 0): m_type(_type), m_data(_data) {}
AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); }
AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); }
AssemblyItemType type() const { return m_type; }
u256 const& data() const { return m_data; }
/// @returns the instruction of this item (only valid if type() == Operation)
Instruction instruction() const { return Instruction(byte(m_data)); }
/// @returns true iff the type and data of the items are equal.
bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; }
bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); }
/// @returns an upper bound for the number of bytes required by this item, assuming that
/// the value of a jump tag takes @a _addressLength bytes.
unsigned bytesRequired(unsigned _addressLength) const;
int deposit() const;
bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); }
void setLocation(SourceLocation const& _location) { m_location = _location; }
SourceLocation const& getLocation() const { return m_location; }
void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; }
JumpType getJumpType() const { return m_jumpType; }
std::string getJumpTypeAsString() const;
private:
AssemblyItemType m_type;
u256 m_data;
SourceLocation m_location;
JumpType m_jumpType = JumpType::Ordinary;
};
using AssemblyItems = std::vector<AssemblyItem>;
using AssemblyItemsConstRef = vector_ref<AssemblyItem const>;
std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item);
std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i);
inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); }
class Assembly
{
public:

135
libevmcore/AssemblyItem.cpp

@ -0,0 +1,135 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file Assembly.cpp
* @author Gav Wood <i@gavwood.com>
* @date 2014
*/
#include "AssemblyItem.h"
#include <fstream>
using namespace std;
using namespace dev;
using namespace dev::eth;
unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const
{
switch (m_type)
{
case Operation:
case Tag: // 1 byte for the JUMPDEST
return 1;
case PushString:
return 33;
case Push:
return 1 + max<unsigned>(1, dev::bytesRequired(m_data));
case PushSubSize:
case PushProgramSize:
return 4; // worst case: a 16MB program
case PushTag:
case PushData:
case PushSub:
return 1 + _addressLength;
default:
break;
}
BOOST_THROW_EXCEPTION(InvalidOpcode());
}
int AssemblyItem::deposit() const
{
switch (m_type)
{
case Operation:
return instructionInfo(instruction()).ret - instructionInfo(instruction()).args;
case Push:
case PushString:
case PushTag:
case PushData:
case PushSub:
case PushSubSize:
case PushProgramSize:
return 1;
case Tag:
return 0;
default:;
}
return 0;
}
string AssemblyItem::getJumpTypeAsString() const
{
switch (m_jumpType)
{
case JumpType::IntoFunction:
return "[in]";
case JumpType::OutOfFunction:
return "[out]";
case JumpType::Ordinary:
default:
return "";
}
}
ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item)
{
switch (_item.type())
{
case Operation:
_out << " " << instructionInfo(_item.instruction()).name;
if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI)
_out << "\t" << _item.getJumpTypeAsString();
break;
case Push:
_out << " PUSH " << hex << _item.data();
break;
case PushString:
_out << " PushString" << hex << (unsigned)_item.data();
break;
case PushTag:
_out << " PushTag " << _item.data();
break;
case Tag:
_out << " Tag " << _item.data();
break;
case PushData:
_out << " PushData " << hex << (unsigned)_item.data();
break;
case PushSub:
_out << " PushSub " << hex << h256(_item.data()).abridged();
break;
case PushSubSize:
_out << " PushSubSize " << hex << h256(_item.data()).abridged();
break;
case PushProgramSize:
_out << " PushProgramSize";
break;
case UndefinedItem:
_out << " ???";
break;
default:
BOOST_THROW_EXCEPTION(InvalidOpcode());
}
return _out;
}
ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i)
{
for (AssemblyItem const& i: _i)
_out << i;
return _out;
}

92
libevmcore/AssemblyItem.h

@ -0,0 +1,92 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file Assembly.h
* @author Gav Wood <i@gavwood.com>
* @date 2014
*/
#pragma once
#include <iostream>
#include <sstream>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
#include <libevmcore/SourceLocation.h>
#include <libevmcore/Instruction.h>
#include "Exceptions.h"
namespace dev
{
namespace eth
{
enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData };
class Assembly;
class AssemblyItem
{
friend class Assembly;
public:
enum class JumpType { Ordinary, IntoFunction, OutOfFunction };
AssemblyItem(u256 _push): m_type(Push), m_data(_push) {}
AssemblyItem(Instruction _i): m_type(Operation), m_data((byte)_i) {}
AssemblyItem(AssemblyItemType _type, u256 _data = 0): m_type(_type), m_data(_data) {}
AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); }
AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); }
AssemblyItemType type() const { return m_type; }
u256 const& data() const { return m_data; }
/// @returns the instruction of this item (only valid if type() == Operation)
Instruction instruction() const { return Instruction(byte(m_data)); }
/// @returns true iff the type and data of the items are equal.
bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; }
bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); }
/// @returns an upper bound for the number of bytes required by this item, assuming that
/// the value of a jump tag takes @a _addressLength bytes.
unsigned bytesRequired(unsigned _addressLength) const;
int deposit() const;
bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); }
void setLocation(SourceLocation const& _location) { m_location = _location; }
SourceLocation const& getLocation() const { return m_location; }
void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; }
JumpType getJumpType() const { return m_jumpType; }
std::string getJumpTypeAsString() const;
private:
AssemblyItemType m_type;
u256 m_data;
SourceLocation m_location;
JumpType m_jumpType = JumpType::Ordinary;
};
using AssemblyItems = std::vector<AssemblyItem>;
using AssemblyItemsConstRef = vector_ref<AssemblyItem const>;
std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item);
std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i);
inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); }
}
}

2
libevmcore/CommonSubexpressionEliminator.cpp

@ -43,7 +43,7 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
targetStackContents[height] = stackElement(height);
// Debug info:
//stream(cout, currentStackContents, targetStackContents);
//stream(cout, initialStackContents, targetStackContents);
return CSECodeGenerator(m_expressionClasses).generateCode(initialStackContents, targetStackContents);
}

418
libevmcore/ExpressionClasses.cpp

@ -26,6 +26,7 @@
#include <tuple>
#include <functional>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/noncopyable.hpp>
#include <libevmcore/Assembly.h>
#include <libevmcore/CommonSubexpressionEliminator.h>
@ -45,6 +46,7 @@ bool ExpressionClasses::Expression::operator<(const ExpressionClasses::Expressio
ExpressionClasses::Id ExpressionClasses::find(AssemblyItem const& _item, Ids const& _arguments)
{
Expression exp;
exp.id = Id(-1);
exp.item = &_item;
exp.arguments = _arguments;
@ -72,158 +74,168 @@ ExpressionClasses::Id ExpressionClasses::find(AssemblyItem const& _item, Ids con
return exp.id;
}
ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun)
string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id)
{
if (_expr.item->type() != Operation)
return -1;
Expression const& expr = representative(_id);
stringstream str;
str << dec << expr.id << ":" << *expr.item << "(";
for (Id arg: expr.arguments)
str << fullDAGToString(arg) << ",";
str << ")";
return str.str();
}
// @todo:
// ISZERO ISZERO
// associative operations (as done in Assembly.cpp)
// 2 * x == x + x
Id arg1;
Id arg2;
Id arg3;
u256 data1;
u256 data2;
u256 data3;
switch (_expr.arguments.size())
{
default:
arg3 = _expr.arguments.at(2);
data3 = representative(arg3).item->data();
case 2:
arg2 = _expr.arguments.at(1);
data2 = representative(arg2).item->data();
case 1:
arg1 = _expr.arguments.at(0);
data1 = representative(arg1).item->data();
case 0:
break;
}
class Rules: public boost::noncopyable
{
public:
Rules();
void resetMatchGroups() { m_matchGroups.clear(); }
vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; }
/**
* Simplification rule. If _strict is false, Push or a constant matches any constant,
* otherwise Push matches "0" and a constant matches itself.
* "UndefinedItem" matches any expression, but all of them must be equal inside one rule.
*/
struct Rule
{
Rule(AssemblyItems const& _pattern, bool _strict, function<AssemblyItem()> const& _action):
pattern(_pattern),
assemblyItemAction(_action),
strict(_strict)
{}
Rule(AssemblyItems const& _pattern, function<AssemblyItem()> _action):
Rule(_pattern, false, _action)
{}
Rule(AssemblyItems const& _pattern, bool _strict, function<Id()> const& _action):
pattern(_pattern),
idAction(_action),
strict(_strict)
{}
Rule(AssemblyItems const& _pattern, function<Id()> _action):
Rule(_pattern, false, _action)
{}
bool matches(ExpressionClasses const& _classes, Expression const& _expr) const
{
if (!_expr.item->match(pattern.front()))
return false;
assertThrow(_expr.arguments.size() == pattern.size() - 1, OptimizerException, "");
Id argRequiredToBeEqual(-1);
for (size_t i = 1; i < pattern.size(); ++i)
{
Id arg = _expr.arguments[i - 1];
if (pattern[i].type() == UndefinedItem)
{
if (argRequiredToBeEqual == Id(-1))
argRequiredToBeEqual = arg;
else if (argRequiredToBeEqual != arg)
return false;
}
else
{
AssemblyItem const& argItem = *_classes.representative(arg).item;
if (strict && argItem != pattern[i])
return false;
else if (!strict && !argItem.match(pattern[i]))
return false;
}
}
return true;
}
private:
using Expression = ExpressionClasses::Expression;
map<unsigned, Expression const*> m_matchGroups;
vector<pair<Pattern, function<Pattern()>>> m_rules;
};
AssemblyItems pattern;
function<AssemblyItem()> assemblyItemAction;
function<Id()> idAction;
bool strict;
};
Rules::Rules()
{
// Multiple occurences of one of these inside one rule must match the same equivalence class.
// Constants.
Pattern A(Push);
Pattern B(Push);
Pattern C(Push);
// Anything.
Pattern X;
Pattern Y;
Pattern Z;
A.setMatchGroup(1, m_matchGroups);
B.setMatchGroup(2, m_matchGroups);
C.setMatchGroup(3, m_matchGroups);
X.setMatchGroup(4, m_matchGroups);
Y.setMatchGroup(5, m_matchGroups);
Z.setMatchGroup(6, m_matchGroups);
vector<Rule> c_singleLevel{
// arithmetics on constants involving only stack variables
{{Instruction::ADD, Push, Push}, [&]{ return data1 + data2; }},
{{Instruction::MUL, Push, Push}, [&]{ return data1 * data2; }},
{{Instruction::SUB, Push, Push}, [&]{ return data1 - data2; }},
{{Instruction::DIV, Push, Push}, [&]{ return data2 == 0 ? 0 : data1 / data2; }},
{{Instruction::SDIV, Push, Push}, [&]{ return data2 == 0 ? 0 : s2u(u2s(data1) / u2s(data2)); }},
{{Instruction::MOD, Push, Push}, [&]{ return data2 == 0 ? 0 : data1 % data2; }},
{{Instruction::SMOD, Push, Push}, [&]{ return data2 == 0 ? 0 : s2u(u2s(data1) % u2s(data2)); }},
{{Instruction::EXP, Push, Push}, [&]{ return u256(boost::multiprecision::powm(bigint(data1), bigint(data2), bigint(1) << 256)); }},
{{Instruction::NOT, Push}, [&]{ return ~data1; }},
{{Instruction::LT, Push, Push}, [&]() -> u256 { return data1 < data2 ? 1 : 0; }},
{{Instruction::GT, Push, Push}, [&]() -> u256 { return data1 > data2 ? 1 : 0; }},
{{Instruction::SLT, Push, Push}, [&]() -> u256 { return u2s(data1) < u2s( data2) ? 1 : 0; }},
{{Instruction::SGT, Push, Push}, [&]() -> u256 { return u2s(data1) > u2s( data2) ? 1 : 0; }},
{{Instruction::EQ, Push, Push}, [&]() -> u256 { return data1 == data2 ? 1 : 0; }},
{{Instruction::ISZERO, Push}, [&]() -> u256 { return data1 == 0 ? 1 : 0; }},
{{Instruction::AND, Push, Push}, [&]{ return data1 & data2; }},
{{Instruction::OR, Push, Push}, [&]{ return data1 | data2; }},
{{Instruction::XOR, Push, Push}, [&]{ return data1 ^ data2; }},
{{Instruction::BYTE, Push, Push}, [&]{ return data1 >= 32 ? 0 : (data2 >> unsigned(8 * (31 - data1))) & 0xff; }},
{{Instruction::ADDMOD, Push, Push, Push}, [&]{ return data3 == 0 ? 0 : u256((bigint(data1) + bigint(data2)) % data3); }},
{{Instruction::MULMOD, Push, Push, Push}, [&]{ return data3 == 0 ? 0 : u256((bigint(data1) * bigint(data2)) % data3); }},
{{Instruction::MULMOD, Push, Push, Push}, [&]{ return data1 * data2; }},
{{Instruction::SIGNEXTEND, Push, Push}, [&]{
if (data1 >= 31)
return data2;
unsigned testBit = unsigned(data1) * 8 + 7;
m_rules = vector<pair<Pattern, function<Pattern()>>>{
// arithmetics on constants
{{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }},
{{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }},
{{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }},
{{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }},
{{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }},
{{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }},
{{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }},
{{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }},
{{Instruction::NOT, {A}}, [=]{ return ~A.d(); }},
{{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }},
{{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }},
{{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }},
{{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }},
{{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }},
{{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }},
{{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }},
{{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }},
{{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }},
{{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }},
{{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }},
{{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }},
{{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }},
{{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 {
if (A.d() >= 31)
return B.d();
unsigned testBit = unsigned(A.d()) * 8 + 7;
u256 mask = (u256(1) << testBit) - 1;
return u256(boost::multiprecision::bit_test(data2, testBit) ? data2 | ~mask : data2 & mask);
return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask);
}},
{{Instruction::ADD, UndefinedItem, u256(0)}, true, [&]{ return arg1; }},
{{Instruction::MUL, UndefinedItem, u256(1)}, true, [&]{ return arg1; }},
{{Instruction::OR, UndefinedItem, u256(0)}, true, [&]{ return arg1; }},
{{Instruction::XOR, UndefinedItem, u256(0)}, true, [&]{ return arg1; }},
{{Instruction::AND, UndefinedItem, ~u256(0)}, true, [&]{ return arg1; }},
{{Instruction::MUL, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }},
{{Instruction::DIV, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }},
{{Instruction::MOD, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }},
{{Instruction::MOD, u256(0), UndefinedItem}, true, [&]{ return u256(0); }},
{{Instruction::AND, UndefinedItem, u256(0)}, true, [&]{ return u256(0); }},
{{Instruction::OR, UndefinedItem, ~u256(0)}, true, [&]{ return ~u256(0); }},
{{Instruction::AND, UndefinedItem, UndefinedItem}, true, [&]{ return arg1; }},
{{Instruction::OR, UndefinedItem, UndefinedItem}, true, [&]{ return arg1; }},
{{Instruction::SUB, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }},
{{Instruction::EQ, UndefinedItem, UndefinedItem}, true, [&]{ return u256(1); }},
{{Instruction::LT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }},
{{Instruction::SLT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }},
{{Instruction::GT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }},
{{Instruction::SGT, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }},
{{Instruction::MOD, UndefinedItem, UndefinedItem}, true, [&]{ return u256(0); }},
// invariants involving known constants
{{Instruction::ADD, {X, 0}}, [=]{ return X; }},
{{Instruction::MUL, {X, 1}}, [=]{ return X; }},
{{Instruction::DIV, {X, 1}}, [=]{ return X; }},
{{Instruction::SDIV, {X, 1}}, [=]{ return X; }},
{{Instruction::OR, {X, 0}}, [=]{ return X; }},
{{Instruction::XOR, {X, 0}}, [=]{ return X; }},
{{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }},
{{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }},
{{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }},
{{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }},
{{Instruction::MOD, {0, X}}, [=]{ return u256(0); }},
{{Instruction::AND, {X, 0}}, [=]{ return u256(0); }},
{{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }},
// operations involving an expression and itself
{{Instruction::AND, {X, X}}, [=]{ return X; }},
{{Instruction::OR, {X, X}}, [=]{ return X; }},
{{Instruction::SUB, {X, X}}, [=]{ return u256(0); }},
{{Instruction::EQ, {X, X}}, [=]{ return u256(1); }},
{{Instruction::EQ, {X, X}}, [=]{ return u256(1); }},
{{Instruction::LT, {X, X}}, [=]{ return u256(0); }},
{{Instruction::SLT, {X, X}}, [=]{ return u256(0); }},
{{Instruction::GT, {X, X}}, [=]{ return u256(0); }},
{{Instruction::SGT, {X, X}}, [=]{ return u256(0); }},
{{Instruction::MOD, {X, X}}, [=]{ return u256(0); }},
{{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }},
};
// Associative operations
for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{
{Instruction::ADD, plus<u256>()},
{Instruction::MUL, multiplies<u256>()},
{Instruction::AND, bit_and<u256>()},
{Instruction::OR, bit_or<u256>()},
{Instruction::XOR, bit_xor<u256>()}
})
{
auto op = opFun.first;
auto fun = opFun.second;
// Moving constants to the outside, order matters here!
// we need actions that return expressions (or patterns?) here, and we need also reversed rules
// (X+A)+B -> X+(A+B)
m_rules.push_back({
{op, {{op, {X, A}}, B}},
[=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
});
// X+(Y+A) -> (X+Y)+A
m_rules.push_back({
{op, {{op, {X, A}}, Y}},
[=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
});
// For now, we still need explicit commutativity for the inner pattern
m_rules.push_back({
{op, {{op, {A, X}}, B}},
[=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
});
m_rules.push_back({
{op, {{op, {A, X}}, Y}},
[=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
});
};
for (auto const& rule: c_singleLevel)
if (rule.matches(*this, _expr))
//@todo: (x+8)-3 and other things
}
ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun)
{
static Rules rules;
if (_expr.item->type() != Operation)
return -1;
for (auto const& rule: rules.rules())
{
rules.resetMatchGroups();
if (rule.first.matches(_expr, *this))
{
if (rule.idAction)
return rule.idAction();
else
{
m_spareAssemblyItem.push_back(make_shared<AssemblyItem>(rule.assemblyItemAction()));
return find(*m_spareAssemblyItem.back());
}
// Debug info
//cout << "Simplifying " << *_expr.item << "(";
//for (Id arg: _expr.arguments)
// cout << fullDAGToString(arg) << ", ";
//cout << ")" << endl;
//cout << "with rule " << rule.first.toString() << endl;
//ExpressionTemplate t(rule.second());
//cout << "to" << rule.second().toString() << endl;
return rebuildExpression(ExpressionTemplate(rule.second()));
}
}
if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item))
{
@ -234,3 +246,127 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr,
return -1;
}
ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template)
{
if (_template.hasId)
return _template.id;
Ids arguments;
for (ExpressionTemplate const& t: _template.arguments)
arguments.push_back(rebuildExpression(t));
m_spareAssemblyItem.push_back(make_shared<AssemblyItem>(_template.item));
return find(*m_spareAssemblyItem.back(), arguments);
}
Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments):
m_type(Operation),
m_requireDataMatch(true),
m_data(_instruction),
m_arguments(_arguments)
{
}
void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups)
{
m_matchGroup = _group;
m_matchGroups = &_matchGroups;
}
bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
{
if (!matchesBaseItem(*_expr.item))
return false;
if (m_matchGroup)
{
if (!m_matchGroups->count(m_matchGroup))
(*m_matchGroups)[m_matchGroup] = &_expr;
else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id)
return false;
}
assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, "");
for (size_t i = 0; i < m_arguments.size(); ++i)
if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes))
return false;
return true;
}
string Pattern::toString() const
{
stringstream s;
switch (m_type)
{
case Operation:
s << instructionInfo(Instruction(unsigned(m_data))).name;
break;
case Push:
s << "PUSH " << hex << m_data;
break;
case UndefinedItem:
s << "ANY";
break;
default:
s << "t=" << dec << m_type << " d=" << hex << m_data;
break;
}
if (!m_requireDataMatch)
s << " ~";
if (m_matchGroup)
s << "[" << dec << m_matchGroup << "]";
s << "(";
for (Pattern const& p: m_arguments)
s << p.toString() << ", ";
s << ")";
return s.str();
}
bool Pattern::matchesBaseItem(AssemblyItem const& _item) const
{
if (m_type == UndefinedItem)
return true;
if (m_type != _item.type())
return false;
if (m_requireDataMatch && m_data != _item.data())
return false;
return true;
}
Pattern::Expression const& Pattern::matchGroupValue() const
{
assertThrow(m_matchGroup > 0, OptimizerException, "");
assertThrow(!!m_matchGroups, OptimizerException, "");
assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, "");
return *(*m_matchGroups)[m_matchGroup];
}
ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern)
{
if (_pattern.matchGroup())
{
hasId = true;
id = _pattern.id();
}
else
{
hasId = false;
item = _pattern.toAssemblyItem();
}
for (auto const& arg: _pattern.arguments())
arguments.push_back(ExpressionTemplate(arg));
}
string ExpressionTemplate::toString() const
{
stringstream s;
if (hasId)
s << id;
else
s << item;
s << "(";
for (auto const& arg: arguments)
s << arg.toString();
s << ")";
return s.str();
}

77
libevmcore/ExpressionClasses.h

@ -26,13 +26,16 @@
#include <vector>
#include <map>
#include <memory>
#include <libdevcore/Common.h>
#include <libevmcore/AssemblyItem.h>
namespace dev
{
namespace eth
{
class AssemblyItem;
class Pattern;
struct ExpressionTemplate;
/**
* Collection of classes of equivalent expressions that can also determine the class of an expression.
@ -60,16 +63,88 @@ public:
/// @returns the number of classes.
Id size() const { return m_representatives.size(); }
std::string fullDAGToString(Id _id);
private:
/// Tries to simplify the given expression.
/// @returns its class if it possible or Id(-1) otherwise.
/// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed
Id tryToSimplify(Expression const& _expr, bool _secondRun = false);
/// Rebuilds an expression from a (matched) pattern.
Id rebuildExpression(ExpressionTemplate const& _template);
std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const;
/// Expression equivalence class representatives - we only store one item of an equivalence.
std::vector<Expression> m_representatives;
std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItem;
};
/**
* Pattern to match against an expression.
* Also stores matched expressions to retrieve them later, for constructing new expressions using
* ExpressionTemplate.
*/
class Pattern
{
public:
using Expression = ExpressionClasses::Expression;
using Id = ExpressionClasses::Id;
// Matches a specific constant value.
Pattern(unsigned _value): Pattern(u256(_value)) {}
// Matches a specific constant value.
Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {}
// Matches a specific assembly item type or anything if not given.
Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {}
// Matches a given instruction with given arguments
Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {});
/// Sets this pattern to be part of the match group with the identifier @a _group.
/// Inside one rule, all patterns in the same match group have to match expressions from the
/// same expression equivalence class.
void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups);
unsigned matchGroup() const { return m_matchGroup; }
bool matches(Expression const& _expr, ExpressionClasses const& _classes) const;
AssemblyItem toAssemblyItem() const { return AssemblyItem(m_type, m_data); }
std::vector<Pattern> arguments() const { return m_arguments; }
/// @returns the id of the matched expression if this pattern is part of a match group.
Id id() const { return matchGroupValue().id; }
/// @returns the data of the matched expression if this pattern is part of a match group.
u256 d() const { return matchGroupValue().item->data(); }
std::string toString() const;
private:
bool matchesBaseItem(AssemblyItem const& _item) const;
Expression const& matchGroupValue() const;
AssemblyItemType m_type;
bool m_requireDataMatch = false;
u256 m_data = 0;
std::vector<Pattern> m_arguments;
unsigned m_matchGroup = 0;
std::map<unsigned, Expression const*>* m_matchGroups = nullptr;
};
/**
* Template for a new expression that can be built from matched patterns.
*/
struct ExpressionTemplate
{
using Expression = ExpressionClasses::Expression;
using Id = ExpressionClasses::Id;
explicit ExpressionTemplate(Pattern const& _pattern);
std::string toString() const;
bool hasId = false;
/// Id of the matched expression, if available.
Id id = Id(-1);
// Otherwise, assembly item.
AssemblyItem item = UndefinedItem;
std::vector<ExpressionTemplate> arguments;
};
}
}

41
test/SolidityOptimizer.cpp

@ -240,6 +240,12 @@ BOOST_AUTO_TEST_CASE(cse_unneeded_items)
checkCSE(input, input);
}
BOOST_AUTO_TEST_CASE(cse_constant_addition)
{
AssemblyItems input{u256(7), u256(8), Instruction::ADD};
checkCSE(input, {u256(7 + 8)});
}
BOOST_AUTO_TEST_CASE(cse_invariants)
{
AssemblyItems input{
@ -262,6 +268,41 @@ BOOST_AUTO_TEST_CASE(cse_subother)
checkCSE({Instruction::SUB}, {Instruction::SUB});
}
BOOST_AUTO_TEST_CASE(cse_double_negation)
{
checkCSE({Instruction::DUP5, Instruction::NOT, Instruction::NOT}, {Instruction::DUP5});
}
BOOST_AUTO_TEST_CASE(cse_associativity)
{
AssemblyItems input{
Instruction::DUP1,
Instruction::DUP1,
u256(0),
Instruction::OR,
Instruction::OR
};
checkCSE(input, {Instruction::DUP1});
}
BOOST_AUTO_TEST_CASE(cse_associativity2)
{
AssemblyItems input{
u256(0),
Instruction::DUP2,
u256(2),
u256(1),
Instruction::DUP6,
Instruction::ADD,
u256(2),
Instruction::ADD,
Instruction::ADD,
Instruction::ADD,
Instruction::ADD
};
checkCSE(input, {Instruction::DUP2, Instruction::DUP2, Instruction::ADD, u256(5), Instruction::ADD});
}
BOOST_AUTO_TEST_SUITE_END()
}

Loading…
Cancel
Save