Browse Source

Code generation part 2.

cl-refactor
chriseth 10 years ago
parent
commit
e8463940d6
  1. 108
      libevmcore/Assembly.cpp
  2. 320
      libevmcore/CommonSubexpressionEliminator.cpp
  3. 37
      libevmcore/CommonSubexpressionEliminator.h
  4. 163
      test/SolidityOptimizer.cpp

108
libevmcore/Assembly.cpp

@ -288,12 +288,6 @@ inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b)
return true;
}
inline bool popCountIncreased(AssemblyItemsConstRef _pre, AssemblyItems const& _post)
{
auto isPop = [](AssemblyItem const& _item) -> bool { return _item.match(AssemblyItem(Instruction::POP)); };
return count_if(begin(_post), end(_post), isPop) > count_if(begin(_pre), end(_pre), isPop);
}
//@todo this has to move to a special optimizer class soon
template<class Iterator>
unsigned bytesRequiredBySlice(Iterator _begin, Iterator _end)
@ -313,29 +307,6 @@ Assembly& Assembly::optimise(bool _enable)
{
if (!_enable)
return *this;
auto signextend = [](u256 a, u256 b) -> u256
{
if (a >= 31)
return b;
unsigned testBit = unsigned(a) * 8 + 7;
u256 mask = (u256(1) << testBit) - 1;
return boost::multiprecision::bit_test(b, testBit) ? b | ~mask : b & mask;
};
map<Instruction, function<u256(u256, u256)>> const c_simple =
{
{ Instruction::SUB, [](u256 a, u256 b)->u256{return a - b;} },
{ Instruction::DIV, [](u256 a, u256 b)->u256{return a / b;} },
{ Instruction::SDIV, [](u256 a, u256 b)->u256{return s2u(u2s(a) / u2s(b));} },
{ Instruction::MOD, [](u256 a, u256 b)->u256{return a % b;} },
{ Instruction::SMOD, [](u256 a, u256 b)->u256{return s2u(u2s(a) % u2s(b));} },
{ Instruction::EXP, [](u256 a, u256 b)->u256{return (u256)boost::multiprecision::powm((bigint)a, (bigint)b, bigint(1) << 256);} },
{ Instruction::SIGNEXTEND, signextend },
{ Instruction::LT, [](u256 a, u256 b)->u256{return a < b ? 1 : 0;} },
{ Instruction::GT, [](u256 a, u256 b)->u256{return a > b ? 1 : 0;} },
{ Instruction::SLT, [](u256 a, u256 b)->u256{return u2s(a) < u2s(b) ? 1 : 0;} },
{ Instruction::SGT, [](u256 a, u256 b)->u256{return u2s(a) > u2s(b) ? 1 : 0;} },
{ Instruction::EQ, [](u256 a, u256 b)->u256{return a == b ? 1 : 0;} },
};
map<Instruction, function<u256(u256, u256)>> const c_associative =
{
{ Instruction::ADD, [](u256 a, u256 b)->u256{return a + b;} },
@ -358,8 +329,6 @@ Assembly& Assembly::optimise(bool _enable)
{ { Instruction::ISZERO, Instruction::ISZERO }, [](AssemblyItemsConstRef) -> AssemblyItems { return {}; } },
};
for (auto const& i: c_simple)
rules.push_back({ { Push, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[1].data(), m[0].data()) }; } });
for (auto const& i: c_associative)
{
rules.push_back({ { Push, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[1].data(), m[0].data()) }; } });
@ -371,64 +340,33 @@ Assembly& Assembly::optimise(bool _enable)
// jump to next instruction
rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].m_data == m[2].m_data) return {m[2]}; else return m.toVector(); }});
// pop optimization, do not compute values that are popped again anyway
rules.push_back({ { AssemblyItem(UndefinedItem), Instruction::POP }, [](AssemblyItemsConstRef m) -> AssemblyItems
{
if (m[0].type() != Operation)
return m.toVector();
Instruction instr = m[0].instruction();
if (Instruction::DUP1 <= instr && instr <= Instruction::DUP16)
return {};
InstructionInfo info = instructionInfo(instr);
if (info.sideEffects || info.additional != 0 || info.ret != 1)
return m.toVector();
return AssemblyItems(info.args, Instruction::POP);
} });
// compute constants close to powers of two by expressions
auto computeConstants = [](AssemblyItemsConstRef m) -> AssemblyItems
{
u256 const& c = m[0].data();
unsigned const minBits = 4 * 8;
if (c < (bigint(1) << minBits))
return m.toVector(); // we need at least "PUSH1 <bits> PUSH1 <2> EXP"
if (c == u256(-1))
return {u256(0), Instruction::NOT};
for (unsigned bits = minBits; bits < 256; ++bits)
{
bigint const diff = c - (bigint(1) << bits);
if (abs(diff) > 0xff)
continue;
AssemblyItems powerOfTwo{u256(bits), u256(2), Instruction::EXP};
if (diff == 0)
return powerOfTwo;
return AssemblyItems{u256(abs(diff))} + powerOfTwo +
AssemblyItems{diff > 0 ? Instruction::ADD : Instruction::SUB};
}
return m.toVector();
};
rules.push_back({{Push}, computeConstants});
copt << *this;
unsigned total = 0;
for (unsigned count = 1; count > 0; total += count)
{
count = 0;
copt << "Performing common subexpression elimination...";
AssemblyItems optimizedItems;
for (auto iter = m_items.begin(); iter != m_items.end(); ++iter)
for (auto iter = m_items.begin(); iter != m_items.end();)
{
CommonSubexpressionEliminator eliminator;
auto orig = iter;
iter = eliminator.feedItems(iter, m_items.end());
optimizedItems += eliminator.getOptimizedItems();
AssemblyItems optItems = eliminator.getOptimizedItems();
copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size();
if (optItems.size() < size_t(iter - orig))
{
// replace items
count++;
for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter)
*orig = move(*moveIter);
iter = m_items.erase(orig, iter);
}
if (iter != m_items.end())
optimizedItems.push_back(*iter);
++iter;
}
copt << "Old size: " << m_items.size() << ", new size: " << optimizedItems.size();
// swap(m_items, optimizedItems);
copt << *this;
unsigned total = 0;
for (unsigned count = 1; count > 0; total += count)
{
count = 0;
for (unsigned i = 0; i < m_items.size(); ++i)
{
for (auto const& r: rules)
@ -437,12 +375,10 @@ Assembly& Assembly::optimise(bool _enable)
if (matches(vr, &r.first))
{
auto rw = r.second(vr);
unsigned const vrSizeInBytes = bytesRequiredBySlice(vr.begin(), vr.end());
unsigned const rwSizeInBytes = bytesRequiredBySlice(rw.begin(), rw.end());
if (rwSizeInBytes < vrSizeInBytes || (rwSizeInBytes == vrSizeInBytes && popCountIncreased(vr, rw)))
if (rw.size() < vr.size())
{
copt << vr << "matches" << AssemblyItemsConstRef(&r.first) << "becomes...";
copt << AssemblyItemsConstRef(&rw);
copt << "Rule " << vr << " matches " << AssemblyItemsConstRef(&r.first) << " becomes...";
copt << AssemblyItemsConstRef(&rw) << "\n";
if (rw.size() > vr.size())
{
// create hole in the vector
@ -456,7 +392,7 @@ Assembly& Assembly::optimise(bool _enable)
copy(rw.begin(), rw.end(), m_items.begin() + i);
count++;
copt << "Now:\n" << m_items;
copt << "Now:" << m_items;
}
}
}

320
libevmcore/CommonSubexpressionEliminator.cpp

@ -31,10 +31,37 @@ using namespace dev;
using namespace dev::eth;
vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
{
map<int, EquivalenceClassId> currentStackContents;
map<int, EquivalenceClassId> targetStackContents;
int minHeight = m_stackHeight + 1;
if (!m_stackElements.empty())
minHeight = min(minHeight, m_stackElements.begin()->first.first);
for (int height = minHeight; height <= max(0, m_stackHeight); ++height)
{
// make sure it is created
EquivalenceClassId c = getStackElement(height);
if (height <= 0)
currentStackContents[height] = getClass(AssemblyItem(dupInstruction(1 - height)));
if (height <= m_stackHeight)
targetStackContents[height] = c;
}
// Debug info:
//stream(cout, currentStackContents, targetStackContents);
return CSECodeGenerator().generateCode(currentStackContents, targetStackContents, m_equivalenceClasses);
}
ostream& CommonSubexpressionEliminator::stream(
ostream& _out,
map<int, EquivalenceClassId> _currentStack,
map<int, EquivalenceClassId> _targetStack
) const
{
auto streamEquivalenceClass = [this](ostream& _out, EquivalenceClassId _id)
{
auto const& eqClass = m_equivalenceClasses[_id];
auto const& eqClass = m_equivalenceClasses.at(_id);
_out << " " << _id << ": " << *eqClass.first;
_out << "(";
for (EquivalenceClassId arg: eqClass.second)
@ -42,59 +69,36 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
_out << ")" << endl;
};
cout << dec;
cout << "Optimizer results:" << endl;
cout << "Final stack height: " << m_stackHeight << endl;
cout << "Stack elements: " << endl;
_out << "Optimizer analysis:" << endl;
_out << "Final stack height: " << dec << m_stackHeight << endl;
_out << "Stack elements: " << endl;
for (auto const& it: m_stackElements)
{
cout
<< " " << dec << it.first.first << "(" << it.first.second << ") = ";
streamEquivalenceClass(cout, it.second);
_out << " " << dec << it.first.first << "(" << it.first.second << ") = ";
streamEquivalenceClass(_out, it.second);
}
cout << "Equivalence classes: " << endl;
_out << "Equivalence classes: " << endl;
for (EquivalenceClassId eqClass = 0; eqClass < m_equivalenceClasses.size(); ++eqClass)
streamEquivalenceClass(cout, eqClass);
cout << "----------------------------" << endl;
streamEquivalenceClass(_out, eqClass);
map<int, EquivalenceClassId> currentStackContents;
map<int, EquivalenceClassId> targetStackContents;
int minStackHeight = m_stackHeight;
if (m_stackElements.size() > 0)
minStackHeight = min(minStackHeight, m_stackElements.begin()->first.first);
for (int stackHeight = minStackHeight; stackHeight <= m_stackHeight; ++stackHeight)
_out << "Current stack: " << endl;
for (auto const& it: _currentStack)
{
if (stackHeight <= 0)
currentStackContents[stackHeight] = getClass(AssemblyItem(dupInstruction(1 - stackHeight)));
targetStackContents[stackHeight] = getStackElement(stackHeight);
_out << " " << dec << it.first << ": ";
streamEquivalenceClass(_out, it.second);
}
return CSECodeGenerator().generateCode(currentStackContents, targetStackContents, m_equivalenceClasses);
}
bool CommonSubexpressionEliminator::breaksBasicBlock(AssemblyItem const& _item)
{
switch (_item.type())
_out << "Target stack: " << endl;
for (auto const& it: _targetStack)
{
case UndefinedItem:
case Tag:
return true;
case Push:
case PushString:
case PushTag:
case PushSub:
case PushSubSize:
case PushProgramSize:
case PushData:
return false;
case Operation:
return instructionInfo(_item.instruction()).sideEffects;
_out << " " << dec << it.first << ": ";
streamEquivalenceClass(_out, it.second);
}
return _out;
}
void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item)
{
cout << _item << endl;
if (_item.type() != Operation)
{
if (_item.deposit() != 1)
@ -105,12 +109,12 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item)
{
Instruction instruction = _item.instruction();
InstructionInfo info = instructionInfo(instruction);
if (Instruction::DUP1 <= instruction && instruction <= Instruction::DUP16)
if (SemanticInformation::isDupInstruction(_item))
setStackElement(
m_stackHeight + 1,
getStackElement(m_stackHeight - int(instruction) + int(Instruction::DUP1))
);
else if (Instruction::SWAP1 <= instruction && instruction <= Instruction::SWAP16)
else if (SemanticInformation::isSwapInstruction(_item))
swapStackElements(
m_stackHeight,
m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1)
@ -168,61 +172,75 @@ EquivalenceClassId CommonSubexpressionEliminator::getClass(
EquivalenceClassIds const& _arguments
)
{
// do a clever search, i.e.
// TODO: do a clever search, i.e.
// - check for the presence of constants in the argument classes and do arithmetic
// - check whether the two items are equal for a SUB instruction
// - check whether 0 or 1 is in one of the classes for a MUL
// - for commutative opcodes, sort the arguments before searching
EquivalenceClassIds args = _arguments;
if (SemanticInformation::isCommutativeOperation(_item))
sort(args.begin(), args.end());
//@todo use a better data structure for search here
for (EquivalenceClassId c = 0; c < m_equivalenceClasses.size(); ++c)
{
AssemblyItem const& classItem = *m_equivalenceClasses[c].first;
AssemblyItem const& classItem = *m_equivalenceClasses.at(c).first;
if (classItem != _item)
continue;
if (_arguments.size() != m_equivalenceClasses[c].second.size())
BOOST_THROW_EXCEPTION(
OptimizerException() <<
errinfo_comment("Equal assembly items with different number of arguments.")
assertThrow(
args.size() == m_equivalenceClasses.at(c).second.size(),
OptimizerException,
"Equal assembly items with different number of arguments."
);
if (equal(_arguments.begin(), _arguments.end(), m_equivalenceClasses[c].second.begin()))
if (equal(args.begin(), args.end(), m_equivalenceClasses.at(c).second.begin()))
return c;
}
if (_item.type() == Operation && _arguments.size() == 2 && all_of(
_arguments.begin(),
_arguments.end(),
[this](EquivalenceClassId eqc) { return m_equivalenceClasses[eqc].first->match(Push); }))
// constant folding
if (_item.type() == Operation && args.size() == 2 && all_of(
args.begin(),
args.end(),
[this](EquivalenceClassId eqc) { return m_equivalenceClasses.at(eqc).first->match(Push); }))
{
auto signextend = [](u256 a, u256 b) -> u256
{
if (a >= 31)
return b;
unsigned testBit = unsigned(a) * 8 + 7;
u256 mask = (u256(1) << testBit) - 1;
return boost::multiprecision::bit_test(b, testBit) ? b | ~mask : b & mask;
};
map<Instruction, function<u256(u256, u256)>> const arithmetics =
{
//@todo these are not correct (e.g. for div by zero)
{ Instruction::SUB, [](u256 a, u256 b)->u256{return a - b;} },
{ Instruction::DIV, [](u256 a, u256 b)->u256{return a / b;} },
{ Instruction::SDIV, [](u256 a, u256 b)->u256{return s2u(u2s(a) / u2s(b));} },
{ Instruction::MOD, [](u256 a, u256 b)->u256{return a % b;} },
{ Instruction::SMOD, [](u256 a, u256 b)->u256{return s2u(u2s(a) % u2s(b));} },
{ Instruction::EXP, [](u256 a, u256 b)->u256{return (u256)boost::multiprecision::powm((bigint)a, (bigint)b, bigint(1) << 256);} },
//{ Instruction::SIGNEXTEND, signextend },
{ Instruction::LT, [](u256 a, u256 b)->u256{return a < b ? 1 : 0;} },
{ Instruction::GT, [](u256 a, u256 b)->u256{return a > b ? 1 : 0;} },
{ Instruction::SLT, [](u256 a, u256 b)->u256{return u2s(a) < u2s(b) ? 1 : 0;} },
{ Instruction::SGT, [](u256 a, u256 b)->u256{return u2s(a) > u2s(b) ? 1 : 0;} },
{ Instruction::EQ, [](u256 a, u256 b)->u256{return a == b ? 1 : 0;} },
{ Instruction::ADD, [](u256 a, u256 b)->u256{return a + b;} },
{ Instruction::MUL, [](u256 a, u256 b)->u256{return a * b;} },
{ Instruction::AND, [](u256 a, u256 b)->u256{return a & b;} },
{ Instruction::OR, [](u256 a, u256 b)->u256{return a | b;} },
{ Instruction::XOR, [](u256 a, u256 b)->u256{return a ^ b;} },
{ Instruction::SUB, [](u256 a, u256 b) -> u256 {return a - b; } },
{ Instruction::DIV, [](u256 a, u256 b) -> u256 {return b == 0 ? 0 : a / b; } },
{ Instruction::SDIV, [](u256 a, u256 b) -> u256 { return b == 0 ? 0 : s2u(u2s(a) / u2s(b)); } },
{ Instruction::MOD, [](u256 a, u256 b) -> u256 { return b == 0 ? 0 : a % b; } },
{ Instruction::SMOD, [](u256 a, u256 b) -> u256 { return b == 0 ? 0 : s2u(u2s(a) % u2s(b)); } },
{ Instruction::EXP, [](u256 a, u256 b) -> u256 { return (u256)boost::multiprecision::powm(bigint(a), bigint(b), bigint(1) << 256); } },
{ Instruction::SIGNEXTEND, signextend },
{ Instruction::LT, [](u256 a, u256 b) -> u256 { return a < b ? 1 : 0; } },
{ Instruction::GT, [](u256 a, u256 b) -> u256 { return a > b ? 1 : 0; } },
{ Instruction::SLT, [](u256 a, u256 b) -> u256 { return u2s(a) < u2s(b) ? 1 : 0; } },
{ Instruction::SGT, [](u256 a, u256 b) -> u256 { return u2s(a) > u2s(b) ? 1 : 0; } },
{ Instruction::EQ, [](u256 a, u256 b) -> u256 { return a == b ? 1 : 0; } },
{ Instruction::ADD, [](u256 a, u256 b) -> u256 { return a + b; } },
{ Instruction::MUL, [](u256 a, u256 b) -> u256 { return a * b; } },
{ Instruction::AND, [](u256 a, u256 b) -> u256 { return a & b; } },
{ Instruction::OR, [](u256 a, u256 b) -> u256 { return a | b; } },
{ Instruction::XOR, [](u256 a, u256 b) -> u256 { return a ^ b; } },
};
if (arithmetics.count(_item.instruction()))
{
u256 result = arithmetics.at(_item.instruction())(
m_equivalenceClasses[_arguments[0]].first->data(),
m_equivalenceClasses[_arguments[1]].first->data()
m_equivalenceClasses.at(args[0]).first->data(),
m_equivalenceClasses.at(args[1]).first->data()
);
m_spareAssemblyItem.push_back(make_shared<AssemblyItem>(result));
return getClass(*m_spareAssemblyItem.back());
}
}
m_equivalenceClasses.push_back(make_pair(&_item, _arguments));
m_equivalenceClasses.push_back(make_pair(&_item, args));
return m_equivalenceClasses.size() - 1;
}
@ -238,6 +256,64 @@ unsigned CommonSubexpressionEliminator::getNextStackElementSequence(int _stackHe
return 0;
}
bool SemanticInformation::breaksBasicBlock(AssemblyItem const& _item)
{
switch (_item.type())
{
case UndefinedItem:
case Tag:
return true;
case Push:
case PushString:
case PushTag:
case PushSub:
case PushSubSize:
case PushProgramSize:
case PushData:
return false;
case Operation:
{
if (isSwapInstruction(_item) || isDupInstruction(_item))
return false;
InstructionInfo info = instructionInfo(_item.instruction());
// the second requirement will be lifted once it is implemented
return info.sideEffects || info.args > 2;
}
}
}
bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item)
{
if (_item.type() != Operation)
return false;
switch (_item.instruction())
{
case Instruction::ADD:
case Instruction::MUL:
case Instruction::EQ:
case Instruction::AND:
case Instruction::OR:
case Instruction::XOR:
return true;
default:
return false;
}
}
bool SemanticInformation::isDupInstruction(AssemblyItem const& _item)
{
if (_item.type() != Operation)
return false;
return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16;
}
bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item)
{
if (_item.type() != Operation)
return false;
return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16;
}
AssemblyItems CSECodeGenerator::generateCode(
map<int, EquivalenceClassId> const& _currentStack,
map<int, EquivalenceClassId> const& _targetStackContents,
@ -249,24 +325,48 @@ AssemblyItems CSECodeGenerator::generateCode(
m_stack = _currentStack;
m_equivalenceClasses = _equivalenceClasses;
for (auto const& item: m_stack)
if (!m_classPositions.count(item.second))
m_classPositions[item.second] = item.first;
// @todo: provide information about the positions of copies of class elements
// generate the dependency graph
for (auto const& stackContent: _targetStackContents)
for (auto const& targetItem: _targetStackContents)
{
m_finalClasses.insert(stackContent.second);
addDependencies(stackContent.second);
m_finalClasses.insert(targetItem.second);
addDependencies(targetItem.second);
}
for (auto const& cid: m_finalClasses)
generateClassElement(cid);
// generate the actual elements
for (auto const& targetItem: _targetStackContents)
{
removeStackTopIfPossible();
int position = generateClassElement(targetItem.second);
if (position == targetItem.first)
continue;
if (position < targetItem.first)
// it is already at its target, we need another copy
appendDup(position);
else
appendSwap(position);
appendSwap(targetItem.first);
}
// @TODO shuffle and copy the elements
// remove surplus elements
while (removeStackTopIfPossible())
{
// no-op
}
cout << "--------------- generated code: ---------------" << endl;
for (auto const& it: m_generatedItems)
cout << it << endl;
cout << "-----------------------------" << endl;
// check validity
int finalHeight = 0;
if (!_targetStackContents.empty())
finalHeight = (--_targetStackContents.end())->first;
else if (!_currentStack.empty())
finalHeight = _currentStack.begin()->first - 1;
else
finalHeight = 0;
assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height.");
return m_generatedItems;
}
@ -275,7 +375,7 @@ void CSECodeGenerator::addDependencies(EquivalenceClassId _c)
{
if (m_neededBy.count(_c))
return;
for (EquivalenceClassId argument: m_equivalenceClasses[_c].second)
for (EquivalenceClassId argument: m_equivalenceClasses.at(_c).second)
{
addDependencies(argument);
m_neededBy.insert(make_pair(argument, _c));
@ -285,13 +385,15 @@ void CSECodeGenerator::addDependencies(EquivalenceClassId _c)
int CSECodeGenerator::generateClassElement(EquivalenceClassId _c)
{
if (m_classPositions.count(_c))
return m_classPositions[_c];
{
assertThrow(
m_classPositions[_c] != c_invalidPosition,
OptimizerException,
"Element already removed but still needed."
);
EquivalenceClassIds const& arguments = m_equivalenceClasses[_c].second;
return m_classPositions[_c];
}
EquivalenceClassIds const& arguments = m_equivalenceClasses.at(_c).second;
for (EquivalenceClassId arg: boost::adaptors::reverse(arguments))
generateClassElement(arg);
@ -312,10 +414,10 @@ int CSECodeGenerator::generateClassElement(EquivalenceClassId _c)
else if (canBeRemoved(arguments[0], _c))
{
appendSwap(m_stackHeight - 1);
appendSwap(generateClassElement(arguments[1]));
appendSwap(generateClassElement(arguments[0]));
}
else
appendDup(generateClassElement(arguments[1]));
appendDup(generateClassElement(arguments[0]));
}
else
{
@ -343,10 +445,20 @@ int CSECodeGenerator::generateClassElement(EquivalenceClassId _c)
OptimizerException,
"Opcodes with more than two arguments not implemented yet."
);
for (size_t i = 0; i < arguments.size(); ++i)
assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." );
AssemblyItem const& item = *m_equivalenceClasses.at(_c).first;
while (SemanticInformation::isCommutativeOperation(item) &&
!m_generatedItems.empty() &&
m_generatedItems.back() == AssemblyItem(Instruction::SWAP1))
appendSwap(m_stackHeight - 1);
for (auto arg: arguments)
if (canBeRemoved(arg, _c))
m_classPositions[arguments[1]] = c_invalidPosition;
appendItem(*m_equivalenceClasses[_c].first);
m_classPositions[arg] = c_invalidPosition;
for (size_t i = 0; i < arguments.size(); ++i)
m_stack.erase(m_stackHeight - i);
appendItem(*m_equivalenceClasses.at(_c).first);
m_stack[m_stackHeight] = _c;
return m_classPositions[_c] = m_stackHeight;
}
@ -365,9 +477,22 @@ bool CSECodeGenerator::canBeRemoved(EquivalenceClassId _element, EquivalenceClas
return true;
}
bool CSECodeGenerator::removeStackTopIfPossible()
{
if (m_stack.empty())
return false;
assertThrow(m_stack.count(m_stackHeight), OptimizerException, "");
EquivalenceClassId top = m_stack[m_stackHeight];
if (!canBeRemoved(top))
return false;
m_generatedItems.push_back(AssemblyItem(Instruction::POP));
m_stack.erase(m_stackHeight);
m_stackHeight--;
return true;
}
void CSECodeGenerator::appendDup(int _fromPosition)
{
m_generatedItems.push_back(AssemblyItem(swapInstruction(1 + m_stackHeight - _fromPosition)));
int nr = 1 + m_stackHeight - _fromPosition;
assertThrow(1 <= nr && nr <= 16, OptimizerException, "Stack too deep.");
m_generatedItems.push_back(AssemblyItem(dupInstruction(nr)));
@ -388,6 +513,13 @@ void CSECodeGenerator::appendSwap(int _fromPosition)
if (m_classPositions[m_stack[_fromPosition]] == _fromPosition)
m_classPositions[m_stack[_fromPosition]] = m_stackHeight;
swap(m_stack[m_stackHeight], m_stack[_fromPosition]);
if (m_generatedItems.size() >= 2 &&
SemanticInformation::isSwapInstruction(m_generatedItems.back()) &&
*(m_generatedItems.end() - 2) == m_generatedItems.back())
{
m_generatedItems.pop_back();
m_generatedItems.pop_back();
}
}
void CSECodeGenerator::appendItem(AssemblyItem const& _item)

37
libevmcore/CommonSubexpressionEliminator.h

@ -25,6 +25,7 @@
#include <vector>
#include <map>
#include <ostream>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Exceptions.h>
@ -63,9 +64,14 @@ public:
/// @returns the resulting items after optimization.
AssemblyItems getOptimizedItems();
/// Streams debugging information to @a _out.
std::ostream& stream(
std::ostream& _out,
std::map<int, EquivalenceClassId> _currentStack = {},
std::map<int, EquivalenceClassId> _targetStack = {}
) const;
private:
/// @returns true if the given items starts a new basic block
bool breaksBasicBlock(AssemblyItem const& _item);
/// Feeds the item into the system for analysis.
void feedItem(AssemblyItem const& _item);
@ -92,9 +98,26 @@ private:
std::vector<std::pair<AssemblyItem const*, EquivalenceClassIds>> m_equivalenceClasses;
/// List of items generated during analysis.
std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItem;
};
/**
* Helper functions to provide context-independent information about assembly items.
*/
struct SemanticInformation
{
/// @returns true if the given items starts a new basic block
static bool breaksBasicBlock(AssemblyItem const& _item);
/// @returns true if the item is a two-argument operation whose value does not depend on the
/// order of its arguments.
static bool isCommutativeOperation(AssemblyItem const& _item);
static bool isDupInstruction(AssemblyItem const& _item);
static bool isSwapInstruction(AssemblyItem const& _item);
};
/**
* Unit that generates code from current stack layout, target stack layout and information about
* the equivalence classes.
*/
class CSECodeGenerator
{
public:
@ -116,8 +139,11 @@ private:
/// @returns the stack position of the element.
int generateClassElement(EquivalenceClassId _c);
/// @returns true if @a _element can be removed while computing @a _result.
bool canBeRemoved(EquivalenceClassId _element, EquivalenceClassId _result);
/// @returns true if @a _element can be removed - in general or, if given, while computing @a _result.
bool canBeRemoved(EquivalenceClassId _element, EquivalenceClassId _result = EquivalenceClassId(-1));
/// Appends code to remove the topmost stack element if it can be removed.
bool removeStackTopIfPossible();
/// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position.
void appendDup(int _fromPosition);
@ -150,8 +176,7 @@ _AssemblyItemIterator CommonSubexpressionEliminator::feedItems(
_AssemblyItemIterator _end
)
{
std::cout << "---------------Feeding items to the CSE engine:--------------" << std::endl;
for (; _iterator != _end && !breaksBasicBlock(*_iterator); ++_iterator)
for (; _iterator != _end && !SemanticInformation::breaksBasicBlock(*_iterator); ++_iterator)
feedItem(*_iterator);
return _iterator;
}

163
test/SolidityOptimizer.cpp

@ -26,8 +26,11 @@
#include <boost/test/unit_test.hpp>
#include <boost/lexical_cast.hpp>
#include <test/solidityExecutionFramework.h>
#include <libevmcore/CommonSubexpressionEliminator.h>
#include <libevmcore/Assembly.h>
using namespace std;
using namespace dev::eth;
namespace dev
{
@ -41,16 +44,21 @@ class OptimizerTestFramework: public ExecutionFramework
public:
OptimizerTestFramework() { }
/// Compiles the source code with and without optimizing.
void compileBothVersions(unsigned _expectedSizeDecrease, std::string const& _sourceCode, u256 const& _value = 0, std::string const& _contractName = "") {
void compileBothVersions(
std::string const& _sourceCode,
u256 const& _value = 0,
std::string const& _contractName = ""
)
{
m_optimize = false;
bytes nonOptimizedBytecode = compileAndRun(_sourceCode, _value, _contractName);
m_nonOptimizedContract = m_contractAddress;
m_optimize = true;
bytes optimizedBytecode = compileAndRun(_sourceCode, _value, _contractName);
int sizeDiff = nonOptimizedBytecode.size() - optimizedBytecode.size();
BOOST_CHECK_MESSAGE(sizeDiff == int(_expectedSizeDecrease), "Bytecode shrank by "
+ boost::lexical_cast<string>(sizeDiff) + " bytes, expected: "
+ boost::lexical_cast<string>(_expectedSizeDecrease));
BOOST_CHECK_MESSAGE(
nonOptimizedBytecode.size() > optimizedBytecode.size(),
"Optimizer did not reduce bytecode size."
);
m_optimizedContract = m_contractAddress;
}
@ -81,24 +89,11 @@ BOOST_AUTO_TEST_CASE(smoke_test)
return a;
}
})";
compileBothVersions(29, sourceCode);
compileBothVersions(sourceCode);
compareVersions("f(uint256)", u256(7));
}
BOOST_AUTO_TEST_CASE(large_integers)
{
char const* sourceCode = R"(
contract test {
function f() returns (uint a, uint b) {
a = 0x234234872642837426347000000;
b = 0x10000000000000000000000002;
}
})";
compileBothVersions(36, sourceCode);
compareVersions("f()");
}
BOOST_AUTO_TEST_CASE(invariants)
BOOST_AUTO_TEST_CASE(identities)
{
char const* sourceCode = R"(
contract test {
@ -106,7 +101,7 @@ BOOST_AUTO_TEST_CASE(invariants)
return int(0) | (int(1) * (int(0) ^ (0 + a)));
}
})";
compileBothVersions(41, sourceCode);
compileBothVersions(sourceCode);
compareVersions("f(uint256)", u256(0x12334664));
}
@ -120,7 +115,7 @@ BOOST_AUTO_TEST_CASE(unused_expressions)
data;
}
})";
compileBothVersions(36, sourceCode);
compileBothVersions(sourceCode);
compareVersions("f()");
}
@ -135,10 +130,132 @@ BOOST_AUTO_TEST_CASE(constant_folding_both_sides)
return 98 ^ (7 * ((1 | (x | 1000)) * 40) ^ 102);
}
})";
compileBothVersions(37, sourceCode);
compileBothVersions(sourceCode);
compareVersions("f(uint256)");
}
BOOST_AUTO_TEST_CASE(storage_access)
{
char const* sourceCode = R"(
contract test {
uint8[40] data;
function f(uint x) returns (uint y) {
data[2] = data[7] = uint8(x);
data[4] = data[2] * 10 + data[3];
}
}
)";
compileBothVersions(sourceCode);
compareVersions("f(uint256)");
}
BOOST_AUTO_TEST_CASE(array_copy)
{
char const* sourceCode = R"(
contract test {
bytes2[] data1;
bytes5[] data2;
function f(uint x) returns (uint l, uint y) {
for (uint i = 0; i < msg.data.length; ++i)
data1[i] = msg.data[i];
data2 = data1;
l = data2.length;
y = uint(data2[x]);
}
}
)";
compileBothVersions(sourceCode);
compareVersions("f(uint256)", 0);
compareVersions("f(uint256)", 10);
compareVersions("f(uint256)", 36);
}
BOOST_AUTO_TEST_CASE(function_calls)
{
char const* sourceCode = R"(
contract test {
function f1(uint x) returns (uint) { return x*x; }
function f(uint x) returns (uint) { return f1(7+x) - this.f1(x**9); }
}
)";
compileBothVersions(sourceCode);
compareVersions("f(uint256)", 0);
compareVersions("f(uint256)", 10);
compareVersions("f(uint256)", 36);
}
BOOST_AUTO_TEST_CASE(cse_intermediate_swap)
{
eth::CommonSubexpressionEliminator cse;
AssemblyItems input{
Instruction::SWAP1, Instruction::POP, Instruction::ADD, u256(0), Instruction::SWAP1,
Instruction::SLOAD, Instruction::SWAP1, u256(100), Instruction::EXP, Instruction::SWAP1,
Instruction::DIV, u256(0xff), Instruction::AND
};
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
BOOST_CHECK(!output.empty());
}
BOOST_AUTO_TEST_CASE(cse_negative_stack_access)
{
eth::CommonSubexpressionEliminator cse;
AssemblyItems input{AssemblyItem(Instruction::DUP2), AssemblyItem(u256(0))};
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
}
BOOST_AUTO_TEST_CASE(cse_negative_stack_end)
{
eth::CommonSubexpressionEliminator cse;
AssemblyItems input{
AssemblyItem(Instruction::ADD)
};
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
}
BOOST_AUTO_TEST_CASE(cse_intermediate_negative_stack)
{
eth::CommonSubexpressionEliminator cse;
AssemblyItems input{
AssemblyItem(Instruction::ADD),
AssemblyItem(u256(1)),
AssemblyItem(Instruction::DUP2)
};
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
}
BOOST_AUTO_TEST_CASE(cse_pop)
{
eth::CommonSubexpressionEliminator cse;
AssemblyItems input{
AssemblyItem(Instruction::POP)
};
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
}
BOOST_AUTO_TEST_CASE(cse_unneeded_items)
{
eth::CommonSubexpressionEliminator cse;
AssemblyItems input{
AssemblyItem(Instruction::ADD),
AssemblyItem(Instruction::SWAP1),
AssemblyItem(Instruction::POP),
AssemblyItem(u256(7)),
AssemblyItem(u256(8)),
};
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
}
BOOST_AUTO_TEST_SUITE_END()
}

Loading…
Cancel
Save