Browse Source

Merge pull request #1455 from chriseth/sol_optimizeSha

Optimization routines for SHA3
cl-refactor
chriseth 10 years ago
parent
commit
8a9bb8f6bd
  1. 1
      libevmcore/CMakeLists.txt
  2. 140
      libevmcore/CommonSubexpressionEliminator.cpp
  3. 61
      libevmcore/CommonSubexpressionEliminator.h
  4. 31
      libevmcore/ExpressionClasses.cpp
  5. 11
      libevmcore/ExpressionClasses.h
  6. 42
      libevmcore/Instruction.cpp
  7. 4
      libevmcore/Instruction.h
  8. 2
      libevmcore/SemanticInformation.cpp
  9. 1
      libsolidity/CMakeLists.txt
  10. 168
      test/SolidityOptimizer.cpp

1
libevmcore/CMakeLists.txt

@ -25,6 +25,7 @@ else()
endif()
target_link_libraries(${EXECUTABLE} devcore)
target_link_libraries(${EXECUTABLE} devcrypto)
install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} )

140
libevmcore/CommonSubexpressionEliminator.cpp

@ -23,6 +23,7 @@
#include <functional>
#include <boost/range/adaptor/reversed.hpp>
#include <libdevcrypto/SHA3.h>
#include <libevmcore/CommonSubexpressionEliminator.h>
#include <libevmcore/AssemblyItem.h>
@ -34,8 +35,8 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
{
optimizeBreakingItem();
map<int, ExpressionClasses::Id> initialStackContents;
map<int, ExpressionClasses::Id> targetStackContents;
map<int, Id> initialStackContents;
map<int, Id> targetStackContents;
int minHeight = m_stackHeight + 1;
if (!m_stackElements.empty())
minHeight = min(minHeight, m_stackElements.begin()->first);
@ -58,18 +59,18 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
ostream& CommonSubexpressionEliminator::stream(
ostream& _out,
map<int, ExpressionClasses::Id> _initialStack,
map<int, ExpressionClasses::Id> _targetStack
map<int, Id> _initialStack,
map<int, Id> _targetStack
) const
{
auto streamExpressionClass = [this](ostream& _out, ExpressionClasses::Id _id)
auto streamExpressionClass = [this](ostream& _out, Id _id)
{
auto const& expr = m_expressionClasses.representative(_id);
_out << " " << dec << _id << ": " << *expr.item;
if (expr.sequenceNumber)
_out << "@" << dec << expr.sequenceNumber;
_out << "(";
for (ExpressionClasses::Id arg: expr.arguments)
for (Id arg: expr.arguments)
_out << dec << arg << ",";
_out << ")" << endl;
};
@ -77,7 +78,7 @@ ostream& CommonSubexpressionEliminator::stream(
_out << "Optimizer analysis:" << endl;
_out << "Final stack height: " << dec << m_stackHeight << endl;
_out << "Equivalence classes: " << endl;
for (ExpressionClasses::Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
streamExpressionClass(_out, eqClass);
_out << "Initial stack: " << endl;
@ -119,7 +120,7 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _co
);
else if (instruction != Instruction::POP)
{
vector<ExpressionClasses::Id> arguments(info.args);
vector<Id> arguments(info.args);
for (int i = 0; i < info.args; ++i)
arguments[i] = stackElement(m_stackHeight - i);
if (_item.instruction() == Instruction::SSTORE)
@ -130,6 +131,8 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _co
storeInMemory(arguments[0], arguments[1]);
else if (_item.instruction() == Instruction::MLOAD)
setStackElement(m_stackHeight + _item.deposit(), loadFromMemory(arguments[0]));
else if (_item.instruction() == Instruction::SHA3)
setStackElement(m_stackHeight + _item.deposit(), applySha3(arguments.at(0), arguments.at(1)));
else
setStackElement(m_stackHeight + _item.deposit(), m_expressionClasses.find(_item, arguments, _copyItem));
}
@ -142,7 +145,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI))
return;
using Id = ExpressionClasses::Id;
static AssemblyItem s_jump = Instruction::JUMP;
Id condition = stackElement(m_stackHeight - 1);
@ -163,7 +165,7 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
}
}
void CommonSubexpressionEliminator::setStackElement(int _stackHeight, ExpressionClasses::Id _class)
void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
{
m_stackElements[_stackHeight] = _class;
}
@ -194,26 +196,28 @@ ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(int _st
return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight)));
}
void CommonSubexpressionEliminator::storeInStorage(ExpressionClasses::Id _slot, ExpressionClasses::Id _value)
void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value)
{
if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
// do not execute the storage if we know that the value is already there
return;
m_sequenceNumber++;
decltype(m_storageContent) storageContents;
// copy over values at points where we know that they are different from _slot
// Copy over all values (i.e. retain knowledge about them) where we know that this store
// operation will not destroy the knowledge. Specifically, we copy storage locations we know
// are different from _slot or locations where we know that the stored value is equal to _value.
for (auto const& storageItem: m_storageContent)
if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot))
if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
storageContents.insert(storageItem);
m_storageContent = move(storageContents);
ExpressionClasses::Id id = m_expressionClasses.find(Instruction::SSTORE, {_slot, _value}, true, m_sequenceNumber);
Id id = m_expressionClasses.find(Instruction::SSTORE, {_slot, _value}, true, m_sequenceNumber);
m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
m_storageContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
}
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(ExpressionClasses::Id _slot)
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot)
{
if (m_storageContent.count(_slot))
return m_storageContent.at(_slot);
@ -221,7 +225,7 @@ ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(ExpressionC
return m_storageContent[_slot] = m_expressionClasses.find(Instruction::SLOAD, {_slot}, true, m_sequenceNumber);
}
void CommonSubexpressionEliminator::storeInMemory(ExpressionClasses::Id _slot, ExpressionClasses::Id _value)
void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value)
{
if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
// do not execute the store if we know that the value is already there
@ -233,14 +237,14 @@ void CommonSubexpressionEliminator::storeInMemory(ExpressionClasses::Id _slot, E
if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
memoryContents.insert(memoryItem);
m_memoryContent = move(memoryContents);
ExpressionClasses::Id id = m_expressionClasses.find(Instruction::MSTORE, {_slot, _value}, true, m_sequenceNumber);
Id id = m_expressionClasses.find(Instruction::MSTORE, {_slot, _value}, true, m_sequenceNumber);
m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
m_memoryContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
}
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(ExpressionClasses::Id _slot)
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot)
{
if (m_memoryContent.count(_slot))
return m_memoryContent.at(_slot);
@ -248,6 +252,37 @@ ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(ExpressionCl
return m_memoryContent[_slot] = m_expressionClasses.find(Instruction::MLOAD, {_slot}, true, m_sequenceNumber);
}
CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(Id _start, Id _length)
{
// Special logic if length is a short constant, otherwise we cannot tell.
u256 const* l = m_expressionClasses.knownConstant(_length);
// unknown or too large length
if (!l || *l > 128)
return m_expressionClasses.find(Instruction::SHA3, {_start, _length}, true, m_sequenceNumber);
vector<Id> arguments;
for (u256 i = 0; i < *l; i += 32)
{
Id slot = m_expressionClasses.find(Instruction::ADD, {_start, m_expressionClasses.find(i)});
arguments.push_back(loadFromMemory(slot));
}
if (m_knownSha3Hashes.count(arguments))
return m_knownSha3Hashes.at(arguments);
Id v;
// If all arguments are known constants, compute the sha3 here
if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
{
bytes data;
for (Id a: arguments)
data += toBigEndian(*m_expressionClasses.knownConstant(a));
data.resize(size_t(*l));
v = m_expressionClasses.find(u256(sha3(data)));
}
else
v = m_expressionClasses.find(Instruction::SHA3, {_start, _length}, true, m_sequenceNumber);
return m_knownSha3Hashes[arguments] = v;
}
CSECodeGenerator::CSECodeGenerator(
ExpressionClasses& _expressionClasses,
vector<CSECodeGenerator::StoreOperation> const& _storeOperations
@ -259,8 +294,8 @@ CSECodeGenerator::CSECodeGenerator(
}
AssemblyItems CSECodeGenerator::generateCode(
map<int, ExpressionClasses::Id> const& _initialStack,
map<int, ExpressionClasses::Id> const& _targetStackContents
map<int, Id> const& _initialStack,
map<int, Id> const& _targetStackContents
)
{
m_stack = _initialStack;
@ -280,7 +315,7 @@ AssemblyItems CSECodeGenerator::generateCode(
}
// store all needed sequenced expressions
set<pair<unsigned, ExpressionClasses::Id>> sequencedExpressions;
set<pair<unsigned, Id>> sequencedExpressions;
for (auto const& p: m_neededBy)
for (auto id: {p.first, p.second})
if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber)
@ -327,19 +362,20 @@ AssemblyItems CSECodeGenerator::generateCode(
return m_generatedItems;
}
void CSECodeGenerator::addDependencies(ExpressionClasses::Id _c)
void CSECodeGenerator::addDependencies(Id _c)
{
if (m_neededBy.count(_c))
return; // we already computed the dependencies for _c
ExpressionClasses::Expression expr = m_expressionClasses.representative(_c);
for (ExpressionClasses::Id argument: expr.arguments)
for (Id argument: expr.arguments)
{
addDependencies(argument);
m_neededBy.insert(make_pair(argument, _c));
}
if (expr.item->type() == Operation && (
expr.item->instruction() == Instruction::SLOAD ||
expr.item->instruction() == Instruction::MLOAD
expr.item->instruction() == Instruction::MLOAD ||
expr.item->instruction() == Instruction::SHA3
))
{
// this loads an unknown value from storage or memory and thus, in addition to its
@ -347,22 +383,52 @@ void CSECodeGenerator::addDependencies(ExpressionClasses::Id _c)
// they are different that occur before this load
StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ?
StoreOperation::Storage : StoreOperation::Memory;
ExpressionClasses::Id slotToLoadFrom = expr.arguments.at(0);
Id slotToLoadFrom = expr.arguments.at(0);
for (auto const& p: m_storeOperations)
{
if (p.first.first != target)
continue;
ExpressionClasses::Id slot = p.first.second;
Id slot = p.first.second;
StoreOperations const& storeOps = p.second;
if (storeOps.front().sequenceNumber > expr.sequenceNumber)
continue;
if (
(target == StoreOperation::Memory && m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom)) ||
(target == StoreOperation::Storage && m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom))
)
bool knownToBeIndependent = false;
switch (expr.item->instruction())
{
case Instruction::SLOAD:
knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom);
break;
case Instruction::MLOAD:
knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom);
break;
case Instruction::SHA3:
{
Id length = expr.arguments.at(1);
Id offsetToStart = m_expressionClasses.find(Instruction::SUB, {slot, slotToLoadFrom});
u256 const* o = m_expressionClasses.knownConstant(offsetToStart);
u256 const* l = m_expressionClasses.knownConstant(length);
if (l && *l == 0)
knownToBeIndependent = true;
else if (o)
{
// We could get problems here if both *o and *l are larger than 2**254
// but it is probably ok for the optimizer to produce wrong code for such cases
// which cannot be executed anyway because of the non-payable price.
if (u2s(*o) <= -32)
knownToBeIndependent = true;
else if (l && u2s(*o) >= 0 && *o >= *l)
knownToBeIndependent = true;
}
break;
}
default:
break;
}
if (knownToBeIndependent)
continue;
// note that store and load never have the same sequence number
ExpressionClasses::Id latestStore = storeOps.front().expression;
Id latestStore = storeOps.front().expression;
for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it)
if (it->sequenceNumber < expr.sequenceNumber)
latestStore = it->expression;
@ -372,7 +438,7 @@ void CSECodeGenerator::addDependencies(ExpressionClasses::Id _c)
}
}
int CSECodeGenerator::generateClassElement(ExpressionClasses::Id _c, bool _allowSequenced)
int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced)
{
// do some cleanup
removeStackTopIfPossible();
@ -392,8 +458,8 @@ int CSECodeGenerator::generateClassElement(ExpressionClasses::Id _c, bool _allow
OptimizerException,
"Sequence constrained operation requested out of sequence."
);
ExpressionClasses::Ids const& arguments = expr.arguments;
for (ExpressionClasses::Id arg: boost::adaptors::reverse(arguments))
vector<Id> const& arguments = expr.arguments;
for (Id arg: boost::adaptors::reverse(arguments))
generateClassElement(arg);
// The arguments are somewhere on the stack now, so it remains to move them at the correct place.
@ -478,7 +544,7 @@ int CSECodeGenerator::generateClassElement(ExpressionClasses::Id _c, bool _allow
}
}
int CSECodeGenerator::classElementPosition(ExpressionClasses::Id _id) const
int CSECodeGenerator::classElementPosition(Id _id) const
{
assertThrow(
m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition,
@ -488,7 +554,7 @@ int CSECodeGenerator::classElementPosition(ExpressionClasses::Id _id) const
return m_classPositions.at(_id);
}
bool CSECodeGenerator::canBeRemoved(ExpressionClasses::Id _element, ExpressionClasses::Id _result)
bool CSECodeGenerator::canBeRemoved(Id _element, Id _result)
{
// Returns false if _element is finally needed or is needed by a class that has not been
// computed yet. Note that m_classPositions also includes classes that were deleted in the meantime.
@ -507,7 +573,7 @@ bool CSECodeGenerator::removeStackTopIfPossible()
if (m_stack.empty())
return false;
assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, "");
ExpressionClasses::Id top = m_stack[m_stackHeight];
Id top = m_stack[m_stackHeight];
if (!canBeRemoved(top))
return false;
m_generatedItems.push_back(AssemblyItem(Instruction::POP));

61
libevmcore/CommonSubexpressionEliminator.h

@ -57,19 +57,20 @@ using AssemblyItems = std::vector<AssemblyItem>;
class CommonSubexpressionEliminator
{
public:
using Id = ExpressionClasses::Id;
struct StoreOperation
{
enum Target { Memory, Storage };
StoreOperation(
Target _target,
ExpressionClasses::Id _slot,
Id _slot,
unsigned _sequenceNumber,
ExpressionClasses::Id _expression
Id _expression
): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
Target target;
ExpressionClasses::Id slot;
Id slot;
unsigned sequenceNumber;
ExpressionClasses::Id expression;
Id expression;
};
/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
@ -83,8 +84,8 @@ public:
/// Streams debugging information to @a _out.
std::ostream& stream(
std::ostream& _out,
std::map<int, ExpressionClasses::Id> _initialStack = std::map<int, ExpressionClasses::Id>(),
std::map<int, ExpressionClasses::Id> _targetStack = std::map<int, ExpressionClasses::Id>()
std::map<int, Id> _initialStack = std::map<int, Id>(),
std::map<int, Id> _targetStack = std::map<int, Id>()
) const;
private:
@ -96,39 +97,42 @@ private:
/// Simplifies the given item using
/// Assigns a new equivalence class to the next sequence number of the given stack element.
void setStackElement(int _stackHeight, ExpressionClasses::Id _class);
void setStackElement(int _stackHeight, Id _class);
/// Swaps the given stack elements in their next sequence number.
void swapStackElements(int _stackHeightA, int _stackHeightB);
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
ExpressionClasses::Id stackElement(int _stackHeight);
Id stackElement(int _stackHeight);
/// @returns the equivalence class id of the special initial stack element at the given height
/// (must not be positive).
ExpressionClasses::Id initialStackElement(int _stackHeight);
Id initialStackElement(int _stackHeight);
/// Increments the sequence number, deletes all storage information that might be overwritten
/// and stores the new value at the given slot.
void storeInStorage(ExpressionClasses::Id _slot, ExpressionClasses::Id _value);
void storeInStorage(Id _slot, Id _value);
/// Retrieves the current value at the given slot in storage or creates a new special sload class.
ExpressionClasses::Id loadFromStorage(ExpressionClasses::Id _slot);
Id loadFromStorage(Id _slot);
/// Increments the sequence number, deletes all memory information that might be overwritten
/// and stores the new value at the given slot.
void storeInMemory(ExpressionClasses::Id _slot, ExpressionClasses::Id _value);
void storeInMemory(Id _slot, Id _value);
/// Retrieves the current value at the given slot in memory or creates a new special mload class.
ExpressionClasses::Id loadFromMemory(ExpressionClasses::Id _slot);
Id loadFromMemory(Id _slot);
/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
Id applySha3(Id _start, Id _length);
/// Current stack height, can be negative.
int m_stackHeight = 0;
/// Current stack layout, mapping stack height -> equivalence class
std::map<int, ExpressionClasses::Id> m_stackElements;
std::map<int, Id> m_stackElements;
/// Current sequence number, this is incremented with each modification to storage or memory.
unsigned m_sequenceNumber = 1;
/// Knowledge about storage content.
std::map<ExpressionClasses::Id, ExpressionClasses::Id> m_storageContent;
std::map<Id, Id> m_storageContent;
/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
/// and are not contained here if they are not completely known.
std::map<ExpressionClasses::Id, ExpressionClasses::Id> m_memoryContent;
std::map<Id, Id> m_memoryContent;
/// Keeps record of all sha3 hashes that are computed.
std::map<std::vector<Id>, Id> m_knownSha3Hashes;
/// Keeps information about which storage or memory slots were written to at which sequence
/// number with what instruction.
std::vector<StoreOperation> m_storeOperations;
@ -149,6 +153,7 @@ class CSECodeGenerator
public:
using StoreOperation = CommonSubexpressionEliminator::StoreOperation;
using StoreOperations = std::vector<StoreOperation>;
using Id = ExpressionClasses::Id;
/// Initializes the code generator with the given classes and store operations.
/// The store operations have to be sorted by sequence number in ascending order.
@ -159,25 +164,25 @@ public:
/// @param _targetStackContents final contents of the stack, by stack height relative to initial
/// @note should only be called once on each object.
AssemblyItems generateCode(
std::map<int, ExpressionClasses::Id> const& _initialStack,
std::map<int, ExpressionClasses::Id> const& _targetStackContents
std::map<int, Id> const& _initialStack,
std::map<int, Id> const& _targetStackContents
);
private:
/// Recursively discovers all dependencies to @a m_requests.
void addDependencies(ExpressionClasses::Id _c);
void addDependencies(Id _c);
/// Produce code that generates the given element if it is not yet present.
/// @returns the stack position of the element or c_invalidPosition if it does not actually
/// generate a value on the stack.
/// @param _allowSequenced indicates that sequence-constrained operations are allowed
int generateClassElement(ExpressionClasses::Id _c, bool _allowSequenced = false);
int generateClassElement(Id _c, bool _allowSequenced = false);
/// @returns the position of the representative of the given id on the stack.
/// @note throws an exception if it is not on the stack.
int classElementPosition(ExpressionClasses::Id _id) const;
int classElementPosition(Id _id) const;
/// @returns true if @a _element can be removed - in general or, if given, while computing @a _result.
bool canBeRemoved(ExpressionClasses::Id _element, ExpressionClasses::Id _result = ExpressionClasses::Id(-1));
bool canBeRemoved(Id _element, Id _result = Id(-1));
/// Appends code to remove the topmost stack element if it can be removed.
bool removeStackTopIfPossible();
@ -196,19 +201,19 @@ private:
/// Current height of the stack relative to the start.
int m_stackHeight = 0;
/// If (b, a) is in m_requests then b is needed to compute a.
std::multimap<ExpressionClasses::Id, ExpressionClasses::Id> m_neededBy;
std::multimap<Id, Id> m_neededBy;
/// Current content of the stack.
std::map<int, ExpressionClasses::Id> m_stack;
std::map<int, Id> m_stack;
/// Current positions of equivalence classes, equal to c_invalidPosition if already deleted.
std::map<ExpressionClasses::Id, int> m_classPositions;
std::map<Id, int> m_classPositions;
/// The actual eqivalence class items and how to compute them.
ExpressionClasses& m_expressionClasses;
/// Keeps information about which storage or memory slots were written to by which operations.
/// The operations are sorted ascendingly by sequence number.
std::map<std::pair<StoreOperation::Target, ExpressionClasses::Id>, StoreOperations> m_storeOperations;
std::map<std::pair<StoreOperation::Target, Id>, StoreOperations> m_storeOperations;
/// The set of equivalence classes that should be present on the stack at the end.
std::set<ExpressionClasses::Id> m_finalClasses;
std::set<Id> m_finalClasses;
};
template <class _AssemblyItemIterator>

31
libevmcore/ExpressionClasses.cpp

@ -84,24 +84,35 @@ ExpressionClasses::Id ExpressionClasses::find(
bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
{
// Try to simplify "_a - _b" and return true iff the value is a non-zero constant.
map<unsigned, Expression const*> matchGroups;
Pattern constant(Push);
constant.setMatchGroup(1, matchGroups);
Id difference = find(Instruction::SUB, {_a, _b});
return constant.matches(representative(difference), *this) && constant.d() != u256(0);
return knownNonZero(find(Instruction::SUB, {_a, _b}));
}
bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
{
// Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero.
u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b}));
// forbidden interval is ["-31", 31]
return v && *v + 31 > u256(62);
}
bool ExpressionClasses::knownZero(Id _c)
{
return Pattern(u256(0)).matches(representative(_c), *this);
}
bool ExpressionClasses::knownNonZero(Id _c)
{
return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this);
}
u256 const* ExpressionClasses::knownConstant(Id _c)
{
map<unsigned, Expression const*> matchGroups;
Pattern constant(Push);
constant.setMatchGroup(1, matchGroups);
Id difference = find(Instruction::SUB, {_a, _b});
if (!constant.matches(representative(difference), *this))
return false;
// forbidden interval is ["-31", 31]
return constant.d() + 31 > u256(62);
if (!constant.matches(representative(_c), *this))
return nullptr;
return &constant.d();
}
string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const

11
libevmcore/ExpressionClasses.h

@ -78,6 +78,15 @@ public:
bool knownToBeDifferent(Id _a, Id _b);
/// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32.
bool knownToBeDifferentBy32(Id _a, Id _b);
/// @returns true if the value of the given class is known to be zero.
/// @note that this is not the negation of knownNonZero
bool knownZero(Id _c);
/// @returns true if the value of the given class is known to be nonzero.
/// @note that this is not the negation of knownZero
bool knownNonZero(Id _c);
/// @returns a pointer to the value if the given class is known to be a constant,
/// and a nullptr otherwise.
u256 const* knownConstant(Id _c);
std::string fullDAGToString(Id _id) const;
@ -131,7 +140,7 @@ public:
/// @returns the id of the matched expression if this pattern is part of a match group.
Id id() const { return matchGroupValue().id; }
/// @returns the data of the matched expression if this pattern is part of a match group.
u256 d() const { return matchGroupValue().item->data(); }
u256 const& d() const { return matchGroupValue().item->data(); }
std::string toString() const;

42
libevmcore/Instruction.cpp

@ -21,6 +21,7 @@
#include "Instruction.h"
#include <functional>
#include <libdevcore/Common.h>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Log.h>
@ -294,27 +295,42 @@ static const std::map<Instruction, InstructionInfo> c_instructionInfo =
{ Instruction::SUICIDE, { "SUICIDE", 0, 1, 0, true, ZeroTier } }
};
string dev::eth::disassemble(bytes const& _mem)
void dev::eth::eachInstruction(
bytes const& _mem,
function<void(Instruction,u256 const&)> const& _onInstruction
)
{
stringstream ret;
unsigned numerics = 0;
for (auto it = _mem.begin(); it != _mem.end(); ++it)
{
byte n = *it;
auto iit = c_instructionInfo.find((Instruction)n);
if (numerics || iit == c_instructionInfo.end() || (byte)iit->first != n) // not an instruction or expecting an argument...
Instruction instr = Instruction(*it);
size_t additional = 0;
if (isValidInstruction(instr))
additional = instructionInfo(instr).additional;
u256 data;
for (size_t i = 0; i < additional; ++i)
{
if (numerics)
numerics--;
ret << "0x" << hex << (int)n << " ";
data <<= 8;
if (it != _mem.end() && ++it != _mem.end())
data |= *it;
}
_onInstruction(instr, data);
}
}
string dev::eth::disassemble(bytes const& _mem)
{
stringstream ret;
eachInstruction(_mem, [&](Instruction _instr, u256 const& _data) {
if (!isValidInstruction(_instr))
ret << "0x" << hex << int(_instr) << " ";
else
{
auto const& ii = iit->second;
ret << ii.name << " ";
numerics = ii.additional;
}
InstructionInfo info = instructionInfo(_instr);
ret << info.name << " ";
if (info.additional)
ret << "0x" << hex << _data << " ";
}
});
return ret.str();
}

4
libevmcore/Instruction.h

@ -21,6 +21,7 @@
#pragma once
#include <functional>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
#include <libevmcore/Exceptions.h>
@ -253,6 +254,9 @@ bool isValidInstruction(Instruction _inst);
/// Convert from string mnemonic to Instruction type.
extern const std::map<std::string, Instruction> c_instructions;
/// Iterate through EVM code and call a function on each instruction.
void eachInstruction(bytes const& _mem, std::function<void(Instruction,u256 const&)> const& _onInstruction);
/// Convert from EVM code to simple EVM assembly language.
std::string disassemble(bytes const& _mem);

2
libevmcore/SemanticInformation.cpp

@ -52,8 +52,6 @@ bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item)
return true; // GAS and PC assume a specific order of opcodes
if (_item.instruction() == Instruction::MSIZE)
return true; // msize is modified already by memory access, avoid that for now
if (_item.instruction() == Instruction::SHA3)
return true; //@todo: we have to compare sha3's not based on their memory addresses but on the memory content.
InstructionInfo info = instructionInfo(_item.instruction());
if (_item.instruction() == Instruction::SSTORE)
return false;

1
libsolidity/CMakeLists.txt

@ -27,7 +27,6 @@ endif()
target_link_libraries(${EXECUTABLE} ${JSONCPP_LIBRARIES})
target_link_libraries(${EXECUTABLE} evmcore)
target_link_libraries(${EXECUTABLE} devcore)
target_link_libraries(${EXECUTABLE} devcrypto)
install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )

168
test/SolidityOptimizer.cpp

@ -56,8 +56,16 @@ public:
m_nonOptimizedContract = m_contractAddress;
m_optimize = true;
bytes optimizedBytecode = compileAndRun(_sourceCode, _value, _contractName);
size_t nonOptimizedSize = 0;
eth::eachInstruction(nonOptimizedBytecode, [&](Instruction, u256 const&) {
nonOptimizedSize++;
});
size_t optimizedSize = 0;
eth::eachInstruction(optimizedBytecode, [&](Instruction, u256 const&) {
optimizedSize++;
});
BOOST_CHECK_MESSAGE(
nonOptimizedBytecode.size() > optimizedBytecode.size(),
nonOptimizedSize > optimizedSize,
"Optimizer did not reduce bytecode size."
);
m_optimizedContract = m_contractAddress;
@ -75,11 +83,16 @@ public:
"\nOptimized: " + toHex(optimizedOutput));
}
void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation)
AssemblyItems getCSE(AssemblyItems const& _input)
{
eth::CommonSubexpressionEliminator cse;
BOOST_REQUIRE(cse.feedItems(_input.begin(), _input.end()) == _input.end());
AssemblyItems output = cse.getOptimizedItems();
return cse.getOptimizedItems();
}
void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation)
{
AssemblyItems output = getCSE(_input);
BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end());
}
@ -569,6 +582,155 @@ BOOST_AUTO_TEST_CASE(cse_jumpi_jump)
});
}
BOOST_AUTO_TEST_CASE(cse_empty_sha3)
{
AssemblyItems input{
u256(0),
Instruction::DUP2,
Instruction::SHA3
};
checkCSE(input, {
u256(sha3(bytesConstRef()))
});
}
BOOST_AUTO_TEST_CASE(cse_partial_sha3)
{
AssemblyItems input{
u256(0xabcd) << (256 - 16),
u256(0),
Instruction::MSTORE,
u256(2),
u256(0),
Instruction::SHA3
};
checkCSE(input, {
u256(0xabcd) << (256 - 16),
u256(0),
Instruction::MSTORE,
u256(sha3(bytes{0xab, 0xcd}))
});
}
BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_location)
{
// sha3 twice from same dynamic location
AssemblyItems input{
Instruction::DUP2,
Instruction::DUP1,
Instruction::MSTORE,
u256(64),
Instruction::DUP2,
Instruction::SHA3,
u256(64),
Instruction::DUP3,
Instruction::SHA3
};
checkCSE(input, {
Instruction::DUP2,
Instruction::DUP1,
Instruction::MSTORE,
u256(64),
Instruction::DUP2,
Instruction::SHA3,
Instruction::DUP1
});
}
BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content)
{
// sha3 twice from different dynamic location but with same content
AssemblyItems input{
Instruction::DUP1,
u256(0x80),
Instruction::MSTORE, // m[128] = DUP1
u256(0x20),
u256(0x80),
Instruction::SHA3, // sha3(m[128..(128+32)])
Instruction::DUP2,
u256(12),
Instruction::MSTORE, // m[12] = DUP1
u256(0x20),
u256(12),
Instruction::SHA3 // sha3(m[12..(12+32)])
};
checkCSE(input, {
u256(0x80),
Instruction::DUP2,
Instruction::DUP2,
Instruction::MSTORE,
u256(0x20),
Instruction::SWAP1,
Instruction::SHA3,
u256(12),
Instruction::DUP3,
Instruction::SWAP1,
Instruction::MSTORE,
Instruction::DUP1
});
}
BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_dynamic_store_in_between)
{
// sha3 twice from different dynamic location but with same content,
// dynamic mstore in between, which forces us to re-calculate the sha3
AssemblyItems input{
u256(0x80),
Instruction::DUP2,
Instruction::DUP2,
Instruction::MSTORE, // m[128] = DUP1
u256(0x20),
Instruction::DUP1,
Instruction::DUP3,
Instruction::SHA3, // sha3(m[128..(128+32)])
u256(12),
Instruction::DUP5,
Instruction::DUP2,
Instruction::MSTORE, // m[12] = DUP1
Instruction::DUP12,
Instruction::DUP14,
Instruction::MSTORE, // destroys memory knowledge
Instruction::SWAP2,
Instruction::SWAP1,
Instruction::SWAP2,
Instruction::SHA3 // sha3(m[12..(12+32)])
};
checkCSE(input, input);
}
BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between)
{
// sha3 twice from different dynamic location but with same content,
// dynamic mstore in between, but does not force us to re-calculate the sha3
AssemblyItems input{
u256(0x80),
Instruction::DUP2,
Instruction::DUP2,
Instruction::MSTORE, // m[128] = DUP1
u256(0x20),
Instruction::DUP1,
Instruction::DUP3,
Instruction::SHA3, // sha3(m[128..(128+32)])
u256(12),
Instruction::DUP5,
Instruction::DUP2,
Instruction::MSTORE, // m[12] = DUP1
Instruction::DUP12,
u256(12 + 32),
Instruction::MSTORE, // does not destoy memory knowledge
Instruction::DUP13,
u256(128 - 32),
Instruction::MSTORE, // does not destoy memory knowledge
u256(0x20),
u256(12),
Instruction::SHA3 // sha3(m[12..(12+32)])
};
// if this changes too often, only count the number of SHA3 and MSTORE instructions
AssemblyItems output = getCSE(input);
BOOST_CHECK_EQUAL(4, count(output.begin(), output.end(), AssemblyItem(Instruction::MSTORE)));
BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3)));
}
BOOST_AUTO_TEST_SUITE_END()
}

Loading…
Cancel
Save