diff --git a/libevmcore/CMakeLists.txt b/libevmcore/CMakeLists.txt index b57d52fa3..6a834936b 100644 --- a/libevmcore/CMakeLists.txt +++ b/libevmcore/CMakeLists.txt @@ -25,6 +25,7 @@ else() endif() target_link_libraries(${EXECUTABLE} devcore) +target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) diff --git a/libevmcore/CommonSubexpressionEliminator.cpp b/libevmcore/CommonSubexpressionEliminator.cpp index caeab4619..d857158b9 100644 --- a/libevmcore/CommonSubexpressionEliminator.cpp +++ b/libevmcore/CommonSubexpressionEliminator.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -34,8 +35,8 @@ vector CommonSubexpressionEliminator::getOptimizedItems() { optimizeBreakingItem(); - map initialStackContents; - map targetStackContents; + map initialStackContents; + map targetStackContents; int minHeight = m_stackHeight + 1; if (!m_stackElements.empty()) minHeight = min(minHeight, m_stackElements.begin()->first); @@ -58,18 +59,18 @@ vector CommonSubexpressionEliminator::getOptimizedItems() ostream& CommonSubexpressionEliminator::stream( ostream& _out, - map _initialStack, - map _targetStack + map _initialStack, + map _targetStack ) const { - auto streamExpressionClass = [this](ostream& _out, ExpressionClasses::Id _id) + auto streamExpressionClass = [this](ostream& _out, Id _id) { auto const& expr = m_expressionClasses.representative(_id); _out << " " << dec << _id << ": " << *expr.item; if (expr.sequenceNumber) _out << "@" << dec << expr.sequenceNumber; _out << "("; - for (ExpressionClasses::Id arg: expr.arguments) + for (Id arg: expr.arguments) _out << dec << arg << ","; _out << ")" << endl; }; @@ -77,7 +78,7 @@ ostream& CommonSubexpressionEliminator::stream( _out << "Optimizer analysis:" << endl; _out << "Final stack height: " << dec << m_stackHeight << endl; _out << "Equivalence classes: " << endl; - for (ExpressionClasses::Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) + for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) streamExpressionClass(_out, eqClass); _out << "Initial stack: " << endl; @@ -119,7 +120,7 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _co ); else if (instruction != Instruction::POP) { - vector arguments(info.args); + vector arguments(info.args); for (int i = 0; i < info.args; ++i) arguments[i] = stackElement(m_stackHeight - i); if (_item.instruction() == Instruction::SSTORE) @@ -130,6 +131,8 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _co storeInMemory(arguments[0], arguments[1]); else if (_item.instruction() == Instruction::MLOAD) setStackElement(m_stackHeight + _item.deposit(), loadFromMemory(arguments[0])); + else if (_item.instruction() == Instruction::SHA3) + setStackElement(m_stackHeight + _item.deposit(), applySha3(arguments.at(0), arguments.at(1))); else setStackElement(m_stackHeight + _item.deposit(), m_expressionClasses.find(_item, arguments, _copyItem)); } @@ -142,7 +145,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI)) return; - using Id = ExpressionClasses::Id; static AssemblyItem s_jump = Instruction::JUMP; Id condition = stackElement(m_stackHeight - 1); @@ -163,7 +165,7 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() } } -void CommonSubexpressionEliminator::setStackElement(int _stackHeight, ExpressionClasses::Id _class) +void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) { m_stackElements[_stackHeight] = _class; } @@ -194,26 +196,28 @@ ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(int _st return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight))); } -void CommonSubexpressionEliminator::storeInStorage(ExpressionClasses::Id _slot, ExpressionClasses::Id _value) +void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value) { if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) // do not execute the storage if we know that the value is already there return; m_sequenceNumber++; decltype(m_storageContent) storageContents; - // copy over values at points where we know that they are different from _slot + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. for (auto const& storageItem: m_storageContent) - if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot)) + if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) storageContents.insert(storageItem); m_storageContent = move(storageContents); - ExpressionClasses::Id id = m_expressionClasses.find(Instruction::SSTORE, {_slot, _value}, true, m_sequenceNumber); + Id id = m_expressionClasses.find(Instruction::SSTORE, {_slot, _value}, true, m_sequenceNumber); m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); m_storageContent[_slot] = _value; // increment a second time so that we get unique sequence numbers for writes m_sequenceNumber++; } -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(ExpressionClasses::Id _slot) +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot) { if (m_storageContent.count(_slot)) return m_storageContent.at(_slot); @@ -221,7 +225,7 @@ ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(ExpressionC return m_storageContent[_slot] = m_expressionClasses.find(Instruction::SLOAD, {_slot}, true, m_sequenceNumber); } -void CommonSubexpressionEliminator::storeInMemory(ExpressionClasses::Id _slot, ExpressionClasses::Id _value) +void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value) { if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) // do not execute the store if we know that the value is already there @@ -233,14 +237,14 @@ void CommonSubexpressionEliminator::storeInMemory(ExpressionClasses::Id _slot, E if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) memoryContents.insert(memoryItem); m_memoryContent = move(memoryContents); - ExpressionClasses::Id id = m_expressionClasses.find(Instruction::MSTORE, {_slot, _value}, true, m_sequenceNumber); + Id id = m_expressionClasses.find(Instruction::MSTORE, {_slot, _value}, true, m_sequenceNumber); m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); m_memoryContent[_slot] = _value; // increment a second time so that we get unique sequence numbers for writes m_sequenceNumber++; } -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(ExpressionClasses::Id _slot) +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot) { if (m_memoryContent.count(_slot)) return m_memoryContent.at(_slot); @@ -248,6 +252,37 @@ ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(ExpressionCl return m_memoryContent[_slot] = m_expressionClasses.find(Instruction::MLOAD, {_slot}, true, m_sequenceNumber); } +CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(Id _start, Id _length) +{ + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses.knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses.find(Instruction::SHA3, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses.find(Instruction::ADD, {_start, m_expressionClasses.find(i)}); + arguments.push_back(loadFromMemory(slot)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses.knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses.find(u256(sha3(data))); + } + else + v = m_expressionClasses.find(Instruction::SHA3, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + CSECodeGenerator::CSECodeGenerator( ExpressionClasses& _expressionClasses, vector const& _storeOperations @@ -259,8 +294,8 @@ CSECodeGenerator::CSECodeGenerator( } AssemblyItems CSECodeGenerator::generateCode( - map const& _initialStack, - map const& _targetStackContents + map const& _initialStack, + map const& _targetStackContents ) { m_stack = _initialStack; @@ -280,7 +315,7 @@ AssemblyItems CSECodeGenerator::generateCode( } // store all needed sequenced expressions - set> sequencedExpressions; + set> sequencedExpressions; for (auto const& p: m_neededBy) for (auto id: {p.first, p.second}) if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) @@ -327,19 +362,20 @@ AssemblyItems CSECodeGenerator::generateCode( return m_generatedItems; } -void CSECodeGenerator::addDependencies(ExpressionClasses::Id _c) +void CSECodeGenerator::addDependencies(Id _c) { if (m_neededBy.count(_c)) return; // we already computed the dependencies for _c ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); - for (ExpressionClasses::Id argument: expr.arguments) + for (Id argument: expr.arguments) { addDependencies(argument); m_neededBy.insert(make_pair(argument, _c)); } if (expr.item->type() == Operation && ( expr.item->instruction() == Instruction::SLOAD || - expr.item->instruction() == Instruction::MLOAD + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 )) { // this loads an unknown value from storage or memory and thus, in addition to its @@ -347,22 +383,52 @@ void CSECodeGenerator::addDependencies(ExpressionClasses::Id _c) // they are different that occur before this load StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ? StoreOperation::Storage : StoreOperation::Memory; - ExpressionClasses::Id slotToLoadFrom = expr.arguments.at(0); + Id slotToLoadFrom = expr.arguments.at(0); for (auto const& p: m_storeOperations) { if (p.first.first != target) continue; - ExpressionClasses::Id slot = p.first.second; + Id slot = p.first.second; StoreOperations const& storeOps = p.second; if (storeOps.front().sequenceNumber > expr.sequenceNumber) continue; - if ( - (target == StoreOperation::Memory && m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom)) || - (target == StoreOperation::Storage && m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom)) - ) + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + Id offsetToStart = m_expressionClasses.find(Instruction::SUB, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) continue; + // note that store and load never have the same sequence number - ExpressionClasses::Id latestStore = storeOps.front().expression; + Id latestStore = storeOps.front().expression; for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) if (it->sequenceNumber < expr.sequenceNumber) latestStore = it->expression; @@ -372,7 +438,7 @@ void CSECodeGenerator::addDependencies(ExpressionClasses::Id _c) } } -int CSECodeGenerator::generateClassElement(ExpressionClasses::Id _c, bool _allowSequenced) +int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) { // do some cleanup removeStackTopIfPossible(); @@ -392,8 +458,8 @@ int CSECodeGenerator::generateClassElement(ExpressionClasses::Id _c, bool _allow OptimizerException, "Sequence constrained operation requested out of sequence." ); - ExpressionClasses::Ids const& arguments = expr.arguments; - for (ExpressionClasses::Id arg: boost::adaptors::reverse(arguments)) + vector const& arguments = expr.arguments; + for (Id arg: boost::adaptors::reverse(arguments)) generateClassElement(arg); // The arguments are somewhere on the stack now, so it remains to move them at the correct place. @@ -478,7 +544,7 @@ int CSECodeGenerator::generateClassElement(ExpressionClasses::Id _c, bool _allow } } -int CSECodeGenerator::classElementPosition(ExpressionClasses::Id _id) const +int CSECodeGenerator::classElementPosition(Id _id) const { assertThrow( m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition, @@ -488,7 +554,7 @@ int CSECodeGenerator::classElementPosition(ExpressionClasses::Id _id) const return m_classPositions.at(_id); } -bool CSECodeGenerator::canBeRemoved(ExpressionClasses::Id _element, ExpressionClasses::Id _result) +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result) { // Returns false if _element is finally needed or is needed by a class that has not been // computed yet. Note that m_classPositions also includes classes that were deleted in the meantime. @@ -507,7 +573,7 @@ bool CSECodeGenerator::removeStackTopIfPossible() if (m_stack.empty()) return false; assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); - ExpressionClasses::Id top = m_stack[m_stackHeight]; + Id top = m_stack[m_stackHeight]; if (!canBeRemoved(top)) return false; m_generatedItems.push_back(AssemblyItem(Instruction::POP)); diff --git a/libevmcore/CommonSubexpressionEliminator.h b/libevmcore/CommonSubexpressionEliminator.h index 0dbb47b29..660e9dacc 100644 --- a/libevmcore/CommonSubexpressionEliminator.h +++ b/libevmcore/CommonSubexpressionEliminator.h @@ -57,19 +57,20 @@ using AssemblyItems = std::vector; class CommonSubexpressionEliminator { public: + using Id = ExpressionClasses::Id; struct StoreOperation { enum Target { Memory, Storage }; StoreOperation( Target _target, - ExpressionClasses::Id _slot, + Id _slot, unsigned _sequenceNumber, - ExpressionClasses::Id _expression + Id _expression ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} Target target; - ExpressionClasses::Id slot; + Id slot; unsigned sequenceNumber; - ExpressionClasses::Id expression; + Id expression; }; /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first @@ -83,8 +84,8 @@ public: /// Streams debugging information to @a _out. std::ostream& stream( std::ostream& _out, - std::map _initialStack = std::map(), - std::map _targetStack = std::map() + std::map _initialStack = std::map(), + std::map _targetStack = std::map() ) const; private: @@ -96,39 +97,42 @@ private: /// Simplifies the given item using /// Assigns a new equivalence class to the next sequence number of the given stack element. - void setStackElement(int _stackHeight, ExpressionClasses::Id _class); + void setStackElement(int _stackHeight, Id _class); /// Swaps the given stack elements in their next sequence number. void swapStackElements(int _stackHeightA, int _stackHeightB); /// Retrieves the current equivalence class fo the given stack element (or generates a new /// one if it does not exist yet). - ExpressionClasses::Id stackElement(int _stackHeight); + Id stackElement(int _stackHeight); /// @returns the equivalence class id of the special initial stack element at the given height /// (must not be positive). - ExpressionClasses::Id initialStackElement(int _stackHeight); + Id initialStackElement(int _stackHeight); /// Increments the sequence number, deletes all storage information that might be overwritten /// and stores the new value at the given slot. - void storeInStorage(ExpressionClasses::Id _slot, ExpressionClasses::Id _value); + void storeInStorage(Id _slot, Id _value); /// Retrieves the current value at the given slot in storage or creates a new special sload class. - ExpressionClasses::Id loadFromStorage(ExpressionClasses::Id _slot); + Id loadFromStorage(Id _slot); /// Increments the sequence number, deletes all memory information that might be overwritten /// and stores the new value at the given slot. - void storeInMemory(ExpressionClasses::Id _slot, ExpressionClasses::Id _value); + void storeInMemory(Id _slot, Id _value); /// Retrieves the current value at the given slot in memory or creates a new special mload class. - ExpressionClasses::Id loadFromMemory(ExpressionClasses::Id _slot); - + Id loadFromMemory(Id _slot); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length); /// Current stack height, can be negative. int m_stackHeight = 0; /// Current stack layout, mapping stack height -> equivalence class - std::map m_stackElements; + std::map m_stackElements; /// Current sequence number, this is incremented with each modification to storage or memory. unsigned m_sequenceNumber = 1; /// Knowledge about storage content. - std::map m_storageContent; + std::map m_storageContent; /// Knowledge about memory content. Keys are memory addresses, note that the values overlap /// and are not contained here if they are not completely known. - std::map m_memoryContent; + std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. std::vector m_storeOperations; @@ -149,6 +153,7 @@ class CSECodeGenerator public: using StoreOperation = CommonSubexpressionEliminator::StoreOperation; using StoreOperations = std::vector; + using Id = ExpressionClasses::Id; /// Initializes the code generator with the given classes and store operations. /// The store operations have to be sorted by sequence number in ascending order. @@ -159,25 +164,25 @@ public: /// @param _targetStackContents final contents of the stack, by stack height relative to initial /// @note should only be called once on each object. AssemblyItems generateCode( - std::map const& _initialStack, - std::map const& _targetStackContents + std::map const& _initialStack, + std::map const& _targetStackContents ); private: /// Recursively discovers all dependencies to @a m_requests. - void addDependencies(ExpressionClasses::Id _c); + void addDependencies(Id _c); /// Produce code that generates the given element if it is not yet present. /// @returns the stack position of the element or c_invalidPosition if it does not actually /// generate a value on the stack. /// @param _allowSequenced indicates that sequence-constrained operations are allowed - int generateClassElement(ExpressionClasses::Id _c, bool _allowSequenced = false); + int generateClassElement(Id _c, bool _allowSequenced = false); /// @returns the position of the representative of the given id on the stack. /// @note throws an exception if it is not on the stack. - int classElementPosition(ExpressionClasses::Id _id) const; + int classElementPosition(Id _id) const; /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result. - bool canBeRemoved(ExpressionClasses::Id _element, ExpressionClasses::Id _result = ExpressionClasses::Id(-1)); + bool canBeRemoved(Id _element, Id _result = Id(-1)); /// Appends code to remove the topmost stack element if it can be removed. bool removeStackTopIfPossible(); @@ -196,19 +201,19 @@ private: /// Current height of the stack relative to the start. int m_stackHeight = 0; /// If (b, a) is in m_requests then b is needed to compute a. - std::multimap m_neededBy; + std::multimap m_neededBy; /// Current content of the stack. - std::map m_stack; + std::map m_stack; /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted. - std::map m_classPositions; + std::map m_classPositions; /// The actual eqivalence class items and how to compute them. ExpressionClasses& m_expressionClasses; /// Keeps information about which storage or memory slots were written to by which operations. /// The operations are sorted ascendingly by sequence number. - std::map, StoreOperations> m_storeOperations; + std::map, StoreOperations> m_storeOperations; /// The set of equivalence classes that should be present on the stack at the end. - std::set m_finalClasses; + std::set m_finalClasses; }; template diff --git a/libevmcore/ExpressionClasses.cpp b/libevmcore/ExpressionClasses.cpp index 1c7a79e6b..81beaac7e 100644 --- a/libevmcore/ExpressionClasses.cpp +++ b/libevmcore/ExpressionClasses.cpp @@ -84,24 +84,35 @@ ExpressionClasses::Id ExpressionClasses::find( bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. - map matchGroups; - Pattern constant(Push); - constant.setMatchGroup(1, matchGroups); - Id difference = find(Instruction::SUB, {_a, _b}); - return constant.matches(representative(difference), *this) && constant.d() != u256(0); + return knownNonZero(find(Instruction::SUB, {_a, _b})); } bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero. + u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b})); + // forbidden interval is ["-31", 31] + return v && *v + 31 > u256(62); +} + +bool ExpressionClasses::knownZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(_c), *this); +} + +bool ExpressionClasses::knownNonZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this); +} + +u256 const* ExpressionClasses::knownConstant(Id _c) +{ map matchGroups; Pattern constant(Push); constant.setMatchGroup(1, matchGroups); - Id difference = find(Instruction::SUB, {_a, _b}); - if (!constant.matches(representative(difference), *this)) - return false; - // forbidden interval is ["-31", 31] - return constant.d() + 31 > u256(62); + if (!constant.matches(representative(_c), *this)) + return nullptr; + return &constant.d(); } string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const diff --git a/libevmcore/ExpressionClasses.h b/libevmcore/ExpressionClasses.h index 5179845f6..dba7384ec 100644 --- a/libevmcore/ExpressionClasses.h +++ b/libevmcore/ExpressionClasses.h @@ -78,6 +78,15 @@ public: bool knownToBeDifferent(Id _a, Id _b); /// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32. bool knownToBeDifferentBy32(Id _a, Id _b); + /// @returns true if the value of the given class is known to be zero. + /// @note that this is not the negation of knownNonZero + bool knownZero(Id _c); + /// @returns true if the value of the given class is known to be nonzero. + /// @note that this is not the negation of knownZero + bool knownNonZero(Id _c); + /// @returns a pointer to the value if the given class is known to be a constant, + /// and a nullptr otherwise. + u256 const* knownConstant(Id _c); std::string fullDAGToString(Id _id) const; @@ -131,7 +140,7 @@ public: /// @returns the id of the matched expression if this pattern is part of a match group. Id id() const { return matchGroupValue().id; } /// @returns the data of the matched expression if this pattern is part of a match group. - u256 d() const { return matchGroupValue().item->data(); } + u256 const& d() const { return matchGroupValue().item->data(); } std::string toString() const; diff --git a/libevmcore/Instruction.cpp b/libevmcore/Instruction.cpp index 23f19ac94..03b6ccf2f 100644 --- a/libevmcore/Instruction.cpp +++ b/libevmcore/Instruction.cpp @@ -21,6 +21,7 @@ #include "Instruction.h" +#include #include #include #include @@ -294,27 +295,42 @@ static const std::map c_instructionInfo = { Instruction::SUICIDE, { "SUICIDE", 0, 1, 0, true, ZeroTier } } }; -string dev::eth::disassemble(bytes const& _mem) +void dev::eth::eachInstruction( + bytes const& _mem, + function const& _onInstruction +) { - stringstream ret; - unsigned numerics = 0; for (auto it = _mem.begin(); it != _mem.end(); ++it) { - byte n = *it; - auto iit = c_instructionInfo.find((Instruction)n); - if (numerics || iit == c_instructionInfo.end() || (byte)iit->first != n) // not an instruction or expecting an argument... + Instruction instr = Instruction(*it); + size_t additional = 0; + if (isValidInstruction(instr)) + additional = instructionInfo(instr).additional; + u256 data; + for (size_t i = 0; i < additional; ++i) { - if (numerics) - numerics--; - ret << "0x" << hex << (int)n << " "; + data <<= 8; + if (it != _mem.end() && ++it != _mem.end()) + data |= *it; } + _onInstruction(instr, data); + } +} + +string dev::eth::disassemble(bytes const& _mem) +{ + stringstream ret; + eachInstruction(_mem, [&](Instruction _instr, u256 const& _data) { + if (!isValidInstruction(_instr)) + ret << "0x" << hex << int(_instr) << " "; else { - auto const& ii = iit->second; - ret << ii.name << " "; - numerics = ii.additional; + InstructionInfo info = instructionInfo(_instr); + ret << info.name << " "; + if (info.additional) + ret << "0x" << hex << _data << " "; } - } + }); return ret.str(); } diff --git a/libevmcore/Instruction.h b/libevmcore/Instruction.h index 07c7b52fd..2ccf5bc34 100644 --- a/libevmcore/Instruction.h +++ b/libevmcore/Instruction.h @@ -21,6 +21,7 @@ #pragma once +#include #include #include #include @@ -253,6 +254,9 @@ bool isValidInstruction(Instruction _inst); /// Convert from string mnemonic to Instruction type. extern const std::map c_instructions; +/// Iterate through EVM code and call a function on each instruction. +void eachInstruction(bytes const& _mem, std::function const& _onInstruction); + /// Convert from EVM code to simple EVM assembly language. std::string disassemble(bytes const& _mem); diff --git a/libevmcore/SemanticInformation.cpp b/libevmcore/SemanticInformation.cpp index e561e7554..139c99ce2 100644 --- a/libevmcore/SemanticInformation.cpp +++ b/libevmcore/SemanticInformation.cpp @@ -52,8 +52,6 @@ bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) return true; // GAS and PC assume a specific order of opcodes if (_item.instruction() == Instruction::MSIZE) return true; // msize is modified already by memory access, avoid that for now - if (_item.instruction() == Instruction::SHA3) - return true; //@todo: we have to compare sha3's not based on their memory addresses but on the memory content. InstructionInfo info = instructionInfo(_item.instruction()); if (_item.instruction() == Instruction::SSTORE) return false; diff --git a/libsolidity/CMakeLists.txt b/libsolidity/CMakeLists.txt index b42b6e5c0..0abd188c1 100644 --- a/libsolidity/CMakeLists.txt +++ b/libsolidity/CMakeLists.txt @@ -27,7 +27,6 @@ endif() target_link_libraries(${EXECUTABLE} ${JSONCPP_LIBRARIES}) target_link_libraries(${EXECUTABLE} evmcore) -target_link_libraries(${EXECUTABLE} devcore) target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) diff --git a/test/SolidityOptimizer.cpp b/test/SolidityOptimizer.cpp index 4fedd642d..f523847f1 100644 --- a/test/SolidityOptimizer.cpp +++ b/test/SolidityOptimizer.cpp @@ -56,8 +56,16 @@ public: m_nonOptimizedContract = m_contractAddress; m_optimize = true; bytes optimizedBytecode = compileAndRun(_sourceCode, _value, _contractName); + size_t nonOptimizedSize = 0; + eth::eachInstruction(nonOptimizedBytecode, [&](Instruction, u256 const&) { + nonOptimizedSize++; + }); + size_t optimizedSize = 0; + eth::eachInstruction(optimizedBytecode, [&](Instruction, u256 const&) { + optimizedSize++; + }); BOOST_CHECK_MESSAGE( - nonOptimizedBytecode.size() > optimizedBytecode.size(), + nonOptimizedSize > optimizedSize, "Optimizer did not reduce bytecode size." ); m_optimizedContract = m_contractAddress; @@ -75,11 +83,16 @@ public: "\nOptimized: " + toHex(optimizedOutput)); } - void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation) + AssemblyItems getCSE(AssemblyItems const& _input) { eth::CommonSubexpressionEliminator cse; BOOST_REQUIRE(cse.feedItems(_input.begin(), _input.end()) == _input.end()); - AssemblyItems output = cse.getOptimizedItems(); + return cse.getOptimizedItems(); + } + + void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation) + { + AssemblyItems output = getCSE(_input); BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end()); } @@ -569,6 +582,155 @@ BOOST_AUTO_TEST_CASE(cse_jumpi_jump) }); } +BOOST_AUTO_TEST_CASE(cse_empty_sha3) +{ + AssemblyItems input{ + u256(0), + Instruction::DUP2, + Instruction::SHA3 + }; + checkCSE(input, { + u256(sha3(bytesConstRef())) + }); +} + +BOOST_AUTO_TEST_CASE(cse_partial_sha3) +{ + AssemblyItems input{ + u256(0xabcd) << (256 - 16), + u256(0), + Instruction::MSTORE, + u256(2), + u256(0), + Instruction::SHA3 + }; + checkCSE(input, { + u256(0xabcd) << (256 - 16), + u256(0), + Instruction::MSTORE, + u256(sha3(bytes{0xab, 0xcd})) + }); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_location) +{ + // sha3 twice from same dynamic location + AssemblyItems input{ + Instruction::DUP2, + Instruction::DUP1, + Instruction::MSTORE, + u256(64), + Instruction::DUP2, + Instruction::SHA3, + u256(64), + Instruction::DUP3, + Instruction::SHA3 + }; + checkCSE(input, { + Instruction::DUP2, + Instruction::DUP1, + Instruction::MSTORE, + u256(64), + Instruction::DUP2, + Instruction::SHA3, + Instruction::DUP1 + }); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content) +{ + // sha3 twice from different dynamic location but with same content + AssemblyItems input{ + Instruction::DUP1, + u256(0x80), + Instruction::MSTORE, // m[128] = DUP1 + u256(0x20), + u256(0x80), + Instruction::SHA3, // sha3(m[128..(128+32)]) + Instruction::DUP2, + u256(12), + Instruction::MSTORE, // m[12] = DUP1 + u256(0x20), + u256(12), + Instruction::SHA3 // sha3(m[12..(12+32)]) + }; + checkCSE(input, { + u256(0x80), + Instruction::DUP2, + Instruction::DUP2, + Instruction::MSTORE, + u256(0x20), + Instruction::SWAP1, + Instruction::SHA3, + u256(12), + Instruction::DUP3, + Instruction::SWAP1, + Instruction::MSTORE, + Instruction::DUP1 + }); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_dynamic_store_in_between) +{ + // sha3 twice from different dynamic location but with same content, + // dynamic mstore in between, which forces us to re-calculate the sha3 + AssemblyItems input{ + u256(0x80), + Instruction::DUP2, + Instruction::DUP2, + Instruction::MSTORE, // m[128] = DUP1 + u256(0x20), + Instruction::DUP1, + Instruction::DUP3, + Instruction::SHA3, // sha3(m[128..(128+32)]) + u256(12), + Instruction::DUP5, + Instruction::DUP2, + Instruction::MSTORE, // m[12] = DUP1 + Instruction::DUP12, + Instruction::DUP14, + Instruction::MSTORE, // destroys memory knowledge + Instruction::SWAP2, + Instruction::SWAP1, + Instruction::SWAP2, + Instruction::SHA3 // sha3(m[12..(12+32)]) + }; + checkCSE(input, input); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between) +{ + // sha3 twice from different dynamic location but with same content, + // dynamic mstore in between, but does not force us to re-calculate the sha3 + AssemblyItems input{ + u256(0x80), + Instruction::DUP2, + Instruction::DUP2, + Instruction::MSTORE, // m[128] = DUP1 + u256(0x20), + Instruction::DUP1, + Instruction::DUP3, + Instruction::SHA3, // sha3(m[128..(128+32)]) + u256(12), + Instruction::DUP5, + Instruction::DUP2, + Instruction::MSTORE, // m[12] = DUP1 + Instruction::DUP12, + u256(12 + 32), + Instruction::MSTORE, // does not destoy memory knowledge + Instruction::DUP13, + u256(128 - 32), + Instruction::MSTORE, // does not destoy memory knowledge + u256(0x20), + u256(12), + Instruction::SHA3 // sha3(m[12..(12+32)]) + }; + // if this changes too often, only count the number of SHA3 and MSTORE instructions + AssemblyItems output = getCSE(input); + BOOST_CHECK_EQUAL(4, count(output.begin(), output.end(), AssemblyItem(Instruction::MSTORE))); + BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3))); +} + BOOST_AUTO_TEST_SUITE_END() }