diff --git a/libevmcore/CMakeLists.txt b/libevmcore/CMakeLists.txt index b57d52fa3..6a834936b 100644 --- a/libevmcore/CMakeLists.txt +++ b/libevmcore/CMakeLists.txt @@ -25,6 +25,7 @@ else() endif() target_link_libraries(${EXECUTABLE} devcore) +target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) diff --git a/libevmcore/CommonSubexpressionEliminator.cpp b/libevmcore/CommonSubexpressionEliminator.cpp index ec1bf3443..9b9862166 100644 --- a/libevmcore/CommonSubexpressionEliminator.cpp +++ b/libevmcore/CommonSubexpressionEliminator.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -130,6 +131,8 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _co storeInMemory(arguments[0], arguments[1]); else if (_item.instruction() == Instruction::MLOAD) setStackElement(m_stackHeight + _item.deposit(), loadFromMemory(arguments[0])); + else if (_item.instruction() == Instruction::SHA3) + setStackElement(m_stackHeight + _item.deposit(), applySha3(arguments.at(0), arguments.at(1))); else setStackElement(m_stackHeight + _item.deposit(), m_expressionClasses.find(_item, arguments, _copyItem)); } @@ -200,9 +203,10 @@ void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value) return; m_sequenceNumber++; decltype(m_storageContent) storageContents; - // copy over values at points where we know that they are different from _slot + // copy over values at points where we know that they are different from _slot or that we know + // that the value is equal for (auto const& storageItem: m_storageContent) - if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot)) + if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) storageContents.insert(storageItem); m_storageContent = move(storageContents); Id id = m_expressionClasses.find(Instruction::SSTORE, {_slot, _value}, true, m_sequenceNumber); @@ -247,6 +251,37 @@ ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot) return m_memoryContent[_slot] = m_expressionClasses.find(Instruction::MLOAD, {_slot}, true, m_sequenceNumber); } +CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(Id _start, Id _length) +{ + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses.knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses.find(Instruction::SHA3, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses.find(Instruction::ADD, {_start, m_expressionClasses.find(i)}); + arguments.push_back(loadFromMemory(slot)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses.knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses.find(u256(sha3(data))); + } + else + v = m_expressionClasses.find(Instruction::SHA3, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + CSECodeGenerator::CSECodeGenerator( ExpressionClasses& _expressionClasses, vector const& _storeOperations @@ -338,7 +373,8 @@ void CSECodeGenerator::addDependencies(Id _c) } if (expr.item->type() == Operation && ( expr.item->instruction() == Instruction::SLOAD || - expr.item->instruction() == Instruction::MLOAD + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 )) { // this loads an unknown value from storage or memory and thus, in addition to its @@ -355,11 +391,41 @@ void CSECodeGenerator::addDependencies(Id _c) StoreOperations const& storeOps = p.second; if (storeOps.front().sequenceNumber > expr.sequenceNumber) continue; - if ( - (target == StoreOperation::Memory && m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom)) || - (target == StoreOperation::Storage && m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom)) - ) + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + Id offsetToStart = m_expressionClasses.find(Instruction::SUB, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) continue; + // note that store and load never have the same sequence number Id latestStore = storeOps.front().expression; for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) diff --git a/libevmcore/CommonSubexpressionEliminator.h b/libevmcore/CommonSubexpressionEliminator.h index aba452d2c..660e9dacc 100644 --- a/libevmcore/CommonSubexpressionEliminator.h +++ b/libevmcore/CommonSubexpressionEliminator.h @@ -117,6 +117,8 @@ private: void storeInMemory(Id _slot, Id _value); /// Retrieves the current value at the given slot in memory or creates a new special mload class. Id loadFromMemory(Id _slot); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length); /// Current stack height, can be negative. int m_stackHeight = 0; @@ -129,6 +131,8 @@ private: /// Knowledge about memory content. Keys are memory addresses, note that the values overlap /// and are not contained here if they are not completely known. std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. std::vector m_storeOperations; diff --git a/libevmcore/SemanticInformation.cpp b/libevmcore/SemanticInformation.cpp index e561e7554..139c99ce2 100644 --- a/libevmcore/SemanticInformation.cpp +++ b/libevmcore/SemanticInformation.cpp @@ -52,8 +52,6 @@ bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) return true; // GAS and PC assume a specific order of opcodes if (_item.instruction() == Instruction::MSIZE) return true; // msize is modified already by memory access, avoid that for now - if (_item.instruction() == Instruction::SHA3) - return true; //@todo: we have to compare sha3's not based on their memory addresses but on the memory content. InstructionInfo info = instructionInfo(_item.instruction()); if (_item.instruction() == Instruction::SSTORE) return false; diff --git a/libsolidity/CMakeLists.txt b/libsolidity/CMakeLists.txt index b42b6e5c0..0abd188c1 100644 --- a/libsolidity/CMakeLists.txt +++ b/libsolidity/CMakeLists.txt @@ -27,7 +27,6 @@ endif() target_link_libraries(${EXECUTABLE} ${JSONCPP_LIBRARIES}) target_link_libraries(${EXECUTABLE} evmcore) -target_link_libraries(${EXECUTABLE} devcore) target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) diff --git a/test/SolidityOptimizer.cpp b/test/SolidityOptimizer.cpp index 4fedd642d..ca72f4fe1 100644 --- a/test/SolidityOptimizer.cpp +++ b/test/SolidityOptimizer.cpp @@ -56,8 +56,16 @@ public: m_nonOptimizedContract = m_contractAddress; m_optimize = true; bytes optimizedBytecode = compileAndRun(_sourceCode, _value, _contractName); + size_t nonOptimizedSize = 0; + eth::eachInstruction(nonOptimizedBytecode, [&](Instruction, u256 const&) { + nonOptimizedSize++; + }); + size_t optimizedSize = 0; + eth::eachInstruction(optimizedBytecode, [&](Instruction, u256 const&) { + optimizedSize++; + }); BOOST_CHECK_MESSAGE( - nonOptimizedBytecode.size() > optimizedBytecode.size(), + nonOptimizedSize > optimizedSize, "Optimizer did not reduce bytecode size." ); m_optimizedContract = m_contractAddress; @@ -75,11 +83,16 @@ public: "\nOptimized: " + toHex(optimizedOutput)); } - void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation) + AssemblyItems getCSE(AssemblyItems const& _input) { eth::CommonSubexpressionEliminator cse; BOOST_REQUIRE(cse.feedItems(_input.begin(), _input.end()) == _input.end()); - AssemblyItems output = cse.getOptimizedItems(); + return cse.getOptimizedItems(); + } + + void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation) + { + AssemblyItems output = getCSE(_input); BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end()); } @@ -569,6 +582,155 @@ BOOST_AUTO_TEST_CASE(cse_jumpi_jump) }); } +BOOST_AUTO_TEST_CASE(cse_empty_sha3) +{ + AssemblyItems input{ + u256(0), + Instruction::DUP2, + Instruction::SHA3 + }; + checkCSE(input, { + u256(sha3(bytesConstRef())) + }); +} + +BOOST_AUTO_TEST_CASE(cse_partial_sha3) +{ + AssemblyItems input{ + u256(0xabcd) << (256 - 16), + u256(0), + Instruction::MSTORE, + u256(2), + u256(0), + Instruction::SHA3 + }; + checkCSE(input, { + u256(0xabcd) << (256 - 16), + u256(0), + Instruction::MSTORE, + u256(sha3(bytes{0xab, 0xcd})) + }); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_location) +{ + // sha3 twice from same dynamic location + AssemblyItems input{ + Instruction::DUP2, + Instruction::DUP1, + Instruction::MSTORE, + u256(64), + Instruction::DUP2, + Instruction::SHA3, + u256(64), + Instruction::DUP3, + Instruction::SHA3 + }; + checkCSE(input, { + Instruction::DUP2, + Instruction::DUP1, + Instruction::MSTORE, + u256(64), + Instruction::DUP2, + Instruction::SHA3, + Instruction::DUP1 + }); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content) +{ + // sha3 twice from different dynamic location but with same content + AssemblyItems input{ + Instruction::DUP1, + u256(0x80), + Instruction::MSTORE, // m[128] = DUP1 + u256(0x20), + u256(0x80), + Instruction::SHA3, // sha3(m[128..(128+32)]) + Instruction::DUP2, + u256(12), + Instruction::MSTORE, // m[12] = DUP1 + u256(0x20), + u256(12), + Instruction::SHA3 // sha3(m[12..(12+32)]) + }; + checkCSE(input, { + u256(0x80), + Instruction::DUP2, + Instruction::DUP2, + Instruction::MSTORE, + u256(0x20), + Instruction::SWAP1, + Instruction::SHA3, + u256(12), + Instruction::DUP3, + Instruction::SWAP1, + Instruction::MSTORE, + Instruction::DUP1 + }); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_dynamic_store_in_between) +{ + // sha3 twice from different dynamic location but with same content, + // dynamic mstore in between, which forces us to re-calculate the sha3 + AssemblyItems input{ + u256(0x80), + Instruction::DUP2, + Instruction::DUP2, + Instruction::MSTORE, // m[128] = DUP1 + u256(0x20), + Instruction::DUP1, + Instruction::DUP3, + Instruction::SHA3, // sha3(m[128..(128+32)]) + u256(12), + Instruction::DUP5, + Instruction::DUP2, + Instruction::MSTORE, // m[12] = DUP1 + Instruction::DUP12, + Instruction::DUP14, + Instruction::MSTORE, // destroys memory knowledge + Instruction::SWAP2, + Instruction::SWAP1, + Instruction::SWAP2, + Instruction::SHA3 // sha3(m[12..(12+32)]) + }; + checkCSE(input, input); +} + +BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between) +{ + // sha3 twice from different dynamic location but with same content, + // dynamic mstore in between, but does not force us to re-calculate the sha3 + AssemblyItems input{ + u256(0x80), + Instruction::DUP2, + Instruction::DUP2, + Instruction::MSTORE, // m[128] = DUP1 + u256(0x20), + Instruction::DUP1, + Instruction::DUP3, + Instruction::SHA3, // sha3(m[128..(128+32)]) + u256(12), + Instruction::DUP5, + Instruction::DUP2, + Instruction::MSTORE, // m[12] = DUP1 + Instruction::DUP12, + u256(12 + 32), + Instruction::MSTORE, // does not destoy memory knowledge + Instruction::DUP13, + u256(128 - 32), + Instruction::MSTORE, // does not destoy memory knowledge + u256(0x20), + u256(12), + Instruction::SHA3 // sha3(m[12..(12+32)]) + }; + // if this changes too often, only count the number of SHA3 and MSTORE instructions + AssemblyItems output = getCSE(input); + BOOST_CHECK_EQUAL(4, count(output.begin(), output.end(), AssemblyItem(Instruction::MSTORE))); + BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3))); +} + BOOST_AUTO_TEST_SUITE_END() }