diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 9530ded49..abcd44516 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -311,54 +311,45 @@ Assembly& Assembly::optimise(bool _enable) copt << toString(*this); count = 0; - //@todo CFG interface should be a generator, that returns an item and a pointer to a - // knownstate, which has to replace the current state if it is not null. - // Feed these items to the CSE, but also store them and replace the stored version - // if the items generated by the CSE are shorter. (or even use less gas?) - copt << "Performing control flow analysis..."; + copt << "Performing optimisation..."; { ControlFlowGraph cfg(m_items); - AssemblyItems optItems; + AssemblyItems optimisedItems; for (BasicBlock const& block: cfg.optimisedBlocks()) - copy(m_items.begin() + block.begin, m_items.begin() + block.end, - back_inserter(optItems)); - if (optItems.size() < m_items.size()) { - copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); - m_items = move(optItems); - count++; - } - } - - copt << "Performing common subexpression elimination..."; - for (auto iter = m_items.begin(); iter != m_items.end();) - { - //@todo use only a single state / expression classes instance. - KnownState state(make_shared()); - CommonSubexpressionEliminator eliminator(state); - auto orig = iter; - iter = eliminator.feedItems(iter, m_items.end()); - AssemblyItems optItems; - bool shouldReplace = false; - try - { - optItems = eliminator.getOptimizedItems(); - shouldReplace = (optItems.size() < size_t(iter - orig)); - } - catch (StackTooDeepException const&) - { - // This might happen if the opcode reconstruction is not as efficient - // as the hand-crafted code. - } - - if (shouldReplace) - { - copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size(); - count++; - for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter) - *orig = move(*moveIter); - iter = m_items.erase(orig, iter); + assertThrow(!!block.startState, OptimizerException, ""); + CommonSubexpressionEliminator eliminator(*block.startState); + auto iter = m_items.begin() + block.begin; + auto const end = m_items.begin() + block.end; + while (iter < end) + { + auto orig = iter; + iter = eliminator.feedItems(iter, end); + bool shouldReplace = false; + AssemblyItems optimisedChunk; + try + { + optimisedChunk = eliminator.getOptimizedItems(); + shouldReplace = (optimisedChunk.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } + + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optimisedChunk.size(); + count++; + optimisedItems += optimisedChunk; + } + else + copy(orig, iter, back_inserter(optimisedItems)); + } } + if (optimisedItems.size() < m_items.size()) + m_items = move(optimisedItems); } } @@ -461,7 +452,8 @@ bytes Assembly::assemble() const for (auto const& i: tagRef) { bytesRef r(ret.data() + i.first, bytesPerTag); - toBigEndian(tagPos[i.second], r); + //@todo in the failure case, we could use the position of the invalid jumpdest + toBigEndian(i.second < tagPos.size() ? tagPos[i.second] : (1 << (8 * bytesPerTag)) - 1, r); } if (!m_data.empty()) diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 4b85eba40..e369c9dbc 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -45,16 +45,22 @@ vector CommonSubexpressionEliminator::getOptimizedItems() for (int height = minHeight; height <= m_state.stackHeight(); ++height) targetStackContents[height] = m_state.stackElement(height, SourceLocation()); - // Debug info: - //stream(cout, initialStackContents, targetStackContents); - AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( m_initialState.stackHeight(), initialStackContents, targetStackContents ); if (m_breakingItem) + { items.push_back(*m_breakingItem); + m_state.feedItem(*m_breakingItem); + } + + // cleanup + m_initialState = m_state; + m_breakingItem = nullptr; + m_storeOperations.clear(); + return items; } @@ -113,16 +119,14 @@ AssemblyItems CSECodeGenerator::generateCode( { m_stackHeight = _initialStackHeight; m_stack = _initialStack; + m_targetStack = _targetStackContents; for (auto const& item: m_stack) - if (!m_classPositions.count(item.second)) - m_classPositions[item.second] = item.first; - - // @todo: provide information about the positions of copies of class elements + m_classPositions[item.second].insert(item.first); // generate the dependency graph starting from final storage and memory writes and target stack contents for (auto const& p: m_storeOperations) addDependencies(p.second.back().expression); - for (auto const& targetItem: _targetStackContents) + for (auto const& targetItem: m_targetStack) { m_finalClasses.insert(targetItem.second); addDependencies(targetItem.second); @@ -141,13 +145,16 @@ AssemblyItems CSECodeGenerator::generateCode( generateClassElement(seqAndId.second, true); // generate the target stack elements - for (auto const& targetItem: _targetStackContents) + for (auto const& targetItem: m_targetStack) { - int position = generateClassElement(targetItem.second); - assertThrow(position != c_invalidPosition, OptimizerException, ""); - if (position == targetItem.first) + if (m_stack.count(targetItem.first) && m_stack.at(targetItem.first) == targetItem.second) + continue; // already there + generateClassElement(targetItem.second); + assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, ""); + if (m_classPositions[targetItem.second].count(targetItem.first)) continue; SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + int position = classElementPosition(targetItem.second); if (position < targetItem.first) // it is already at its target, we need another copy appendDup(position, location); @@ -164,21 +171,24 @@ AssemblyItems CSECodeGenerator::generateCode( // check validity int finalHeight = 0; - if (!_targetStackContents.empty()) + if (!m_targetStack.empty()) // have target stack, so its height should be the final height - finalHeight = (--_targetStackContents.end())->first; + finalHeight = (--m_targetStack.end())->first; else if (!_initialStack.empty()) // no target stack, only erase the initial stack finalHeight = _initialStack.begin()->first - 1; else // neither initial no target stack, no change in height - finalHeight = 0; + finalHeight = _initialStackHeight; assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + return m_generatedItems; } void CSECodeGenerator::addDependencies(Id _c) { + if (m_classPositions.count(_c)) + return; // it is already on the stack if (m_neededBy.count(_c)) return; // we already computed the dependencies for _c ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); @@ -254,19 +264,23 @@ void CSECodeGenerator::addDependencies(Id _c) } } -int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) { + for (auto it: m_classPositions) + for (auto p: it.second) + if (p > m_stackHeight) + assertThrow(false, OptimizerException, ""); // do some cleanup removeStackTopIfPossible(); if (m_classPositions.count(_c)) { assertThrow( - m_classPositions[_c] != c_invalidPosition, + !m_classPositions[_c].empty(), OptimizerException, "Element already removed but still needed." ); - return m_classPositions[_c]; + return; } ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); assertThrow( @@ -339,16 +353,16 @@ int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) // this will not append a swap but remove the one that is already there appendOrRemoveSwap(m_stackHeight - 1, location); - for (auto arg: arguments) - if (canBeRemoved(arg, _c)) - m_classPositions[arg] = c_invalidPosition; for (size_t i = 0; i < arguments.size(); ++i) + { + m_classPositions[m_stack[m_stackHeight - i]].erase(m_stackHeight - i); m_stack.erase(m_stackHeight - i); + } appendItem(*expr.item); if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) { m_stack[m_stackHeight] = _c; - return m_classPositions[_c] = m_stackHeight; + m_classPositions[_c].insert(m_stackHeight); } else { @@ -357,31 +371,39 @@ int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) OptimizerException, "Invalid number of return values." ); - return m_classPositions[_c] = c_invalidPosition; + m_classPositions[_c]; // ensure it is created to mark the expression as generated } } int CSECodeGenerator::classElementPosition(Id _id) const { assertThrow( - m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition, + m_classPositions.count(_id) && !m_classPositions.at(_id).empty(), OptimizerException, "Element requested but is not present." ); - return m_classPositions.at(_id); + return *max_element(m_classPositions.at(_id).begin(), m_classPositions.at(_id).end()); } -bool CSECodeGenerator::canBeRemoved(Id _element, Id _result) +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition) { - // Returns false if _element is finally needed or is needed by a class that has not been - // computed yet. Note that m_classPositions also includes classes that were deleted in the meantime. - if (m_finalClasses.count(_element)) - return false; + // Default for _fromPosition is the canonical position of the element. + if (_fromPosition == c_invalidPosition) + _fromPosition = classElementPosition(_element); - auto range = m_neededBy.equal_range(_element); - for (auto it = range.first; it != range.second; ++it) - if (it->second != _result && !m_classPositions.count(it->second)) - return false; + bool haveCopy = m_classPositions.at(_element).size() > 1; + if (m_finalClasses.count(_element)) + // It is part of the target stack. It can be removed if it is a copy that is not in the target position. + return haveCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element); + else if (!haveCopy) + { + // Can be removed unless it is needed by a class that has not been computed yet. + // Note that m_classPositions also includes classes that were deleted in the meantime. + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + } return true; } @@ -391,11 +413,11 @@ bool CSECodeGenerator::removeStackTopIfPossible() return false; assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); Id top = m_stack[m_stackHeight]; - if (!canBeRemoved(top)) + if (!canBeRemoved(top, Id(-1), m_stackHeight)) return false; - m_generatedItems.push_back(AssemblyItem(Instruction::POP)); + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); m_stack.erase(m_stackHeight); - m_stackHeight--; + appendItem(AssemblyItem(Instruction::POP)); return true; } @@ -407,6 +429,7 @@ void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _locat assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); m_stack[m_stackHeight] = m_stack[_fromPosition]; + m_classPositions[m_stack[m_stackHeight]].insert(m_stackHeight); } void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) @@ -418,13 +441,15 @@ void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation cons assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); - // The value of a class can be present in multiple locations on the stack. We only update the - // "canonical" one that is tracked by m_classPositions - if (m_classPositions[m_stack[m_stackHeight]] == m_stackHeight) - m_classPositions[m_stack[m_stackHeight]] = _fromPosition; - if (m_classPositions[m_stack[_fromPosition]] == _fromPosition) - m_classPositions[m_stack[_fromPosition]] = m_stackHeight; - swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + + if (m_stack[m_stackHeight] != m_stack[_fromPosition]) + { + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); + m_classPositions[m_stack[m_stackHeight]].insert(_fromPosition); + m_classPositions[m_stack[_fromPosition]].erase(_fromPosition); + m_classPositions[m_stack[_fromPosition]].insert(m_stackHeight); + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + } if (m_generatedItems.size() >= 2 && SemanticInformation::isSwapInstruction(m_generatedItems.back()) && *(m_generatedItems.end() - 2) == m_generatedItems.back()) diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h index 6e1ba40b3..a35e31d90 100644 --- a/libevmasm/CommonSubexpressionEliminator.h +++ b/libevmasm/CommonSubexpressionEliminator.h @@ -71,13 +71,6 @@ public: /// @returns the resulting items after optimization. AssemblyItems getOptimizedItems(); - /// Streams debugging information to @a _out. - std::ostream& stream( - std::ostream& _out, - std::map _initialStack = std::map(), - std::map _targetStack = std::map() - ) const; - private: /// Feeds the item into the system for analysis. void feedItem(AssemblyItem const& _item, bool _copyItem = false); @@ -126,16 +119,15 @@ private: void addDependencies(Id _c); /// Produce code that generates the given element if it is not yet present. - /// @returns the stack position of the element or c_invalidPosition if it does not actually - /// generate a value on the stack. /// @param _allowSequenced indicates that sequence-constrained operations are allowed - int generateClassElement(Id _c, bool _allowSequenced = false); + void generateClassElement(Id _c, bool _allowSequenced = false); /// @returns the position of the representative of the given id on the stack. /// @note throws an exception if it is not on the stack. int classElementPosition(Id _id) const; - /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result. - bool canBeRemoved(Id _element, Id _result = Id(-1)); + /// @returns true if the copy of @a _element can be removed from stack position _fromPosition + /// - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1), int _fromPosition = c_invalidPosition); /// Appends code to remove the topmost stack element if it can be removed. bool removeStackTopIfPossible(); @@ -157,8 +149,8 @@ private: std::multimap m_neededBy; /// Current content of the stack. std::map m_stack; - /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted. - std::map m_classPositions; + /// Current positions of equivalence classes, equal to the empty set if already deleted. + std::map> m_classPositions; /// The actual eqivalence class items and how to compute them. ExpressionClasses& m_expressionClasses; @@ -167,6 +159,7 @@ private: std::map, StoreOperations> m_storeOperations; /// The set of equivalence classes that should be present on the stack at the end. std::set m_finalClasses; + std::map m_targetStack; }; template @@ -175,6 +168,7 @@ _AssemblyItemIterator CommonSubexpressionEliminator::feedItems( _AssemblyItemIterator _end ) { + assertThrow(!m_breakingItem, OptimizerException, "Invalid use of CommonSubexpressionEliminator."); for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) feedItem(*_iterator); if (_iterator != _end) diff --git a/libevmasm/ControlFlowGraph.cpp b/libevmasm/ControlFlowGraph.cpp index 2e28317a3..cc68b2af8 100644 --- a/libevmasm/ControlFlowGraph.cpp +++ b/libevmasm/ControlFlowGraph.cpp @@ -142,7 +142,7 @@ void ControlFlowGraph::removeUnusedBlocks() BasicBlock const& block = m_blocks.at(blocksToProcess.back()); blocksToProcess.pop_back(); for (BlockId tag: block.pushedTags) - if (!neededBlocks.count(tag)) + if (!neededBlocks.count(tag) && m_blocks.count(tag)) { neededBlocks.insert(tag); blocksToProcess.push_back(tag); @@ -191,12 +191,12 @@ void ControlFlowGraph::setPrevLinks() if (push.type() != PushTag) continue; BlockId nextId(push.data()); - if (m_blocks.at(nextId).prev) + if (m_blocks.count(nextId) && m_blocks.at(nextId).prev) continue; bool hasLoop = false; - for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next) + for (BlockId id = nextId; id && m_blocks.count(id) && !hasLoop; id = m_blocks.at(id).next) hasLoop = (id == blockId); - if (hasLoop) + if (hasLoop || !m_blocks.count(nextId)) continue; m_blocks[nextId].prev = blockId; @@ -225,6 +225,8 @@ void ControlFlowGraph::gatherKnowledge() { //@todo we might have to do something like incrementing the sequence number for each JUMPDEST assertThrow(!!workQueue.back().first, OptimizerException, ""); + if (!m_blocks.count(workQueue.back().first)) + continue; // too bad, we do not know the tag, probably an invalid jump BasicBlock& block = m_blocks.at(workQueue.back().first); KnownStatePointer state = workQueue.back().second; workQueue.pop_back(); @@ -281,6 +283,15 @@ void ControlFlowGraph::gatherKnowledge() ) workQueue.push_back(make_pair(block.next, state->copy())); } + + // Remove all blocks we never visited here. This might happen because a tag is pushed but + // never used for a JUMP. + // Note that this invalidates some contents of pushedTags + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (!it->second.startState) + it = m_blocks.erase(it); + else + it++; } BasicBlocks ControlFlowGraph::rebuildCode() @@ -288,7 +299,8 @@ BasicBlocks ControlFlowGraph::rebuildCode() map pushes; for (auto& idAndBlock: m_blocks) for (BlockId ref: idAndBlock.second.pushedTags) - pushes[ref]++; + if (m_blocks.count(ref)) + pushes[ref]++; set blocksToAdd; for (auto it: m_blocks) diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index 41ac4802b..5a70a74fb 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -160,23 +160,51 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool return op; } -void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/) +/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from +/// _this which is not in or not equal to the value in _other. +template void intersect( + _Mapping& _this, + _Mapping const& _other, + function<_KeyType(_KeyType)> const& _keyTrans = [](_KeyType _k) { return _k; } +) +{ + for (auto it = _this.begin(); it != _this.end();) + if (_other.count(_keyTrans(it->first)) && _other.at(_keyTrans(it->first)) == it->second) + ++it; + else + it = _this.erase(it); +} + +template void intersect(_Mapping& _this, _Mapping const& _other) { - //@todo - *this = KnownState(m_expressionClasses); + intersect<_Mapping, ExpressionClasses::Id>(_this, _other, [](ExpressionClasses::Id _k) { return _k; }); +} + +void KnownState::reduceToCommonKnowledge(KnownState const& _other) +{ + int stackDiff = m_stackHeight - _other.m_stackHeight; + function stackKeyTransform = [=](int _key) -> int { return _key - stackDiff; }; + intersect(m_stackElements, _other.m_stackElements, stackKeyTransform); + // Use the smaller stack height. Essential to terminate in case of loops. + if (m_stackHeight > _other.m_stackHeight) + { + map shiftedStack; + for (auto const& stackElement: m_stackElements) + shiftedStack[stackElement.first - stackDiff] = stackElement.second; + m_stackElements = move(shiftedStack); + m_stackHeight = _other.m_stackHeight; + } + + intersect(m_storageContent, _other.m_storageContent); + intersect(m_memoryContent, _other.m_memoryContent); } bool KnownState::operator==(const KnownState& _other) const { - //@todo - return ( - m_stackElements.empty() && - _other.m_stackElements.empty() && - m_storageContent.empty() && - _other.m_storageContent.empty() && - m_memoryContent.empty() && - _other.m_memoryContent.empty() - ); + return m_storageContent == _other.m_storageContent && + m_memoryContent == _other.m_memoryContent && + m_stackHeight == _other.m_stackHeight && + m_stackElements == _other.m_stackElements; } ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) diff --git a/test/libsolidity/SolidityOptimizer.cpp b/test/libsolidity/SolidityOptimizer.cpp index 3cb6a536a..e50469dd6 100644 --- a/test/libsolidity/SolidityOptimizer.cpp +++ b/test/libsolidity/SolidityOptimizer.cpp @@ -251,6 +251,63 @@ BOOST_AUTO_TEST_CASE(function_calls) compareVersions("f(uint256)", 36); } +BOOST_AUTO_TEST_CASE(storage_write_in_loops) +{ + char const* sourceCode = R"( + contract test { + uint d; + function f(uint a) returns (uint r) { + var x = d; + for (uint i = 1; i < a * a; i++) { + r = d; + d = i; + } + + } + } + )"; + compileBothVersions(sourceCode); + compareVersions("f(uint256)", 0); + compareVersions("f(uint256)", 10); + compareVersions("f(uint256)", 36); +} + +BOOST_AUTO_TEST_CASE(retain_information_in_branches) +{ + // This tests that the optimizer knows that we already have "z == sha3(y)" inside both branches. + char const* sourceCode = R"( + contract c { + bytes32 d; + uint a; + function f(uint x, bytes32 y) returns (uint r_a, bytes32 r_d) { + bytes32 z = sha3(y); + if (x > 8) { + z = sha3(y); + a = x; + } else { + z = sha3(y); + a = x; + } + r_a = a; + r_d = d; + } + } + )"; + compileBothVersions(sourceCode); + compareVersions("f(uint256,bytes32)", 0, "abc"); + compareVersions("f(uint256,bytes32)", 8, "def"); + compareVersions("f(uint256,bytes32)", 10, "ghi"); + + m_optimize = true; + bytes optimizedBytecode = compileAndRun(sourceCode, 0, "c"); + size_t numSHA3s = 0; + eth::eachInstruction(optimizedBytecode, [&](Instruction _instr, u256 const&) { + if (_instr == eth::Instruction::SHA3) + numSHA3s++; + }); + BOOST_CHECK_EQUAL(1, numSHA3s); +} + BOOST_AUTO_TEST_CASE(cse_intermediate_swap) { eth::KnownState state;