From 30c832e3582f341c4fbddc90fd9a0e528349d423 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 29 Apr 2015 18:16:05 +0200 Subject: [PATCH 1/8] Split known state from common subexpression eliminator. --- libevmasm/Assembly.cpp | 3 +- libevmasm/CommonSubexpressionEliminator.cpp | 267 ++----------------- libevmasm/CommonSubexpressionEliminator.h | 59 +---- libevmasm/KnownState.cpp | 278 ++++++++++++++++++++ libevmasm/KnownState.h | 149 +++++++++++ test/libsolidity/SolidityOptimizer.cpp | 6 +- 6 files changed, 455 insertions(+), 307 deletions(-) create mode 100644 libevmasm/KnownState.cpp create mode 100644 libevmasm/KnownState.h diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 6cc09a4bc..c7253622e 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -329,7 +329,8 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - CommonSubexpressionEliminator eliminator; + KnownState state; + CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); AssemblyItems optItems; diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 63524d6f3..645a426d9 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -37,18 +37,19 @@ vector CommonSubexpressionEliminator::getOptimizedItems() map initialStackContents; map targetStackContents; - int minHeight = m_stackHeight + 1; - if (!m_stackElements.empty()) - minHeight = min(minHeight, m_stackElements.begin()->first); + int minHeight = m_state.stackHeight() + 1; + if (!m_state.stackElements().empty()) + minHeight = min(minHeight, m_state.stackElements().begin()->first); for (int height = minHeight; height <= 0; ++height) - initialStackContents[height] = initialStackElement(height, SourceLocation()); - for (int height = minHeight; height <= m_stackHeight; ++height) - targetStackContents[height] = stackElement(height, SourceLocation()); + //@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown? + initialStackContents[height] = m_state.initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_state.stackHeight(); ++height) + targetStackContents[height] = m_state.stackElement(height, SourceLocation()); // Debug info: //stream(cout, initialStackContents, targetStackContents); - AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( initialStackContents, targetStackContents ); @@ -57,103 +58,11 @@ vector CommonSubexpressionEliminator::getOptimizedItems() return items; } -ostream& CommonSubexpressionEliminator::stream( - ostream& _out, - map _initialStack, - map _targetStack -) const -{ - auto streamExpressionClass = [this](ostream& _out, Id _id) - { - auto const& expr = m_expressionClasses.representative(_id); - _out << " " << dec << _id << ": " << *expr.item; - if (expr.sequenceNumber) - _out << "@" << dec << expr.sequenceNumber; - _out << "("; - for (Id arg: expr.arguments) - _out << dec << arg << ","; - _out << ")" << endl; - }; - - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; - _out << "Equivalence classes: " << endl; - for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) - streamExpressionClass(_out, eqClass); - - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - - return _out; -} - void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) { - if (_item.type() != Operation) - { - assertThrow(_item.deposit() == 1, InvalidDeposit, ""); - setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); - } - else - { - Instruction instruction = _item.instruction(); - InstructionInfo info = instructionInfo(instruction); - if (SemanticInformation::isDupInstruction(_item)) - setStackElement( - m_stackHeight + 1, - stackElement( - m_stackHeight - int(instruction) + int(Instruction::DUP1), - _item.getLocation() - ) - ); - else if (SemanticInformation::isSwapInstruction(_item)) - swapStackElements( - m_stackHeight, - m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), - _item.getLocation() - ); - else if (instruction != Instruction::POP) - { - vector arguments(info.args); - for (int i = 0; i < info.args; ++i) - arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); - if (_item.instruction() == Instruction::SSTORE) - storeInStorage(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::SLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromStorage(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::MSTORE) - storeInMemory(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::MLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromMemory(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::SHA3) - setStackElement( - m_stackHeight + _item.deposit(), - applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) - ); - else - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses.find(_item, arguments, _copyItem) - ); - } - m_stackHeight += _item.deposit(); - } + StoreOperation op = m_state.feedItem(_item, _copyItem); + if (op.isValid()) + m_storeOperations.push_back(op); } void CommonSubexpressionEliminator::optimizeBreakingItem() @@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() SourceLocation const& location = m_breakingItem->getLocation(); AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - Id condition = stackElement(m_stackHeight - 1, location); - Id zero = m_expressionClasses.find(u256(0)); - if (m_expressionClasses.knownToBeDifferent(condition, zero)) + Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); + Id zero = m_state.expressionClasses().find(u256(0)); + if (m_state.expressionClasses().knownToBeDifferent(condition, zero)) { feedItem(AssemblyItem(Instruction::SWAP1, location), true); feedItem(AssemblyItem(Instruction::POP, location), true); AssemblyItem item(Instruction::JUMP, location); item.setJumpType(jumpType); - m_breakingItem = m_expressionClasses.storeItem(item); + m_breakingItem = m_state.expressionClasses().storeItem(item); return; } - Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); - if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition}); + if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero)) { AssemblyItem it(Instruction::POP, location); feedItem(it, true); @@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() } } -void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) -{ - m_stackElements[_stackHeight] = _class; -} - -void CommonSubexpressionEliminator::swapStackElements( - int _stackHeightA, - int _stackHeightB, - SourceLocation const& _location -) -{ - assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); - // ensure they are created - stackElement(_stackHeightA, _location); - stackElement(_stackHeightB, _location); - - swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - if (m_stackElements.count(_stackHeight)) - return m_stackElements.at(_stackHeight); - // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); - assertThrow(_stackHeight > -16, StackTooDeepException, ""); - // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); -} - -void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) - // do not execute the storage if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_storageContent) storageContents; - // Copy over all values (i.e. retain knowledge about them) where we know that this store - // operation will not destroy the knowledge. Specifically, we copy storage locations we know - // are different from _slot or locations where we know that the stored value is equal to _value. - for (auto const& storageItem: m_storageContent) - if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) - storageContents.insert(storageItem); - m_storageContent = move(storageContents); - - AssemblyItem item(Instruction::SSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); - m_storageContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot)) - return m_storageContent.at(_slot); - - AssemblyItem item(Instruction::SLOAD, _location); - return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) - // do not execute the store if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_memoryContent) memoryContents; - // copy over values at points where we know that they are different from _slot by at least 32 - for (auto const& memoryItem: m_memoryContent) - if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) - memoryContents.insert(memoryItem); - m_memoryContent = move(memoryContents); - - AssemblyItem item(Instruction::MSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); - m_memoryContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot)) - return m_memoryContent.at(_slot); - - AssemblyItem item(Instruction::MLOAD, _location); - return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( - Id _start, - Id _length, - SourceLocation const& _location -) -{ - AssemblyItem sha3Item(Instruction::SHA3, _location); - // Special logic if length is a short constant, otherwise we cannot tell. - u256 const* l = m_expressionClasses.knownConstant(_length); - // unknown or too large length - if (!l || *l > 128) - return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - - vector arguments; - for (u256 i = 0; i < *l; i += 32) - { - Id slot = m_expressionClasses.find( - AssemblyItem(Instruction::ADD, _location), - {_start, m_expressionClasses.find(i)} - ); - arguments.push_back(loadFromMemory(slot, _location)); - } - if (m_knownSha3Hashes.count(arguments)) - return m_knownSha3Hashes.at(arguments); - Id v; - // If all arguments are known constants, compute the sha3 here - if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) - { - bytes data; - for (Id a: arguments) - data += toBigEndian(*m_expressionClasses.knownConstant(a)); - data.resize(size_t(*l)); - v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); - } - else - v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - return m_knownSha3Hashes[arguments] = v; -} - CSECodeGenerator::CSECodeGenerator( ExpressionClasses& _expressionClasses, vector const& _storeOperations diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h index 6156bc81a..2ed926401 100644 --- a/libevmasm/CommonSubexpressionEliminator.h +++ b/libevmasm/CommonSubexpressionEliminator.h @@ -32,6 +32,7 @@ #include #include #include +#include namespace dev { @@ -58,20 +59,9 @@ class CommonSubexpressionEliminator { public: using Id = ExpressionClasses::Id; - struct StoreOperation - { - enum Target { Memory, Storage }; - StoreOperation( - Target _target, - Id _slot, - unsigned _sequenceNumber, - Id _expression - ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} - Target target; - Id slot; - unsigned sequenceNumber; - Id expression; - }; + using StoreOperation = KnownState::StoreOperation; + + CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {} /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first /// item that must be fed into a new instance of the eliminator. @@ -95,49 +85,10 @@ private: /// Tries to optimize the item that breaks the basic block at the end. void optimizeBreakingItem(); - /// Simplifies the given item using - /// Assigns a new equivalence class to the next sequence number of the given stack element. - void setStackElement(int _stackHeight, Id _class); - /// Swaps the given stack elements in their next sequence number. - void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); - /// Retrieves the current equivalence class fo the given stack element (or generates a new - /// one if it does not exist yet). - Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). - Id initialStackElement(int _stackHeight, SourceLocation const& _location); - - /// Increments the sequence number, deletes all storage information that might be overwritten - /// and stores the new value at the given slot. - void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in storage or creates a new special sload class. - Id loadFromStorage(Id _slot, SourceLocation const& _location); - /// Increments the sequence number, deletes all memory information that might be overwritten - /// and stores the new value at the given slot. - void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in memory or creates a new special mload class. - Id loadFromMemory(Id _slot, SourceLocation const& _location); - /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. - Id applySha3(Id _start, Id _length, SourceLocation const& _location); - - /// Current stack height, can be negative. - int m_stackHeight = 0; - /// Current stack layout, mapping stack height -> equivalence class - std::map m_stackElements; - /// Current sequence number, this is incremented with each modification to storage or memory. - unsigned m_sequenceNumber = 1; - /// Knowledge about storage content. - std::map m_storageContent; - /// Knowledge about memory content. Keys are memory addresses, note that the values overlap - /// and are not contained here if they are not completely known. - std::map m_memoryContent; - /// Keeps record of all sha3 hashes that are computed. - std::map, Id> m_knownSha3Hashes; + KnownState m_state; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. std::vector m_storeOperations; - /// Structure containing the classes of equivalent expressions. - ExpressionClasses m_expressionClasses; /// The item that breaks the basic block, can be nullptr. /// It is usually appended to the block but can be optimized in some cases. diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp new file mode 100644 index 000000000..244270fb6 --- /dev/null +++ b/libevmasm/KnownState.cpp @@ -0,0 +1,278 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file KnownState.cpp + * @author Christian + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream( + ostream& _out, + map _initialStack, + map _targetStack +) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": " << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "Optimizer analysis:" << endl; + _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Initial stack: " << endl; + for (auto const& it: _initialStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Target stack: " << endl; + for (auto const& it: _targetStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + m_stackHeight += _item.deposit(); + } + return op; +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new equivalence class. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id KnownState::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); + assertThrow(_stackHeight > -16, StackTooDeepException, ""); + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + diff --git a/libevmasm/KnownState.h b/libevmasm/KnownState.h new file mode 100644 index 000000000..c6dfcee6b --- /dev/null +++ b/libevmasm/KnownState.h @@ -0,0 +1,149 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file KnownState.h + * @author Christian + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Class to infer and store knowledge about the state of the virtual machine at a specific + * instruction. + * + * The general workings are that for each assembly item that is fed, an equivalence class is + * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some + * arithmetic instructions are used to infer equivalences while these classes are determined. + */ +class KnownState +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Invalid, Memory, Storage }; + StoreOperation(): target(Invalid), sequenceNumber(-1) {} + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + bool isValid() const { return target != Invalid; } + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + KnownState(): m_expressionClasses(std::make_shared()) {} + + /// Streams debugging information to @a _out. + std::ostream& stream( + std::ostream& _out, + std::map _initialStack = std::map(), + std::map _targetStack = std::map() + ) const; + + /// Feeds the item into the system for analysis. + /// @returns a possible store operation + StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Resets any knowledge about storage. + void resetStorage() { m_storageContent.clear(); } + /// Resets any knowledge about storage. + void resetMemory() { m_memoryContent.clear(); } + /// Resets any knowledge about the current stack. + void resetStack() { m_stackElements.clear(); m_stackHeight = 0; } + /// Resets any knowledge. + void reset() { resetStorage(); resetMemory(); resetStack(); } + + ///@todo the sequence numbers in two copies of this class should never be the same. + /// might be doable using two-dimensional sequence numbers, where the first value is incremented + /// for each copy + + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height + /// (must not be positive). + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + int stackHeight() const { return m_stackHeight; } + std::map const& stackElements() const { return m_stackElements; } + ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + +private: + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if storage was not modified + StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if memory was not modified + StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; + /// Structure containing the classes of equivalent expressions. + std::shared_ptr m_expressionClasses; +}; + +} +} diff --git a/test/libsolidity/SolidityOptimizer.cpp b/test/libsolidity/SolidityOptimizer.cpp index 9cdaa5886..71463f919 100644 --- a/test/libsolidity/SolidityOptimizer.cpp +++ b/test/libsolidity/SolidityOptimizer.cpp @@ -90,7 +90,8 @@ public: for (AssemblyItem& item: input) item.setLocation(SourceLocation(1, 3, make_shared(""))); - eth::CommonSubexpressionEliminator cse; + eth::KnownState state; + eth::CommonSubexpressionEliminator cse(state); BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end()); AssemblyItems output = cse.getOptimizedItems(); @@ -231,7 +232,8 @@ BOOST_AUTO_TEST_CASE(function_calls) BOOST_AUTO_TEST_CASE(cse_intermediate_swap) { - eth::CommonSubexpressionEliminator cse; + eth::KnownState state; + eth::CommonSubexpressionEliminator cse(state); AssemblyItems input{ Instruction::SWAP1, Instruction::POP, Instruction::ADD, u256(0), Instruction::SWAP1, Instruction::SLOAD, Instruction::SWAP1, u256(100), Instruction::EXP, Instruction::SWAP1, From ce15b5bbc9df87c72aaf3faa28f91db17f6cb624 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 30 Apr 2015 11:40:43 +0200 Subject: [PATCH 2/8] More flexible way to approach unknown stack elements. --- libevmasm/ExpressionClasses.cpp | 36 ++++++++++++++++++++++++--------- libevmasm/ExpressionClasses.h | 8 ++++++-- libevmasm/KnownState.cpp | 2 +- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index 1e60a7fe8..8d0785d37 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -37,6 +37,7 @@ using namespace dev::eth; bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const { + assertThrow(!!item && !!_other.item, OptimizerException, ""); auto type = item->type(); auto otherType = _other.item->type(); return std::tie(type, item->data(), arguments, sequenceNumber) < @@ -78,6 +79,15 @@ ExpressionClasses::Id ExpressionClasses::find( return exp.id; } +ExpressionClasses::Id ExpressionClasses::newId() +{ + // Note that we cannot insert it in m_expressions because this requires item to be set. + Expression exp; + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + return exp.id; +} + bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. @@ -122,10 +132,16 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const { Expression const& expr = representative(_id); stringstream str; - str << dec << expr.id << ":" << *expr.item << "("; - for (Id arg: expr.arguments) - str << fullDAGToString(arg) << ","; - str << ")"; + str << dec << expr.id << ":"; + if (expr.item) + { + str << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + } + else + str << " UNIQUE"; return str.str(); } @@ -279,7 +295,7 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, { static Rules rules; - if (_expr.item->type() != Operation) + if (!_expr.item || _expr.item->type() != Operation) return -1; for (auto const& rule: rules.rules()) @@ -337,7 +353,7 @@ void Pattern::setMatchGroup(unsigned _group, map& _ bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const { - if (!matchesBaseItem(*_expr.item)) + if (!matchesBaseItem(_expr.item)) return false; if (m_matchGroup) { @@ -387,13 +403,15 @@ string Pattern::toString() const return s.str(); } -bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const { if (m_type == UndefinedItem) return true; - if (m_type != _item.type()) + if (!_item) + return false; + if (m_type != _item->type()) return false; - if (m_requireDataMatch && m_data != _item.data()) + if (m_requireDataMatch && m_data != _item->data()) return false; return true; } diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h index 2f720f606..5d32c0f71 100644 --- a/libevmasm/ExpressionClasses.h +++ b/libevmasm/ExpressionClasses.h @@ -50,7 +50,7 @@ public: struct Expression { Id id; - AssemblyItem const* item; + AssemblyItem const* item = nullptr; Ids arguments; unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). @@ -68,6 +68,10 @@ public: bool _copyItem = true, unsigned _sequenceNumber = 0 ); + /// @returns a new unique class id which does not and will never have a representative containing + /// an AssemblyItem, i.e. its value cannot be generated, instead it has to be assumed to be + /// already present. + Id newId(); /// @returns the canonical representative of an expression class. Expression const& representative(Id _id) const { return m_representatives.at(_id); } /// @returns the number of classes. @@ -149,7 +153,7 @@ public: std::string toString() const; private: - bool matchesBaseItem(AssemblyItem const& _item) const; + bool matchesBaseItem(AssemblyItem const* _item) const; Expression const& matchGroupValue() const; AssemblyItemType m_type; diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index 244270fb6..e83810d43 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -136,7 +136,7 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation if (m_stackElements.count(_stackHeight)) return m_stackElements.at(_stackHeight); // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); + return m_stackElements[_stackHeight] = m_expressionClasses->newId(); } ExpressionClasses::Id KnownState::initialStackElement( From e768959342dca9320670874fbdd353b8789854e8 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 30 Apr 2015 14:41:55 +0200 Subject: [PATCH 3/8] Common subexpression elimination ready for using pre-known state. --- libevmasm/CommonSubexpressionEliminator.cpp | 8 ++-- libevmasm/CommonSubexpressionEliminator.h | 6 ++- libevmasm/ExpressionClasses.cpp | 9 ---- libevmasm/ExpressionClasses.h | 4 -- libevmasm/KnownState.cpp | 10 ++--- test/libsolidity/SolidityOptimizer.cpp | 49 +++++++++++++++++++-- 6 files changed, 59 insertions(+), 27 deletions(-) diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 645a426d9..4b85eba40 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -40,9 +40,8 @@ vector CommonSubexpressionEliminator::getOptimizedItems() int minHeight = m_state.stackHeight() + 1; if (!m_state.stackElements().empty()) minHeight = min(minHeight, m_state.stackElements().begin()->first); - for (int height = minHeight; height <= 0; ++height) - //@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown? - initialStackContents[height] = m_state.initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_initialState.stackHeight(); ++height) + initialStackContents[height] = m_initialState.stackElement(height, SourceLocation()); for (int height = minHeight; height <= m_state.stackHeight(); ++height) targetStackContents[height] = m_state.stackElement(height, SourceLocation()); @@ -50,6 +49,7 @@ vector CommonSubexpressionEliminator::getOptimizedItems() //stream(cout, initialStackContents, targetStackContents); AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( + m_initialState.stackHeight(), initialStackContents, targetStackContents ); @@ -106,10 +106,12 @@ CSECodeGenerator::CSECodeGenerator( } AssemblyItems CSECodeGenerator::generateCode( + int _initialStackHeight, map const& _initialStack, map const& _targetStackContents ) { + m_stackHeight = _initialStackHeight; m_stack = _initialStack; for (auto const& item: m_stack) if (!m_classPositions.count(item.second)) diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h index 2ed926401..6e1ba40b3 100644 --- a/libevmasm/CommonSubexpressionEliminator.h +++ b/libevmasm/CommonSubexpressionEliminator.h @@ -61,7 +61,7 @@ public: using Id = ExpressionClasses::Id; using StoreOperation = KnownState::StoreOperation; - CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {} + CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {} /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first /// item that must be fed into a new instance of the eliminator. @@ -85,6 +85,7 @@ private: /// Tries to optimize the item that breaks the basic block at the end. void optimizeBreakingItem(); + KnownState m_initialState; KnownState m_state; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. @@ -115,6 +116,7 @@ public: /// @param _targetStackContents final contents of the stack, by stack height relative to initial /// @note should only be called once on each object. AssemblyItems generateCode( + int _initialStackHeight, std::map const& _initialStack, std::map const& _targetStackContents ); @@ -150,7 +152,7 @@ private: AssemblyItems m_generatedItems; /// Current height of the stack relative to the start. - int m_stackHeight = 0; + int m_stackHeight; /// If (b, a) is in m_requests then b is needed to compute a. std::multimap m_neededBy; /// Current content of the stack. diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index 8d0785d37..e62f75264 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -79,15 +79,6 @@ ExpressionClasses::Id ExpressionClasses::find( return exp.id; } -ExpressionClasses::Id ExpressionClasses::newId() -{ - // Note that we cannot insert it in m_expressions because this requires item to be set. - Expression exp; - exp.id = m_representatives.size(); - m_representatives.push_back(exp); - return exp.id; -} - bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h index 5d32c0f71..c83520300 100644 --- a/libevmasm/ExpressionClasses.h +++ b/libevmasm/ExpressionClasses.h @@ -68,10 +68,6 @@ public: bool _copyItem = true, unsigned _sequenceNumber = 0 ); - /// @returns a new unique class id which does not and will never have a representative containing - /// an AssemblyItem, i.e. its value cannot be generated, instead it has to be assumed to be - /// already present. - Id newId(); /// @returns the canonical representative of an expression class. Expression const& representative(Id _id) const { return m_representatives.at(_id); } /// @returns the number of classes. diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index e83810d43..02c6ee136 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -135,8 +135,10 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation { if (m_stackElements.count(_stackHeight)) return m_stackElements.at(_stackHeight); - // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = m_expressionClasses->newId(); + // Stack element not found (not assigned yet), create new unknown equivalence class. + //@todo check that we do not infer incorrect equivalences when the stack is cleared partially + //in between. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); } ExpressionClasses::Id KnownState::initialStackElement( @@ -144,10 +146,8 @@ ExpressionClasses::Id KnownState::initialStackElement( SourceLocation const& _location ) { - assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); - assertThrow(_stackHeight > -16, StackTooDeepException, ""); // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); + return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location)); } void KnownState::setStackElement(int _stackHeight, Id _class) diff --git a/test/libsolidity/SolidityOptimizer.cpp b/test/libsolidity/SolidityOptimizer.cpp index 71463f919..59e8f04a8 100644 --- a/test/libsolidity/SolidityOptimizer.cpp +++ b/test/libsolidity/SolidityOptimizer.cpp @@ -83,15 +83,28 @@ public: "\nOptimized: " + toHex(optimizedOutput)); } - AssemblyItems getCSE(AssemblyItems const& _input) + AssemblyItems addDummyLocations(AssemblyItems const& _input) { // add dummy locations to each item so that we can check that they are not deleted AssemblyItems input = _input; for (AssemblyItem& item: input) item.setLocation(SourceLocation(1, 3, make_shared(""))); + return input; + } + eth::KnownState createInitialState(AssemblyItems const& _input) + { eth::KnownState state; - eth::CommonSubexpressionEliminator cse(state); + for (auto const& item: addDummyLocations(_input)) + state.feedItem(item); + return state; + } + + AssemblyItems getCSE(AssemblyItems const& _input, eth::KnownState const& _state = eth::KnownState()) + { + AssemblyItems input = addDummyLocations(_input); + + eth::CommonSubexpressionEliminator cse(_state); BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end()); AssemblyItems output = cse.getOptimizedItems(); @@ -102,9 +115,13 @@ public: return output; } - void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation) + void checkCSE( + AssemblyItems const& _input, + AssemblyItems const& _expectation, + KnownState const& _state = eth::KnownState() + ) { - AssemblyItems output = getCSE(_input); + AssemblyItems output = getCSE(_input, _state); BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end()); } @@ -756,6 +773,30 @@ BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3))); } +BOOST_AUTO_TEST_CASE(cse_with_initially_known_stack) +{ + eth::KnownState state = createInitialState(AssemblyItems{ + u256(0x12), + u256(0x20), + Instruction::ADD + }); + AssemblyItems input{ + u256(0x12 + 0x20) + }; + checkCSE(input, AssemblyItems{Instruction::DUP1}, state); +} + +BOOST_AUTO_TEST_CASE(cse_equality_on_initially_known_stack) +{ + eth::KnownState state = createInitialState(AssemblyItems{Instruction::DUP1}); + AssemblyItems input{ + Instruction::EQ + }; + AssemblyItems output = getCSE(input, state); + // check that it directly pushes 1 (true) + BOOST_CHECK(find(output.begin(), output.end(), AssemblyItem(u256(1))) != output.end()); +} + BOOST_AUTO_TEST_CASE(control_flow_graph_remove_unused) { // remove parts of the code that are unused From 4988a6766fb26a47320725138451ff6b477b9018 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 30 Apr 2015 15:31:16 +0200 Subject: [PATCH 4/8] Make KnownState work with all instructions. --- libevmasm/ExpressionClasses.cpp | 19 +++++++---- libevmasm/KnownState.cpp | 7 ++++ libevmasm/SemanticInformation.cpp | 54 +++++++++++++++++++++++++++++++ libevmasm/SemanticInformation.h | 9 ++++++ 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index e62f75264..cfbeba7fa 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -57,12 +57,15 @@ ExpressionClasses::Id ExpressionClasses::find( exp.arguments = _arguments; exp.sequenceNumber = _sequenceNumber; - if (SemanticInformation::isCommutativeOperation(_item)) - sort(exp.arguments.begin(), exp.arguments.end()); + if (SemanticInformation::isDeterministic(_item)) + { + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); - auto it = m_expressions.find(exp); - if (it != m_expressions.end()) - return it->id; + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + } if (_copyItem) exp.item = storeItem(_item); @@ -286,7 +289,11 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, { static Rules rules; - if (!_expr.item || _expr.item->type() != Operation) + if ( + !_expr.item || + _expr.item->type() != Operation || + !SemanticInformation::isDeterministic(*_expr.item) + ) return -1; for (auto const& rule: rules.rules()) diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index 02c6ee136..632777c82 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -101,6 +101,7 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool vector arguments(info.args); for (int i = 0; i < info.args; ++i) arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); else if (_item.instruction() == Instruction::SLOAD) @@ -121,10 +122,16 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) ); else + { + if (SemanticInformation::invalidatesMemory(_item.instruction())) + resetMemory(); + if (SemanticInformation::invalidatesStorage(_item.instruction())) + resetStorage(); setStackElement( m_stackHeight + _item.deposit(), m_expressionClasses->find(_item, arguments, _copyItem) ); + } } m_stackHeight += _item.deposit(); } diff --git a/libevmasm/SemanticInformation.cpp b/libevmasm/SemanticInformation.cpp index 83d59efc7..40c36f9e3 100644 --- a/libevmasm/SemanticInformation.cpp +++ b/libevmasm/SemanticInformation.cpp @@ -122,3 +122,57 @@ bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) return false; } } + + +bool SemanticInformation::isDeterministic(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return true; + assertThrow(!altersControlFlow(_item), OptimizerException, ""); + + switch (_item.instruction()) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::GAS: + case Instruction::PC: + case Instruction::MSIZE: // depends on previous writes and reads, not only on content + case Instruction::BALANCE: // depends on previous calls + case Instruction::EXTCODESIZE: + return false; + default: + return true; + } +} + +bool SemanticInformation::invalidatesMemory(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + case Instruction::EXTCODECOPY: + case Instruction::MSTORE: + case Instruction::MSTORE8: + case Instruction::CALL: + case Instruction::CALLCODE: + return true; + default: + return false; + } +} + +bool SemanticInformation::invalidatesStorage(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::SSTORE: + return true; + default: + return false; + } +} diff --git a/libevmasm/SemanticInformation.h b/libevmasm/SemanticInformation.h index 27aa6f1a4..b14ddb65a 100644 --- a/libevmasm/SemanticInformation.h +++ b/libevmasm/SemanticInformation.h @@ -23,6 +23,7 @@ #pragma once +#include namespace dev { @@ -45,6 +46,14 @@ struct SemanticInformation static bool isSwapInstruction(AssemblyItem const& _item); static bool isJumpInstruction(AssemblyItem const& _item); static bool altersControlFlow(AssemblyItem const& _item); + /// @returns false if the value put on the stack by _item depends on anything else than + /// the information in the current block header, memory, storage or stack. + /// @note should not be called for instructions that alter the control flow. + static bool isDeterministic(AssemblyItem const& _item); + /// @returns true if the given instruction modifies memory. + static bool invalidatesMemory(Instruction _instruction); + /// @returns true if the given instruction modifies storage (even indirectly). + static bool invalidatesStorage(Instruction _instruction); }; } From 1f149925bc6be94e4d07e887b3d7848f50b6cc0b Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 4 May 2015 10:15:41 +0200 Subject: [PATCH 5/8] Gather knowledge about the state during control flow analysis. --- libevmasm/Assembly.cpp | 7 ++- libevmasm/ControlFlowGraph.cpp | 91 ++++++++++++++++++++++++++++++- libevmasm/ControlFlowGraph.h | 22 ++++++-- libevmasm/KnownState.cpp | 75 +++++++++++++++++++------ libevmasm/KnownState.h | 30 +++++++--- libevmasm/SemanticInformation.cpp | 1 - libevmasm/SemanticInformation.h | 1 - 7 files changed, 192 insertions(+), 35 deletions(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index c7253622e..1c5391168 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -314,6 +314,10 @@ Assembly& Assembly::optimise(bool _enable) copt << toString(*this); count = 0; + //@todo CFG interface should be a generator, that returns an item and a pointer to a + // knownstate, which has to replace the current state if it is not null. + // Feed these items to the CSE, but also store them and replace the stored version + // if the items generated by the CSE are shorter. (or even use less gas?) copt << "Performing control flow analysis..."; { ControlFlowGraph cfg(m_items); @@ -329,7 +333,8 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - KnownState state; + //@todo use only a single state / expression classes instance. + KnownState state(make_shared()); CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); diff --git a/libevmasm/ControlFlowGraph.cpp b/libevmasm/ControlFlowGraph.cpp index cc4367e64..0b0c757d6 100644 --- a/libevmasm/ControlFlowGraph.cpp +++ b/libevmasm/ControlFlowGraph.cpp @@ -23,9 +23,11 @@ #include #include +#include #include #include #include +#include using namespace std; using namespace dev; @@ -46,6 +48,7 @@ AssemblyItems ControlFlowGraph::optimisedItems() resolveNextLinks(); removeUnusedBlocks(); setPrevLinks(); + gatherKnowledge(); return rebuildCode(); } @@ -209,6 +212,77 @@ void ControlFlowGraph::setPrevLinks() } } +void ControlFlowGraph::gatherKnowledge() +{ + // @todo actually we know that memory is filled with zeros at the beginning, + // we could make use of that. + shared_ptr emptyState = make_shared(); + ExpressionClasses& expr = emptyState->expressionClasses(); + bool unknownJumpEncountered = false; + + vector>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + while (!workQueue.empty()) + { + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!workQueue.back().first, OptimizerException, ""); + BasicBlock& block = m_blocks.at(workQueue.back().first); + shared_ptr state = workQueue.back().second; + workQueue.pop_back(); + if (block.startState) + { + state->reduceToCommonKnowledge(*block.startState); + if (*state == *block.startState) + continue; + } + + block.startState = state->copy(); + //@todo we might know the return address for the first pass, but not anymore for the second, + // -> store knowledge about tags as a union. + + // Feed all items except for the final jump yet because it will erase the target tag. + unsigned pc = block.begin; + while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) + state->feedItem(m_items.at(pc++)); + + if ( + block.endType == BasicBlock::EndType::JUMP || + block.endType == BasicBlock::EndType::JUMPI + ) + { + assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); + //@todo in the case of JUMPI, add knowledge about the condition to the state + // (for both values of the condition) + BlockId nextBlock = expressionClassToBlockId( + state->stackElement(state->stackHeight(), SourceLocation()), + expr + ); + state->feedItem(m_items.at(pc++)); + if (nextBlock) + workQueue.push_back(make_pair(nextBlock, state->copy())); + else if (!unknownJumpEncountered) + { + // We do not know where this jump goes, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(make_pair(it.first, emptyState->copy())); + } + } + else if (block.begin <= pc && pc < block.end) + state->feedItem(m_items.at(pc++)); + assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); + + block.endState = state; + + if ( + block.endType == BasicBlock::EndType::HANDOVER || + block.endType == BasicBlock::EndType::JUMPI + ) + workQueue.push_back(make_pair(block.next, state->copy())); + } +} + AssemblyItems ControlFlowGraph::rebuildCode() { map pushes; @@ -233,7 +307,7 @@ AssemblyItems ControlFlowGraph::rebuildCode() blockId = m_blocks.at(blockId).prev; for (; blockId; blockId = m_blocks.at(blockId).next) { - BasicBlock const& block = m_blocks.at(blockId); + BasicBlock& block = m_blocks.at(blockId); blocksToAdd.erase(blockId); blocksAdded.insert(blockId); @@ -243,7 +317,10 @@ AssemblyItems ControlFlowGraph::rebuildCode() continue; // If block starts with unused tag, skip it. if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + { ++begin; + ++block.begin; + } previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); copy(begin, end, back_inserter(code)); } @@ -252,6 +329,18 @@ AssemblyItems ControlFlowGraph::rebuildCode() return code; } +BlockId ControlFlowGraph::expressionClassToBlockId( + ExpressionClasses::Id _id, + ExpressionClasses& _exprClasses +) +{ + ExpressionClasses::Expression expr = _exprClasses.representative(_id); + if (expr.item && expr.item->type() == PushTag) + return BlockId(expr.item->data()); + else + return BlockId::invalid(); +} + BlockId ControlFlowGraph::generateNewId() { BlockId id = BlockId(++m_lastUsedId); diff --git a/libevmasm/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h index 5d16df327..4310d6642 100644 --- a/libevmasm/ControlFlowGraph.h +++ b/libevmasm/ControlFlowGraph.h @@ -24,16 +24,17 @@ #pragma once #include +#include #include #include +#include namespace dev { namespace eth { -class AssemblyItem; -using AssemblyItems = std::vector; +class KnownState; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special @@ -69,14 +70,20 @@ struct BasicBlock unsigned end = 0; /// Tags pushed inside this block, with multiplicity. std::vector pushedTags; - /// ID of the block that always follows this one (either JUMP or flow into new block), - /// or BlockId::invalid() otherwise + /// ID of the block that always follows this one (either non-branching part of JUMPI or flow + /// into new block), or BlockId::invalid() otherwise BlockId next = BlockId::invalid(); - /// ID of the block that has to precede this one. + /// ID of the block that has to precede this one (because control flows into it). BlockId prev = BlockId::invalid(); enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; EndType endType = EndType::HANDOVER; + + /// Knowledge about the state when this block is entered. Intersection of all possible ways + /// to enter this block. + std::shared_ptr startState; + /// Knowledge about the state at the end of this block. + std::shared_ptr endState; }; class ControlFlowGraph @@ -93,9 +100,14 @@ private: void splitBlocks(); void resolveNextLinks(); void removeUnusedBlocks(); + void gatherKnowledge(); void setPrevLinks(); AssemblyItems rebuildCode(); + /// @returns the corresponding BlockId if _id is a pushed jump tag, + /// and an invalid BlockId otherwise. + BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses); + BlockId generateNewId(); unsigned m_lastUsedId = 0; diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index 632777c82..7ff0143e1 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -30,16 +30,18 @@ using namespace std; using namespace dev; using namespace dev::eth; -ostream& KnownState::stream( - ostream& _out, - map _initialStack, - map _targetStack -) const +ostream& KnownState::stream(ostream& _out) const { auto streamExpressionClass = [this](ostream& _out, Id _id) { auto const& expr = m_expressionClasses->representative(_id); - _out << " " << dec << _id << ": " << *expr.item; + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; if (expr.sequenceNumber) _out << "@" << dec << expr.sequenceNumber; _out << "("; @@ -48,22 +50,32 @@ ostream& KnownState::stream( _out << ")" << endl; }; - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; _out << "Equivalence classes: " << endl; for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) streamExpressionClass(_out, eqClass); - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) { _out << " " << dec << it.first << ": "; streamExpressionClass(_out, it.second); } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) { - _out << " " << dec << it.first << ": "; + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; streamExpressionClass(_out, it.second); } @@ -73,7 +85,11 @@ ostream& KnownState::stream( KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) { StoreOperation op; - if (_item.type() != Operation) + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) { assertThrow(_item.deposit() == 1, InvalidDeposit, ""); setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); @@ -127,17 +143,40 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool resetMemory(); if (SemanticInformation::invalidatesStorage(_item.instruction())) resetStorage(); - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses->find(_item, arguments, _copyItem) - ); + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); } } + for (int p = m_stackHeight; p > m_stackHeight + _item.deposit(); --p) + m_stackElements.erase(p); m_stackHeight += _item.deposit(); } return op; } +void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/) +{ + //@todo + *this = KnownState(m_expressionClasses); +} + +bool KnownState::operator==(const KnownState& _other) const +{ + //@todo + return ( + m_stackElements.empty() && + _other.m_stackElements.empty() && + m_storageContent.empty() && + _other.m_storageContent.empty() && + m_memoryContent.empty() && + _other.m_memoryContent.empty() + ); +} + ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) { if (m_stackElements.count(_stackHeight)) diff --git a/libevmasm/KnownState.h b/libevmasm/KnownState.h index c6dfcee6b..f7a3dd675 100644 --- a/libevmasm/KnownState.h +++ b/libevmasm/KnownState.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -70,14 +71,14 @@ public: Id expression; }; - KnownState(): m_expressionClasses(std::make_shared()) {} + explicit KnownState( + std::shared_ptr _expressionClasses = std::make_shared() + ): m_expressionClasses(_expressionClasses) + { + } /// Streams debugging information to @a _out. - std::ostream& stream( - std::ostream& _out, - std::map _initialStack = std::map(), - std::map _targetStack = std::map() - ) const; + std::ostream& stream(std::ostream& _out) const; /// Feeds the item into the system for analysis. /// @returns a possible store operation @@ -92,6 +93,20 @@ public: /// Resets any knowledge. void reset() { resetStorage(); resetMemory(); resetStack(); } + /// Manually increments the storage and memory sequence number. + void incrementSequenceNumber() { m_sequenceNumber += 2; } + + /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. + /// If the stack heighht is different, the smaller one is used and the stack is compared + /// relatively. + void reduceToCommonKnowledge(KnownState const& _other); + + /// @returns a shared pointer to a copy of this state. + std::shared_ptr copy() const { return std::make_shared(*this); } + + /// @returns true if the knowledge about the state of both objects is (known to be) equal. + bool operator==(KnownState const& _other) const; + ///@todo the sequence numbers in two copies of this class should never be the same. /// might be doable using two-dimensional sequence numbers, where the first value is incremented /// for each copy @@ -99,8 +114,7 @@ public: /// Retrieves the current equivalence class fo the given stack element (or generates a new /// one if it does not exist yet). Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). + /// @returns the equivalence class id of the special initial stack element at the given height. Id initialStackElement(int _stackHeight, SourceLocation const& _location); int stackHeight() const { return m_stackHeight; } diff --git a/libevmasm/SemanticInformation.cpp b/libevmasm/SemanticInformation.cpp index 40c36f9e3..056162b5f 100644 --- a/libevmasm/SemanticInformation.cpp +++ b/libevmasm/SemanticInformation.cpp @@ -128,7 +128,6 @@ bool SemanticInformation::isDeterministic(AssemblyItem const& _item) { if (_item.type() != Operation) return true; - assertThrow(!altersControlFlow(_item), OptimizerException, ""); switch (_item.instruction()) { diff --git a/libevmasm/SemanticInformation.h b/libevmasm/SemanticInformation.h index b14ddb65a..094f45912 100644 --- a/libevmasm/SemanticInformation.h +++ b/libevmasm/SemanticInformation.h @@ -48,7 +48,6 @@ struct SemanticInformation static bool altersControlFlow(AssemblyItem const& _item); /// @returns false if the value put on the stack by _item depends on anything else than /// the information in the current block header, memory, storage or stack. - /// @note should not be called for instructions that alter the control flow. static bool isDeterministic(AssemblyItem const& _item); /// @returns true if the given instruction modifies memory. static bool invalidatesMemory(Instruction _instruction); From ec7425fecde40ccf38bd5cd6d348c840e480ee25 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 5 May 2015 17:03:07 +0200 Subject: [PATCH 6/8] Remove unused old optimizer rule. --- libevmasm/Assembly.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 1c5391168..aec06aef6 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -304,9 +304,6 @@ Assembly& Assembly::optimise(bool _enable) { if (!_enable) return *this; - std::vector>> rules; - // jump to next instruction - rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }}); unsigned total = 0; for (unsigned count = 1; count > 0; total += count) From 1933aa5c2cb41300001daad91779591d7aa0b970 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 5 May 2015 17:45:58 +0200 Subject: [PATCH 7/8] CFG returns vector of blocks instead of assembly items. --- libevmasm/Assembly.cpp | 5 ++++- libevmasm/ControlFlowGraph.cpp | 28 +++++++++++--------------- libevmasm/ControlFlowGraph.h | 14 ++++++++----- test/libsolidity/SolidityOptimizer.cpp | 8 ++++++-- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index aec06aef6..9530ded49 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -318,7 +318,10 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing control flow analysis..."; { ControlFlowGraph cfg(m_items); - AssemblyItems optItems = cfg.optimisedItems(); + AssemblyItems optItems; + for (BasicBlock const& block: cfg.optimisedBlocks()) + copy(m_items.begin() + block.begin, m_items.begin() + block.end, + back_inserter(optItems)); if (optItems.size() < m_items.size()) { copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); diff --git a/libevmasm/ControlFlowGraph.cpp b/libevmasm/ControlFlowGraph.cpp index 0b0c757d6..2e28317a3 100644 --- a/libevmasm/ControlFlowGraph.cpp +++ b/libevmasm/ControlFlowGraph.cpp @@ -38,10 +38,10 @@ BlockId::BlockId(u256 const& _id): m_id(_id) assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); } -AssemblyItems ControlFlowGraph::optimisedItems() +BasicBlocks ControlFlowGraph::optimisedBlocks() { if (m_items.empty()) - return m_items; + return BasicBlocks(); findLargestTag(); splitBlocks(); @@ -216,17 +216,17 @@ void ControlFlowGraph::gatherKnowledge() { // @todo actually we know that memory is filled with zeros at the beginning, // we could make use of that. - shared_ptr emptyState = make_shared(); + KnownStatePointer emptyState = make_shared(); ExpressionClasses& expr = emptyState->expressionClasses(); bool unknownJumpEncountered = false; - vector>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + vector> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); while (!workQueue.empty()) { //@todo we might have to do something like incrementing the sequence number for each JUMPDEST assertThrow(!!workQueue.back().first, OptimizerException, ""); BasicBlock& block = m_blocks.at(workQueue.back().first); - shared_ptr state = workQueue.back().second; + KnownStatePointer state = workQueue.back().second; workQueue.pop_back(); if (block.startState) { @@ -283,7 +283,7 @@ void ControlFlowGraph::gatherKnowledge() } } -AssemblyItems ControlFlowGraph::rebuildCode() +BasicBlocks ControlFlowGraph::rebuildCode() { map pushes; for (auto& idAndBlock: m_blocks) @@ -294,7 +294,7 @@ AssemblyItems ControlFlowGraph::rebuildCode() for (auto it: m_blocks) blocksToAdd.insert(it.first); set blocksAdded; - AssemblyItems code; + BasicBlocks blocks; for ( BlockId blockId = BlockId::initial(); @@ -311,22 +311,18 @@ AssemblyItems ControlFlowGraph::rebuildCode() blocksToAdd.erase(blockId); blocksAdded.insert(blockId); - auto begin = m_items.begin() + block.begin; - auto end = m_items.begin() + block.end; - if (begin == end) + if (block.begin == block.end) continue; // If block starts with unused tag, skip it. - if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) - { - ++begin; + if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) ++block.begin; - } + if (block.begin < block.end) + blocks.push_back(block); previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); - copy(begin, end, back_inserter(code)); } } - return code; + return blocks; } BlockId ControlFlowGraph::expressionClassToBlockId( diff --git a/libevmasm/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h index 4310d6642..3366dc45f 100644 --- a/libevmasm/ControlFlowGraph.h +++ b/libevmasm/ControlFlowGraph.h @@ -35,6 +35,7 @@ namespace eth { class KnownState; +using KnownStatePointer = std::shared_ptr; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special @@ -81,19 +82,22 @@ struct BasicBlock /// Knowledge about the state when this block is entered. Intersection of all possible ways /// to enter this block. - std::shared_ptr startState; + KnownStatePointer startState; /// Knowledge about the state at the end of this block. - std::shared_ptr endState; + KnownStatePointer endState; }; +using BasicBlocks = std::vector; + class ControlFlowGraph { public: /// Initializes the control flow graph. /// @a _items has to persist across the usage of this class. ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} - /// @returns the collection of optimised items, should be called only once. - AssemblyItems optimisedItems(); + /// @returns vector of basic blocks in the order they should be used in the final code. + /// Should be called only once. + BasicBlocks optimisedBlocks(); private: void findLargestTag(); @@ -102,7 +106,7 @@ private: void removeUnusedBlocks(); void gatherKnowledge(); void setPrevLinks(); - AssemblyItems rebuildCode(); + BasicBlocks rebuildCode(); /// @returns the corresponding BlockId if _id is a pushed jump tag, /// and an invalid BlockId otherwise. diff --git a/test/libsolidity/SolidityOptimizer.cpp b/test/libsolidity/SolidityOptimizer.cpp index 59e8f04a8..3cb6a536a 100644 --- a/test/libsolidity/SolidityOptimizer.cpp +++ b/test/libsolidity/SolidityOptimizer.cpp @@ -131,8 +131,12 @@ public: // Running it four times should be enough for these tests. for (unsigned i = 0; i < 4; ++i) { - eth::ControlFlowGraph cfg(output); - output = cfg.optimisedItems(); + ControlFlowGraph cfg(output); + AssemblyItems optItems; + for (BasicBlock const& block: cfg.optimisedBlocks()) + copy(output.begin() + block.begin, output.begin() + block.end, + back_inserter(optItems)); + output = move(optItems); } BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end()); } From 365372799e5f4d158dfb86cf6d0d2f6d6ecc5708 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 8 May 2015 18:07:56 +0200 Subject: [PATCH 8/8] Use range-based erase. --- libevmasm/KnownState.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index 7ff0143e1..41ac4802b 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -151,8 +151,10 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool ); } } - for (int p = m_stackHeight; p > m_stackHeight + _item.deposit(); --p) - m_stackElements.erase(p); + m_stackElements.erase( + m_stackElements.upper_bound(m_stackHeight + _item.deposit()), + m_stackElements.end() + ); m_stackHeight += _item.deposit(); } return op;