From 30c832e3582f341c4fbddc90fd9a0e528349d423 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 29 Apr 2015 18:16:05 +0200 Subject: [PATCH] Split known state from common subexpression eliminator. --- libevmasm/Assembly.cpp | 3 +- libevmasm/CommonSubexpressionEliminator.cpp | 267 ++----------------- libevmasm/CommonSubexpressionEliminator.h | 59 +---- libevmasm/KnownState.cpp | 278 ++++++++++++++++++++ libevmasm/KnownState.h | 149 +++++++++++ test/libsolidity/SolidityOptimizer.cpp | 6 +- 6 files changed, 455 insertions(+), 307 deletions(-) create mode 100644 libevmasm/KnownState.cpp create mode 100644 libevmasm/KnownState.h diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 6cc09a4bc..c7253622e 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -329,7 +329,8 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - CommonSubexpressionEliminator eliminator; + KnownState state; + CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); AssemblyItems optItems; diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 63524d6f3..645a426d9 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -37,18 +37,19 @@ vector CommonSubexpressionEliminator::getOptimizedItems() map initialStackContents; map targetStackContents; - int minHeight = m_stackHeight + 1; - if (!m_stackElements.empty()) - minHeight = min(minHeight, m_stackElements.begin()->first); + int minHeight = m_state.stackHeight() + 1; + if (!m_state.stackElements().empty()) + minHeight = min(minHeight, m_state.stackElements().begin()->first); for (int height = minHeight; height <= 0; ++height) - initialStackContents[height] = initialStackElement(height, SourceLocation()); - for (int height = minHeight; height <= m_stackHeight; ++height) - targetStackContents[height] = stackElement(height, SourceLocation()); + //@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown? + initialStackContents[height] = m_state.initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_state.stackHeight(); ++height) + targetStackContents[height] = m_state.stackElement(height, SourceLocation()); // Debug info: //stream(cout, initialStackContents, targetStackContents); - AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( initialStackContents, targetStackContents ); @@ -57,103 +58,11 @@ vector CommonSubexpressionEliminator::getOptimizedItems() return items; } -ostream& CommonSubexpressionEliminator::stream( - ostream& _out, - map _initialStack, - map _targetStack -) const -{ - auto streamExpressionClass = [this](ostream& _out, Id _id) - { - auto const& expr = m_expressionClasses.representative(_id); - _out << " " << dec << _id << ": " << *expr.item; - if (expr.sequenceNumber) - _out << "@" << dec << expr.sequenceNumber; - _out << "("; - for (Id arg: expr.arguments) - _out << dec << arg << ","; - _out << ")" << endl; - }; - - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; - _out << "Equivalence classes: " << endl; - for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) - streamExpressionClass(_out, eqClass); - - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - - return _out; -} - void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) { - if (_item.type() != Operation) - { - assertThrow(_item.deposit() == 1, InvalidDeposit, ""); - setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); - } - else - { - Instruction instruction = _item.instruction(); - InstructionInfo info = instructionInfo(instruction); - if (SemanticInformation::isDupInstruction(_item)) - setStackElement( - m_stackHeight + 1, - stackElement( - m_stackHeight - int(instruction) + int(Instruction::DUP1), - _item.getLocation() - ) - ); - else if (SemanticInformation::isSwapInstruction(_item)) - swapStackElements( - m_stackHeight, - m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), - _item.getLocation() - ); - else if (instruction != Instruction::POP) - { - vector arguments(info.args); - for (int i = 0; i < info.args; ++i) - arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); - if (_item.instruction() == Instruction::SSTORE) - storeInStorage(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::SLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromStorage(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::MSTORE) - storeInMemory(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::MLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromMemory(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::SHA3) - setStackElement( - m_stackHeight + _item.deposit(), - applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) - ); - else - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses.find(_item, arguments, _copyItem) - ); - } - m_stackHeight += _item.deposit(); - } + StoreOperation op = m_state.feedItem(_item, _copyItem); + if (op.isValid()) + m_storeOperations.push_back(op); } void CommonSubexpressionEliminator::optimizeBreakingItem() @@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() SourceLocation const& location = m_breakingItem->getLocation(); AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - Id condition = stackElement(m_stackHeight - 1, location); - Id zero = m_expressionClasses.find(u256(0)); - if (m_expressionClasses.knownToBeDifferent(condition, zero)) + Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); + Id zero = m_state.expressionClasses().find(u256(0)); + if (m_state.expressionClasses().knownToBeDifferent(condition, zero)) { feedItem(AssemblyItem(Instruction::SWAP1, location), true); feedItem(AssemblyItem(Instruction::POP, location), true); AssemblyItem item(Instruction::JUMP, location); item.setJumpType(jumpType); - m_breakingItem = m_expressionClasses.storeItem(item); + m_breakingItem = m_state.expressionClasses().storeItem(item); return; } - Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); - if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition}); + if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero)) { AssemblyItem it(Instruction::POP, location); feedItem(it, true); @@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() } } -void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) -{ - m_stackElements[_stackHeight] = _class; -} - -void CommonSubexpressionEliminator::swapStackElements( - int _stackHeightA, - int _stackHeightB, - SourceLocation const& _location -) -{ - assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); - // ensure they are created - stackElement(_stackHeightA, _location); - stackElement(_stackHeightB, _location); - - swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - if (m_stackElements.count(_stackHeight)) - return m_stackElements.at(_stackHeight); - // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); - assertThrow(_stackHeight > -16, StackTooDeepException, ""); - // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); -} - -void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) - // do not execute the storage if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_storageContent) storageContents; - // Copy over all values (i.e. retain knowledge about them) where we know that this store - // operation will not destroy the knowledge. Specifically, we copy storage locations we know - // are different from _slot or locations where we know that the stored value is equal to _value. - for (auto const& storageItem: m_storageContent) - if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) - storageContents.insert(storageItem); - m_storageContent = move(storageContents); - - AssemblyItem item(Instruction::SSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); - m_storageContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot)) - return m_storageContent.at(_slot); - - AssemblyItem item(Instruction::SLOAD, _location); - return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) - // do not execute the store if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_memoryContent) memoryContents; - // copy over values at points where we know that they are different from _slot by at least 32 - for (auto const& memoryItem: m_memoryContent) - if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) - memoryContents.insert(memoryItem); - m_memoryContent = move(memoryContents); - - AssemblyItem item(Instruction::MSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); - m_memoryContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot)) - return m_memoryContent.at(_slot); - - AssemblyItem item(Instruction::MLOAD, _location); - return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( - Id _start, - Id _length, - SourceLocation const& _location -) -{ - AssemblyItem sha3Item(Instruction::SHA3, _location); - // Special logic if length is a short constant, otherwise we cannot tell. - u256 const* l = m_expressionClasses.knownConstant(_length); - // unknown or too large length - if (!l || *l > 128) - return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - - vector arguments; - for (u256 i = 0; i < *l; i += 32) - { - Id slot = m_expressionClasses.find( - AssemblyItem(Instruction::ADD, _location), - {_start, m_expressionClasses.find(i)} - ); - arguments.push_back(loadFromMemory(slot, _location)); - } - if (m_knownSha3Hashes.count(arguments)) - return m_knownSha3Hashes.at(arguments); - Id v; - // If all arguments are known constants, compute the sha3 here - if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) - { - bytes data; - for (Id a: arguments) - data += toBigEndian(*m_expressionClasses.knownConstant(a)); - data.resize(size_t(*l)); - v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); - } - else - v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - return m_knownSha3Hashes[arguments] = v; -} - CSECodeGenerator::CSECodeGenerator( ExpressionClasses& _expressionClasses, vector const& _storeOperations diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h index 6156bc81a..2ed926401 100644 --- a/libevmasm/CommonSubexpressionEliminator.h +++ b/libevmasm/CommonSubexpressionEliminator.h @@ -32,6 +32,7 @@ #include #include #include +#include namespace dev { @@ -58,20 +59,9 @@ class CommonSubexpressionEliminator { public: using Id = ExpressionClasses::Id; - struct StoreOperation - { - enum Target { Memory, Storage }; - StoreOperation( - Target _target, - Id _slot, - unsigned _sequenceNumber, - Id _expression - ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} - Target target; - Id slot; - unsigned sequenceNumber; - Id expression; - }; + using StoreOperation = KnownState::StoreOperation; + + CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {} /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first /// item that must be fed into a new instance of the eliminator. @@ -95,49 +85,10 @@ private: /// Tries to optimize the item that breaks the basic block at the end. void optimizeBreakingItem(); - /// Simplifies the given item using - /// Assigns a new equivalence class to the next sequence number of the given stack element. - void setStackElement(int _stackHeight, Id _class); - /// Swaps the given stack elements in their next sequence number. - void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); - /// Retrieves the current equivalence class fo the given stack element (or generates a new - /// one if it does not exist yet). - Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). - Id initialStackElement(int _stackHeight, SourceLocation const& _location); - - /// Increments the sequence number, deletes all storage information that might be overwritten - /// and stores the new value at the given slot. - void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in storage or creates a new special sload class. - Id loadFromStorage(Id _slot, SourceLocation const& _location); - /// Increments the sequence number, deletes all memory information that might be overwritten - /// and stores the new value at the given slot. - void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in memory or creates a new special mload class. - Id loadFromMemory(Id _slot, SourceLocation const& _location); - /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. - Id applySha3(Id _start, Id _length, SourceLocation const& _location); - - /// Current stack height, can be negative. - int m_stackHeight = 0; - /// Current stack layout, mapping stack height -> equivalence class - std::map m_stackElements; - /// Current sequence number, this is incremented with each modification to storage or memory. - unsigned m_sequenceNumber = 1; - /// Knowledge about storage content. - std::map m_storageContent; - /// Knowledge about memory content. Keys are memory addresses, note that the values overlap - /// and are not contained here if they are not completely known. - std::map m_memoryContent; - /// Keeps record of all sha3 hashes that are computed. - std::map, Id> m_knownSha3Hashes; + KnownState m_state; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. std::vector m_storeOperations; - /// Structure containing the classes of equivalent expressions. - ExpressionClasses m_expressionClasses; /// The item that breaks the basic block, can be nullptr. /// It is usually appended to the block but can be optimized in some cases. diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp new file mode 100644 index 000000000..244270fb6 --- /dev/null +++ b/libevmasm/KnownState.cpp @@ -0,0 +1,278 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file KnownState.cpp + * @author Christian + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream( + ostream& _out, + map _initialStack, + map _targetStack +) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": " << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "Optimizer analysis:" << endl; + _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Initial stack: " << endl; + for (auto const& it: _initialStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Target stack: " << endl; + for (auto const& it: _targetStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + m_stackHeight += _item.deposit(); + } + return op; +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new equivalence class. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id KnownState::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); + assertThrow(_stackHeight > -16, StackTooDeepException, ""); + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + diff --git a/libevmasm/KnownState.h b/libevmasm/KnownState.h new file mode 100644 index 000000000..c6dfcee6b --- /dev/null +++ b/libevmasm/KnownState.h @@ -0,0 +1,149 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file KnownState.h + * @author Christian + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Class to infer and store knowledge about the state of the virtual machine at a specific + * instruction. + * + * The general workings are that for each assembly item that is fed, an equivalence class is + * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some + * arithmetic instructions are used to infer equivalences while these classes are determined. + */ +class KnownState +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Invalid, Memory, Storage }; + StoreOperation(): target(Invalid), sequenceNumber(-1) {} + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + bool isValid() const { return target != Invalid; } + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + KnownState(): m_expressionClasses(std::make_shared()) {} + + /// Streams debugging information to @a _out. + std::ostream& stream( + std::ostream& _out, + std::map _initialStack = std::map(), + std::map _targetStack = std::map() + ) const; + + /// Feeds the item into the system for analysis. + /// @returns a possible store operation + StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Resets any knowledge about storage. + void resetStorage() { m_storageContent.clear(); } + /// Resets any knowledge about storage. + void resetMemory() { m_memoryContent.clear(); } + /// Resets any knowledge about the current stack. + void resetStack() { m_stackElements.clear(); m_stackHeight = 0; } + /// Resets any knowledge. + void reset() { resetStorage(); resetMemory(); resetStack(); } + + ///@todo the sequence numbers in two copies of this class should never be the same. + /// might be doable using two-dimensional sequence numbers, where the first value is incremented + /// for each copy + + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height + /// (must not be positive). + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + int stackHeight() const { return m_stackHeight; } + std::map const& stackElements() const { return m_stackElements; } + ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + +private: + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if storage was not modified + StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if memory was not modified + StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; + /// Structure containing the classes of equivalent expressions. + std::shared_ptr m_expressionClasses; +}; + +} +} diff --git a/test/libsolidity/SolidityOptimizer.cpp b/test/libsolidity/SolidityOptimizer.cpp index 9cdaa5886..71463f919 100644 --- a/test/libsolidity/SolidityOptimizer.cpp +++ b/test/libsolidity/SolidityOptimizer.cpp @@ -90,7 +90,8 @@ public: for (AssemblyItem& item: input) item.setLocation(SourceLocation(1, 3, make_shared(""))); - eth::CommonSubexpressionEliminator cse; + eth::KnownState state; + eth::CommonSubexpressionEliminator cse(state); BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end()); AssemblyItems output = cse.getOptimizedItems(); @@ -231,7 +232,8 @@ BOOST_AUTO_TEST_CASE(function_calls) BOOST_AUTO_TEST_CASE(cse_intermediate_swap) { - eth::CommonSubexpressionEliminator cse; + eth::KnownState state; + eth::CommonSubexpressionEliminator cse(state); AssemblyItems input{ Instruction::SWAP1, Instruction::POP, Instruction::ADD, u256(0), Instruction::SWAP1, Instruction::SLOAD, Instruction::SWAP1, u256(100), Instruction::EXP, Instruction::SWAP1,