Browse Source

Split known state from common subexpression eliminator.

cl-refactor
chriseth 10 years ago
parent
commit
30c832e358
  1. 3
      libevmasm/Assembly.cpp
  2. 267
      libevmasm/CommonSubexpressionEliminator.cpp
  3. 59
      libevmasm/CommonSubexpressionEliminator.h
  4. 278
      libevmasm/KnownState.cpp
  5. 149
      libevmasm/KnownState.h
  6. 6
      test/libsolidity/SolidityOptimizer.cpp

3
libevmasm/Assembly.cpp

@ -329,7 +329,8 @@ Assembly& Assembly::optimise(bool _enable)
copt << "Performing common subexpression elimination...";
for (auto iter = m_items.begin(); iter != m_items.end();)
{
CommonSubexpressionEliminator eliminator;
KnownState state;
CommonSubexpressionEliminator eliminator(state);
auto orig = iter;
iter = eliminator.feedItems(iter, m_items.end());
AssemblyItems optItems;

267
libevmasm/CommonSubexpressionEliminator.cpp

@ -37,18 +37,19 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
map<int, Id> initialStackContents;
map<int, Id> targetStackContents;
int minHeight = m_stackHeight + 1;
if (!m_stackElements.empty())
minHeight = min(minHeight, m_stackElements.begin()->first);
int minHeight = m_state.stackHeight() + 1;
if (!m_state.stackElements().empty())
minHeight = min(minHeight, m_state.stackElements().begin()->first);
for (int height = minHeight; height <= 0; ++height)
initialStackContents[height] = initialStackElement(height, SourceLocation());
for (int height = minHeight; height <= m_stackHeight; ++height)
targetStackContents[height] = stackElement(height, SourceLocation());
//@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown?
initialStackContents[height] = m_state.initialStackElement(height, SourceLocation());
for (int height = minHeight; height <= m_state.stackHeight(); ++height)
targetStackContents[height] = m_state.stackElement(height, SourceLocation());
// Debug info:
//stream(cout, initialStackContents, targetStackContents);
AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode(
AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode(
initialStackContents,
targetStackContents
);
@ -57,103 +58,11 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
return items;
}
ostream& CommonSubexpressionEliminator::stream(
ostream& _out,
map<int, Id> _initialStack,
map<int, Id> _targetStack
) const
{
auto streamExpressionClass = [this](ostream& _out, Id _id)
{
auto const& expr = m_expressionClasses.representative(_id);
_out << " " << dec << _id << ": " << *expr.item;
if (expr.sequenceNumber)
_out << "@" << dec << expr.sequenceNumber;
_out << "(";
for (Id arg: expr.arguments)
_out << dec << arg << ",";
_out << ")" << endl;
};
_out << "Optimizer analysis:" << endl;
_out << "Final stack height: " << dec << m_stackHeight << endl;
_out << "Equivalence classes: " << endl;
for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
streamExpressionClass(_out, eqClass);
_out << "Initial stack: " << endl;
for (auto const& it: _initialStack)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
_out << "Target stack: " << endl;
for (auto const& it: _targetStack)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
return _out;
}
void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
{
if (_item.type() != Operation)
{
assertThrow(_item.deposit() == 1, InvalidDeposit, "");
setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem));
}
else
{
Instruction instruction = _item.instruction();
InstructionInfo info = instructionInfo(instruction);
if (SemanticInformation::isDupInstruction(_item))
setStackElement(
m_stackHeight + 1,
stackElement(
m_stackHeight - int(instruction) + int(Instruction::DUP1),
_item.getLocation()
)
);
else if (SemanticInformation::isSwapInstruction(_item))
swapStackElements(
m_stackHeight,
m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
_item.getLocation()
);
else if (instruction != Instruction::POP)
{
vector<Id> arguments(info.args);
for (int i = 0; i < info.args; ++i)
arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
if (_item.instruction() == Instruction::SSTORE)
storeInStorage(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::SLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromStorage(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::MSTORE)
storeInMemory(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::MLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromMemory(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::SHA3)
setStackElement(
m_stackHeight + _item.deposit(),
applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
);
else
setStackElement(
m_stackHeight + _item.deposit(),
m_expressionClasses.find(_item, arguments, _copyItem)
);
}
m_stackHeight += _item.deposit();
}
StoreOperation op = m_state.feedItem(_item, _copyItem);
if (op.isValid())
m_storeOperations.push_back(op);
}
void CommonSubexpressionEliminator::optimizeBreakingItem()
@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
SourceLocation const& location = m_breakingItem->getLocation();
AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
Id condition = stackElement(m_stackHeight - 1, location);
Id zero = m_expressionClasses.find(u256(0));
if (m_expressionClasses.knownToBeDifferent(condition, zero))
Id condition = m_state.stackElement(m_state.stackHeight() - 1, location);
Id zero = m_state.expressionClasses().find(u256(0));
if (m_state.expressionClasses().knownToBeDifferent(condition, zero))
{
feedItem(AssemblyItem(Instruction::SWAP1, location), true);
feedItem(AssemblyItem(Instruction::POP, location), true);
AssemblyItem item(Instruction::JUMP, location);
item.setJumpType(jumpType);
m_breakingItem = m_expressionClasses.storeItem(item);
m_breakingItem = m_state.expressionClasses().storeItem(item);
return;
}
Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition});
if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero))
Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition});
if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero))
{
AssemblyItem it(Instruction::POP, location);
feedItem(it, true);
@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
}
}
void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
{
m_stackElements[_stackHeight] = _class;
}
void CommonSubexpressionEliminator::swapStackElements(
int _stackHeightA,
int _stackHeightB,
SourceLocation const& _location
)
{
assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
// ensure they are created
stackElement(_stackHeightA, _location);
stackElement(_stackHeightB, _location);
swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
}
ExpressionClasses::Id CommonSubexpressionEliminator::stackElement(
int _stackHeight,
SourceLocation const& _location
)
{
if (m_stackElements.count(_stackHeight))
return m_stackElements.at(_stackHeight);
// Stack element not found (not assigned yet), create new equivalence class.
return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
}
ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(
int _stackHeight,
SourceLocation const& _location
)
{
assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
assertThrow(_stackHeight > -16, StackTooDeepException, "");
// This is a special assembly item that refers to elements pre-existing on the initial stack.
return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
}
void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location)
{
if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
// do not execute the storage if we know that the value is already there
return;
m_sequenceNumber++;
decltype(m_storageContent) storageContents;
// Copy over all values (i.e. retain knowledge about them) where we know that this store
// operation will not destroy the knowledge. Specifically, we copy storage locations we know
// are different from _slot or locations where we know that the stored value is equal to _value.
for (auto const& storageItem: m_storageContent)
if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
storageContents.insert(storageItem);
m_storageContent = move(storageContents);
AssemblyItem item(Instruction::SSTORE, _location);
Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
m_storageContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
}
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location)
{
if (m_storageContent.count(_slot))
return m_storageContent.at(_slot);
AssemblyItem item(Instruction::SLOAD, _location);
return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
}
void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
// do not execute the store if we know that the value is already there
return;
m_sequenceNumber++;
decltype(m_memoryContent) memoryContents;
// copy over values at points where we know that they are different from _slot by at least 32
for (auto const& memoryItem: m_memoryContent)
if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
memoryContents.insert(memoryItem);
m_memoryContent = move(memoryContents);
AssemblyItem item(Instruction::MSTORE, _location);
Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
m_memoryContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
}
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot))
return m_memoryContent.at(_slot);
AssemblyItem item(Instruction::MLOAD, _location);
return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
}
CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(
Id _start,
Id _length,
SourceLocation const& _location
)
{
AssemblyItem sha3Item(Instruction::SHA3, _location);
// Special logic if length is a short constant, otherwise we cannot tell.
u256 const* l = m_expressionClasses.knownConstant(_length);
// unknown or too large length
if (!l || *l > 128)
return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
vector<Id> arguments;
for (u256 i = 0; i < *l; i += 32)
{
Id slot = m_expressionClasses.find(
AssemblyItem(Instruction::ADD, _location),
{_start, m_expressionClasses.find(i)}
);
arguments.push_back(loadFromMemory(slot, _location));
}
if (m_knownSha3Hashes.count(arguments))
return m_knownSha3Hashes.at(arguments);
Id v;
// If all arguments are known constants, compute the sha3 here
if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
{
bytes data;
for (Id a: arguments)
data += toBigEndian(*m_expressionClasses.knownConstant(a));
data.resize(size_t(*l));
v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location));
}
else
v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
return m_knownSha3Hashes[arguments] = v;
}
CSECodeGenerator::CSECodeGenerator(
ExpressionClasses& _expressionClasses,
vector<CSECodeGenerator::StoreOperation> const& _storeOperations

59
libevmasm/CommonSubexpressionEliminator.h

@ -32,6 +32,7 @@
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>
#include <libevmasm/KnownState.h>
namespace dev
{
@ -58,20 +59,9 @@ class CommonSubexpressionEliminator
{
public:
using Id = ExpressionClasses::Id;
struct StoreOperation
{
enum Target { Memory, Storage };
StoreOperation(
Target _target,
Id _slot,
unsigned _sequenceNumber,
Id _expression
): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
Target target;
Id slot;
unsigned sequenceNumber;
Id expression;
};
using StoreOperation = KnownState::StoreOperation;
CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {}
/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
/// item that must be fed into a new instance of the eliminator.
@ -95,49 +85,10 @@ private:
/// Tries to optimize the item that breaks the basic block at the end.
void optimizeBreakingItem();
/// Simplifies the given item using
/// Assigns a new equivalence class to the next sequence number of the given stack element.
void setStackElement(int _stackHeight, Id _class);
/// Swaps the given stack elements in their next sequence number.
void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
Id stackElement(int _stackHeight, SourceLocation const& _location);
/// @returns the equivalence class id of the special initial stack element at the given height
/// (must not be positive).
Id initialStackElement(int _stackHeight, SourceLocation const& _location);
/// Increments the sequence number, deletes all storage information that might be overwritten
/// and stores the new value at the given slot.
void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in storage or creates a new special sload class.
Id loadFromStorage(Id _slot, SourceLocation const& _location);
/// Increments the sequence number, deletes all memory information that might be overwritten
/// and stores the new value at the given slot.
void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in memory or creates a new special mload class.
Id loadFromMemory(Id _slot, SourceLocation const& _location);
/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
Id applySha3(Id _start, Id _length, SourceLocation const& _location);
/// Current stack height, can be negative.
int m_stackHeight = 0;
/// Current stack layout, mapping stack height -> equivalence class
std::map<int, Id> m_stackElements;
/// Current sequence number, this is incremented with each modification to storage or memory.
unsigned m_sequenceNumber = 1;
/// Knowledge about storage content.
std::map<Id, Id> m_storageContent;
/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
/// and are not contained here if they are not completely known.
std::map<Id, Id> m_memoryContent;
/// Keeps record of all sha3 hashes that are computed.
std::map<std::vector<Id>, Id> m_knownSha3Hashes;
KnownState m_state;
/// Keeps information about which storage or memory slots were written to at which sequence
/// number with what instruction.
std::vector<StoreOperation> m_storeOperations;
/// Structure containing the classes of equivalent expressions.
ExpressionClasses m_expressionClasses;
/// The item that breaks the basic block, can be nullptr.
/// It is usually appended to the block but can be optimized in some cases.

278
libevmasm/KnownState.cpp

@ -0,0 +1,278 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file KnownState.cpp
* @author Christian <c@ethdev.com>
* @date 2015
* Contains knowledge about the state of the virtual machine at a specific instruction.
*/
#include "KnownState.h"
#include <functional>
#include <libdevcrypto/SHA3.h>
#include <libevmasm/AssemblyItem.h>
using namespace std;
using namespace dev;
using namespace dev::eth;
ostream& KnownState::stream(
ostream& _out,
map<int, Id> _initialStack,
map<int, Id> _targetStack
) const
{
auto streamExpressionClass = [this](ostream& _out, Id _id)
{
auto const& expr = m_expressionClasses->representative(_id);
_out << " " << dec << _id << ": " << *expr.item;
if (expr.sequenceNumber)
_out << "@" << dec << expr.sequenceNumber;
_out << "(";
for (Id arg: expr.arguments)
_out << dec << arg << ",";
_out << ")" << endl;
};
_out << "Optimizer analysis:" << endl;
_out << "Final stack height: " << dec << m_stackHeight << endl;
_out << "Equivalence classes: " << endl;
for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
streamExpressionClass(_out, eqClass);
_out << "Initial stack: " << endl;
for (auto const& it: _initialStack)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
_out << "Target stack: " << endl;
for (auto const& it: _targetStack)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
return _out;
}
KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
{
StoreOperation op;
if (_item.type() != Operation)
{
assertThrow(_item.deposit() == 1, InvalidDeposit, "");
setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
}
else
{
Instruction instruction = _item.instruction();
InstructionInfo info = instructionInfo(instruction);
if (SemanticInformation::isDupInstruction(_item))
setStackElement(
m_stackHeight + 1,
stackElement(
m_stackHeight - int(instruction) + int(Instruction::DUP1),
_item.getLocation()
)
);
else if (SemanticInformation::isSwapInstruction(_item))
swapStackElements(
m_stackHeight,
m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
_item.getLocation()
);
else if (instruction != Instruction::POP)
{
vector<Id> arguments(info.args);
for (int i = 0; i < info.args; ++i)
arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
if (_item.instruction() == Instruction::SSTORE)
op = storeInStorage(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::SLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromStorage(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::MSTORE)
op = storeInMemory(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::MLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromMemory(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::SHA3)
setStackElement(
m_stackHeight + _item.deposit(),
applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
);
else
setStackElement(
m_stackHeight + _item.deposit(),
m_expressionClasses->find(_item, arguments, _copyItem)
);
}
m_stackHeight += _item.deposit();
}
return op;
}
ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
{
if (m_stackElements.count(_stackHeight))
return m_stackElements.at(_stackHeight);
// Stack element not found (not assigned yet), create new equivalence class.
return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
}
ExpressionClasses::Id KnownState::initialStackElement(
int _stackHeight,
SourceLocation const& _location
)
{
assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
assertThrow(_stackHeight > -16, StackTooDeepException, "");
// This is a special assembly item that refers to elements pre-existing on the initial stack.
return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
}
void KnownState::setStackElement(int _stackHeight, Id _class)
{
m_stackElements[_stackHeight] = _class;
}
void KnownState::swapStackElements(
int _stackHeightA,
int _stackHeightB,
SourceLocation const& _location
)
{
assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
// ensure they are created
stackElement(_stackHeightA, _location);
stackElement(_stackHeightB, _location);
swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
}
KnownState::StoreOperation KnownState::storeInStorage(
Id _slot,
Id _value,
SourceLocation const& _location)
{
if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
// do not execute the storage if we know that the value is already there
return StoreOperation();
m_sequenceNumber++;
decltype(m_storageContent) storageContents;
// Copy over all values (i.e. retain knowledge about them) where we know that this store
// operation will not destroy the knowledge. Specifically, we copy storage locations we know
// are different from _slot or locations where we know that the stored value is equal to _value.
for (auto const& storageItem: m_storageContent)
if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
storageContents.insert(storageItem);
m_storageContent = move(storageContents);
AssemblyItem item(Instruction::SSTORE, _location);
Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id);
m_storageContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
return operation;
}
ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
{
if (m_storageContent.count(_slot))
return m_storageContent.at(_slot);
AssemblyItem item(Instruction::SLOAD, _location);
return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
}
KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
// do not execute the store if we know that the value is already there
return StoreOperation();
m_sequenceNumber++;
decltype(m_memoryContent) memoryContents;
// copy over values at points where we know that they are different from _slot by at least 32
for (auto const& memoryItem: m_memoryContent)
if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
memoryContents.insert(memoryItem);
m_memoryContent = move(memoryContents);
AssemblyItem item(Instruction::MSTORE, _location);
Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
m_memoryContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
return operation;
}
ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot))
return m_memoryContent.at(_slot);
AssemblyItem item(Instruction::MLOAD, _location);
return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
}
KnownState::Id KnownState::applySha3(
Id _start,
Id _length,
SourceLocation const& _location
)
{
AssemblyItem sha3Item(Instruction::SHA3, _location);
// Special logic if length is a short constant, otherwise we cannot tell.
u256 const* l = m_expressionClasses->knownConstant(_length);
// unknown or too large length
if (!l || *l > 128)
return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
vector<Id> arguments;
for (u256 i = 0; i < *l; i += 32)
{
Id slot = m_expressionClasses->find(
AssemblyItem(Instruction::ADD, _location),
{_start, m_expressionClasses->find(i)}
);
arguments.push_back(loadFromMemory(slot, _location));
}
if (m_knownSha3Hashes.count(arguments))
return m_knownSha3Hashes.at(arguments);
Id v;
// If all arguments are known constants, compute the sha3 here
if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
{
bytes data;
for (Id a: arguments)
data += toBigEndian(*m_expressionClasses->knownConstant(a));
data.resize(size_t(*l));
v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location));
}
else
v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
return m_knownSha3Hashes[arguments] = v;
}

149
libevmasm/KnownState.h

@ -0,0 +1,149 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file KnownState.h
* @author Christian <c@ethdev.com>
* @date 2015
* Contains knowledge about the state of the virtual machine at a specific instruction.
*/
#pragma once
#include <vector>
#include <map>
#include <set>
#include <tuple>
#include <ostream>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>
namespace dev
{
namespace eth
{
class AssemblyItem;
using AssemblyItems = std::vector<AssemblyItem>;
/**
* Class to infer and store knowledge about the state of the virtual machine at a specific
* instruction.
*
* The general workings are that for each assembly item that is fed, an equivalence class is
* derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some
* arithmetic instructions are used to infer equivalences while these classes are determined.
*/
class KnownState
{
public:
using Id = ExpressionClasses::Id;
struct StoreOperation
{
enum Target { Invalid, Memory, Storage };
StoreOperation(): target(Invalid), sequenceNumber(-1) {}
StoreOperation(
Target _target,
Id _slot,
unsigned _sequenceNumber,
Id _expression
): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
bool isValid() const { return target != Invalid; }
Target target;
Id slot;
unsigned sequenceNumber;
Id expression;
};
KnownState(): m_expressionClasses(std::make_shared<ExpressionClasses>()) {}
/// Streams debugging information to @a _out.
std::ostream& stream(
std::ostream& _out,
std::map<int, Id> _initialStack = std::map<int, Id>(),
std::map<int, Id> _targetStack = std::map<int, Id>()
) const;
/// Feeds the item into the system for analysis.
/// @returns a possible store operation
StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false);
/// Resets any knowledge about storage.
void resetStorage() { m_storageContent.clear(); }
/// Resets any knowledge about storage.
void resetMemory() { m_memoryContent.clear(); }
/// Resets any knowledge about the current stack.
void resetStack() { m_stackElements.clear(); m_stackHeight = 0; }
/// Resets any knowledge.
void reset() { resetStorage(); resetMemory(); resetStack(); }
///@todo the sequence numbers in two copies of this class should never be the same.
/// might be doable using two-dimensional sequence numbers, where the first value is incremented
/// for each copy
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
Id stackElement(int _stackHeight, SourceLocation const& _location);
/// @returns the equivalence class id of the special initial stack element at the given height
/// (must not be positive).
Id initialStackElement(int _stackHeight, SourceLocation const& _location);
int stackHeight() const { return m_stackHeight; }
std::map<int, Id> const& stackElements() const { return m_stackElements; }
ExpressionClasses& expressionClasses() const { return *m_expressionClasses; }
private:
/// Assigns a new equivalence class to the next sequence number of the given stack element.
void setStackElement(int _stackHeight, Id _class);
/// Swaps the given stack elements in their next sequence number.
void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
/// Increments the sequence number, deletes all storage information that might be overwritten
/// and stores the new value at the given slot.
/// @returns the store operation, which might be invalid if storage was not modified
StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in storage or creates a new special sload class.
Id loadFromStorage(Id _slot, SourceLocation const& _location);
/// Increments the sequence number, deletes all memory information that might be overwritten
/// and stores the new value at the given slot.
/// @returns the store operation, which might be invalid if memory was not modified
StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in memory or creates a new special mload class.
Id loadFromMemory(Id _slot, SourceLocation const& _location);
/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
Id applySha3(Id _start, Id _length, SourceLocation const& _location);
/// Current stack height, can be negative.
int m_stackHeight = 0;
/// Current stack layout, mapping stack height -> equivalence class
std::map<int, Id> m_stackElements;
/// Current sequence number, this is incremented with each modification to storage or memory.
unsigned m_sequenceNumber = 1;
/// Knowledge about storage content.
std::map<Id, Id> m_storageContent;
/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
/// and are not contained here if they are not completely known.
std::map<Id, Id> m_memoryContent;
/// Keeps record of all sha3 hashes that are computed.
std::map<std::vector<Id>, Id> m_knownSha3Hashes;
/// Structure containing the classes of equivalent expressions.
std::shared_ptr<ExpressionClasses> m_expressionClasses;
};
}
}

6
test/libsolidity/SolidityOptimizer.cpp

@ -90,7 +90,8 @@ public:
for (AssemblyItem& item: input)
item.setLocation(SourceLocation(1, 3, make_shared<string>("")));
eth::CommonSubexpressionEliminator cse;
eth::KnownState state;
eth::CommonSubexpressionEliminator cse(state);
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
@ -231,7 +232,8 @@ BOOST_AUTO_TEST_CASE(function_calls)
BOOST_AUTO_TEST_CASE(cse_intermediate_swap)
{
eth::CommonSubexpressionEliminator cse;
eth::KnownState state;
eth::CommonSubexpressionEliminator cse(state);
AssemblyItems input{
Instruction::SWAP1, Instruction::POP, Instruction::ADD, u256(0), Instruction::SWAP1,
Instruction::SLOAD, Instruction::SWAP1, u256(100), Instruction::EXP, Instruction::SWAP1,

Loading…
Cancel
Save