Browse Source

Merge branch 'develop' of github.com:ethereum/cpp-ethereum into develop

cl-refactor
Gav Wood 10 years ago
parent
commit
01bb472844
  1. 16
      libevmasm/Assembly.cpp
  2. 271
      libevmasm/CommonSubexpressionEliminator.cpp
  3. 63
      libevmasm/CommonSubexpressionEliminator.h
  4. 109
      libevmasm/ControlFlowGraph.cpp
  5. 32
      libevmasm/ControlFlowGraph.h
  6. 28
      libevmasm/ExpressionClasses.cpp
  7. 4
      libevmasm/ExpressionClasses.h
  8. 326
      libevmasm/KnownState.cpp
  9. 163
      libevmasm/KnownState.h
  10. 53
      libevmasm/SemanticInformation.cpp
  11. 8
      libevmasm/SemanticInformation.h
  12. 6
      libsolidity/AST.h
  13. 10
      libsolidity/DeclarationContainer.cpp
  14. 8
      libsolidity/DeclarationContainer.h
  15. 83
      libsolidity/NameAndTypeResolver.cpp
  16. 17
      libsolidity/NameAndTypeResolver.h
  17. 1
      libsolidity/Types.h
  18. 8
      libweb3jsonrpc/WebThreeStubServerBase.cpp
  19. 84
      test/libsolidity/SolidityEndToEndTest.cpp
  20. 61
      test/libsolidity/SolidityOptimizer.cpp

16
libevmasm/Assembly.cpp

@ -304,9 +304,6 @@ Assembly& Assembly::optimise(bool _enable)
{
if (!_enable)
return *this;
std::vector<pair<AssemblyItems, function<AssemblyItems(AssemblyItemsConstRef)>>> rules;
// jump to next instruction
rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }});
unsigned total = 0;
for (unsigned count = 1; count > 0; total += count)
@ -314,10 +311,17 @@ Assembly& Assembly::optimise(bool _enable)
copt << toString(*this);
count = 0;
//@todo CFG interface should be a generator, that returns an item and a pointer to a
// knownstate, which has to replace the current state if it is not null.
// Feed these items to the CSE, but also store them and replace the stored version
// if the items generated by the CSE are shorter. (or even use less gas?)
copt << "Performing control flow analysis...";
{
ControlFlowGraph cfg(m_items);
AssemblyItems optItems = cfg.optimisedItems();
AssemblyItems optItems;
for (BasicBlock const& block: cfg.optimisedBlocks())
copy(m_items.begin() + block.begin, m_items.begin() + block.end,
back_inserter(optItems));
if (optItems.size() < m_items.size())
{
copt << "Old size: " << m_items.size() << ", new size: " << optItems.size();
@ -329,7 +333,9 @@ Assembly& Assembly::optimise(bool _enable)
copt << "Performing common subexpression elimination...";
for (auto iter = m_items.begin(); iter != m_items.end();)
{
CommonSubexpressionEliminator eliminator;
//@todo use only a single state / expression classes instance.
KnownState state(make_shared<ExpressionClasses>());
CommonSubexpressionEliminator eliminator(state);
auto orig = iter;
iter = eliminator.feedItems(iter, m_items.end());
AssemblyItems optItems;

271
libevmasm/CommonSubexpressionEliminator.cpp

@ -37,18 +37,19 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
map<int, Id> initialStackContents;
map<int, Id> targetStackContents;
int minHeight = m_stackHeight + 1;
if (!m_stackElements.empty())
minHeight = min(minHeight, m_stackElements.begin()->first);
for (int height = minHeight; height <= 0; ++height)
initialStackContents[height] = initialStackElement(height, SourceLocation());
for (int height = minHeight; height <= m_stackHeight; ++height)
targetStackContents[height] = stackElement(height, SourceLocation());
int minHeight = m_state.stackHeight() + 1;
if (!m_state.stackElements().empty())
minHeight = min(minHeight, m_state.stackElements().begin()->first);
for (int height = minHeight; height <= m_initialState.stackHeight(); ++height)
initialStackContents[height] = m_initialState.stackElement(height, SourceLocation());
for (int height = minHeight; height <= m_state.stackHeight(); ++height)
targetStackContents[height] = m_state.stackElement(height, SourceLocation());
// Debug info:
//stream(cout, initialStackContents, targetStackContents);
AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode(
AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode(
m_initialState.stackHeight(),
initialStackContents,
targetStackContents
);
@ -57,103 +58,11 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
return items;
}
ostream& CommonSubexpressionEliminator::stream(
ostream& _out,
map<int, Id> _initialStack,
map<int, Id> _targetStack
) const
{
auto streamExpressionClass = [this](ostream& _out, Id _id)
{
auto const& expr = m_expressionClasses.representative(_id);
_out << " " << dec << _id << ": " << *expr.item;
if (expr.sequenceNumber)
_out << "@" << dec << expr.sequenceNumber;
_out << "(";
for (Id arg: expr.arguments)
_out << dec << arg << ",";
_out << ")" << endl;
};
_out << "Optimizer analysis:" << endl;
_out << "Final stack height: " << dec << m_stackHeight << endl;
_out << "Equivalence classes: " << endl;
for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
streamExpressionClass(_out, eqClass);
_out << "Initial stack: " << endl;
for (auto const& it: _initialStack)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
_out << "Target stack: " << endl;
for (auto const& it: _targetStack)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
return _out;
}
void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
{
if (_item.type() != Operation)
{
assertThrow(_item.deposit() == 1, InvalidDeposit, "");
setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem));
}
else
{
Instruction instruction = _item.instruction();
InstructionInfo info = instructionInfo(instruction);
if (SemanticInformation::isDupInstruction(_item))
setStackElement(
m_stackHeight + 1,
stackElement(
m_stackHeight - int(instruction) + int(Instruction::DUP1),
_item.getLocation()
)
);
else if (SemanticInformation::isSwapInstruction(_item))
swapStackElements(
m_stackHeight,
m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
_item.getLocation()
);
else if (instruction != Instruction::POP)
{
vector<Id> arguments(info.args);
for (int i = 0; i < info.args; ++i)
arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
if (_item.instruction() == Instruction::SSTORE)
storeInStorage(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::SLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromStorage(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::MSTORE)
storeInMemory(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::MLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromMemory(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::SHA3)
setStackElement(
m_stackHeight + _item.deposit(),
applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
);
else
setStackElement(
m_stackHeight + _item.deposit(),
m_expressionClasses.find(_item, arguments, _copyItem)
);
}
m_stackHeight += _item.deposit();
}
StoreOperation op = m_state.feedItem(_item, _copyItem);
if (op.isValid())
m_storeOperations.push_back(op);
}
void CommonSubexpressionEliminator::optimizeBreakingItem()
@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
SourceLocation const& location = m_breakingItem->getLocation();
AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
Id condition = stackElement(m_stackHeight - 1, location);
Id zero = m_expressionClasses.find(u256(0));
if (m_expressionClasses.knownToBeDifferent(condition, zero))
Id condition = m_state.stackElement(m_state.stackHeight() - 1, location);
Id zero = m_state.expressionClasses().find(u256(0));
if (m_state.expressionClasses().knownToBeDifferent(condition, zero))
{
feedItem(AssemblyItem(Instruction::SWAP1, location), true);
feedItem(AssemblyItem(Instruction::POP, location), true);
AssemblyItem item(Instruction::JUMP, location);
item.setJumpType(jumpType);
m_breakingItem = m_expressionClasses.storeItem(item);
m_breakingItem = m_state.expressionClasses().storeItem(item);
return;
}
Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition});
if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero))
Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition});
if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero))
{
AssemblyItem it(Instruction::POP, location);
feedItem(it, true);
@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
}
}
void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
{
m_stackElements[_stackHeight] = _class;
}
void CommonSubexpressionEliminator::swapStackElements(
int _stackHeightA,
int _stackHeightB,
SourceLocation const& _location
)
{
assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
// ensure they are created
stackElement(_stackHeightA, _location);
stackElement(_stackHeightB, _location);
swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
}
ExpressionClasses::Id CommonSubexpressionEliminator::stackElement(
int _stackHeight,
SourceLocation const& _location
)
{
if (m_stackElements.count(_stackHeight))
return m_stackElements.at(_stackHeight);
// Stack element not found (not assigned yet), create new equivalence class.
return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
}
ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(
int _stackHeight,
SourceLocation const& _location
)
{
assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
assertThrow(_stackHeight > -16, StackTooDeepException, "");
// This is a special assembly item that refers to elements pre-existing on the initial stack.
return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
}
void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location)
{
if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
// do not execute the storage if we know that the value is already there
return;
m_sequenceNumber++;
decltype(m_storageContent) storageContents;
// Copy over all values (i.e. retain knowledge about them) where we know that this store
// operation will not destroy the knowledge. Specifically, we copy storage locations we know
// are different from _slot or locations where we know that the stored value is equal to _value.
for (auto const& storageItem: m_storageContent)
if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
storageContents.insert(storageItem);
m_storageContent = move(storageContents);
AssemblyItem item(Instruction::SSTORE, _location);
Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
m_storageContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
}
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location)
{
if (m_storageContent.count(_slot))
return m_storageContent.at(_slot);
AssemblyItem item(Instruction::SLOAD, _location);
return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
}
void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
// do not execute the store if we know that the value is already there
return;
m_sequenceNumber++;
decltype(m_memoryContent) memoryContents;
// copy over values at points where we know that they are different from _slot by at least 32
for (auto const& memoryItem: m_memoryContent)
if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
memoryContents.insert(memoryItem);
m_memoryContent = move(memoryContents);
AssemblyItem item(Instruction::MSTORE, _location);
Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
m_memoryContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
}
ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot))
return m_memoryContent.at(_slot);
AssemblyItem item(Instruction::MLOAD, _location);
return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
}
CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(
Id _start,
Id _length,
SourceLocation const& _location
)
{
AssemblyItem sha3Item(Instruction::SHA3, _location);
// Special logic if length is a short constant, otherwise we cannot tell.
u256 const* l = m_expressionClasses.knownConstant(_length);
// unknown or too large length
if (!l || *l > 128)
return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
vector<Id> arguments;
for (u256 i = 0; i < *l; i += 32)
{
Id slot = m_expressionClasses.find(
AssemblyItem(Instruction::ADD, _location),
{_start, m_expressionClasses.find(i)}
);
arguments.push_back(loadFromMemory(slot, _location));
}
if (m_knownSha3Hashes.count(arguments))
return m_knownSha3Hashes.at(arguments);
Id v;
// If all arguments are known constants, compute the sha3 here
if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
{
bytes data;
for (Id a: arguments)
data += toBigEndian(*m_expressionClasses.knownConstant(a));
data.resize(size_t(*l));
v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location));
}
else
v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
return m_knownSha3Hashes[arguments] = v;
}
CSECodeGenerator::CSECodeGenerator(
ExpressionClasses& _expressionClasses,
vector<CSECodeGenerator::StoreOperation> const& _storeOperations
@ -339,10 +106,12 @@ CSECodeGenerator::CSECodeGenerator(
}
AssemblyItems CSECodeGenerator::generateCode(
int _initialStackHeight,
map<int, Id> const& _initialStack,
map<int, Id> const& _targetStackContents
)
{
m_stackHeight = _initialStackHeight;
m_stack = _initialStack;
for (auto const& item: m_stack)
if (!m_classPositions.count(item.second))

63
libevmasm/CommonSubexpressionEliminator.h

@ -32,6 +32,7 @@
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>
#include <libevmasm/KnownState.h>
namespace dev
{
@ -58,20 +59,9 @@ class CommonSubexpressionEliminator
{
public:
using Id = ExpressionClasses::Id;
struct StoreOperation
{
enum Target { Memory, Storage };
StoreOperation(
Target _target,
Id _slot,
unsigned _sequenceNumber,
Id _expression
): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
Target target;
Id slot;
unsigned sequenceNumber;
Id expression;
};
using StoreOperation = KnownState::StoreOperation;
CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {}
/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
/// item that must be fed into a new instance of the eliminator.
@ -95,49 +85,11 @@ private:
/// Tries to optimize the item that breaks the basic block at the end.
void optimizeBreakingItem();
/// Simplifies the given item using
/// Assigns a new equivalence class to the next sequence number of the given stack element.
void setStackElement(int _stackHeight, Id _class);
/// Swaps the given stack elements in their next sequence number.
void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
Id stackElement(int _stackHeight, SourceLocation const& _location);
/// @returns the equivalence class id of the special initial stack element at the given height
/// (must not be positive).
Id initialStackElement(int _stackHeight, SourceLocation const& _location);
/// Increments the sequence number, deletes all storage information that might be overwritten
/// and stores the new value at the given slot.
void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in storage or creates a new special sload class.
Id loadFromStorage(Id _slot, SourceLocation const& _location);
/// Increments the sequence number, deletes all memory information that might be overwritten
/// and stores the new value at the given slot.
void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in memory or creates a new special mload class.
Id loadFromMemory(Id _slot, SourceLocation const& _location);
/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
Id applySha3(Id _start, Id _length, SourceLocation const& _location);
/// Current stack height, can be negative.
int m_stackHeight = 0;
/// Current stack layout, mapping stack height -> equivalence class
std::map<int, Id> m_stackElements;
/// Current sequence number, this is incremented with each modification to storage or memory.
unsigned m_sequenceNumber = 1;
/// Knowledge about storage content.
std::map<Id, Id> m_storageContent;
/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
/// and are not contained here if they are not completely known.
std::map<Id, Id> m_memoryContent;
/// Keeps record of all sha3 hashes that are computed.
std::map<std::vector<Id>, Id> m_knownSha3Hashes;
KnownState m_initialState;
KnownState m_state;
/// Keeps information about which storage or memory slots were written to at which sequence
/// number with what instruction.
std::vector<StoreOperation> m_storeOperations;
/// Structure containing the classes of equivalent expressions.
ExpressionClasses m_expressionClasses;
/// The item that breaks the basic block, can be nullptr.
/// It is usually appended to the block but can be optimized in some cases.
@ -164,6 +116,7 @@ public:
/// @param _targetStackContents final contents of the stack, by stack height relative to initial
/// @note should only be called once on each object.
AssemblyItems generateCode(
int _initialStackHeight,
std::map<int, Id> const& _initialStack,
std::map<int, Id> const& _targetStackContents
);
@ -199,7 +152,7 @@ private:
AssemblyItems m_generatedItems;
/// Current height of the stack relative to the start.
int m_stackHeight = 0;
int m_stackHeight;
/// If (b, a) is in m_requests then b is needed to compute a.
std::multimap<Id, Id> m_neededBy;
/// Current content of the stack.

109
libevmasm/ControlFlowGraph.cpp

@ -23,9 +23,11 @@
#include <libevmasm/ControlFlowGraph.h>
#include <map>
#include <memory>
#include <libevmasm/Exceptions.h>
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/SemanticInformation.h>
#include <libevmasm/KnownState.h>
using namespace std;
using namespace dev;
@ -36,16 +38,17 @@ BlockId::BlockId(u256 const& _id): m_id(_id)
assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large.");
}
AssemblyItems ControlFlowGraph::optimisedItems()
BasicBlocks ControlFlowGraph::optimisedBlocks()
{
if (m_items.empty())
return m_items;
return BasicBlocks();
findLargestTag();
splitBlocks();
resolveNextLinks();
removeUnusedBlocks();
setPrevLinks();
gatherKnowledge();
return rebuildCode();
}
@ -209,7 +212,78 @@ void ControlFlowGraph::setPrevLinks()
}
}
AssemblyItems ControlFlowGraph::rebuildCode()
void ControlFlowGraph::gatherKnowledge()
{
// @todo actually we know that memory is filled with zeros at the beginning,
// we could make use of that.
KnownStatePointer emptyState = make_shared<KnownState>();
ExpressionClasses& expr = emptyState->expressionClasses();
bool unknownJumpEncountered = false;
vector<pair<BlockId, KnownStatePointer>> workQueue({make_pair(BlockId::initial(), emptyState->copy())});
while (!workQueue.empty())
{
//@todo we might have to do something like incrementing the sequence number for each JUMPDEST
assertThrow(!!workQueue.back().first, OptimizerException, "");
BasicBlock& block = m_blocks.at(workQueue.back().first);
KnownStatePointer state = workQueue.back().second;
workQueue.pop_back();
if (block.startState)
{
state->reduceToCommonKnowledge(*block.startState);
if (*state == *block.startState)
continue;
}
block.startState = state->copy();
//@todo we might know the return address for the first pass, but not anymore for the second,
// -> store knowledge about tags as a union.
// Feed all items except for the final jump yet because it will erase the target tag.
unsigned pc = block.begin;
while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc)))
state->feedItem(m_items.at(pc++));
if (
block.endType == BasicBlock::EndType::JUMP ||
block.endType == BasicBlock::EndType::JUMPI
)
{
assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, "");
//@todo in the case of JUMPI, add knowledge about the condition to the state
// (for both values of the condition)
BlockId nextBlock = expressionClassToBlockId(
state->stackElement(state->stackHeight(), SourceLocation()),
expr
);
state->feedItem(m_items.at(pc++));
if (nextBlock)
workQueue.push_back(make_pair(nextBlock, state->copy()));
else if (!unknownJumpEncountered)
{
// We do not know where this jump goes, so we have to reset the states of all
// JUMPDESTs.
unknownJumpEncountered = true;
for (auto const& it: m_blocks)
if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag)
workQueue.push_back(make_pair(it.first, emptyState->copy()));
}
}
else if (block.begin <= pc && pc < block.end)
state->feedItem(m_items.at(pc++));
assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, "");
block.endState = state;
if (
block.endType == BasicBlock::EndType::HANDOVER ||
block.endType == BasicBlock::EndType::JUMPI
)
workQueue.push_back(make_pair(block.next, state->copy()));
}
}
BasicBlocks ControlFlowGraph::rebuildCode()
{
map<BlockId, unsigned> pushes;
for (auto& idAndBlock: m_blocks)
@ -220,7 +294,7 @@ AssemblyItems ControlFlowGraph::rebuildCode()
for (auto it: m_blocks)
blocksToAdd.insert(it.first);
set<BlockId> blocksAdded;
AssemblyItems code;
BasicBlocks blocks;
for (
BlockId blockId = BlockId::initial();
@ -233,23 +307,34 @@ AssemblyItems ControlFlowGraph::rebuildCode()
blockId = m_blocks.at(blockId).prev;
for (; blockId; blockId = m_blocks.at(blockId).next)
{
BasicBlock const& block = m_blocks.at(blockId);
BasicBlock& block = m_blocks.at(blockId);
blocksToAdd.erase(blockId);
blocksAdded.insert(blockId);
auto begin = m_items.begin() + block.begin;
auto end = m_items.begin() + block.end;
if (begin == end)
if (block.begin == block.end)
continue;
// If block starts with unused tag, skip it.
if (previousHandedOver && !pushes[blockId] && begin->type() == Tag)
++begin;
if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag)
++block.begin;
if (block.begin < block.end)
blocks.push_back(block);
previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER);
copy(begin, end, back_inserter(code));
}
}
return code;
return blocks;
}
BlockId ControlFlowGraph::expressionClassToBlockId(
ExpressionClasses::Id _id,
ExpressionClasses& _exprClasses
)
{
ExpressionClasses::Expression expr = _exprClasses.representative(_id);
if (expr.item && expr.item->type() == PushTag)
return BlockId(expr.item->data());
else
return BlockId::invalid();
}
BlockId ControlFlowGraph::generateNewId()

32
libevmasm/ControlFlowGraph.h

@ -24,16 +24,18 @@
#pragma once
#include <vector>
#include <memory>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
#include <libevmasm/ExpressionClasses.h>
namespace dev
{
namespace eth
{
class AssemblyItem;
using AssemblyItems = std::vector<AssemblyItem>;
class KnownState;
using KnownStatePointer = std::shared_ptr<KnownState>;
/**
* Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special
@ -69,32 +71,46 @@ struct BasicBlock
unsigned end = 0;
/// Tags pushed inside this block, with multiplicity.
std::vector<BlockId> pushedTags;
/// ID of the block that always follows this one (either JUMP or flow into new block),
/// or BlockId::invalid() otherwise
/// ID of the block that always follows this one (either non-branching part of JUMPI or flow
/// into new block), or BlockId::invalid() otherwise
BlockId next = BlockId::invalid();
/// ID of the block that has to precede this one.
/// ID of the block that has to precede this one (because control flows into it).
BlockId prev = BlockId::invalid();
enum class EndType { JUMP, JUMPI, STOP, HANDOVER };
EndType endType = EndType::HANDOVER;
/// Knowledge about the state when this block is entered. Intersection of all possible ways
/// to enter this block.
KnownStatePointer startState;
/// Knowledge about the state at the end of this block.
KnownStatePointer endState;
};
using BasicBlocks = std::vector<BasicBlock>;
class ControlFlowGraph
{
public:
/// Initializes the control flow graph.
/// @a _items has to persist across the usage of this class.
ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {}
/// @returns the collection of optimised items, should be called only once.
AssemblyItems optimisedItems();
/// @returns vector of basic blocks in the order they should be used in the final code.
/// Should be called only once.
BasicBlocks optimisedBlocks();
private:
void findLargestTag();
void splitBlocks();
void resolveNextLinks();
void removeUnusedBlocks();
void gatherKnowledge();
void setPrevLinks();
AssemblyItems rebuildCode();
BasicBlocks rebuildCode();
/// @returns the corresponding BlockId if _id is a pushed jump tag,
/// and an invalid BlockId otherwise.
BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses);
BlockId generateNewId();

28
libevmasm/ExpressionClasses.cpp

@ -37,6 +37,7 @@ using namespace dev::eth;
bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const
{
assertThrow(!!item && !!_other.item, OptimizerException, "");
auto type = item->type();
auto otherType = _other.item->type();
return std::tie(type, item->data(), arguments, sequenceNumber) <
@ -56,12 +57,15 @@ ExpressionClasses::Id ExpressionClasses::find(
exp.arguments = _arguments;
exp.sequenceNumber = _sequenceNumber;
if (SemanticInformation::isDeterministic(_item))
{
if (SemanticInformation::isCommutativeOperation(_item))
sort(exp.arguments.begin(), exp.arguments.end());
auto it = m_expressions.find(exp);
if (it != m_expressions.end())
return it->id;
}
if (_copyItem)
exp.item = storeItem(_item);
@ -122,10 +126,16 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const
{
Expression const& expr = representative(_id);
stringstream str;
str << dec << expr.id << ":" << *expr.item << "(";
str << dec << expr.id << ":";
if (expr.item)
{
str << *expr.item << "(";
for (Id arg: expr.arguments)
str << fullDAGToString(arg) << ",";
str << ")";
}
else
str << " UNIQUE";
return str.str();
}
@ -279,7 +289,11 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr,
{
static Rules rules;
if (_expr.item->type() != Operation)
if (
!_expr.item ||
_expr.item->type() != Operation ||
!SemanticInformation::isDeterministic(*_expr.item)
)
return -1;
for (auto const& rule: rules.rules())
@ -337,7 +351,7 @@ void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _
bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
{
if (!matchesBaseItem(*_expr.item))
if (!matchesBaseItem(_expr.item))
return false;
if (m_matchGroup)
{
@ -387,13 +401,15 @@ string Pattern::toString() const
return s.str();
}
bool Pattern::matchesBaseItem(AssemblyItem const& _item) const
bool Pattern::matchesBaseItem(AssemblyItem const* _item) const
{
if (m_type == UndefinedItem)
return true;
if (m_type != _item.type())
if (!_item)
return false;
if (m_type != _item->type())
return false;
if (m_requireDataMatch && m_data != _item.data())
if (m_requireDataMatch && m_data != _item->data())
return false;
return true;
}

4
libevmasm/ExpressionClasses.h

@ -50,7 +50,7 @@ public:
struct Expression
{
Id id;
AssemblyItem const* item;
AssemblyItem const* item = nullptr;
Ids arguments;
unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions.
/// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber).
@ -149,7 +149,7 @@ public:
std::string toString() const;
private:
bool matchesBaseItem(AssemblyItem const& _item) const;
bool matchesBaseItem(AssemblyItem const* _item) const;
Expression const& matchGroupValue() const;
AssemblyItemType m_type;

326
libevmasm/KnownState.cpp

@ -0,0 +1,326 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file KnownState.cpp
* @author Christian <c@ethdev.com>
* @date 2015
* Contains knowledge about the state of the virtual machine at a specific instruction.
*/
#include "KnownState.h"
#include <functional>
#include <libdevcrypto/SHA3.h>
#include <libevmasm/AssemblyItem.h>
using namespace std;
using namespace dev;
using namespace dev::eth;
ostream& KnownState::stream(ostream& _out) const
{
auto streamExpressionClass = [this](ostream& _out, Id _id)
{
auto const& expr = m_expressionClasses->representative(_id);
_out << " " << dec << _id << ": ";
if (!expr.item)
_out << " no item";
else if (expr.item->type() == UndefinedItem)
_out << " unknown " << int(expr.item->data());
else
_out << *expr.item;
if (expr.sequenceNumber)
_out << "@" << dec << expr.sequenceNumber;
_out << "(";
for (Id arg: expr.arguments)
_out << dec << arg << ",";
_out << ")" << endl;
};
_out << "=== State ===" << endl;
_out << "Stack height: " << dec << m_stackHeight << endl;
_out << "Equivalence classes: " << endl;
for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
streamExpressionClass(_out, eqClass);
_out << "Stack: " << endl;
for (auto const& it: m_stackElements)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
_out << "Storage: " << endl;
for (auto const& it: m_storageContent)
{
_out << " ";
streamExpressionClass(_out, it.first);
_out << ": ";
streamExpressionClass(_out, it.second);
}
_out << "Memory: " << endl;
for (auto const& it: m_memoryContent)
{
_out << " ";
streamExpressionClass(_out, it.first);
_out << ": ";
streamExpressionClass(_out, it.second);
}
return _out;
}
KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
{
StoreOperation op;
if (_item.type() == Tag)
{
// can be ignored
}
else if (_item.type() != Operation)
{
assertThrow(_item.deposit() == 1, InvalidDeposit, "");
setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
}
else
{
Instruction instruction = _item.instruction();
InstructionInfo info = instructionInfo(instruction);
if (SemanticInformation::isDupInstruction(_item))
setStackElement(
m_stackHeight + 1,
stackElement(
m_stackHeight - int(instruction) + int(Instruction::DUP1),
_item.getLocation()
)
);
else if (SemanticInformation::isSwapInstruction(_item))
swapStackElements(
m_stackHeight,
m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
_item.getLocation()
);
else if (instruction != Instruction::POP)
{
vector<Id> arguments(info.args);
for (int i = 0; i < info.args; ++i)
arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
if (_item.instruction() == Instruction::SSTORE)
op = storeInStorage(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::SLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromStorage(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::MSTORE)
op = storeInMemory(arguments[0], arguments[1], _item.getLocation());
else if (_item.instruction() == Instruction::MLOAD)
setStackElement(
m_stackHeight + _item.deposit(),
loadFromMemory(arguments[0], _item.getLocation())
);
else if (_item.instruction() == Instruction::SHA3)
setStackElement(
m_stackHeight + _item.deposit(),
applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
);
else
{
if (SemanticInformation::invalidatesMemory(_item.instruction()))
resetMemory();
if (SemanticInformation::invalidatesStorage(_item.instruction()))
resetStorage();
assertThrow(info.ret <= 1, InvalidDeposit, "");
if (info.ret == 1)
setStackElement(
m_stackHeight + _item.deposit(),
m_expressionClasses->find(_item, arguments, _copyItem)
);
}
}
m_stackElements.erase(
m_stackElements.upper_bound(m_stackHeight + _item.deposit()),
m_stackElements.end()
);
m_stackHeight += _item.deposit();
}
return op;
}
void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/)
{
//@todo
*this = KnownState(m_expressionClasses);
}
bool KnownState::operator==(const KnownState& _other) const
{
//@todo
return (
m_stackElements.empty() &&
_other.m_stackElements.empty() &&
m_storageContent.empty() &&
_other.m_storageContent.empty() &&
m_memoryContent.empty() &&
_other.m_memoryContent.empty()
);
}
ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
{
if (m_stackElements.count(_stackHeight))
return m_stackElements.at(_stackHeight);
// Stack element not found (not assigned yet), create new unknown equivalence class.
//@todo check that we do not infer incorrect equivalences when the stack is cleared partially
//in between.
return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
}
ExpressionClasses::Id KnownState::initialStackElement(
int _stackHeight,
SourceLocation const& _location
)
{
// This is a special assembly item that refers to elements pre-existing on the initial stack.
return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location));
}
void KnownState::setStackElement(int _stackHeight, Id _class)
{
m_stackElements[_stackHeight] = _class;
}
void KnownState::swapStackElements(
int _stackHeightA,
int _stackHeightB,
SourceLocation const& _location
)
{
assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
// ensure they are created
stackElement(_stackHeightA, _location);
stackElement(_stackHeightB, _location);
swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
}
KnownState::StoreOperation KnownState::storeInStorage(
Id _slot,
Id _value,
SourceLocation const& _location)
{
if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
// do not execute the storage if we know that the value is already there
return StoreOperation();
m_sequenceNumber++;
decltype(m_storageContent) storageContents;
// Copy over all values (i.e. retain knowledge about them) where we know that this store
// operation will not destroy the knowledge. Specifically, we copy storage locations we know
// are different from _slot or locations where we know that the stored value is equal to _value.
for (auto const& storageItem: m_storageContent)
if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
storageContents.insert(storageItem);
m_storageContent = move(storageContents);
AssemblyItem item(Instruction::SSTORE, _location);
Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id);
m_storageContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
return operation;
}
ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
{
if (m_storageContent.count(_slot))
return m_storageContent.at(_slot);
AssemblyItem item(Instruction::SLOAD, _location);
return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
}
KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
// do not execute the store if we know that the value is already there
return StoreOperation();
m_sequenceNumber++;
decltype(m_memoryContent) memoryContents;
// copy over values at points where we know that they are different from _slot by at least 32
for (auto const& memoryItem: m_memoryContent)
if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
memoryContents.insert(memoryItem);
m_memoryContent = move(memoryContents);
AssemblyItem item(Instruction::MSTORE, _location);
Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
m_memoryContent[_slot] = _value;
// increment a second time so that we get unique sequence numbers for writes
m_sequenceNumber++;
return operation;
}
ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
{
if (m_memoryContent.count(_slot))
return m_memoryContent.at(_slot);
AssemblyItem item(Instruction::MLOAD, _location);
return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
}
KnownState::Id KnownState::applySha3(
Id _start,
Id _length,
SourceLocation const& _location
)
{
AssemblyItem sha3Item(Instruction::SHA3, _location);
// Special logic if length is a short constant, otherwise we cannot tell.
u256 const* l = m_expressionClasses->knownConstant(_length);
// unknown or too large length
if (!l || *l > 128)
return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
vector<Id> arguments;
for (u256 i = 0; i < *l; i += 32)
{
Id slot = m_expressionClasses->find(
AssemblyItem(Instruction::ADD, _location),
{_start, m_expressionClasses->find(i)}
);
arguments.push_back(loadFromMemory(slot, _location));
}
if (m_knownSha3Hashes.count(arguments))
return m_knownSha3Hashes.at(arguments);
Id v;
// If all arguments are known constants, compute the sha3 here
if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
{
bytes data;
for (Id a: arguments)
data += toBigEndian(*m_expressionClasses->knownConstant(a));
data.resize(size_t(*l));
v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location));
}
else
v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
return m_knownSha3Hashes[arguments] = v;
}

163
libevmasm/KnownState.h

@ -0,0 +1,163 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file KnownState.h
* @author Christian <c@ethdev.com>
* @date 2015
* Contains knowledge about the state of the virtual machine at a specific instruction.
*/
#pragma once
#include <vector>
#include <map>
#include <set>
#include <tuple>
#include <memory>
#include <ostream>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>
namespace dev
{
namespace eth
{
class AssemblyItem;
using AssemblyItems = std::vector<AssemblyItem>;
/**
* Class to infer and store knowledge about the state of the virtual machine at a specific
* instruction.
*
* The general workings are that for each assembly item that is fed, an equivalence class is
* derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some
* arithmetic instructions are used to infer equivalences while these classes are determined.
*/
class KnownState
{
public:
using Id = ExpressionClasses::Id;
struct StoreOperation
{
enum Target { Invalid, Memory, Storage };
StoreOperation(): target(Invalid), sequenceNumber(-1) {}
StoreOperation(
Target _target,
Id _slot,
unsigned _sequenceNumber,
Id _expression
): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
bool isValid() const { return target != Invalid; }
Target target;
Id slot;
unsigned sequenceNumber;
Id expression;
};
explicit KnownState(
std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>()
): m_expressionClasses(_expressionClasses)
{
}
/// Streams debugging information to @a _out.
std::ostream& stream(std::ostream& _out) const;
/// Feeds the item into the system for analysis.
/// @returns a possible store operation
StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false);
/// Resets any knowledge about storage.
void resetStorage() { m_storageContent.clear(); }
/// Resets any knowledge about storage.
void resetMemory() { m_memoryContent.clear(); }
/// Resets any knowledge about the current stack.
void resetStack() { m_stackElements.clear(); m_stackHeight = 0; }
/// Resets any knowledge.
void reset() { resetStorage(); resetMemory(); resetStack(); }
/// Manually increments the storage and memory sequence number.
void incrementSequenceNumber() { m_sequenceNumber += 2; }
/// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained.
/// If the stack heighht is different, the smaller one is used and the stack is compared
/// relatively.
void reduceToCommonKnowledge(KnownState const& _other);
/// @returns a shared pointer to a copy of this state.
std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); }
/// @returns true if the knowledge about the state of both objects is (known to be) equal.
bool operator==(KnownState const& _other) const;
///@todo the sequence numbers in two copies of this class should never be the same.
/// might be doable using two-dimensional sequence numbers, where the first value is incremented
/// for each copy
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
Id stackElement(int _stackHeight, SourceLocation const& _location);
/// @returns the equivalence class id of the special initial stack element at the given height.
Id initialStackElement(int _stackHeight, SourceLocation const& _location);
int stackHeight() const { return m_stackHeight; }
std::map<int, Id> const& stackElements() const { return m_stackElements; }
ExpressionClasses& expressionClasses() const { return *m_expressionClasses; }
private:
/// Assigns a new equivalence class to the next sequence number of the given stack element.
void setStackElement(int _stackHeight, Id _class);
/// Swaps the given stack elements in their next sequence number.
void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
/// Increments the sequence number, deletes all storage information that might be overwritten
/// and stores the new value at the given slot.
/// @returns the store operation, which might be invalid if storage was not modified
StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in storage or creates a new special sload class.
Id loadFromStorage(Id _slot, SourceLocation const& _location);
/// Increments the sequence number, deletes all memory information that might be overwritten
/// and stores the new value at the given slot.
/// @returns the store operation, which might be invalid if memory was not modified
StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
/// Retrieves the current value at the given slot in memory or creates a new special mload class.
Id loadFromMemory(Id _slot, SourceLocation const& _location);
/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
Id applySha3(Id _start, Id _length, SourceLocation const& _location);
/// Current stack height, can be negative.
int m_stackHeight = 0;
/// Current stack layout, mapping stack height -> equivalence class
std::map<int, Id> m_stackElements;
/// Current sequence number, this is incremented with each modification to storage or memory.
unsigned m_sequenceNumber = 1;
/// Knowledge about storage content.
std::map<Id, Id> m_storageContent;
/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
/// and are not contained here if they are not completely known.
std::map<Id, Id> m_memoryContent;
/// Keeps record of all sha3 hashes that are computed.
std::map<std::vector<Id>, Id> m_knownSha3Hashes;
/// Structure containing the classes of equivalent expressions.
std::shared_ptr<ExpressionClasses> m_expressionClasses;
};
}
}

53
libevmasm/SemanticInformation.cpp

@ -122,3 +122,56 @@ bool SemanticInformation::altersControlFlow(AssemblyItem const& _item)
return false;
}
}
bool SemanticInformation::isDeterministic(AssemblyItem const& _item)
{
if (_item.type() != Operation)
return true;
switch (_item.instruction())
{
case Instruction::CALL:
case Instruction::CALLCODE:
case Instruction::CREATE:
case Instruction::GAS:
case Instruction::PC:
case Instruction::MSIZE: // depends on previous writes and reads, not only on content
case Instruction::BALANCE: // depends on previous calls
case Instruction::EXTCODESIZE:
return false;
default:
return true;
}
}
bool SemanticInformation::invalidatesMemory(Instruction _instruction)
{
switch (_instruction)
{
case Instruction::CALLDATACOPY:
case Instruction::CODECOPY:
case Instruction::EXTCODECOPY:
case Instruction::MSTORE:
case Instruction::MSTORE8:
case Instruction::CALL:
case Instruction::CALLCODE:
return true;
default:
return false;
}
}
bool SemanticInformation::invalidatesStorage(Instruction _instruction)
{
switch (_instruction)
{
case Instruction::CALL:
case Instruction::CALLCODE:
case Instruction::CREATE:
case Instruction::SSTORE:
return true;
default:
return false;
}
}

8
libevmasm/SemanticInformation.h

@ -23,6 +23,7 @@
#pragma once
#include <libevmcore/Instruction.h>
namespace dev
{
@ -45,6 +46,13 @@ struct SemanticInformation
static bool isSwapInstruction(AssemblyItem const& _item);
static bool isJumpInstruction(AssemblyItem const& _item);
static bool altersControlFlow(AssemblyItem const& _item);
/// @returns false if the value put on the stack by _item depends on anything else than
/// the information in the current block header, memory, storage or stack.
static bool isDeterministic(AssemblyItem const& _item);
/// @returns true if the given instruction modifies memory.
static bool invalidatesMemory(Instruction _instruction);
/// @returns true if the given instruction modifies storage (even indirectly).
static bool invalidatesStorage(Instruction _instruction);
};
}

6
libsolidity/AST.h

@ -1220,7 +1220,7 @@ public:
/// Stores a set of possible declarations referenced by this identifier. Has to be resolved
/// providing argument types using overloadResolution before the referenced declaration
/// is accessed.
void setOverloadedDeclarations(std::set<Declaration const*> const& _declarations)
void setOverloadedDeclarations(std::vector<Declaration const*> const& _declarations)
{
m_overloadedDeclarations = _declarations;
}
@ -1237,8 +1237,8 @@ private:
/// Stores a reference to the current contract. This is needed because types of base contracts
/// change depending on the context.
ContractDefinition const* m_currentContract = nullptr;
/// A set of overloaded declarations, right now only FunctionDefinition has overloaded declarations.
std::set<Declaration const*> m_overloadedDeclarations;
/// A vector of overloaded declarations, right now only FunctionDefinition has overloaded declarations.
std::vector<Declaration const*> m_overloadedDeclarations;
};
/**

10
libsolidity/DeclarationContainer.cpp

@ -37,6 +37,7 @@ Declaration const* DeclarationContainer::conflictingDeclaration(Declaration cons
declarations += m_declarations.at(name);
if (m_invisibleDeclarations.count(name))
declarations += m_invisibleDeclarations.at(name);
if (dynamic_cast<FunctionDefinition const*>(&_declaration))
{
// check that all other declarations with the same name are functions
@ -66,14 +67,13 @@ bool DeclarationContainer::registerDeclaration(Declaration const& _declaration,
return false;
if (_invisible)
m_invisibleDeclarations[name].insert(&_declaration);
m_invisibleDeclarations[name].push_back(&_declaration);
else
m_declarations[name].insert(&_declaration);
m_declarations[name].push_back(&_declaration);
return true;
}
set<Declaration const*> DeclarationContainer::resolveName(ASTString const& _name, bool _recursive) const
std::vector<Declaration const*> DeclarationContainer::resolveName(ASTString const& _name, bool _recursive) const
{
solAssert(!_name.empty(), "Attempt to resolve empty name.");
auto result = m_declarations.find(_name);
@ -81,5 +81,5 @@ set<Declaration const*> DeclarationContainer::resolveName(ASTString const& _name
return result->second;
if (_recursive && m_enclosingContainer)
return m_enclosingContainer->resolveName(_name, true);
return set<Declaration const*>({});
return vector<Declaration const*>({});
}

8
libsolidity/DeclarationContainer.h

@ -48,17 +48,17 @@ public:
/// @param _update if true, replaces a potential declaration that is already present
/// @returns false if the name was already declared.
bool registerDeclaration(Declaration const& _declaration, bool _invisible = false, bool _update = false);
std::set<Declaration const*> resolveName(ASTString const& _name, bool _recursive = false) const;
std::vector<Declaration const*> resolveName(ASTString const& _name, bool _recursive = false) const;
Declaration const* getEnclosingDeclaration() const { return m_enclosingDeclaration; }
std::map<ASTString, std::set<Declaration const*>> const& getDeclarations() const { return m_declarations; }
std::map<ASTString, std::vector<Declaration const*>> const& getDeclarations() const { return m_declarations; }
/// @returns whether declaration is valid, and if not also returns previous declaration.
Declaration const* conflictingDeclaration(Declaration const& _declaration) const;
private:
Declaration const* m_enclosingDeclaration;
DeclarationContainer const* m_enclosingContainer;
std::map<ASTString, std::set<Declaration const*>> m_declarations;
std::map<ASTString, std::set<Declaration const*>> m_invisibleDeclarations;
std::map<ASTString, std::vector<Declaration const*>> m_declarations;
std::map<ASTString, std::vector<Declaration const*>> m_invisibleDeclarations;
};
}

83
libsolidity/NameAndTypeResolver.cpp

@ -53,9 +53,13 @@ void NameAndTypeResolver::resolveNamesAndTypes(ContractDefinition& _contract)
m_currentScope = &m_scopes[&_contract];
linearizeBaseContracts(_contract);
// we first import non-functions only as we do not yet know the argument types
for (ContractDefinition const* base: _contract.getLinearizedBaseContracts())
importInheritedScope(*base, false); // import non-functions
std::vector<ContractDefinition const*> properBases(
++_contract.getLinearizedBaseContracts().begin(),
_contract.getLinearizedBaseContracts().end()
);
for (ContractDefinition const* base: properBases)
importInheritedScope(*base);
for (ASTPointer<StructDefinition> const& structDef: _contract.getDefinedStructs())
ReferencesResolver resolver(*structDef, *this, &_contract, nullptr);
@ -80,8 +84,6 @@ void NameAndTypeResolver::resolveNamesAndTypes(ContractDefinition& _contract)
}
m_currentScope = &m_scopes[&_contract];
for (ContractDefinition const* base: _contract.getLinearizedBaseContracts())
importInheritedScope(*base, true); // import functions
// now resolve references inside the code
for (ASTPointer<ModifierDefinition> const& modifier: _contract.getFunctionModifiers())
@ -115,20 +117,55 @@ void NameAndTypeResolver::updateDeclaration(Declaration const& _declaration)
solAssert(_declaration.getScope() == nullptr, "Updated declaration outside global scope.");
}
set<Declaration const*> NameAndTypeResolver::resolveName(ASTString const& _name, Declaration const* _scope) const
vector<Declaration const*> NameAndTypeResolver::resolveName(ASTString const& _name, Declaration const* _scope) const
{
auto iterator = m_scopes.find(_scope);
if (iterator == end(m_scopes))
return set<Declaration const*>({});
return vector<Declaration const*>({});
return iterator->second.resolveName(_name, false);
}
set<Declaration const*> NameAndTypeResolver::getNameFromCurrentScope(ASTString const& _name, bool _recursive)
vector<Declaration const*> NameAndTypeResolver::getNameFromCurrentScope(ASTString const& _name, bool _recursive)
{
return m_currentScope->resolveName(_name, _recursive);
}
void NameAndTypeResolver::importInheritedScope(ContractDefinition const& _base, bool _importFunctions)
vector<Declaration const*> NameAndTypeResolver::cleanedDeclarations(
Identifier const& _identifier,
vector<Declaration const*> const& _declarations
)
{
solAssert(_declarations.size() > 1, "");
vector<Declaration const*> uniqueFunctions;
for (auto it = _declarations.begin(); it != _declarations.end(); ++it)
{
solAssert(*it, "");
// the declaration is functionDefinition while declarations > 1
FunctionDefinition const& functionDefinition = dynamic_cast<FunctionDefinition const&>(**it);
FunctionType functionType(functionDefinition);
for (auto parameter: functionType.getParameterTypes() + functionType.getReturnParameterTypes())
if (!parameter)
BOOST_THROW_EXCEPTION(
DeclarationError() <<
errinfo_sourceLocation(_identifier.getLocation()) <<
errinfo_comment("Function type can not be used in this context")
);
if (uniqueFunctions.end() == find_if(
uniqueFunctions.begin(),
uniqueFunctions.end(),
[&](Declaration const* d)
{
FunctionType newFunctionType(dynamic_cast<FunctionDefinition const&>(*d));
return functionType.hasEqualArgumentTypes(newFunctionType);
}
))
uniqueFunctions.push_back(*it);
}
return uniqueFunctions;
}
void NameAndTypeResolver::importInheritedScope(ContractDefinition const& _base)
{
auto iterator = m_scopes.find(&_base);
solAssert(iterator != end(m_scopes), "");
@ -136,31 +173,8 @@ void NameAndTypeResolver::importInheritedScope(ContractDefinition const& _base,
for (auto const& declaration: nameAndDeclaration.second)
// Import if it was declared in the base, is not the constructor and is visible in derived classes
if (declaration->getScope() == &_base && declaration->isVisibleInDerivedContracts())
{
auto function = dynamic_cast<FunctionDefinition const*>(declaration);
if ((function == nullptr) == _importFunctions)
continue;
if (!!function)
{
FunctionType functionType(*function);
// only import if a function with the same arguments does not exist yet
bool functionWithEqualArgumentsFound = false;
for (auto knownDeclaration: m_currentScope->resolveName(nameAndDeclaration.first))
{
auto knownFunction = dynamic_cast<FunctionDefinition const*>(knownDeclaration);
if (!knownFunction)
continue; // this is not legal, but will be caught later
if (!FunctionType(*knownFunction).hasEqualArgumentTypes(functionType))
continue;
functionWithEqualArgumentsFound = true;
break;
}
if (functionWithEqualArgumentsFound)
continue;
}
m_currentScope->registerDeclaration(*declaration);
}
}
void NameAndTypeResolver::linearizeBaseContracts(ContractDefinition& _contract) const
{
@ -465,10 +479,9 @@ bool ReferencesResolver::visit(Identifier& _identifier)
errinfo_comment("Undeclared identifier.")
);
else if (declarations.size() == 1)
_identifier.setReferencedDeclaration(**declarations.begin(), m_currentContract);
_identifier.setReferencedDeclaration(*declarations.front(), m_currentContract);
else
// Duplicate declaration will be checked in checkTypeRequirements()
_identifier.setOverloadedDeclarations(declarations);
_identifier.setOverloadedDeclarations(m_resolver.cleanedDeclarations(_identifier, declarations));
return false;
}

17
libsolidity/NameAndTypeResolver.h

@ -56,19 +56,24 @@ public:
/// Resolves the given @a _name inside the scope @a _scope. If @a _scope is omitted,
/// the global scope is used (i.e. the one containing only the contract).
/// @returns a pointer to the declaration on success or nullptr on failure.
std::set<Declaration const*> resolveName(ASTString const& _name, Declaration const* _scope = nullptr) const;
std::vector<Declaration const*> resolveName(ASTString const& _name, Declaration const* _scope = nullptr) const;
/// Resolves a name in the "current" scope. Should only be called during the initial
/// resolving phase.
std::set<Declaration const*> getNameFromCurrentScope(ASTString const& _name, bool _recursive = true);
std::vector<Declaration const*> getNameFromCurrentScope(ASTString const& _name, bool _recursive = true);
/// returns the vector of declarations without repetitions
static std::vector<Declaration const*> cleanedDeclarations(
Identifier const& _identifier,
std::vector<Declaration const*> const& _declarations
);
private:
void reset();
/// Either imports all non-function members or all function members declared directly in the
/// given contract (i.e. does not import inherited members) into the current scope if they are
///not present already.
void importInheritedScope(ContractDefinition const& _base, bool _importFunctions);
/// Imports all members declared directly in the given contract (i.e. does not import inherited members)
/// into the current scope if they are not present already.
void importInheritedScope(ContractDefinition const& _base);
/// Computes "C3-Linearization" of base contracts and stores it inside the contract.
void linearizeBaseContracts(ContractDefinition& _contract) const;

1
libsolidity/Types.h

@ -617,6 +617,7 @@ public:
/// @returns true if this function can take the given argument types (possibly
/// after implicit conversion).
bool canTakeArguments(TypePointers const& _arguments) const;
/// @returns true if the types of parameters are equal (does't check return parameter types)
bool hasEqualArgumentTypes(FunctionType const& _other) const;
Location const& getLocation() const { return m_location; }

8
libweb3jsonrpc/WebThreeStubServerBase.cpp

@ -531,9 +531,9 @@ string WebThreeStubServerBase::eth_signTransaction(Json::Value const& _json)
t.from = m_accounts->getDefaultTransactAccount();
if (t.creation)
ret = toJS(right160(sha3(rlpList(t.from, client()->countAt(t.from)))));;
if (!t.gasPrice)
if (t.gasPrice == UndefinedU256)
t.gasPrice = 10 * dev::eth::szabo; // TODO: should be determined by user somehow.
if (!t.gas)
if (t.gas == UndefinedU256)
t.gas = min<u256>(client()->gasLimitRemaining() / 5, client()->balanceAt(t.from) / t.gasPrice);
if (m_accounts->isRealAccount(t.from))
@ -582,9 +582,9 @@ string WebThreeStubServerBase::eth_call(Json::Value const& _json, string const&
t.from = m_accounts->getDefaultTransactAccount();
// if (!m_accounts->isRealAccount(t.from))
// return ret;
if (!t.gasPrice)
if (t.gasPrice == UndefinedU256)
t.gasPrice = 10 * dev::eth::szabo;
if (!t.gas)
if (t.gas == UndefinedU256)
t.gas = client()->gasLimitRemaining();
return toJS(client()->call(m_accounts->secretKey(t.from), t.value, t.to, t.data, t.gas, t.gasPrice, jsToBlockNumber(_blockNumber), FudgeFactor::Lenient).output);

84
test/libsolidity/SolidityEndToEndTest.cpp

@ -3910,6 +3910,90 @@ BOOST_AUTO_TEST_CASE(external_types_in_calls)
BOOST_CHECK(callContractFunction("nonexisting") == encodeArgs(u256(9)));
}
BOOST_AUTO_TEST_CASE(proper_order_of_overwriting_of_attributes)
{
// bug #1798
char const* sourceCode = R"(
contract init {
function isOk() returns (bool) { return false; }
bool public ok = false;
}
contract fix {
function isOk() returns (bool) { return true; }
bool public ok = true;
}
contract init_fix is init, fix {
function checkOk() returns (bool) { return ok; }
}
contract fix_init is fix, init {
function checkOk() returns (bool) { return ok; }
}
)";
compileAndRun(sourceCode, 0, "init_fix");
BOOST_CHECK(callContractFunction("isOk()") == encodeArgs(true));
BOOST_CHECK(callContractFunction("ok()") == encodeArgs(true));
compileAndRun(sourceCode, 0, "fix_init");
BOOST_CHECK(callContractFunction("isOk()") == encodeArgs(false));
BOOST_CHECK(callContractFunction("ok()") == encodeArgs(false));
}
BOOST_AUTO_TEST_CASE(proper_overwriting_accessor_by_function)
{
// bug #1798
char const* sourceCode = R"(
contract attribute {
bool ok = false;
}
contract func {
function ok() returns (bool) { return true; }
}
contract attr_func is attribute, func {
function checkOk() returns (bool) { return ok(); }
}
contract func_attr is func, attribute {
function checkOk() returns (bool) { return ok; }
}
)";
compileAndRun(sourceCode, 0, "attr_func");
BOOST_CHECK(callContractFunction("ok()") == encodeArgs(true));
compileAndRun(sourceCode, 0, "func_attr");
BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(false));
}
BOOST_AUTO_TEST_CASE(overwriting_inheritance)
{
// bug #1798
char const* sourceCode = R"(
contract A {
function ok() returns (uint) { return 1; }
}
contract B {
function ok() returns (uint) { return 2; }
}
contract C {
uint ok = 6;
}
contract AB is A, B {
function ok() returns (uint) { return 4; }
}
contract reversedE is C, AB {
function checkOk() returns (uint) { return ok(); }
}
contract E is AB, C {
function checkOk() returns (uint) { return ok; }
}
)";
compileAndRun(sourceCode, 0, "reversedE");
BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(4));
compileAndRun(sourceCode, 0, "E");
BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(6));
}
BOOST_AUTO_TEST_SUITE_END()
}

61
test/libsolidity/SolidityOptimizer.cpp

@ -83,14 +83,28 @@ public:
"\nOptimized: " + toHex(optimizedOutput));
}
AssemblyItems getCSE(AssemblyItems const& _input)
AssemblyItems addDummyLocations(AssemblyItems const& _input)
{
// add dummy locations to each item so that we can check that they are not deleted
AssemblyItems input = _input;
for (AssemblyItem& item: input)
item.setLocation(SourceLocation(1, 3, make_shared<string>("")));
return input;
}
eth::KnownState createInitialState(AssemblyItems const& _input)
{
eth::KnownState state;
for (auto const& item: addDummyLocations(_input))
state.feedItem(item);
return state;
}
AssemblyItems getCSE(AssemblyItems const& _input, eth::KnownState const& _state = eth::KnownState())
{
AssemblyItems input = addDummyLocations(_input);
eth::CommonSubexpressionEliminator cse;
eth::CommonSubexpressionEliminator cse(_state);
BOOST_REQUIRE(cse.feedItems(input.begin(), input.end()) == input.end());
AssemblyItems output = cse.getOptimizedItems();
@ -101,9 +115,13 @@ public:
return output;
}
void checkCSE(AssemblyItems const& _input, AssemblyItems const& _expectation)
void checkCSE(
AssemblyItems const& _input,
AssemblyItems const& _expectation,
KnownState const& _state = eth::KnownState()
)
{
AssemblyItems output = getCSE(_input);
AssemblyItems output = getCSE(_input, _state);
BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end());
}
@ -113,8 +131,12 @@ public:
// Running it four times should be enough for these tests.
for (unsigned i = 0; i < 4; ++i)
{
eth::ControlFlowGraph cfg(output);
output = cfg.optimisedItems();
ControlFlowGraph cfg(output);
AssemblyItems optItems;
for (BasicBlock const& block: cfg.optimisedBlocks())
copy(output.begin() + block.begin, output.begin() + block.end,
back_inserter(optItems));
output = move(optItems);
}
BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end());
}
@ -231,7 +253,8 @@ BOOST_AUTO_TEST_CASE(function_calls)
BOOST_AUTO_TEST_CASE(cse_intermediate_swap)
{
eth::CommonSubexpressionEliminator cse;
eth::KnownState state;
eth::CommonSubexpressionEliminator cse(state);
AssemblyItems input{
Instruction::SWAP1, Instruction::POP, Instruction::ADD, u256(0), Instruction::SWAP1,
Instruction::SLOAD, Instruction::SWAP1, u256(100), Instruction::EXP, Instruction::SWAP1,
@ -754,6 +777,30 @@ BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between
BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3)));
}
BOOST_AUTO_TEST_CASE(cse_with_initially_known_stack)
{
eth::KnownState state = createInitialState(AssemblyItems{
u256(0x12),
u256(0x20),
Instruction::ADD
});
AssemblyItems input{
u256(0x12 + 0x20)
};
checkCSE(input, AssemblyItems{Instruction::DUP1}, state);
}
BOOST_AUTO_TEST_CASE(cse_equality_on_initially_known_stack)
{
eth::KnownState state = createInitialState(AssemblyItems{Instruction::DUP1});
AssemblyItems input{
Instruction::EQ
};
AssemblyItems output = getCSE(input, state);
// check that it directly pushes 1 (true)
BOOST_CHECK(find(output.begin(), output.end(), AssemblyItem(u256(1))) != output.end());
}
BOOST_AUTO_TEST_CASE(control_flow_graph_remove_unused)
{
// remove parts of the code that are unused

Loading…
Cancel
Save