Browse Source

Control flow analysis.

cl-refactor
chriseth 10 years ago
parent
commit
ae366e7d05
  1. 87
      libevmcore/Assembly.cpp
  2. 260
      libevmcore/ControlFlowGraph.cpp
  3. 108
      libevmcore/ControlFlowGraph.h
  4. 19
      libevmcore/SemanticInformation.cpp
  5. 1
      libevmcore/SemanticInformation.h
  6. 80
      test/SolidityOptimizer.cpp

87
libevmcore/Assembly.cpp

@ -23,6 +23,7 @@
#include <fstream>
#include <libdevcore/Log.h>
#include <libevmcore/CommonSubexpressionEliminator.h>
#include <libevmcore/ControlFlowGraph.h>
using namespace std;
using namespace dev;
@ -197,6 +198,18 @@ Assembly& Assembly::optimise(bool _enable)
copt << *this;
count = 0;
copt << "Performing control flow analysis...";
{
ControlFlowGraph cfg(m_items);
AssemblyItems optItems = cfg.optimisedItems();
if (optItems.size() < m_items.size())
{
copt << "Old size: " << m_items.size() << ", new size: " << optItems.size();
m_items = move(optItems);
count++;
}
}
copt << "Performing common subexpression elimination...";
for (auto iter = m_items.begin(); iter != m_items.end();)
{
@ -225,80 +238,6 @@ Assembly& Assembly::optimise(bool _enable)
iter = m_items.erase(orig, iter);
}
}
for (unsigned i = 0; i < m_items.size(); ++i)
{
for (auto const& r: rules)
{
auto vr = AssemblyItemsConstRef(&m_items).cropped(i, r.first.size());
if (matches(vr, &r.first))
{
auto rw = r.second(vr);
if (rw.size() < vr.size())
{
copt << "Rule " << vr << " matches " << AssemblyItemsConstRef(&r.first) << " becomes...";
copt << AssemblyItemsConstRef(&rw) << "\n";
if (rw.size() > vr.size())
{
// create hole in the vector
unsigned sizeIncrease = rw.size() - vr.size();
m_items.resize(m_items.size() + sizeIncrease, AssemblyItem(UndefinedItem));
move_backward(m_items.begin() + i, m_items.end() - sizeIncrease, m_items.end());
}
else
m_items.erase(m_items.begin() + i + rw.size(), m_items.begin() + i + vr.size());
copy(rw.begin(), rw.end(), m_items.begin() + i);
count++;
copt << "Now:" << m_items;
}
}
}
if (m_items[i].type() == Operation && m_items[i].instruction() == Instruction::JUMP)
{
bool o = false;
while (m_items.size() > i + 1 && m_items[i + 1].type() != Tag)
{
m_items.erase(m_items.begin() + i + 1);
o = true;
}
if (o)
{
copt << "Jump with no tag. Now:\n" << m_items;
++count;
}
}
}
map<u256, unsigned> tags;
for (unsigned i = 0; i < m_items.size(); ++i)
if (m_items[i].type() == Tag)
tags.insert(make_pair(m_items[i].data(), i));
for (auto const& i: m_items)
if (i.type() == PushTag)
tags.erase(i.data());
if (!tags.empty())
{
auto t = *tags.begin();
unsigned i = t.second;
if (i && m_items[i - 1].type() == Operation && m_items[i - 1].instruction() == Instruction::JUMP)
while (i < m_items.size() && (m_items[i].type() != Tag || tags.count(m_items[i].data())))
{
if (m_items[i].type() == Tag && tags.count(m_items[i].data()))
tags.erase(m_items[i].data());
m_items.erase(m_items.begin() + i);
}
else
{
m_items.erase(m_items.begin() + i);
tags.erase(t.first);
}
copt << "Unused tag. Now:\n" << m_items;
++count;
}
}
copt << total << " optimisations done.";

260
libevmcore/ControlFlowGraph.cpp

@ -0,0 +1,260 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file ControlFlowGraph.cpp
* @author Christian <c@ethdev.com>
* @date 2015
* Control flow analysis for the optimizer.
*/
#include <libevmcore/ControlFlowGraph.h>
#include <map>
#include <libevmcore/Exceptions.h>
#include <libevmcore/AssemblyItem.h>
#include <libevmcore/SemanticInformation.h>
using namespace std;
using namespace dev;
using namespace dev::eth;
BlockId::BlockId(u256 const& _id): m_id(_id)
{
assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large.");
}
AssemblyItems ControlFlowGraph::optimisedItems()
{
if (m_items.empty())
return m_items;
findLargestTag();
splitBlocks();
resolveNextLinks();
removeUnusedBlocks();
setPrevLinks();
return rebuildCode();
}
void ControlFlowGraph::findLargestTag()
{
m_lastUsedId = 0;
for (auto const& item: m_items)
if (item.type() == Tag || item.type() == PushTag)
{
// Assert that it can be converted.
BlockId(item.data());
m_lastUsedId = max(unsigned(item.data()), m_lastUsedId);
}
}
void ControlFlowGraph::splitBlocks()
{
m_blocks.clear();
BlockId id = BlockId::initial();
m_blocks[id].begin = 0;
for (size_t index = 0; index < m_items.size(); ++index)
{
AssemblyItem const& item = m_items.at(index);
if (item.type() == Tag)
{
if (id)
m_blocks[id].end = index;
id = BlockId::invalid();
}
if (!id)
{
id = item.type() == Tag ? BlockId(item.data()) : generateNewId();
m_blocks[id].begin = index;
}
if (item.type() == PushTag)
m_blocks[id].pushedTags.push_back(BlockId(item.data()));
if (SemanticInformation::altersControlFlow(item))
{
m_blocks[id].end = index + 1;
if (item == Instruction::JUMP)
m_blocks[id].endType = BasicBlock::EndType::JUMP;
else if (item == Instruction::JUMPI)
m_blocks[id].endType = BasicBlock::EndType::JUMPI;
else
m_blocks[id].endType = BasicBlock::EndType::STOP;
id = BlockId::invalid();
}
}
if (id)
{
m_blocks[id].end = m_items.size();
if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER)
m_blocks[id].endType = BasicBlock::EndType::STOP;
}
}
void ControlFlowGraph::resolveNextLinks()
{
map<unsigned, BlockId> blockByBeginPos;
for (auto const& idAndBlock: m_blocks)
if (idAndBlock.second.begin != idAndBlock.second.end)
blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first;
for (auto& idAndBlock: m_blocks)
{
BasicBlock& block = idAndBlock.second;
switch (block.endType)
{
case BasicBlock::EndType::JUMPI:
case BasicBlock::EndType::HANDOVER:
assertThrow(
blockByBeginPos.count(block.end),
OptimizerException,
"Successor block not found."
);
block.next = blockByBeginPos.at(block.end);
break;
default:
break;
}
}
}
void ControlFlowGraph::removeUnusedBlocks()
{
vector<BlockId> blocksToProcess{BlockId::initial()};
set<BlockId> neededBlocks{BlockId::initial()};
while (!blocksToProcess.empty())
{
BasicBlock const& block = m_blocks.at(blocksToProcess.back());
blocksToProcess.pop_back();
for (BlockId tag: block.pushedTags)
if (!neededBlocks.count(tag))
{
neededBlocks.insert(tag);
blocksToProcess.push_back(tag);
}
if (block.next && !neededBlocks.count(block.next))
{
neededBlocks.insert(block.next);
blocksToProcess.push_back(block.next);
}
}
for (auto it = m_blocks.begin(); it != m_blocks.end();)
if (neededBlocks.count(it->first))
++it;
else
m_blocks.erase(it++);
}
void ControlFlowGraph::setPrevLinks()
{
for (auto& idAndBlock: m_blocks)
{
BasicBlock& block = idAndBlock.second;
switch (block.endType)
{
case BasicBlock::EndType::JUMPI:
case BasicBlock::EndType::HANDOVER:
assertThrow(
!m_blocks.at(block.next).prev,
OptimizerException,
"Successor already has predecessor."
);
m_blocks[block.next].prev = idAndBlock.first;
break;
default:
break;
}
}
// If block ends with jump to not yet linked block, link them removing the jump
for (auto& idAndBlock: m_blocks)
{
BlockId blockId = idAndBlock.first;
BasicBlock& block = idAndBlock.second;
if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2)
continue;
AssemblyItem const& push = m_items.at(block.end - 2);
if (push.type() != PushTag)
continue;
BlockId nextId(push.data());
if (m_blocks.at(nextId).prev)
continue;
bool hasLoop = false;
for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next)
hasLoop = (id == blockId);
if (hasLoop)
continue;
m_blocks[nextId].prev = blockId;
block.next = nextId;
block.end -= 2;
assertThrow(
!block.pushedTags.empty() && block.pushedTags.back() == nextId,
OptimizerException,
"Last pushed tag not at end of pushed list."
);
block.pushedTags.pop_back();
block.endType = BasicBlock::EndType::HANDOVER;
}
}
AssemblyItems ControlFlowGraph::rebuildCode()
{
map<BlockId, unsigned> pushes;
for (auto& idAndBlock: m_blocks)
for (BlockId ref: idAndBlock.second.pushedTags)
pushes[ref]++;
set<BlockId> blocksToAdd;
for (auto it: m_blocks)
blocksToAdd.insert(it.first);
set<BlockId> blocksAdded;
AssemblyItems code;
for (
BlockId blockId = BlockId::initial();
blockId;
blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin()
)
{
bool previousHandedOver = (blockId == BlockId::initial());
while (m_blocks.at(blockId).prev)
blockId = m_blocks.at(blockId).prev;
for (; blockId; blockId = m_blocks.at(blockId).next)
{
BasicBlock const& block = m_blocks.at(blockId);
blocksToAdd.erase(blockId);
blocksAdded.insert(blockId);
auto begin = m_items.begin() + block.begin;
auto end = m_items.begin() + block.end;
if (begin == end)
continue;
// If block starts with unused tag, skip it.
if (previousHandedOver && !pushes[blockId] && begin->type() == Tag)
++begin;
previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER);
copy(begin, end, back_inserter(code));
}
}
return code;
}
BlockId ControlFlowGraph::generateNewId()
{
BlockId id = BlockId(++m_lastUsedId);
assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs.");
return id;
}

108
libevmcore/ControlFlowGraph.h

@ -0,0 +1,108 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file ControlFlowGraph.h
* @author Christian <c@ethdev.com>
* @date 2015
* Control flow analysis for the optimizer.
*/
#pragma once
#include <vector>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
namespace dev
{
namespace eth
{
class AssemblyItem;
using AssemblyItems = std::vector<AssemblyItem>;
/**
* Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special
* ID for the inital block.
*/
class BlockId
{
public:
BlockId() { *this = invalid(); }
explicit BlockId(unsigned _id): m_id(_id) {}
explicit BlockId(u256 const& _id);
static BlockId initial() { return BlockId(-2); }
static BlockId invalid() { return BlockId(-1); }
bool operator==(BlockId const& _other) const { return m_id == _other.m_id; }
bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; }
bool operator<(BlockId const& _other) const { return m_id < _other.m_id; }
explicit operator bool() const { return *this != invalid(); }
private:
unsigned m_id;
};
/**
* Control flow block inside which instruction counter is always incremented by one
* (except for possibly the last instruction).
*/
struct BasicBlock
{
/// Start index into assembly item list.
unsigned begin = 0;
/// End index (excluded) inte assembly item list.
unsigned end = 0;
/// Tags pushed inside this block, with multiplicity.
std::vector<BlockId> pushedTags;
/// ID of the block that always follows this one (either JUMP or flow into new block),
/// or BlockId::invalid() otherwise
BlockId next = BlockId::invalid();
/// ID of the block that has to precede this one.
BlockId prev = BlockId::invalid();
enum class EndType { JUMP, JUMPI, STOP, HANDOVER };
EndType endType = EndType::HANDOVER;
};
class ControlFlowGraph
{
public:
/// Initializes the control flow graph.
/// @a _items has to persist across the usage of this class.
ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {}
/// @returns the collection of optimised items, should be called only once.
AssemblyItems optimisedItems();
private:
void findLargestTag();
void splitBlocks();
void resolveNextLinks();
void removeUnusedBlocks();
void setPrevLinks();
AssemblyItems rebuildCode();
BlockId generateNewId();
unsigned m_lastUsedId = 0;
AssemblyItems const& m_items;
std::map<BlockId, BasicBlock> m_blocks;
};
}
}

19
libevmcore/SemanticInformation.cpp

@ -103,3 +103,22 @@ bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item)
{
return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI);
}
bool SemanticInformation::altersControlFlow(AssemblyItem const& _item)
{
if (_item.type() != Operation)
return false;
switch (_item.instruction())
{
// note that CALL, CALLCODE and CREATE do not really alter the control flow, because we
// continue on the next instruction (unless an exception happens which can always happen)
case Instruction::JUMP:
case Instruction::JUMPI:
case Instruction::RETURN:
case Instruction::SUICIDE:
case Instruction::STOP:
return true;
default:
return false;
}
}

1
libevmcore/SemanticInformation.h

@ -44,6 +44,7 @@ struct SemanticInformation
static bool isDupInstruction(AssemblyItem const& _item);
static bool isSwapInstruction(AssemblyItem const& _item);
static bool isJumpInstruction(AssemblyItem const& _item);
static bool altersControlFlow(AssemblyItem const& _item);
};
}

80
test/SolidityOptimizer.cpp

@ -28,6 +28,7 @@
#include <boost/lexical_cast.hpp>
#include <test/solidityExecutionFramework.h>
#include <libevmcore/CommonSubexpressionEliminator.h>
#include <libevmcore/ControlFlowGraph.h>
#include <libevmcore/Assembly.h>
using namespace std;
@ -96,6 +97,18 @@ public:
BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end());
}
void checkCFG(AssemblyItems const& _input, AssemblyItems const& _expectation)
{
AssemblyItems output = _input;
// Running it four times should be enough for these tests.
for (unsigned i = 0; i < 4; ++i)
{
eth::ControlFlowGraph cfg(output);
output = cfg.optimisedItems();
}
BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end());
}
protected:
Address m_optimizedContract;
Address m_nonOptimizedContract;
@ -731,6 +744,73 @@ BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between
BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3)));
}
BOOST_AUTO_TEST_CASE(control_flow_graph_remove_unused)
{
// remove parts of the code that are unused
AssemblyItems input{
AssemblyItem(PushTag, 1),
Instruction::JUMP,
u256(7),
AssemblyItem(Tag, 1),
};
checkCFG(input, {});
}
BOOST_AUTO_TEST_CASE(control_flow_graph_remove_unused_loop)
{
AssemblyItems input{
AssemblyItem(PushTag, 3),
Instruction::JUMP,
AssemblyItem(Tag, 1),
u256(7),
AssemblyItem(PushTag, 2),
Instruction::JUMP,
AssemblyItem(Tag, 2),
u256(8),
AssemblyItem(PushTag, 1),
Instruction::JUMP,
AssemblyItem(Tag, 3),
u256(11)
};
checkCFG(input, {u256(11)});
}
BOOST_AUTO_TEST_CASE(control_flow_graph_reconnect_single_jump_source)
{
// move code that has only one unconditional jump source
AssemblyItems input{
u256(1),
AssemblyItem(PushTag, 1),
Instruction::JUMP,
AssemblyItem(Tag, 2),
u256(2),
AssemblyItem(PushTag, 3),
Instruction::JUMP,
AssemblyItem(Tag, 1),
u256(3),
AssemblyItem(PushTag, 2),
Instruction::JUMP,
AssemblyItem(Tag, 3),
u256(4),
};
checkCFG(input, {u256(1), u256(3), u256(2), u256(4)});
}
BOOST_AUTO_TEST_CASE(control_flow_graph_do_not_remove_returned_to)
{
// do not remove parts that are "returned to"
AssemblyItems input{
AssemblyItem(PushTag, 1),
AssemblyItem(PushTag, 2),
Instruction::JUMP,
AssemblyItem(Tag, 2),
Instruction::JUMP,
AssemblyItem(PushTag, 1),
u256(2)
};
checkCFG(input, {u256(2)});
}
BOOST_AUTO_TEST_SUITE_END()
}

Loading…
Cancel
Save