From ae366e7d051d8a8f94a9377c22ab606f35e73d97 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 2 Apr 2015 19:20:30 +0200 Subject: [PATCH] Control flow analysis. --- libevmcore/Assembly.cpp | 87 ++-------- libevmcore/ControlFlowGraph.cpp | 260 +++++++++++++++++++++++++++++ libevmcore/ControlFlowGraph.h | 108 ++++++++++++ libevmcore/SemanticInformation.cpp | 19 +++ libevmcore/SemanticInformation.h | 1 + test/SolidityOptimizer.cpp | 80 +++++++++ 6 files changed, 481 insertions(+), 74 deletions(-) create mode 100644 libevmcore/ControlFlowGraph.cpp create mode 100644 libevmcore/ControlFlowGraph.h diff --git a/libevmcore/Assembly.cpp b/libevmcore/Assembly.cpp index 904903761..bf4ea2145 100644 --- a/libevmcore/Assembly.cpp +++ b/libevmcore/Assembly.cpp @@ -23,6 +23,7 @@ #include #include #include +#include using namespace std; using namespace dev; @@ -197,6 +198,18 @@ Assembly& Assembly::optimise(bool _enable) copt << *this; count = 0; + copt << "Performing control flow analysis..."; + { + ControlFlowGraph cfg(m_items); + AssemblyItems optItems = cfg.optimisedItems(); + if (optItems.size() < m_items.size()) + { + copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); + m_items = move(optItems); + count++; + } + } + copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { @@ -225,80 +238,6 @@ Assembly& Assembly::optimise(bool _enable) iter = m_items.erase(orig, iter); } } - - for (unsigned i = 0; i < m_items.size(); ++i) - { - for (auto const& r: rules) - { - auto vr = AssemblyItemsConstRef(&m_items).cropped(i, r.first.size()); - if (matches(vr, &r.first)) - { - auto rw = r.second(vr); - if (rw.size() < vr.size()) - { - copt << "Rule " << vr << " matches " << AssemblyItemsConstRef(&r.first) << " becomes..."; - copt << AssemblyItemsConstRef(&rw) << "\n"; - if (rw.size() > vr.size()) - { - // create hole in the vector - unsigned sizeIncrease = rw.size() - vr.size(); - m_items.resize(m_items.size() + sizeIncrease, AssemblyItem(UndefinedItem)); - move_backward(m_items.begin() + i, m_items.end() - sizeIncrease, m_items.end()); - } - else - m_items.erase(m_items.begin() + i + rw.size(), m_items.begin() + i + vr.size()); - - copy(rw.begin(), rw.end(), m_items.begin() + i); - - count++; - copt << "Now:" << m_items; - } - } - } - if (m_items[i].type() == Operation && m_items[i].instruction() == Instruction::JUMP) - { - bool o = false; - while (m_items.size() > i + 1 && m_items[i + 1].type() != Tag) - { - m_items.erase(m_items.begin() + i + 1); - o = true; - } - if (o) - { - copt << "Jump with no tag. Now:\n" << m_items; - ++count; - } - } - } - - map tags; - for (unsigned i = 0; i < m_items.size(); ++i) - if (m_items[i].type() == Tag) - tags.insert(make_pair(m_items[i].data(), i)); - - for (auto const& i: m_items) - if (i.type() == PushTag) - tags.erase(i.data()); - - if (!tags.empty()) - { - auto t = *tags.begin(); - unsigned i = t.second; - if (i && m_items[i - 1].type() == Operation && m_items[i - 1].instruction() == Instruction::JUMP) - while (i < m_items.size() && (m_items[i].type() != Tag || tags.count(m_items[i].data()))) - { - if (m_items[i].type() == Tag && tags.count(m_items[i].data())) - tags.erase(m_items[i].data()); - m_items.erase(m_items.begin() + i); - } - else - { - m_items.erase(m_items.begin() + i); - tags.erase(t.first); - } - copt << "Unused tag. Now:\n" << m_items; - ++count; - } } copt << total << " optimisations done."; diff --git a/libevmcore/ControlFlowGraph.cpp b/libevmcore/ControlFlowGraph.cpp new file mode 100644 index 000000000..ca20a8fb0 --- /dev/null +++ b/libevmcore/ControlFlowGraph.cpp @@ -0,0 +1,260 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ControlFlowGraph.cpp + * @author Christian + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +BlockId::BlockId(u256 const& _id): m_id(_id) +{ + assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); +} + +AssemblyItems ControlFlowGraph::optimisedItems() +{ + if (m_items.empty()) + return m_items; + + findLargestTag(); + splitBlocks(); + resolveNextLinks(); + removeUnusedBlocks(); + setPrevLinks(); + + return rebuildCode(); +} + +void ControlFlowGraph::findLargestTag() +{ + m_lastUsedId = 0; + for (auto const& item: m_items) + if (item.type() == Tag || item.type() == PushTag) + { + // Assert that it can be converted. + BlockId(item.data()); + m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); + } +} + +void ControlFlowGraph::splitBlocks() +{ + m_blocks.clear(); + BlockId id = BlockId::initial(); + m_blocks[id].begin = 0; + for (size_t index = 0; index < m_items.size(); ++index) + { + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag) + { + if (id) + m_blocks[id].end = index; + id = BlockId::invalid(); + } + if (!id) + { + id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); + m_blocks[id].begin = index; + } + if (item.type() == PushTag) + m_blocks[id].pushedTags.push_back(BlockId(item.data())); + if (SemanticInformation::altersControlFlow(item)) + { + m_blocks[id].end = index + 1; + if (item == Instruction::JUMP) + m_blocks[id].endType = BasicBlock::EndType::JUMP; + else if (item == Instruction::JUMPI) + m_blocks[id].endType = BasicBlock::EndType::JUMPI; + else + m_blocks[id].endType = BasicBlock::EndType::STOP; + id = BlockId::invalid(); + } + } + if (id) + { + m_blocks[id].end = m_items.size(); + if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) + m_blocks[id].endType = BasicBlock::EndType::STOP; + } +} + +void ControlFlowGraph::resolveNextLinks() +{ + map blockByBeginPos; + for (auto const& idAndBlock: m_blocks) + if (idAndBlock.second.begin != idAndBlock.second.end) + blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; + + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + blockByBeginPos.count(block.end), + OptimizerException, + "Successor block not found." + ); + block.next = blockByBeginPos.at(block.end); + break; + default: + break; + } + } +} + +void ControlFlowGraph::removeUnusedBlocks() +{ + vector blocksToProcess{BlockId::initial()}; + set neededBlocks{BlockId::initial()}; + while (!blocksToProcess.empty()) + { + BasicBlock const& block = m_blocks.at(blocksToProcess.back()); + blocksToProcess.pop_back(); + for (BlockId tag: block.pushedTags) + if (!neededBlocks.count(tag)) + { + neededBlocks.insert(tag); + blocksToProcess.push_back(tag); + } + if (block.next && !neededBlocks.count(block.next)) + { + neededBlocks.insert(block.next); + blocksToProcess.push_back(block.next); + } + } + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (neededBlocks.count(it->first)) + ++it; + else + m_blocks.erase(it++); +} + +void ControlFlowGraph::setPrevLinks() +{ + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + !m_blocks.at(block.next).prev, + OptimizerException, + "Successor already has predecessor." + ); + m_blocks[block.next].prev = idAndBlock.first; + break; + default: + break; + } + } + // If block ends with jump to not yet linked block, link them removing the jump + for (auto& idAndBlock: m_blocks) + { + BlockId blockId = idAndBlock.first; + BasicBlock& block = idAndBlock.second; + if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) + continue; + AssemblyItem const& push = m_items.at(block.end - 2); + if (push.type() != PushTag) + continue; + BlockId nextId(push.data()); + if (m_blocks.at(nextId).prev) + continue; + bool hasLoop = false; + for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next) + hasLoop = (id == blockId); + if (hasLoop) + continue; + + m_blocks[nextId].prev = blockId; + block.next = nextId; + block.end -= 2; + assertThrow( + !block.pushedTags.empty() && block.pushedTags.back() == nextId, + OptimizerException, + "Last pushed tag not at end of pushed list." + ); + block.pushedTags.pop_back(); + block.endType = BasicBlock::EndType::HANDOVER; + } +} + +AssemblyItems ControlFlowGraph::rebuildCode() +{ + map pushes; + for (auto& idAndBlock: m_blocks) + for (BlockId ref: idAndBlock.second.pushedTags) + pushes[ref]++; + + set blocksToAdd; + for (auto it: m_blocks) + blocksToAdd.insert(it.first); + set blocksAdded; + AssemblyItems code; + + for ( + BlockId blockId = BlockId::initial(); + blockId; + blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() + ) + { + bool previousHandedOver = (blockId == BlockId::initial()); + while (m_blocks.at(blockId).prev) + blockId = m_blocks.at(blockId).prev; + for (; blockId; blockId = m_blocks.at(blockId).next) + { + BasicBlock const& block = m_blocks.at(blockId); + blocksToAdd.erase(blockId); + blocksAdded.insert(blockId); + + auto begin = m_items.begin() + block.begin; + auto end = m_items.begin() + block.end; + if (begin == end) + continue; + // If block starts with unused tag, skip it. + if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + ++begin; + previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); + copy(begin, end, back_inserter(code)); + } + } + + return code; +} + +BlockId ControlFlowGraph::generateNewId() +{ + BlockId id = BlockId(++m_lastUsedId); + assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); + return id; +} diff --git a/libevmcore/ControlFlowGraph.h b/libevmcore/ControlFlowGraph.h new file mode 100644 index 000000000..5d16df327 --- /dev/null +++ b/libevmcore/ControlFlowGraph.h @@ -0,0 +1,108 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ControlFlowGraph.h + * @author Christian + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#pragma once + +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special + * ID for the inital block. + */ +class BlockId +{ +public: + BlockId() { *this = invalid(); } + explicit BlockId(unsigned _id): m_id(_id) {} + explicit BlockId(u256 const& _id); + static BlockId initial() { return BlockId(-2); } + static BlockId invalid() { return BlockId(-1); } + + bool operator==(BlockId const& _other) const { return m_id == _other.m_id; } + bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; } + bool operator<(BlockId const& _other) const { return m_id < _other.m_id; } + explicit operator bool() const { return *this != invalid(); } + +private: + unsigned m_id; +}; + +/** + * Control flow block inside which instruction counter is always incremented by one + * (except for possibly the last instruction). + */ +struct BasicBlock +{ + /// Start index into assembly item list. + unsigned begin = 0; + /// End index (excluded) inte assembly item list. + unsigned end = 0; + /// Tags pushed inside this block, with multiplicity. + std::vector pushedTags; + /// ID of the block that always follows this one (either JUMP or flow into new block), + /// or BlockId::invalid() otherwise + BlockId next = BlockId::invalid(); + /// ID of the block that has to precede this one. + BlockId prev = BlockId::invalid(); + + enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; + EndType endType = EndType::HANDOVER; +}; + +class ControlFlowGraph +{ +public: + /// Initializes the control flow graph. + /// @a _items has to persist across the usage of this class. + ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} + /// @returns the collection of optimised items, should be called only once. + AssemblyItems optimisedItems(); + +private: + void findLargestTag(); + void splitBlocks(); + void resolveNextLinks(); + void removeUnusedBlocks(); + void setPrevLinks(); + AssemblyItems rebuildCode(); + + BlockId generateNewId(); + + unsigned m_lastUsedId = 0; + AssemblyItems const& m_items; + std::map m_blocks; +}; + + +} +} diff --git a/libevmcore/SemanticInformation.cpp b/libevmcore/SemanticInformation.cpp index 139c99ce2..3abb26dd9 100644 --- a/libevmcore/SemanticInformation.cpp +++ b/libevmcore/SemanticInformation.cpp @@ -103,3 +103,22 @@ bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item) { return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI); } + +bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we + // continue on the next instruction (unless an exception happens which can always happen) + case Instruction::JUMP: + case Instruction::JUMPI: + case Instruction::RETURN: + case Instruction::SUICIDE: + case Instruction::STOP: + return true; + default: + return false; + } +} diff --git a/libevmcore/SemanticInformation.h b/libevmcore/SemanticInformation.h index 7497dc651..27aa6f1a4 100644 --- a/libevmcore/SemanticInformation.h +++ b/libevmcore/SemanticInformation.h @@ -44,6 +44,7 @@ struct SemanticInformation static bool isDupInstruction(AssemblyItem const& _item); static bool isSwapInstruction(AssemblyItem const& _item); static bool isJumpInstruction(AssemblyItem const& _item); + static bool altersControlFlow(AssemblyItem const& _item); }; } diff --git a/test/SolidityOptimizer.cpp b/test/SolidityOptimizer.cpp index f523847f1..450ae3c22 100644 --- a/test/SolidityOptimizer.cpp +++ b/test/SolidityOptimizer.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include using namespace std; @@ -96,6 +97,18 @@ public: BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end()); } + void checkCFG(AssemblyItems const& _input, AssemblyItems const& _expectation) + { + AssemblyItems output = _input; + // Running it four times should be enough for these tests. + for (unsigned i = 0; i < 4; ++i) + { + eth::ControlFlowGraph cfg(output); + output = cfg.optimisedItems(); + } + BOOST_CHECK_EQUAL_COLLECTIONS(_expectation.begin(), _expectation.end(), output.begin(), output.end()); + } + protected: Address m_optimizedContract; Address m_nonOptimizedContract; @@ -731,6 +744,73 @@ BOOST_AUTO_TEST_CASE(cse_sha3_twice_same_content_noninterfering_store_in_between BOOST_CHECK_EQUAL(1, count(output.begin(), output.end(), AssemblyItem(Instruction::SHA3))); } +BOOST_AUTO_TEST_CASE(control_flow_graph_remove_unused) +{ + // remove parts of the code that are unused + AssemblyItems input{ + AssemblyItem(PushTag, 1), + Instruction::JUMP, + u256(7), + AssemblyItem(Tag, 1), + }; + checkCFG(input, {}); +} + +BOOST_AUTO_TEST_CASE(control_flow_graph_remove_unused_loop) +{ + AssemblyItems input{ + AssemblyItem(PushTag, 3), + Instruction::JUMP, + AssemblyItem(Tag, 1), + u256(7), + AssemblyItem(PushTag, 2), + Instruction::JUMP, + AssemblyItem(Tag, 2), + u256(8), + AssemblyItem(PushTag, 1), + Instruction::JUMP, + AssemblyItem(Tag, 3), + u256(11) + }; + checkCFG(input, {u256(11)}); +} + +BOOST_AUTO_TEST_CASE(control_flow_graph_reconnect_single_jump_source) +{ + // move code that has only one unconditional jump source + AssemblyItems input{ + u256(1), + AssemblyItem(PushTag, 1), + Instruction::JUMP, + AssemblyItem(Tag, 2), + u256(2), + AssemblyItem(PushTag, 3), + Instruction::JUMP, + AssemblyItem(Tag, 1), + u256(3), + AssemblyItem(PushTag, 2), + Instruction::JUMP, + AssemblyItem(Tag, 3), + u256(4), + }; + checkCFG(input, {u256(1), u256(3), u256(2), u256(4)}); +} + +BOOST_AUTO_TEST_CASE(control_flow_graph_do_not_remove_returned_to) +{ + // do not remove parts that are "returned to" + AssemblyItems input{ + AssemblyItem(PushTag, 1), + AssemblyItem(PushTag, 2), + Instruction::JUMP, + AssemblyItem(Tag, 2), + Instruction::JUMP, + AssemblyItem(PushTag, 1), + u256(2) + }; + checkCFG(input, {u256(2)}); +} + BOOST_AUTO_TEST_SUITE_END() }