/* This file is part of cpp-ethereum. cpp-ethereum is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. cpp-ethereum is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. */ /** * @file ControlFlowGraph.cpp * @author Christian <c@ethdev.com> * @date 2015 * Control flow analysis for the optimizer. */ #include <libevmasm/ControlFlowGraph.h> #include <map> #include <memory> #include <algorithm> #include <libevmasm/Exceptions.h> #include <libevmasm/AssemblyItem.h> #include <libevmasm/SemanticInformation.h> #include <libevmasm/KnownState.h> using namespace std; using namespace dev; using namespace dev::eth; BlockId::BlockId(u256 const& _id): m_id(_id) { assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); } BasicBlocks ControlFlowGraph::optimisedBlocks() { if (m_items.empty()) return BasicBlocks(); findLargestTag(); splitBlocks(); resolveNextLinks(); removeUnusedBlocks(); setPrevLinks(); gatherKnowledge(); return rebuildCode(); } void ControlFlowGraph::findLargestTag() { m_lastUsedId = 0; for (auto const& item: m_items) if (item.type() == Tag || item.type() == PushTag) { // Assert that it can be converted. BlockId(item.data()); m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); } } void ControlFlowGraph::splitBlocks() { m_blocks.clear(); BlockId id = BlockId::initial(); m_blocks[id].begin = 0; for (size_t index = 0; index < m_items.size(); ++index) { AssemblyItem const& item = m_items.at(index); if (item.type() == Tag) { if (id) m_blocks[id].end = index; id = BlockId::invalid(); } if (!id) { id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); m_blocks[id].begin = index; } if (item.type() == PushTag) m_blocks[id].pushedTags.push_back(BlockId(item.data())); if (SemanticInformation::altersControlFlow(item)) { m_blocks[id].end = index + 1; if (item == Instruction::JUMP) m_blocks[id].endType = BasicBlock::EndType::JUMP; else if (item == Instruction::JUMPI) m_blocks[id].endType = BasicBlock::EndType::JUMPI; else m_blocks[id].endType = BasicBlock::EndType::STOP; id = BlockId::invalid(); } } if (id) { m_blocks[id].end = m_items.size(); if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) m_blocks[id].endType = BasicBlock::EndType::STOP; } } void ControlFlowGraph::resolveNextLinks() { map<unsigned, BlockId> blockByBeginPos; for (auto const& idAndBlock: m_blocks) if (idAndBlock.second.begin != idAndBlock.second.end) blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; for (auto& idAndBlock: m_blocks) { BasicBlock& block = idAndBlock.second; switch (block.endType) { case BasicBlock::EndType::JUMPI: case BasicBlock::EndType::HANDOVER: assertThrow( blockByBeginPos.count(block.end), OptimizerException, "Successor block not found." ); block.next = blockByBeginPos.at(block.end); break; default: break; } } } void ControlFlowGraph::removeUnusedBlocks() { vector<BlockId> blocksToProcess{BlockId::initial()}; set<BlockId> neededBlocks{BlockId::initial()}; while (!blocksToProcess.empty()) { BasicBlock const& block = m_blocks.at(blocksToProcess.back()); blocksToProcess.pop_back(); for (BlockId tag: block.pushedTags) if (!neededBlocks.count(tag) && m_blocks.count(tag)) { neededBlocks.insert(tag); blocksToProcess.push_back(tag); } if (block.next && !neededBlocks.count(block.next)) { neededBlocks.insert(block.next); blocksToProcess.push_back(block.next); } } for (auto it = m_blocks.begin(); it != m_blocks.end();) if (neededBlocks.count(it->first)) ++it; else m_blocks.erase(it++); } void ControlFlowGraph::setPrevLinks() { for (auto& idAndBlock: m_blocks) { BasicBlock& block = idAndBlock.second; switch (block.endType) { case BasicBlock::EndType::JUMPI: case BasicBlock::EndType::HANDOVER: assertThrow( !m_blocks.at(block.next).prev, OptimizerException, "Successor already has predecessor." ); m_blocks[block.next].prev = idAndBlock.first; break; default: break; } } // If block ends with jump to not yet linked block, link them removing the jump for (auto& idAndBlock: m_blocks) { BlockId blockId = idAndBlock.first; BasicBlock& block = idAndBlock.second; if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) continue; AssemblyItem const& push = m_items.at(block.end - 2); if (push.type() != PushTag) continue; BlockId nextId(push.data()); if (m_blocks.count(nextId) && m_blocks.at(nextId).prev) continue; bool hasLoop = false; for (BlockId id = nextId; id && m_blocks.count(id) && !hasLoop; id = m_blocks.at(id).next) hasLoop = (id == blockId); if (hasLoop || !m_blocks.count(nextId)) continue; m_blocks[nextId].prev = blockId; block.next = nextId; block.end -= 2; assertThrow( !block.pushedTags.empty() && block.pushedTags.back() == nextId, OptimizerException, "Last pushed tag not at end of pushed list." ); block.pushedTags.pop_back(); block.endType = BasicBlock::EndType::HANDOVER; } } void ControlFlowGraph::gatherKnowledge() { // @todo actually we know that memory is filled with zeros at the beginning, // we could make use of that. KnownStatePointer emptyState = make_shared<KnownState>(); bool unknownJumpEncountered = false; vector<pair<BlockId, KnownStatePointer>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); while (!workQueue.empty()) { //@todo we might have to do something like incrementing the sequence number for each JUMPDEST assertThrow(!!workQueue.back().first, OptimizerException, ""); if (!m_blocks.count(workQueue.back().first)) { workQueue.pop_back(); continue; // too bad, we do not know the tag, probably an invalid jump } BasicBlock& block = m_blocks.at(workQueue.back().first); KnownStatePointer state = workQueue.back().second; workQueue.pop_back(); if (block.startState) { state->reduceToCommonKnowledge(*block.startState); if (*state == *block.startState) continue; } block.startState = state->copy(); // Feed all items except for the final jump yet because it will erase the target tag. unsigned pc = block.begin; while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) state->feedItem(m_items.at(pc++)); if ( block.endType == BasicBlock::EndType::JUMP || block.endType == BasicBlock::EndType::JUMPI ) { assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); //@todo in the case of JUMPI, add knowledge about the condition to the state // (for both values of the condition) set<u256> tags = state->tagsInExpression( state->stackElement(state->stackHeight(), SourceLocation()) ); state->feedItem(m_items.at(pc++)); if (tags.empty()) { if (!unknownJumpEncountered) { // We do not know the target of this jump, so we have to reset the states of all // JUMPDESTs. unknownJumpEncountered = true; for (auto const& it: m_blocks) if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) workQueue.push_back(make_pair(it.first, emptyState->copy())); } } else for (auto tag: tags) workQueue.push_back(make_pair(BlockId(tag), state->copy())); } else if (block.begin <= pc && pc < block.end) state->feedItem(m_items.at(pc++)); assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); block.endState = state; if ( block.endType == BasicBlock::EndType::HANDOVER || block.endType == BasicBlock::EndType::JUMPI ) workQueue.push_back(make_pair(block.next, state->copy())); } // Remove all blocks we never visited here. This might happen because a tag is pushed but // never used for a JUMP. // Note that this invalidates some contents of pushedTags for (auto it = m_blocks.begin(); it != m_blocks.end();) if (!it->second.startState) it = m_blocks.erase(it); else it++; } BasicBlocks ControlFlowGraph::rebuildCode() { map<BlockId, unsigned> pushes; for (auto& idAndBlock: m_blocks) for (BlockId ref: idAndBlock.second.pushedTags) if (m_blocks.count(ref)) pushes[ref]++; set<BlockId> blocksToAdd; for (auto it: m_blocks) blocksToAdd.insert(it.first); set<BlockId> blocksAdded; BasicBlocks blocks; for ( BlockId blockId = BlockId::initial(); blockId; blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() ) { bool previousHandedOver = (blockId == BlockId::initial()); while (m_blocks.at(blockId).prev) blockId = m_blocks.at(blockId).prev; for (; blockId; blockId = m_blocks.at(blockId).next) { BasicBlock& block = m_blocks.at(blockId); blocksToAdd.erase(blockId); blocksAdded.insert(blockId); if (block.begin == block.end) continue; // If block starts with unused tag, skip it. if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) ++block.begin; if (block.begin < block.end) { blocks.push_back(block); blocks.back().startState->clearTagUnions(); blocks.back().endState->clearTagUnions(); } previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); } } return blocks; } BlockId ControlFlowGraph::generateNewId() { BlockId id = BlockId(++m_lastUsedId); assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); return id; }