From 2b36803c969d8291dfdb54afffa2d07cdfe4b85f Mon Sep 17 00:00:00 2001 From: artur-zawlocki Date: Fri, 10 Oct 2014 10:01:26 +0100 Subject: [PATCH] Implementing JUMP/JUMPDEST (work in progress) --- evmcc/BasicBlock.cpp | 9 ++- evmcc/BasicBlock.h | 3 +- evmcc/Compiler.cpp | 179 ++++++++++++++++++++++++++++++++----------- evmcc/Compiler.h | 20 ++++- 4 files changed, 161 insertions(+), 50 deletions(-) diff --git a/evmcc/BasicBlock.cpp b/evmcc/BasicBlock.cpp index e5cdf386f..c7c98aca5 100644 --- a/evmcc/BasicBlock.cpp +++ b/evmcc/BasicBlock.cpp @@ -16,6 +16,13 @@ BasicBlock::BasicBlock(ProgramCounter _beginInstIdx, ProgramCounter _endInstIdx, m_stack(m_llvmBB) {} +BasicBlock::BasicBlock(std::string _name, llvm::Function* _mainFunc) : + m_beginInstIdx(0), + m_endInstIdx(0), + m_llvmBB(llvm::BasicBlock::Create(_mainFunc->getContext(), _name, _mainFunc)), + m_stack(m_llvmBB) +{} + void BasicBlock::Stack::push(llvm::Value* _value) { @@ -49,4 +56,4 @@ void BasicBlock::Stack::swap(size_t _index) std::swap(get(0), get(_index)); } -} \ No newline at end of file +} diff --git a/evmcc/BasicBlock.h b/evmcc/BasicBlock.h index 500ddb95b..3c47db328 100644 --- a/evmcc/BasicBlock.h +++ b/evmcc/BasicBlock.h @@ -50,6 +50,7 @@ public: static const char* NamePrefix; explicit BasicBlock(ProgramCounter _beginInstIdx, ProgramCounter _endInstIdx, llvm::Function* _mainFunc); + explicit BasicBlock(std::string _name, llvm::Function* _mainFunc); BasicBlock(const BasicBlock&) = delete; void operator=(const BasicBlock&) = delete; @@ -72,4 +73,4 @@ private: Stack m_stack; }; -} \ No newline at end of file +} diff --git a/evmcc/Compiler.cpp b/evmcc/Compiler.cpp index 880487705..02c391251 100644 --- a/evmcc/Compiler.cpp +++ b/evmcc/Compiler.cpp @@ -1,6 +1,8 @@ #include "Compiler.h" +#include + #include #include @@ -14,17 +16,19 @@ namespace evmcc struct { - llvm::Type* word8; + llvm::IntegerType* word8; llvm::Type* word8ptr; - llvm::Type* word256; + llvm::IntegerType* word256; llvm::Type* word256ptr; llvm::Type* word256arr; - llvm::Type* size; + llvm::IntegerType* size; llvm::Type* Void; llvm::Type* WordLowPrecision; } Types; Compiler::Compiler() + : m_finalBlock(nullptr) + , m_badJumpBlock(nullptr) { auto& context = llvm::getGlobalContext(); Types.word8 = llvm::Type::getInt8Ty(context); @@ -39,15 +43,31 @@ Compiler::Compiler() Types.WordLowPrecision = llvm::Type::getIntNTy(context, 64); } +namespace +{ + void validateSplitPoints(cons dev::bytes& bytecode, std::set splitPoints) + { + + } +} + void Compiler::createBasicBlocks(const dev::bytes& bytecode) { - std::set splitPoints; // Sorted collections of instruction indecies where basic blocks start/end + std::set splitPoints; // Sorted collections of instruction indices where basic blocks start/end splitPoints.insert(0); // First basic block + std::map directJumpTargets; + std::vector indirectJumpTargets; + + boost::dynamic_bitset<> validJumpTargets(bytecode.size()); + for (auto curr = bytecode.cbegin(); curr != bytecode.cend(); ++curr) { using dev::eth::Instruction; + ProgramCounter currentPC = curr - bytecode.cbegin(); + validJumpTargets[currentPC] = 1; + auto inst = static_cast(*curr); switch (inst) { @@ -86,7 +106,7 @@ void Compiler::createBasicBlocks(const dev::bytes& bytecode) { auto numBytes = static_cast(inst) - static_cast(Instruction::PUSH1) + 1; auto next = curr + numBytes + 1; - if (next == bytecode.cend()) + if (next >= bytecode.cend()) break; auto nextInst = static_cast(*next); @@ -101,34 +121,30 @@ void Compiler::createBasicBlocks(const dev::bytes& bytecode) val |= *iter; } - // Create a block following the JUMP. - if (next + 1 < bytecode.cend()) - { - ProgramCounter nextPC = (next + 1 - bytecode.cbegin()); - splitPoints.insert(nextPC); - } - // Create a block for the JUMP target. ProgramCounter targetPC = val.convert_to(); + if (targetPC > bytecode.size()) + targetPC = bytecode.size(); splitPoints.insert(targetPC); ProgramCounter jumpPC = (next - bytecode.cbegin()); - jumpTargets[jumpPC] = targetPC; - - curr += 1; // skip over JUMP + directJumpTargets[jumpPC] = targetPC; } curr += numBytes; break; } - case Instruction::JUMP: - case Instruction::JUMPI: + case Instruction::JUMPDEST: { - std::cerr << "JUMP/JUMPI at " << (curr - bytecode.cbegin()) << " not preceded by PUSH\n"; - std::exit(1); + // A basic block starts here. + splitPoints.insert(currentPC); + indirectJumpTargets.push_back(currentPC); + break; } + case Instruction::JUMP: + case Instruction::JUMPI: case Instruction::RETURN: case Instruction::STOP: case Instruction::SUICIDE: @@ -136,8 +152,7 @@ void Compiler::createBasicBlocks(const dev::bytes& bytecode) // Create a basic block starting at the following instruction. if (curr + 1 < bytecode.cend()) { - ProgramCounter nextPC = (curr + 1 - bytecode.cbegin()); - splitPoints.insert(nextPC); + splitPoints.insert(currentPC + 1); } break; } @@ -147,14 +162,40 @@ void Compiler::createBasicBlocks(const dev::bytes& bytecode) } } - splitPoints.insert(bytecode.size()); // For final block - for (auto it = splitPoints.cbegin(); it != splitPoints.cend();) + for (auto it = splitPoints.cbegin(); it != splitPoints.cend() && *it < bytecode.size(); ++it) + { + if (! validJumpTargets[*it]) + { + std::cerr "Jump to invalid PC " << *it << "\n"; + std::exit(1); + } + } + + for (auto it = splitPoints.cbegin(); it != splitPoints.cend() && *it < bytecode.size();) { auto beginInstIdx = *it; ++it; - auto endInstIdx = it != splitPoints.cend() ? *it : beginInstIdx; // For final block + auto endInstIdx = it != splitPoints.cend() ? *it : bytecode.size(); basicBlocks.emplace(std::piecewise_construct, std::forward_as_tuple(beginInstIdx), std::forward_as_tuple(beginInstIdx, endInstIdx, m_mainFunc)); } + + m_finalBlock = std::make_unique("FinalBlock", m_mainFunc); + m_badJumpBlock = std::make_unique("BadJumpBlock", m_mainFunc); + + for (auto it = directJumpTargets.cbegin(); it != directJumpTargets.cend(); ++it) + { + if (it->second >= bytecode.size()) + m_directJumpTargets[it->first] = m_finalBlock.get(); + else + m_directJumpTargets[it->first] = &basicBlocks.find(it->second)->second; + } + for (auto it = indirectJumpTargets.cbegin(); it != indirectJumpTargets.cend(); ++it) + { + if (*it >= bytecode.size()) + m_indirectJumpTargets.push_back(m_finalBlock.get()); + else + m_indirectJumpTargets.push_back(&basicBlocks.find(*it)->second); + } } std::unique_ptr Compiler::compile(const dev::bytes& bytecode) @@ -559,30 +600,69 @@ std::unique_ptr Compiler::compile(const dev::bytes& bytecode) } case Instruction::JUMP: + case Instruction::JUMPI: { - // The target address is computed at compile time, - // just pop it without looking... - stack.pop(); + // Generate direct jump iff: + // 1. this is not the first instruction in the block + // 2. m_directJumpTargets[currentPC] is defined (meaning that the previous instruction is a PUSH) + // Otherwise generate a indirect jump (a switch). + if (currentPC != basicBlock.begin()) + { + auto pairIter = m_directJumpTargets.find(currentPC); + if (pairIter != m_directJumpTargets.end()) + { + auto targetBlock = pairIter->second; + + // The target address is computed at compile time, + // just pop it without looking... + stack.pop(); + + if (inst == Instruction::JUMP) + { + builder.CreateBr(targetBlock->llvm()); + } + else // JUMPI + { + auto top = stack.pop(); + auto zero = ConstantInt::get(Types.word256, 0); + auto cond = builder.CreateICmpNE(top, zero, "nonzero"); + + // Assume the basic blocks are properly ordered: + auto nextBBIter = basicBlockPairIt; + ++nextBBIter; + assert (nextBBIter != basicBlocks.end()); + auto& followBlock = nextBBIter->second; + builder.CreateCondBr(cond, targetBlock->llvm(), followBlock.llvm()); + } + break; + } + } + + if (inst == Instruction::JUMPI) + { + std::cerr << "Indirect JUMPI is not supported yet (at PC " + << currentPC << ")\n"; + std::exit(1); + } + + // Generate switch for indirect jump. + auto dest = stack.pop(); + auto switchInstr = builder.CreateSwitch(dest, m_badJumpBlock->llvm(), + m_indirectJumpTargets.size()); + for (auto it = m_indirectJumpTargets.cbegin(); it != m_indirectJumpTargets.cend(); ++it) + { + auto& bb = *it; + auto dest = ConstantInt::get(Types.word256, bb->begin()); + switchInstr->addCase(dest, bb->llvm()); + } - auto& targetBlock = basicBlocks.find(jumpTargets[currentPC])->second; - builder.CreateBr(targetBlock); break; } - case Instruction::JUMPI: + case Instruction::JUMPDEST: { - assert(currentPC + 1 < bytecode.size()); - - // The target address is computed at compile time, - // just pop it without looking... - stack.pop(); - - auto top = stack.pop(); - auto zero = ConstantInt::get(Types.word256, 0); - auto cond = builder.CreateICmpNE(top, zero, "nonzero"); - auto& targetBlock = basicBlocks.find(jumpTargets[currentPC])->second; - auto& followBlock = basicBlocks.find(currentPC + 1)->second; - builder.CreateCondBr(cond, targetBlock, followBlock); + // Extra asserts just in case. + assert(currentPC == basicBlock.begin()); break; } @@ -754,17 +834,18 @@ std::unique_ptr Compiler::compile(const dev::bytes& bytecode) } } - } if (!builder.GetInsertBlock()->getTerminator()) // If block not terminated { - if (basicBlock.begin() == bytecode.size()) // Special final block + if (basicBlock.end() == bytecode.size()) { - builder.CreateRet(builder.getInt64(0)); + // Branch from the last regular block to the final block. + builder.CreateBr(m_finalBlock->llvm()); } else { + // Branch to the next block. auto iterCopy = basicBlockPairIt; ++iterCopy; auto& next = iterCopy->second; @@ -773,6 +854,14 @@ std::unique_ptr Compiler::compile(const dev::bytes& bytecode) } } + // Code for special blocks: + builder.SetInsertPoint(m_finalBlock->llvm()); + builder.CreateRet(builder.getInt64(0)); + + // TODO: throw an exception or something + builder.SetInsertPoint(m_badJumpBlock->llvm()); + builder.CreateRet(builder.getInt64(1)); + linkBasicBlocks(); return module; diff --git a/evmcc/Compiler.h b/evmcc/Compiler.h index 52a6bc9ac..f57d800a4 100644 --- a/evmcc/Compiler.h +++ b/evmcc/Compiler.h @@ -32,14 +32,28 @@ private: std::map basicBlocks; /** - * Maps a pc at which there is a JUMP or JUMPI to the target pc of the jump. + * Maps a pc at which there is a JUMP or JUMPI to the target block of the jump. */ - std::map jumpTargets; + std::map m_directJumpTargets; + + /** + * A list of possible blocks to which there may be indirect jumps. + */ + std::vector m_indirectJumpTargets; -private: /// Collection of basic blocks in program //std::vector m_basicBlocks; + /** + * Final block for normal (non-exceptional) execution. + */ + std::unique_ptr m_finalBlock; + + /** + * Default destination for indirect jumps. + */ + std::unique_ptr m_badJumpBlock; + /// Main program function llvm::Function* m_mainFunc = nullptr; };