From 52b7fb343ce5f058d72c20f48c222c4ae091afc9 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Mon, 26 May 2014 19:41:46 +0200 Subject: [PATCH] New Assembler. --- alethzero/MainWin.cpp | 82 +++------- liblll/Assembly.cpp | 175 ++++++++++++++++++++- liblll/Assembly.h | 54 ++++++- liblll/CodeFragment.cpp | 338 +++++++++++++--------------------------- liblll/CodeFragment.h | 51 +----- liblll/CodeLocation.cpp | 59 ------- liblll/CodeLocation.h | 54 ------- liblll/Compiler.cpp | 27 +++- liblll/Compiler.h | 3 +- lllc/main.cpp | 16 +- 10 files changed, 386 insertions(+), 473 deletions(-) delete mode 100644 liblll/CodeLocation.cpp delete mode 100644 liblll/CodeLocation.h diff --git a/alethzero/MainWin.cpp b/alethzero/MainWin.cpp index 2a9282861..e32e7c335 100644 --- a/alethzero/MainWin.cpp +++ b/alethzero/MainWin.cpp @@ -839,59 +839,10 @@ void Main::on_data_textChanged() { if (isCreation()) { - QString code = ui->data->toPlainText(); - bytes initBytes; - bytes bodyBytes; - auto init = code.indexOf("init:"); - auto body = code.indexOf("body:"); - if (body == -1) - body = code.indexOf("code:"); - - if (body == -1 && init == -1) - { - vector errors; - initBytes = compileLLL(code.toStdString(), &errors); - for (auto const& i: errors) - cwarn << i; - } - else - { - init = (init == -1 ? 0 : (init + 5)); - int initSize = (body == -1 ? code.size() : (body - init)); - body = (body == -1 ? code.size() : (body + 5)); - auto initCode = code.mid(init, initSize).trimmed(); - auto bodyCode = code.mid(body).trimmed(); - if (QRegExp("[^0-9a-fA-F]").indexIn(initCode) == -1) - initBytes = fromHex(initCode.toStdString()); - else - initBytes = compileSerpent(initCode.toStdString()); - if (QRegExp("[^0-9a-zA-Z]").indexIn(bodyCode) == -1) - bodyBytes = fromHex(bodyCode.toStdString()); - else - bodyBytes = compileSerpent(bodyCode.toStdString()); - } - - m_data.clear(); - if (initBytes.size()) - m_data = initBytes; - if (bodyBytes.size()) - { - eth::CodeFragment c(bodyBytes); - - unsigned s = bodyBytes.size(); - unsigned ss = c.appendPush(s); - unsigned p = m_data.size() + 4 + 2 + 1 + ss + 2 + 1; - c.appendPush(p); - c.appendPush(0); - c.appendInstruction(Instruction::CODECOPY); - c.appendPush(s); - c.appendPush(0); - c.appendInstruction(Instruction::RETURN); - while (c.size() < p) - c.appendInstruction(Instruction::STOP); - for (auto b: c.code()) - m_data.push_back(b); - } + vector errors; + m_data = compileLLL(ui->data->toPlainText().toStdString(), &errors); + for (auto const& i: errors) + cwarn << i; ui->code->setHtml("

Code

" + QString::fromStdString(disassemble(m_data)).toHtmlEscaped()); ui->gas->setMinimum((qint64)state().createGas(m_data.size(), 0)); @@ -1163,17 +1114,24 @@ void Main::initDebugger() for (unsigned i = 0; i <= m_currentExecution->ext().code.size(); ++i) { byte b = i < m_currentExecution->ext().code.size() ? m_currentExecution->ext().code[i] : 0; - QString s = c_instructionInfo.at((Instruction)b).name; - m_pcWarp[i] = dc->count(); - ostringstream out; - out << hex << setw(4) << setfill('0') << i; - if (b >= (byte)Instruction::PUSH1 && b <= (byte)Instruction::PUSH32) + try + { + QString s = c_instructionInfo.at((Instruction)b).name; + m_pcWarp[i] = dc->count(); + ostringstream out; + out << hex << setw(4) << setfill('0') << i; + if (b >= (byte)Instruction::PUSH1 && b <= (byte)Instruction::PUSH32) + { + unsigned bc = b - (byte)Instruction::PUSH1 + 1; + s = "PUSH 0x" + QString::fromStdString(toHex(bytesConstRef(&m_currentExecution->ext().code[i + 1], bc))); + i += bc; + } + dc->addItem(QString::fromStdString(out.str()) + " " + s); + } + catch (...) { - unsigned bc = b - (byte)Instruction::PUSH1 + 1; - s = "PUSH 0x" + QString::fromStdString(toHex(bytesConstRef(&m_currentExecution->ext().code[i + 1], bc))); - i += bc; + break; // probably hit data segment } - dc->addItem(QString::fromStdString(out.str()) + " " + s); } } diff --git a/liblll/Assembly.cpp b/liblll/Assembly.cpp index a84913762..1f53cfb5f 100644 --- a/liblll/Assembly.cpp +++ b/liblll/Assembly.cpp @@ -26,38 +26,201 @@ using namespace std; using namespace eth; +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return c_instructionInfo.at((Instruction)(byte)m_data).ret - c_instructionInfo.at((Instruction)(byte)m_data).args; + case Push: case PushString: case PushTag: case PushData: + return 1; + case Tag: + return 0; + } + assert(false); +} + +unsigned Assembly::bytesRequired() const +{ + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + switch (i.m_type) + { + case Operation: + ret++; + break; + case PushString: + ret += 33; + break; + case Push: + ret += 1 + max(1, eth::bytesRequired(i.m_data)); + break; + case PushTag: + case PushData: + ret += 1 + br; + case Tag:; + } + if (eth::bytesRequired(ret) <= br) + return ret; + } +} + void Assembly::append(Assembly const& _a) { for (AssemblyItem i: _a.m_items) { if (i.type() == Tag || i.type() == PushTag) i.m_data += m_usedTags; - m_items.push_back(i); + append(i); } + m_usedTags += _a.m_usedTags; for (auto const& i: _a.m_data) m_data.insert(i); + for (auto const& i: _a.m_strings) + m_strings.insert(i); + + assert(!_a.m_baseDeposit); + assert(!_a.m_totalDeposit); +} + +void Assembly::append(Assembly const& _a, int _deposit) +{ + if (_deposit > _a.m_deposit) + throw InvalidDeposit(); + else + { + append(_a); + while (_deposit++ < _a.m_deposit) + append(Instruction::POP); + } } ostream& Assembly::streamOut(ostream& _out) const { + _out << ".code:" << endl; for (AssemblyItem const& i: m_items) switch (i.m_type) { case Operation: - _out << c_instructionInfo.at((Instruction)(byte)i.m_data).name << endl; + _out << " " << c_instructionInfo.at((Instruction)(byte)i.m_data).name << endl; break; case Push: - _out << i.m_data << endl; + _out << " PUSH " << i.m_data << endl; + break; + case PushString: + _out << " PUSH \"" << m_strings.at((h256)i.m_data) << "\"" << endl; + break; + case PushTag: + _out << " PUSH [tag" << i.m_data << "]" << endl; + break; + case Tag: + _out << "tag" << i.m_data << ": " << endl; + break; + case PushData: + _out << " PUSH [" << h256(i.m_data).abridged() << "]" << endl; break; -/* case PushString: - _out << i.m_data << endl; - break;*/ } + + if (m_data.size()) + { + _out << ".data:" << endl; + for (auto const& i: m_data) + _out << " " << i.first.abridged() << ": " << toHex(i.second) << endl; + } return _out; } +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + m_deposit += _i.deposit(); + m_items.push_back(_i); + return back(); +} + bytes Assembly::assemble() const { bytes ret; + + unsigned totalBytes = bytesRequired(); + ret.reserve(totalBytes); + vector tagPos(m_usedTags); + map tagRef; + multimap dataRef; + unsigned bytesPerTag = eth::bytesRequired(totalBytes); + byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + + for (AssemblyItem const& i: m_items) + switch (i.m_type) + { + case Operation: + ret.push_back((byte)i.m_data); + break; + case PushString: + { + ret.push_back((byte)Instruction::PUSH32); + unsigned ii = 0; + for (auto j: m_strings.at((h256)i.m_data)) + if (++ii > 32) + break; + else + ret.push_back((byte)j); + while (ii++ < 32) + ret.push_back(0); + break; + } + case Push: + { + byte b = max(1, eth::bytesRequired(i.m_data)); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(i.m_data, byr); + break; + } + case PushTag: + { + ret.push_back(tagPush); + tagRef[ret.size()] = (unsigned)i.m_data; + ret.resize(ret.size() + bytesPerTag); + break; + } + case PushData: + { + ret.push_back(tagPush); + dataRef.insert(make_pair((h256)i.m_data, ret.size())); + ret.resize(ret.size() + bytesPerTag); + break; + } + case Tag: + tagPos[(unsigned)i.m_data] = ret.size(); + break; + } + + for (auto const& i: tagRef) + { + bytesRef r(ret.data() + i.first, bytesPerTag); + toBigEndian(tagPos[i.second], r); + } + + if (m_data.size()) + { + ret.push_back(0); + for (auto const& i: m_data) + { + auto its = dataRef.equal_range(i.first); + for (auto it = its.first; it != its.second; ++it) + { + bytesRef r(ret.data() + it->second, bytesPerTag); + toBigEndian(ret.size(), r); + } + for (auto b: i.second) + ret.push_back(b); + } + } return ret; } diff --git a/liblll/Assembly.h b/liblll/Assembly.h index ca3beaf5b..86b37622e 100644 --- a/liblll/Assembly.h +++ b/liblll/Assembly.h @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include #include "Exceptions.h" @@ -39,14 +40,16 @@ class AssemblyItem public: AssemblyItem(u256 _push): m_type(Push), m_data(_push) {} - AssemblyItem(AssemblyItemType _type, AssemblyItem const& _tag): m_type(_type), m_data(_tag.m_data) { assert(_type == PushTag); assert(_tag.m_type == Tag); } AssemblyItem(Instruction _i): m_type(Operation), m_data((byte)_i) {} AssemblyItem(AssemblyItemType _type, u256 _data): m_type(_type), m_data(_data) {} + AssemblyItem tag() const { assert(m_type == PushTag || m_type == Tag); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assert(m_type == PushTag || m_type == Tag); return AssemblyItem(PushTag, m_data); } + AssemblyItemType type() const { return m_type; } u256 data() const { return m_data; } - std::ostream& streamOut(std::ostream& _out) const; + int deposit() const; private: AssemblyItemType m_type; @@ -57,19 +60,58 @@ class Assembly { public: AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } AssemblyItem newData(bytes const& _data) { auto h = sha3(_data); m_data[h] = _data; return AssemblyItem(PushData, h); } - AssemblyItem newPushString(std::string const& _data) { auto b = asBytes(_data); auto h = sha3(b); m_data[h] = b; return AssemblyItem(PushString, h); } + AssemblyItem newPushString(std::string const& _data) { auto h = sha3(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } + + AssemblyItem append() { return append(newTag()); } + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } + AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + + AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + + template Assembly& operator<<(T const& _d) { append(_d); return *this; } + + AssemblyItem const& back() { return m_items.back(); } + std::string backString() const { return m_items.back().m_type == PushString ? m_strings.at((h256)m_items.back().m_data) : std::string(); } - void append(AssemblyItem const& _i) { m_items.push_back(_i); } + void onePath() { assert(!m_totalDeposit && !m_baseDeposit); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); } + + std::string out() const { std::stringstream ret; streamOut(ret); return ret.str(); } + int deposit() const { return m_deposit; } bytes assemble() const; - void append(Assembly const& _a); std::ostream& streamOut(std::ostream& _out) const; private: - u256 m_usedTags = 0; + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) throw InvalidDeposit(); } + unsigned bytesRequired() const; + + unsigned m_usedTags = 0; std::vector m_items; std::map m_data; + std::map m_strings; + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; }; +inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a) +{ + _a.streamOut(_out); + return _out; +} + } diff --git a/liblll/CodeFragment.cpp b/liblll/CodeFragment.cpp index c4b7ae0bb..59ff729e7 100644 --- a/liblll/CodeFragment.cpp +++ b/liblll/CodeFragment.cpp @@ -34,100 +34,6 @@ namespace qi = boost::spirit::qi; namespace px = boost::phoenix; namespace sp = boost::spirit; -void CodeFragment::appendFragment(CodeFragment const& _f) -{ - m_locs.reserve(m_locs.size() + _f.m_locs.size()); - m_code.reserve(m_code.size() + _f.m_code.size()); - - unsigned os = m_code.size(); - - for (auto i: _f.m_code) - m_code.push_back(i); - - for (auto i: _f.m_locs) - { - CodeLocation(this, i + os).increase(os); - m_locs.push_back(i + os); - } - - for (auto i: _f.m_data) - m_data.insert(make_pair(i.first, i.second + os)); - - m_deposit += _f.m_deposit; -} - -CodeFragment CodeFragment::compile(string const& _src, CompilerState& _s) -{ - CodeFragment ret; - sp::utree o; - parseTreeLLL(_src, o); - if (!o.empty()) - ret = CodeFragment(o, _s); - _s.treesToKill.push_back(o); - return ret; -} - -void CodeFragment::consolidateData() -{ - m_code.push_back(0); - bytes ld; - for (auto const& i: m_data) - { - if (ld != i.first) - { - ld = i.first; - for (auto j: ld) - m_code.push_back(j); - } - CodeLocation(this, i.second).set(m_code.size() - ld.size()); - } - m_data.clear(); -} - -void CodeFragment::appendFragment(CodeFragment const& _f, unsigned _deposit) -{ - if ((int)_deposit > _f.m_deposit) - error(); - else - { - appendFragment(_f); - while (_deposit++ < (unsigned)_f.m_deposit) - appendInstruction(Instruction::POP); - } -} - -CodeLocation CodeFragment::appendPushLocation(unsigned _locationValue) -{ - m_code.push_back((byte)Instruction::PUSH4); - CodeLocation ret(this, m_code.size()); - m_locs.push_back(m_code.size()); - m_code.resize(m_code.size() + 4); - bytesRef r(&m_code[m_code.size() - 4], 4); - toBigEndian(_locationValue, r); - m_deposit++; - return ret; -} - -unsigned CodeFragment::appendPush(u256 _literalValue) -{ - unsigned br = max(1, bytesRequired(_literalValue)); - m_code.push_back((byte)Instruction::PUSH1 + br - 1); - m_code.resize(m_code.size() + br); - for (unsigned i = 0; i < br; ++i) - { - m_code[m_code.size() - 1 - i] = (byte)(_literalValue & 0xff); - _literalValue >>= 8; - } - m_deposit++; - return br + 1; -} - -void CodeFragment::appendInstruction(Instruction _i) -{ - m_code.push_back((byte)_i); - m_deposit += c_instructionInfo.at(_i).ret - c_instructionInfo.at(_i).args; -} - CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM) { /* cdebug << "CodeFragment. Locals:"; @@ -151,12 +57,7 @@ CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowAS { auto sr = _t.get, sp::utree_type::string_type>>(); string s(sr.begin(), sr.end()); - if (s.size() > 32) - error(); - h256 valHash; - memcpy(valHash.data(), s.data(), s.size()); - memset(valHash.data() + s.size(), 0, 32 - s.size()); - appendPush(valHash); + m_asm.append(s); break; } case sp::utree_type::symbol_type: @@ -164,21 +65,14 @@ CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowAS auto sr = _t.get, sp::utree_type::symbol_type>>(); string s(sr.begin(), sr.end()); string us = boost::algorithm::to_upper_copy(s); - if (_allowASM) - { - if (c_instructions.count(us)) - { - auto it = c_instructions.find(us); - m_deposit = c_instructionInfo.at(it->second).ret - c_instructionInfo.at(it->second).args; - m_code.push_back((byte)it->second); - } - } + if (_allowASM && c_instructions.count(us)) + m_asm.append(c_instructions.at(us)); if (_s.defs.count(s)) - appendFragment(_s.defs.at(s)); + m_asm.append(_s.defs.at(s).m_asm); else if (_s.args.count(s)) - appendFragment(_s.args.at(s)); + m_asm.append(_s.args.at(s).m_asm); else if (_s.outers.count(s)) - appendFragment(_s.outers.at(s)); + m_asm.append(_s.outers.at(s).m_asm); else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) { auto it = _s.vars.find(s); @@ -187,7 +81,7 @@ CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowAS bool ok; tie(it, ok) = _s.vars.insert(make_pair(s, _s.vars.size() * 32)); } - appendPush(it->second); + m_asm.append((u256)it->second); } else error(); @@ -199,44 +93,13 @@ CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowAS bigint i = *_t.get(); if (i < 0 || i > bigint(u256(0) - 1)) error(); - appendPush((u256)i); + m_asm.append((u256)i); break; } default: break; } } -void CodeFragment::appendPushDataLocation(bytes const& _data) -{ - m_code.push_back((byte)Instruction::PUSH4); - m_data.insert(make_pair(_data, m_code.size())); - m_code.resize(m_code.size() + 4); - memset(&m_code.back() - 3, 0, 4); - m_deposit++; -} - -std::string CodeFragment::asPushedString() const -{ - string ret; - if (m_code.size()) - { - unsigned bc = m_code[0] - (byte)Instruction::PUSH1 + 1; - if (m_code[0] >= (byte)Instruction::PUSH1 && m_code[0] <= (byte)Instruction::PUSH32) - { - for (unsigned s = 0; s < bc && m_code[1 + s]; ++s) - ret.push_back(m_code[1 + s]); - return ret; - } - } - error(); - return ret; -} - -void CodeFragment::optimise() -{ -// map const&)>> pattern = { { "PUSH,PUSH,ADD", [](vector const& v) { return CodeFragment(appendPush(v[0] + v[1])); } } }; -} - void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) { if (_t.empty()) @@ -281,7 +144,7 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) int c = 0; for (auto const& i: _t) if (c++) - appendFragment(CodeFragment(i, _s, true)); + m_asm.append(CodeFragment(i, _s, true).m_asm); } else if (us == "INCLUDE") { @@ -299,9 +162,9 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) else if (i.which() == sp::utree_type::symbol_type) { auto sr = i.get, sp::utree_type::symbol_type>>(); - n = _s.getDef(string(sr.begin(), sr.end())).asPushedString(); + n = _s.getDef(string(sr.begin(), sr.end())).m_asm.backString(); } - appendFragment(CodeFragment::compile(asString(contents(n)), _s)); + m_asm.append(CodeFragment::compile(asString(contents(n)), _s).m_asm); } else if (us == "DEF") { @@ -323,7 +186,7 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) else if (i.which() == sp::utree_type::symbol_type) { auto sr = i.get, sp::utree_type::symbol_type>>(); - n = _s.getDef(string(sr.begin(), sr.end())).asPushedString(); + n = _s.getDef(string(sr.begin(), sr.end())).m_asm.backString(); } } else if (ii == 2) @@ -362,7 +225,7 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) if (ii == 1) { pos = CodeFragment(i, _s); - if (pos.m_deposit != 1) + if (pos.m_asm.deposit() != 1) error(); } else if (ii == 2 && !i.tag() && i.which() == sp::utree_type::string_type) @@ -396,11 +259,11 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) error(); ++ii; } - appendPush(data.size()); - appendInstruction(Instruction::DUP); - appendPushDataLocation(data); - appendFragment(pos, 1); - appendInstruction(Instruction::CODECOPY); + m_asm.append((u256)data.size()); + m_asm.append(Instruction::DUP); + m_asm.append(data); + m_asm.append(pos.m_asm, 1); + m_asm.append(Instruction::CODECOPY); } else nonStandard = false; @@ -427,7 +290,7 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) auto requireSize = [&](unsigned s) { if (code.size() != s) error(); }; auto requireMinSize = [&](unsigned s) { if (code.size() < s) error(); }; auto requireMaxSize = [&](unsigned s) { if (code.size() > s) error(); }; - auto requireDeposit = [&](unsigned i, int s) { if (code[i].m_deposit != s) error(); }; + auto requireDeposit = [&](unsigned i, int s) { if (code[i].m_asm.deposit() != s) error(); }; if (_s.macros.count(s) && _s.macros.at(s).args.size() == code.size()) { @@ -443,7 +306,7 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) requireDeposit(i, 1); cs.args[m.args[i]] = code[i]; } - appendFragment(CodeFragment(m.code, cs)); + m_asm.append(CodeFragment(m.code, cs).m_asm); for (auto const& i: cs.defs) _s.defs[i.first] = i.second; for (auto const& i: cs.macros) @@ -459,8 +322,8 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) requireMinSize(-ea); for (unsigned i = code.size(); i; --i) - appendFragment(code[i - 1], 1); - appendInstruction(it->second); + m_asm.append(code[i - 1].m_asm, 1); + m_asm.append(it->second); } else if (c_arith.count(us)) { @@ -469,10 +332,10 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) for (unsigned i = code.size(); i; --i) { requireDeposit(i - 1, 1); - appendFragment(code[i - 1], 1); + m_asm.append(code[i - 1].m_asm, 1); } for (unsigned i = 1; i < code.size(); ++i) - appendInstruction(it->second); + m_asm.append(it->second); } else if (c_binary.count(us)) { @@ -480,74 +343,75 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) requireSize(2); requireDeposit(0, 1); requireDeposit(1, 1); - appendFragment(code[1], 1); - appendFragment(code[0], 1); - appendInstruction(it->second.first); + m_asm.append(code[1].m_asm, 1); + m_asm.append(code[0].m_asm, 1); + m_asm.append(it->second.first); if (it->second.second) - appendInstruction(Instruction::NOT); + m_asm.append(Instruction::NOT); } else if (c_unary.count(us)) { auto it = c_unary.find(us); requireSize(1); requireDeposit(0, 1); - appendFragment(code[0], 1); - appendInstruction(it->second); + m_asm.append(code[0].m_asm, 1); + m_asm.append(it->second); } else if (us == "IF") { requireSize(3); requireDeposit(0, 1); - appendFragment(code[0]); - auto pos = appendJumpI(); - onePath(); - appendFragment(code[2]); - auto end = appendJump(); - otherPath(); - pos.anchor(); - appendFragment(code[1]); - donePaths(); - end.anchor(); + + m_asm.append(code[0].m_asm); + auto pos = m_asm.appendJumpI(); + m_asm.onePath(); + m_asm << code[2].m_asm; + auto end = m_asm.appendJump(); + m_asm.otherPath(); + m_asm << pos.tag() << code[1].m_asm << end.tag(); + m_asm.donePaths(); } else if (us == "WHEN" || us == "UNLESS") { requireSize(2); requireDeposit(0, 1); - appendFragment(code[0]); + + m_asm.append(code[0].m_asm); if (us == "WHEN") - appendInstruction(Instruction::NOT); - auto end = appendJumpI(); - onePath(); - otherPath(); - appendFragment(code[1], 0); - donePaths(); - end.anchor(); + m_asm.append(Instruction::NOT); + auto end = m_asm.appendJumpI(); + m_asm.onePath(); + m_asm.otherPath(); + m_asm << code[1].m_asm << end.tag(); + m_asm.donePaths(); } else if (us == "WHILE") { requireSize(2); requireDeposit(0, 1); - auto begin = CodeLocation(this); - appendFragment(code[0], 1); - appendInstruction(Instruction::NOT); - auto end = appendJumpI(); - appendFragment(code[1], 0); - appendJump(begin); - end.anchor(); + + auto begin = m_asm.append(); + m_asm.append(code[0].m_asm); + m_asm.append(Instruction::NOT); + auto end = m_asm.appendJumpI(); + m_asm.append(code[1].m_asm, 0); + m_asm.appendJump(begin); + m_asm << end.tag(); } else if (us == "FOR") { requireSize(4); requireDeposit(1, 1); - appendFragment(code[0], 0); - auto begin = CodeLocation(this); - appendFragment(code[1], 1); - appendInstruction(Instruction::NOT); - auto end = appendJumpI(); - appendFragment(code[3], 0); - appendFragment(code[2], 0); - appendJump(begin); - end.anchor(); + + m_asm.append(code[0].m_asm, 0); + auto begin = m_asm.append(); + m_asm.append(code[1].m_asm); + m_asm.append(Instruction::NOT); + auto end = m_asm.appendJumpI(); + m_asm.append(code[3].m_asm, 0); + m_asm.append(code[2].m_asm, 0); + m_asm.appendJump(begin); + m_asm << end.tag(); } else if (us == "LLL") { @@ -555,22 +419,22 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) requireMaxSize(3); requireDeposit(1, 1); - CodeLocation codeloc(this, m_code.size() + 6); bytes const& subcode = code[0].code(); - appendPush(subcode.size()); - appendInstruction(Instruction::DUP); + + m_asm.append((u256)subcode.size()); + m_asm.append(Instruction::DUP); if (code.size() == 3) { requireDeposit(2, 1); - appendFragment(code[2], 1); - appendInstruction(Instruction::LT); - appendInstruction(Instruction::NOT); - appendInstruction(Instruction::MUL); - appendInstruction(Instruction::DUP); + m_asm.append(code[2].m_asm, 1); + m_asm.append(Instruction::LT); + m_asm.append(Instruction::NOT); + m_asm.append(Instruction::MUL); + m_asm.append(Instruction::DUP); } - appendPushDataLocation(subcode); - appendFragment(code[1], 1); - appendInstruction(Instruction::CODECOPY); + m_asm.append(subcode); + m_asm.append(code[1].m_asm, 1); + m_asm.append(Instruction::CODECOPY); } else if (us == "&&" || us == "||") { @@ -578,53 +442,52 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) for (unsigned i = 0; i < code.size(); ++i) requireDeposit(i, 1); - vector ends; + auto end = m_asm.newTag(); if (code.size() > 1) { - appendPush(us == "||" ? 1 : 0); + m_asm.append((u256)(us == "||" ? 1 : 0)); for (unsigned i = 1; i < code.size(); ++i) { // Check if true - predicate - appendFragment(code[i - 1], 1); + m_asm.append(code[i - 1].m_asm, 1); if (us == "&&") - appendInstruction(Instruction::NOT); - ends.push_back(appendJumpI()); + m_asm.append(Instruction::NOT); + m_asm.appendJumpI(end); } - appendInstruction(Instruction::POP); + m_asm.append(Instruction::POP); } // Check if true - predicate - appendFragment(code.back(), 1); + m_asm.append(code.back().m_asm, 1); // At end now. - for (auto& i: ends) - i.anchor(); + m_asm.append(end); } else if (us == "~") { requireSize(1); requireDeposit(0, 1); - appendFragment(code[0], 1); - appendPush(1); - appendPush(0); - appendInstruction(Instruction::SUB); - appendInstruction(Instruction::SUB); + + m_asm.append(code[0].m_asm, 1); + m_asm.append((u256)1); + m_asm.append((u256)0); + m_asm.append(Instruction::SUB); + m_asm.append(Instruction::SUB); } else if (us == "SEQ") { unsigned ii = 0; for (auto const& i: code) if (++ii < code.size()) - appendFragment(i, 0); + m_asm.append(i.m_asm, 0); else - appendFragment(i); + m_asm.append(i.m_asm); } else if (us == "RAW") { for (auto const& i: code) - appendFragment(i); - while (m_deposit > 1) - appendInstruction(Instruction::POP); + m_asm.append(i.m_asm); + m_asm.popTo(1); } else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) { @@ -634,9 +497,20 @@ void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) bool ok; tie(it, ok) = _s.vars.insert(make_pair(s, _s.vars.size() * 32)); } - appendPush(it->second); + m_asm.append((u256)it->second); } else error(); } } + +CodeFragment CodeFragment::compile(string const& _src, CompilerState& _s) +{ + CodeFragment ret; + sp::utree o; + parseTreeLLL(_src, o); + if (!o.empty()) + ret = CodeFragment(o, _s); + _s.treesToKill.push_back(o); + return ret; +} diff --git a/liblll/CodeFragment.h b/liblll/CodeFragment.h index 9f312cda9..647604849 100644 --- a/liblll/CodeFragment.h +++ b/liblll/CodeFragment.h @@ -23,7 +23,7 @@ #include #include -#include "CodeLocation.h" +#include "Assembly.h" #include "Exceptions.h" namespace boost { namespace spirit { class utree; } } @@ -36,60 +36,23 @@ class CompilerState; class CodeFragment { - friend class CodeLocation; - public: + CodeFragment() {} CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM = false); - CodeFragment(bytes const& _c = bytes()): m_code(_c) {} static CodeFragment compile(std::string const& _src, CompilerState& _s); - /// Consolidates data and returns code. - bytes const& code() { optimise(); consolidateData(); return m_code; } - - unsigned appendPush(u256 _l); - void appendFragment(CodeFragment const& _f); - void appendFragment(CodeFragment const& _f, unsigned _i); - void appendInstruction(Instruction _i); - - CodeLocation appendPushLocation(unsigned _l = 0); - void appendPushLocation(CodeLocation _l) { assert(_l.m_f == this); appendPushLocation(_l.m_pos); } - void appendPushDataLocation(bytes const& _data); - - CodeLocation appendJump() { auto ret = appendPushLocation(0); appendInstruction(Instruction::JUMP); return ret; } - CodeLocation appendJumpI() { auto ret = appendPushLocation(0); appendInstruction(Instruction::JUMPI); return ret; } - CodeLocation appendJump(CodeLocation _l) { auto ret = appendPushLocation(_l.m_pos); appendInstruction(Instruction::JUMP); return ret; } - CodeLocation appendJumpI(CodeLocation _l) { auto ret = appendPushLocation(_l.m_pos); appendInstruction(Instruction::JUMPI); return ret; } - - void appendFile(std::string const& _fn); + /// Consolidates data and compiles code. + bytes code() const { return m_asm.assemble(); } - std::string asPushedString() const; - - void onePath() { assert(!m_totalDeposit && !m_baseDeposit); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } - void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } - void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } - void ignored() { m_baseDeposit = m_deposit; } - void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } - - bool operator==(CodeFragment const& _f) const { return _f.m_code == m_code && _f.m_data == m_data; } - bool operator!=(CodeFragment const& _f) const { return !operator==(_f); } - unsigned size() const { return m_code.size(); } - - void consolidateData(); - void optimise(); + /// Consolidates data and compiles code. + std::string assembly() const { return m_asm.out(); } private: template void error() const { throw T(); } void constructOperation(sp::utree const& _t, CompilerState& _s); - void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) error(); } - - int m_deposit = 0; - int m_baseDeposit = 0; - int m_totalDeposit = 0; - bytes m_code; - std::vector m_locs; - std::multimap m_data; + Assembly m_asm; }; static const CodeFragment NullCodeFragment; diff --git a/liblll/CodeLocation.cpp b/liblll/CodeLocation.cpp deleted file mode 100644 index 2c9ca2644..000000000 --- a/liblll/CodeLocation.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file CodeLocation.cpp - * @author Gav Wood - * @date 2014 - */ - -#include "CodeLocation.h" -#include "CodeFragment.h" -using namespace std; -using namespace eth; - -CodeLocation::CodeLocation(CodeFragment* _f) -{ - m_f = _f; - m_pos = _f->m_code.size(); -} - -unsigned CodeLocation::get() const -{ - assert(m_f->m_code[m_pos - 1] == (byte)Instruction::PUSH4); - bytesConstRef r(&m_f->m_code[m_pos], 4); - return fromBigEndian(r); -} - -void CodeLocation::set(unsigned _val) -{ - assert(m_f->m_code[m_pos - 1] == (byte)Instruction::PUSH4); - assert(!get()); - bytesRef r(&m_f->m_code[m_pos], 4); - toBigEndian(_val, r); -} - -void CodeLocation::anchor() -{ - set(m_f->m_code.size()); -} - -void CodeLocation::increase(unsigned _val) -{ - assert(m_f->m_code[m_pos - 1] == (byte)Instruction::PUSH4); - bytesRef r(&m_f->m_code[m_pos], 4); - toBigEndian(get() + _val, r); -} - diff --git a/liblll/CodeLocation.h b/liblll/CodeLocation.h deleted file mode 100644 index c8cf5ee87..000000000 --- a/liblll/CodeLocation.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file CodeLocation.h - * @author Gav Wood - * @date 2014 - */ - -#pragma once - -#include -#include -#include "Exceptions.h" - -namespace eth -{ - -class CodeFragment; - -class CodeLocation -{ - friend class CodeFragment; - -public: - CodeLocation(CodeFragment* _f); - CodeLocation(CodeFragment* _f, unsigned _p): m_f(_f), m_pos(_p) {} - - unsigned get() const; - void increase(unsigned _val); - void set(unsigned _val); - void set(CodeLocation _loc) { assert(_loc.m_f == m_f); set(_loc.m_pos); } - void anchor(); - - CodeLocation operator+(unsigned _i) const { return CodeLocation(m_f, m_pos + _i); } - -private: - CodeFragment* m_f; - unsigned m_pos; -}; - -} diff --git a/liblll/Compiler.cpp b/liblll/Compiler.cpp index afe84eb4b..777bb72d1 100644 --- a/liblll/Compiler.cpp +++ b/liblll/Compiler.cpp @@ -27,12 +27,12 @@ using namespace std; using namespace eth; -bytes eth::compileLLL(string const& _s, vector* _errors) +bytes eth::compileLLL(string const& _src, vector* _errors) { try { CompilerState cs; - bytes ret = CodeFragment::compile(_s, cs).code(); + bytes ret = CodeFragment::compile(_src, cs).code(); for (auto i: cs.treesToKill) killBigints(i); return ret; @@ -50,6 +50,29 @@ bytes eth::compileLLL(string const& _s, vector* _errors) return bytes(); } +std::string eth::compileLLLToAsm(std::string const& _src, std::vector* _errors) +{ + try + { + CompilerState cs; + string ret = CodeFragment::compile(_src, cs).assembly(); + for (auto i: cs.treesToKill) + killBigints(i); + return ret; + } + catch (Exception const& _e) + { + if (_errors) + _errors->push_back(_e.description()); + } + catch (std::exception) + { + if (_errors) + _errors->push_back("Parse error."); + } + return string(); +} + string eth::parseLLL(string const& _src) { sp::utree o; diff --git a/liblll/Compiler.h b/liblll/Compiler.h index e58e12bae..9dd5fc291 100644 --- a/liblll/Compiler.h +++ b/liblll/Compiler.h @@ -29,7 +29,8 @@ namespace eth { std::string parseLLL(std::string const& _src); -bytes compileLLL(std::string const& _s, std::vector* _errors = nullptr); +std::string compileLLLToAsm(std::string const& _src, std::vector* _errors = nullptr); +bytes compileLLL(std::string const& _src, std::vector* _errors = nullptr); } diff --git a/lllc/main.cpp b/lllc/main.cpp index edad99a0a..f9ef41975 100644 --- a/lllc/main.cpp +++ b/lllc/main.cpp @@ -36,7 +36,7 @@ void help() << "Options:" << endl << " -b,--binary Parse, compile and assemble; output byte code in binary." << endl << " -x,--hex Parse, compile and assemble; output byte code in hex." << endl -// << " -a,--assembly Only parse and compile; show assembly." << endl + << " -a,--assembly Only parse and compile; show assembly." << endl << " -t,--parse-tree Only parse; show parse tree." << endl << " -h,--help Show this help message and exit." << endl << " -V,--version Show the version and exit." << endl; @@ -51,7 +51,7 @@ void version() exit(0); } -enum Mode { Binary, Hex, ParseTree }; +enum Mode { Binary, Hex, Assembly, ParseTree }; int main(int argc, char** argv) { @@ -67,6 +67,8 @@ int main(int argc, char** argv) mode = Binary; else if (arg == "-x" || arg == "--hex") mode = Hex; + else if (arg == "-a" || arg == "--assembly") + mode = Assembly; else if (arg == "-t" || arg == "--parse-tree") mode = ParseTree; else if (arg == "-V" || arg == "--version") @@ -88,23 +90,23 @@ int main(int argc, char** argv) else src = asString(contents(infile)); + vector errors; if (src.empty()) cerr << "Empty file." << endl; else if (mode == Binary || mode == Hex) { - vector errors; auto bs = compileLLL(src, &errors); if (mode == Hex) cout << toHex(bs) << endl; else if (mode == Binary) cout.write((char const*)bs.data(), bs.size()); - for (auto const& i: errors) - cerr << i << endl; } else if (mode == ParseTree) - { cout << parseLLL(src) << endl; - } + else if (mode == Assembly) + cout << compileLLLToAsm(src, &errors) << endl; + for (auto const& i: errors) + cerr << i << endl; return 0; }