From 143e370bbf7518261424200f4b78e3aeb4853f1b Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Tue, 27 May 2014 18:51:10 +0200 Subject: [PATCH] Pinhole optimise working fairly well... --- alethzero/MainWin.cpp | 7 ++-- liblll/Assembly.cpp | 80 +++++++++++++++++++++++++++++++++---------- liblll/Assembly.h | 1 + liblll/CodeFragment.h | 7 ++-- liblll/Compiler.cpp | 14 +++++--- liblll/Compiler.h | 4 +-- lllc/main.cpp | 7 ++-- test/vm.cpp | 4 +-- 8 files changed, 91 insertions(+), 33 deletions(-) diff --git a/alethzero/MainWin.cpp b/alethzero/MainWin.cpp index 95b266aa4..34312043d 100644 --- a/alethzero/MainWin.cpp +++ b/alethzero/MainWin.cpp @@ -840,12 +840,13 @@ void Main::on_data_textChanged() if (isCreation()) { vector errors; - auto asmcode = eth::compileLLLToAsm(ui->data->toPlainText().toStdString()); - m_data = compileLLL(ui->data->toPlainText().toStdString(), &errors); + auto asmcode = eth::compileLLLToAsm(ui->data->toPlainText().toStdString(), false); + auto asmcodeopt = eth::compileLLLToAsm(ui->data->toPlainText().toStdString(), true); + m_data = compileLLL(ui->data->toPlainText().toStdString(), true, &errors); for (auto const& i: errors) cwarn << i; - ui->code->setHtml("

Code

" + QString::fromStdString(disassemble(m_data)).toHtmlEscaped() + "

" + QString::fromStdString(asmcode).toHtmlEscaped() + "
"); + ui->code->setHtml("

Opt

" + QString::fromStdString(asmcodeopt).toHtmlEscaped() + "

Pre

" + QString::fromStdString(asmcode).toHtmlEscaped() + "

Code

" + QString::fromStdString(disassemble(m_data)).toHtmlEscaped()); ui->gas->setMinimum((qint64)state().createGas(m_data.size(), 0)); if (!ui->gas->isEnabled()) ui->gas->setValue(m_backupGas); diff --git a/liblll/Assembly.cpp b/liblll/Assembly.cpp index 053de61a5..1d9cb4bcb 100644 --- a/liblll/Assembly.cpp +++ b/liblll/Assembly.cpp @@ -186,6 +186,9 @@ inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b) return true; } +struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; +#define copt eth::LogOutputStream() + void Assembly::optimise() { map> c_simple = @@ -210,7 +213,7 @@ void Assembly::optimise() std::vector>> rules = { { { Push, Instruction::POP }, [](AssemblyItemsConstRef) -> AssemblyItems { return {}; } }, - { { Push, PushTag, Instruction::JUMPI }, [](AssemblyItemsConstRef m) -> AssemblyItems { return m[0].data() ? AssemblyItems({ m[1], Instruction::JUMP }) : AssemblyItems(); } }, + { { Push, PushTag, Instruction::JUMPI }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data()) return { m[1], Instruction::JUMP }; else return {}; } }, }; for (auto const& i: c_simple) @@ -219,19 +222,16 @@ void Assembly::optimise() { rules.push_back({ { Push, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[1].data(), m[0].data()) }; } }); rules.push_back({ { Push, i.first, Push, i.first }, [&](AssemblyItemsConstRef m) -> AssemblyItems { return { i.second(m[2].data(), m[0].data()), i.first }; } }); - rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [&](AssemblyItemsConstRef m) -> AssemblyItems - { - if (m[0].m_data == m[2].m_data) - return {}; - else - return m.toVector(); - }}); + rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [&](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].m_data == m[2].m_data) return {}; else return m.toVector(); }}); } + copt << *this; + unsigned total = 0; for (unsigned count = 1; count > 0; total += count) { count = 0; + map tags; for (unsigned i = 0; i < m_items.size(); ++i) { for (auto const& r: rules) @@ -242,23 +242,64 @@ void Assembly::optimise() auto rw = r.second(vr); if (rw.size() < vr.size()) { - cnote << vr << "matches" << AssemblyItemsConstRef(&r.first) << "becomes..."; + copt << vr << "matches" << AssemblyItemsConstRef(&r.first) << "becomes..."; for (unsigned j = 0; j < vr.size(); ++j) if (j < rw.size()) m_items[i + j] = rw[j]; else m_items.erase(m_items.begin() + i + rw.size()); - cnote << AssemblyItemsConstRef(&rw); + copt << AssemblyItemsConstRef(&rw); count++; + copt << "Now:\n" << m_items; } } } + if (m_items[i].type() == Operation && m_items[i].data() == (byte)Instruction::JUMP) + { + bool o = false; + while (m_items.size() > i + 1 && m_items[i + 1].type() != Tag) + { + m_items.erase(m_items.begin() + i + 1); + o = true; + } + if (o) + { + copt << "Jump with no tag. Now:\n" << m_items; + ++count; + } + } } - } - // TODO: find all unused tags, for all those that have an unconditional jump immediately before, remove code between the tag and the next used tag (removing unused tags from the todo along the way). + for (unsigned i = 0; i < m_items.size(); ++i) + if (m_items[i].type() == Tag) + tags.insert(make_pair(m_items[i].data(), i)); + + for (auto const& i: m_items) + if (i.type() == PushTag) + tags.erase(i.data()); - cnote << total << " optimisations done."; + if (tags.size()) + { + auto t = *tags.begin(); + unsigned i = t.second; + if (i && m_items[i - 1].type() == Operation && m_items[i - 1].data() == (byte)Instruction::JUMP) + while (i < m_items.size() && (m_items[i].type() != Tag || tags.count(m_items[i].data()))) + { + if (m_items[i].type() == Tag && tags.count(m_items[i].data())) + tags.erase(m_items[i].data()); + m_items.erase(m_items.begin() + i); + } + else + { + m_items.erase(m_items.begin() + i); + tags.erase(t.first); + } + copt << "Unused tag. Now:\n" << m_items; + ++count; + } + } + + copt << total << " optimisations done."; } bytes Assembly::assemble() const @@ -333,13 +374,16 @@ bytes Assembly::assemble() const for (auto const& i: m_data) { auto its = dataRef.equal_range(i.first); - for (auto it = its.first; it != its.second; ++it) + if (its.first != its.second) { - bytesRef r(ret.data() + it->second, bytesPerTag); - toBigEndian(ret.size(), r); + for (auto it = its.first; it != its.second; ++it) + { + bytesRef r(ret.data() + it->second, bytesPerTag); + toBigEndian(ret.size(), r); + } + for (auto b: i.second) + ret.push_back(b); } - for (auto b: i.second) - ret.push_back(b); } } return ret; diff --git a/liblll/Assembly.h b/liblll/Assembly.h index 2efff9437..a3bf998d6 100644 --- a/liblll/Assembly.h +++ b/liblll/Assembly.h @@ -62,6 +62,7 @@ typedef std::vector AssemblyItems; typedef vector_ref AssemblyItemsConstRef; std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } class Assembly { diff --git a/liblll/CodeFragment.h b/liblll/CodeFragment.h index 6935a111c..2c6f2cce6 100644 --- a/liblll/CodeFragment.h +++ b/liblll/CodeFragment.h @@ -43,10 +43,13 @@ public: static CodeFragment compile(std::string const& _src, CompilerState& _s); /// Consolidates data and compiles code. - bytes code() { m_asm.optimise(); return m_asm.assemble(); } + bytes code() const { return m_asm.assemble(); } /// Consolidates data and compiles code. - std::string assembly() { m_asm.optimise(); return m_asm.out(); } + std::string assembly() const { return m_asm.out(); } + + /// Optimise the code. Best do this just before calling code() or assembly(). + void optimise() { m_asm.optimise(); } private: template void error() const { throw T(); } diff --git a/liblll/Compiler.cpp b/liblll/Compiler.cpp index 777bb72d1..cd326341c 100644 --- a/liblll/Compiler.cpp +++ b/liblll/Compiler.cpp @@ -27,12 +27,15 @@ using namespace std; using namespace eth; -bytes eth::compileLLL(string const& _src, vector* _errors) +bytes eth::compileLLL(string const& _src, bool _opt, vector* _errors) { try { CompilerState cs; - bytes ret = CodeFragment::compile(_src, cs).code(); + auto f = CodeFragment::compile(_src, cs); + if (_opt) + f.optimise(); + bytes ret = f.code(); for (auto i: cs.treesToKill) killBigints(i); return ret; @@ -50,12 +53,15 @@ bytes eth::compileLLL(string const& _src, vector* _errors) return bytes(); } -std::string eth::compileLLLToAsm(std::string const& _src, std::vector* _errors) +std::string eth::compileLLLToAsm(std::string const& _src, bool _opt, std::vector* _errors) { try { CompilerState cs; - string ret = CodeFragment::compile(_src, cs).assembly(); + auto f = CodeFragment::compile(_src, cs); + if (_opt) + f.optimise(); + string ret = f.assembly(); for (auto i: cs.treesToKill) killBigints(i); return ret; diff --git a/liblll/Compiler.h b/liblll/Compiler.h index 9dd5fc291..395d79094 100644 --- a/liblll/Compiler.h +++ b/liblll/Compiler.h @@ -29,8 +29,8 @@ namespace eth { std::string parseLLL(std::string const& _src); -std::string compileLLLToAsm(std::string const& _src, std::vector* _errors = nullptr); -bytes compileLLL(std::string const& _src, std::vector* _errors = nullptr); +std::string compileLLLToAsm(std::string const& _src, bool _opt = true, std::vector* _errors = nullptr); +bytes compileLLL(std::string const& _src, bool _opt = true, std::vector* _errors = nullptr); } diff --git a/lllc/main.cpp b/lllc/main.cpp index f9ef41975..bfe51b35d 100644 --- a/lllc/main.cpp +++ b/lllc/main.cpp @@ -55,6 +55,7 @@ enum Mode { Binary, Hex, Assembly, ParseTree }; int main(int argc, char** argv) { + unsigned optimise = 1; string infile; Mode mode = Hex; @@ -71,6 +72,8 @@ int main(int argc, char** argv) mode = Assembly; else if (arg == "-t" || arg == "--parse-tree") mode = ParseTree; + else if ((arg == "-o" || arg == "--optimise") && argc > i + 1) + optimise = atoi(argv[++i]); else if (arg == "-V" || arg == "--version") version(); else @@ -95,7 +98,7 @@ int main(int argc, char** argv) cerr << "Empty file." << endl; else if (mode == Binary || mode == Hex) { - auto bs = compileLLL(src, &errors); + auto bs = compileLLL(src, optimise ? true : false, &errors); if (mode == Hex) cout << toHex(bs) << endl; else if (mode == Binary) @@ -104,7 +107,7 @@ int main(int argc, char** argv) else if (mode == ParseTree) cout << parseLLL(src) << endl; else if (mode == Assembly) - cout << compileLLLToAsm(src, &errors) << endl; + cout << compileLLLToAsm(src, optimise ? true : false, &errors) << endl; for (auto const& i: errors) cerr << i << endl; diff --git a/test/vm.cpp b/test/vm.cpp index 6c3d5b1b1..c2dc27910 100644 --- a/test/vm.cpp +++ b/test/vm.cpp @@ -163,7 +163,7 @@ public: thisTxCode.clear(); if (_o["code"].type() == str_type) - thisTxCode = compileLLL(_o["code"].get_str(), nullptr); + thisTxCode = compileLLL(_o["code"].get_str()); else for (auto const& j: _o["code"].get_array()) thisTxCode.push_back(toByte(j)); @@ -278,7 +278,7 @@ public: get<2>(a)[adr++] = toInt(k); } if (o["code"].type() == str_type) - get<3>(a) = compileLLL(o["code"].get_str(), nullptr); + get<3>(a) = compileLLL(o["code"].get_str()); else { get<3>(a).clear();