From ca2f4ed2740dc50b20ab125a687c7dd24999ee49 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 5 Nov 2014 17:39:44 +0000 Subject: [PATCH] Fix for State and Transaction. Serpent update. --- libethereum/Executive.cpp | 9 +- libethereum/State.cpp | 32 +- libethereum/Transaction.cpp | 2 +- libethereum/Transaction.h | 4 +- libserpent/bignum.cpp | 14 + libserpent/bignum.h | 12 +- libserpent/compiler.cpp | 212 +++++++-- libserpent/opcodes.h | 224 +++++----- libserpent/parser.cpp | 53 ++- libserpent/rewriter.cpp | 870 +++++++++++++++++++++++++++++++----- libserpent/tokenize.cpp | 2 +- libserpent/util.cpp | 22 + libserpent/util.h | 6 +- 13 files changed, 1145 insertions(+), 317 deletions(-) diff --git a/libethereum/Executive.cpp b/libethereum/Executive.cpp index e14401972..71fc12a8c 100644 --- a/libethereum/Executive.cpp +++ b/libethereum/Executive.cpp @@ -184,15 +184,10 @@ bool Executive::go(OnOpFunc const& _onOp) { return false; } - catch (OutOfGas const& /*_e*/) - { - clog(StateChat) << "Out of Gas! Reverting."; - revert = true; - } catch (VMException const& _e) { - clog(StateChat) << "VM Exception: " << diagnostic_information(_e); - m_endGas = m_vm->gas(); + clog(StateChat) << "Safe VM Exception: " << diagnostic_information(_e); + m_endGas = 0;//m_vm->gas(); revert = true; } catch (Exception const& _e) diff --git a/libethereum/State.cpp b/libethereum/State.cpp index f35d21946..4aa916632 100644 --- a/libethereum/State.cpp +++ b/libethereum/State.cpp @@ -1213,32 +1213,29 @@ bool State::call(Address _receiveAddress, Address _codeAddress, Address _senderA *o_sub += evm.sub; if (o_ms) o_ms->output = out.toBytes(); - } - catch (OutOfGas const& /*_e*/) - { - clog(StateChat) << "Out of Gas! Reverting."; - revert = true; + *_gas = vm.gas(); } catch (VMException const& _e) { - clog(StateChat) << "VM Exception: " << diagnostic_information(_e); + clog(StateChat) << "Safe VM Exception: " << diagnostic_information(_e); revert = true; + *_gas = 0; } catch (Exception const& _e) { - clog(StateChat) << "Exception in VM: " << diagnostic_information(_e); + cwarn << "Unexpected exception in VM: " << diagnostic_information(_e) << ". This is exceptionally bad."; + // TODO: use fallback known-safe VM. } catch (std::exception const& _e) { - clog(StateChat) << "std::exception in VM: " << _e.what(); + cwarn << "Unexpected exception in VM: " << _e.what() << ". This is exceptionally bad."; + // TODO: use fallback known-safe VM. } // Write state out only in the case of a non-excepted transaction. if (revert) evm.revert(); - *_gas = vm.gas(); - return !revert; } else @@ -1281,16 +1278,13 @@ h160 State::create(Address _sender, u256 _endowment, u256 _gasPrice, u256* _gas, o_ms->output = out.toBytes(); if (o_sub) *o_sub += evm.sub; - } - catch (OutOfGas const& /*_e*/) - { - clog(StateChat) << "Out of Gas! Reverting."; - revert = true; + *_gas = vm.gas(); } catch (VMException const& _e) { - clog(StateChat) << "VM Exception: " << diagnostic_information(_e); + clog(StateChat) << "Safe VM Exception: " << diagnostic_information(_e); revert = true; + *_gas = 0; } catch (Exception const& _e) { @@ -1317,8 +1311,6 @@ h160 State::create(Address _sender, u256 _endowment, u256 _gasPrice, u256* _gas, if (addressInUse(newAddress)) m_cache[newAddress].setCode(out); - *_gas = vm.gas(); - return newAddress; } @@ -1380,7 +1372,7 @@ std::ostream& dev::eth::operator<<(std::ostream& _out, State const& _s) stringstream contout; - if ((cache && cache->codeBearing()) || (!cache && r && !r[3].isEmpty())) + if ((cache && cache->codeBearing()) || (!cache && r && (h256)r[3] != EmptySHA3)) { std::map mem; std::set back; @@ -1409,7 +1401,7 @@ std::ostream& dev::eth::operator<<(std::ostream& _out, State const& _s) else contout << r[2].toHash(); if (cache && cache->isFreshCode()) - contout << " $" << cache->code(); + contout << " $" << toHex(cache->code()); else contout << " $" << (cache ? cache->codeHash() : r[3].toHash()); diff --git a/libethereum/Transaction.cpp b/libethereum/Transaction.cpp index cfbae92fc..d94a31425 100644 --- a/libethereum/Transaction.cpp +++ b/libethereum/Transaction.cpp @@ -47,7 +47,7 @@ Transaction::Transaction(bytesConstRef _rlpData, bool _checkSender) if (_checkSender) m_sender = sender(); } - catch (Exception & _e) + catch (Exception& _e) { _e << errinfo_name("invalid transaction format") << BadFieldError(field,toHex(rlp[field].data().toBytes())); throw; diff --git a/libethereum/Transaction.h b/libethereum/Transaction.h index 56cc1295e..490a2ac68 100644 --- a/libethereum/Transaction.h +++ b/libethereum/Transaction.h @@ -57,10 +57,10 @@ public: Transaction(u256 _value, u256 _gasPrice, u256 _gas, bytes const& _data): m_type(ContractCreation), m_value(_value), m_gasPrice(_gasPrice), m_gas(_gas), m_data(_data) {} /// Constructs a transaction from the given RLP. - Transaction(bytesConstRef _rlp, bool _checkSender = false); + explicit Transaction(bytesConstRef _rlp, bool _checkSender = false); /// Constructs a transaction from the given RLP. - Transaction(bytes const& _rlp, bool _checkSender = false): Transaction(&_rlp, _checkSender) {} + explicit Transaction(bytes const& _rlp, bool _checkSender = false): Transaction(&_rlp, _checkSender) {} /// Checks equality of transactions. diff --git a/libserpent/bignum.cpp b/libserpent/bignum.cpp index 877808ead..108b1eb04 100644 --- a/libserpent/bignum.cpp +++ b/libserpent/bignum.cpp @@ -48,6 +48,20 @@ std::string decimalMul(std::string a, std::string b) { return o; } +//Modexp +std::string decimalModExp(std::string b, std::string e, std::string m) { + if (e == "0") return "1"; + else if (e == "1") return b; + else if (decimalMod(e, "2") == "0") { + std::string o = decimalModExp(b, decimalDiv(e, "2"), m); + return decimalMod(decimalMul(o, o), m); + } + else { + std::string o = decimalModExp(b, decimalDiv(e, "2"), m); + return decimalMod(decimalMul(decimalMul(o, o), b), m); + } +} + //Is a greater than b? Flag allows equality bool decimalGt(std::string a, std::string b, bool eqAllowed) { if (a == b) return eqAllowed; diff --git a/libserpent/bignum.h b/libserpent/bignum.h index 6656fdaec..599365b6c 100644 --- a/libserpent/bignum.h +++ b/libserpent/bignum.h @@ -7,10 +7,16 @@ const std::string tt256 = "115792089237316195423570985008687907853269984665640564039457584007913129639936" ; -const std::string tt255 = -"57896044618658097711785492504343953926634992332820282019728792003956564819968" +const std::string tt256m1 = +"115792089237316195423570985008687907853269984665640564039457584007913129639935" ; +const std::string tt255 = +"57896044618658097711785492504343953926634992332820282019728792003956564819968"; + +const std::string tt176 = +"95780971304118053647396689196894323976171195136475136"; + std::string unsignedToDecimal(unsigned branch); std::string decimalAdd(std::string a, std::string b); @@ -23,6 +29,8 @@ std::string decimalDiv(std::string a, std::string b); std::string decimalMod(std::string a, std::string b); +std::string decimalModExp(std::string b, std::string e, std::string m); + bool decimalGt(std::string a, std::string b, bool eqAllowed=false); unsigned decimalToUnsigned(std::string a); diff --git a/libserpent/compiler.cpp b/libserpent/compiler.cpp index 251c7d9da..623ab3950 100644 --- a/libserpent/compiler.cpp +++ b/libserpent/compiler.cpp @@ -8,10 +8,18 @@ struct programAux { std::map vars; + int nextVarMem; bool allocUsed; bool calldataUsed; int step; int labelLength; + int functionCount; +}; + +struct programVerticalAux { + int height; + std::map dupvars; + std::map funvars; }; struct programData { @@ -25,6 +33,16 @@ programAux Aux() { o.allocUsed = false; o.calldataUsed = false; o.step = 0; + o.nextVarMem = 32; + o.functionCount = 0; + return o; +} + +programVerticalAux verticalAux() { + programVerticalAux o; + o.height = 0; + o.dupvars = std::map(); + o.funvars = std::map(); return o; } @@ -57,28 +75,28 @@ Node popwrap(Node node) { // Turns LLL tree into tree of code fragments programData opcodeify(Node node, programAux aux=Aux(), - int height=0, - std::map dupvars= - std::map()) { + programVerticalAux vaux=verticalAux()) { std::string symb = "_"+mkUniqueToken(); Metadata m = node.metadata; // Numbers if (node.type == TOKEN) { return pd(aux, nodeToNumeric(node), 1); } - else if (node.val == "ref" || node.val == "get" || node.val == "set") { + else if (node.val == "ref" || node.val == "get" || + node.val == "set" || node.val == "declare") { std::string varname = node.args[0].val; if (!aux.vars.count(varname)) { - aux.vars[varname] = unsignedToDecimal(aux.vars.size() * 32); + aux.vars[varname] = unsignedToDecimal(aux.nextVarMem); + aux.nextVarMem += 32; } if (varname == "'msg.data") aux.calldataUsed = true; // Set variable if (node.val == "set") { - programData sub = opcodeify(node.args[1], aux, height, dupvars); + programData sub = opcodeify(node.args[1], aux, vaux); if (!sub.outs) err("Value to set variable must have nonzero arity!", m); - if (dupvars.count(node.args[0].val)) { - int h = height - dupvars[node.args[0].val]; + if (vaux.dupvars.count(node.args[0].val)) { + int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); Node nodelist[] = { sub.code, @@ -96,8 +114,8 @@ programData opcodeify(Node node, } // Get variable else if (node.val == "get") { - if (dupvars.count(node.args[0].val)) { - int h = height - dupvars[node.args[0].val]; + if (vaux.dupvars.count(node.args[0].val)) { + int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); return pd(aux, token("DUP"+unsignedToDecimal(h)), 1); } @@ -106,36 +124,157 @@ programData opcodeify(Node node, return pd(aux, multiToken(nodelist, 2, m), 1); } // Refer variable - else { - if (dupvars.count(node.args[0].val)) + else if (node.val == "ref") { + if (vaux.dupvars.count(node.args[0].val)) err("Cannot ref stack variable!", m); return pd(aux, token(aux.vars[varname], m), 1); } + // Declare variable + else { + Node nodelist[] = { }; + return pd(aux, multiToken(nodelist, 0, m), 0); + } + } + // Define functions (TODO: eventually move to rewriter.cpp, keep + // compiler pure LLL) + if (node.val == "def") { + std::vector varNames; + std::vector varSizes; + bool useLt32 = false; + int totalSz = 0; + if (node.args.size() != 2) + err("Malformed def!", m); + // Collect the list of variable names and variable byte counts + for (unsigned i = 0; i < node.args[0].args.size(); i++) { + if (node.args[0].args[i].val == "kv") { + if (node.args[0].args[i].args.size() != 2) + err("Malformed def!", m); + varNames.push_back(node.args[0].args[i].args[0].val); + varSizes.push_back( + decimalToUnsigned(node.args[0].args[i].args[1].val)); + if (varSizes.back() > 32) + err("Max argument width: 32 bytes", m); + useLt32 = true; + } + else { + varNames.push_back(node.args[0].args[i].val); + varSizes.push_back(32); + } + aux.vars[varNames.back()] = unsignedToDecimal(aux.nextVarMem + 32 * i); + totalSz += varSizes.back(); + } + int functionCount = aux.functionCount; + int nextVarMem = aux.nextVarMem; + aux.nextVarMem += 32 * varNames.size(); + aux.functionCount += 1; + programData inner; + // If we're only using 32-byte variables, then great, just copy + // over the calldata! + if (!useLt32) { + programData sub = opcodeify(node.args[1], aux, vaux); + Node nodelist[] = { + token(unsignedToDecimal(totalSz), m), + token("1", m), + token(unsignedToDecimal(nextVarMem), m), + token("CALLDATACOPY", m), + sub.code + }; + inner = pd(sub.aux, multiToken(nodelist, 5, m), 0); + } + else { + std::vector innerList; + int cum = 1; + for (unsigned i = 0; i < varNames.size();) { + // If we get a series of 32-byte values, we calldatacopy them + if (varSizes[i] == 32) { + unsigned until = i+1; + while (until < varNames.size() && varSizes[until] == 32) + until += 1; + innerList.push_back(token(unsignedToDecimal((until - i) * 32), m)); + innerList.push_back(token(unsignedToDecimal(cum), m)); + innerList.push_back(token(unsignedToDecimal(nextVarMem + i * 32), m)); + innerList.push_back(token("CALLDATACOPY", m)); + cum += (until - i) * 32; + i = until; + } + // Otherwise, we do a clever trick to extract the value + else { + innerList.push_back(token(unsignedToDecimal(32 - varSizes[i]), m)); + innerList.push_back(token("256", m)); + innerList.push_back(token("EXP", m)); + innerList.push_back(token(unsignedToDecimal(cum), m)); + innerList.push_back(token("CALLDATALOAD", m)); + innerList.push_back(token("DIV", m)); + innerList.push_back(token(unsignedToDecimal(nextVarMem + i * 32), m)); + innerList.push_back(token("MSTORE", m)); + cum += varSizes[i]; + i += 1; + } + } + // If caller == origin, then it's from a tx, so unpack, otherwise + // plain copy + programData sub = opcodeify(node.args[1], aux, vaux); + Node ilnode = astnode("", innerList, m); + Node nodelist[] = { + token(unsignedToDecimal(32 * varNames.size()), m), + token("1", m), + token(unsignedToDecimal(nextVarMem), m), + token("CALLDATACOPY", m), + token("CALLER", m), + token("ORIGIN", m), + token("EQ", m), + token("ISZERO", m), + token("$maincode"+symb, m), + token("JUMPI", m), + ilnode, + token("~maincode"+symb, m), + token("JUMPDEST", m), + sub.code + }; + inner = pd(sub.aux, multiToken(nodelist, 14, m), 0); + } + // Check if the function call byte is the same + Node nodelist2[] = { + token("0", m), + token("CALLDATALOAD", m), + token("0", m), + token("BYTE", m), + token(unsignedToDecimal(functionCount), m), + token("EQ", m), + token("ISZERO", m), + token("$endcode"+symb, m), + token("JUMPI", m), + inner.code, + token("~endcode"+symb, m), + token("JUMPDEST", m), + }; + return pd(inner.aux, multiToken(nodelist2, 12, m), 0); } // Code blocks if (node.val == "lll" && node.args.size() == 2) { if (node.args[1].val != "0") aux.allocUsed = true; std::vector o; o.push_back(finalize(opcodeify(node.args[0]))); - programData sub = opcodeify(node.args[1], aux, height, dupvars); + programData sub = opcodeify(node.args[1], aux, vaux); Node code = astnode("____CODE", o, m); Node nodelist[] = { token("$begincode"+symb+".endcode"+symb, m), token("DUP1", m), token("$begincode"+symb, m), sub.code, token("CODECOPY", m), token("$endcode"+symb, m), token("JUMP", m), - token("~begincode"+symb, m), code, token("~endcode"+symb, m), - token("JUMPDEST", m) + token("~begincode"+symb, m), code, + token("~endcode"+symb, m), token("JUMPDEST", m) }; return pd(sub.aux, multiToken(nodelist, 11, m), 1); } // Stack variables if (node.val == "with") { - std::map dupvars2 = dupvars; - dupvars2[node.args[0].val] = height; - programData initial = opcodeify(node.args[1], aux, height, dupvars); + programData initial = opcodeify(node.args[1], aux, vaux); + programVerticalAux vaux2 = vaux; + vaux2.dupvars[node.args[0].val] = vaux.height; + vaux2.height += 1; if (!initial.outs) err("Initial variable value must have nonzero arity!", m); - programData sub = opcodeify(node.args[2], initial.aux, height + 1, dupvars2); + programData sub = opcodeify(node.args[2], initial.aux, vaux2); Node nodelist[] = { initial.code, sub.code @@ -151,7 +290,7 @@ programData opcodeify(Node node, std::vector children; int lastOut = 0; for (unsigned i = 0; i < node.args.size(); i++) { - programData sub = opcodeify(node.args[i], aux, height, dupvars); + programData sub = opcodeify(node.args[i], aux, vaux); aux = sub.aux; if (sub.outs == 1) { if (i < node.args.size() - 1) sub.code = popwrap(sub.code); @@ -163,8 +302,8 @@ programData opcodeify(Node node, } // 2-part conditional (if gets rewritten to unless in rewrites) else if (node.val == "unless" && node.args.size() == 2) { - programData cond = opcodeify(node.args[0], aux, height, dupvars); - programData action = opcodeify(node.args[1], cond.aux, height, dupvars); + programData cond = opcodeify(node.args[0], aux, vaux); + programData action = opcodeify(node.args[1], cond.aux, vaux); aux = action.aux; if (!cond.outs) err("Condition of if/unless statement has arity 0", m); if (action.outs) action.code = popwrap(action.code); @@ -178,9 +317,9 @@ programData opcodeify(Node node, } // 3-part conditional else if (node.val == "if" && node.args.size() == 3) { - programData ifd = opcodeify(node.args[0], aux, height, dupvars); - programData thend = opcodeify(node.args[1], ifd.aux, height, dupvars); - programData elsed = opcodeify(node.args[2], thend.aux, height, dupvars); + programData ifd = opcodeify(node.args[0], aux, vaux); + programData thend = opcodeify(node.args[1], ifd.aux, vaux); + programData elsed = opcodeify(node.args[2], thend.aux, vaux); aux = elsed.aux; if (!ifd.outs) err("Condition of if/unless statement has arity 0", m); @@ -191,7 +330,7 @@ programData opcodeify(Node node, if (elsed.outs > outs) elsed.code = popwrap(elsed.code); Node nodelist[] = { ifd.code, - token("NOT", m), + token("ISZERO", m), token("$else"+symb, m), token("JUMPI", m), thend.code, token("$endif"+symb, m), token("JUMP", m), @@ -203,8 +342,8 @@ programData opcodeify(Node node, } // While (rewritten to this in rewrites) else if (node.val == "until") { - programData cond = opcodeify(node.args[0], aux, height, dupvars); - programData action = opcodeify(node.args[1], cond.aux, height, dupvars); + programData cond = opcodeify(node.args[0], aux, vaux); + programData action = opcodeify(node.args[1], cond.aux, vaux); aux = action.aux; if (!cond.outs) err("Condition of while/until loop has arity 0", m); @@ -215,13 +354,13 @@ programData opcodeify(Node node, token("$end"+symb, m), token("JUMPI", m), action.code, token("$beg"+symb, m), token("JUMP", m), - token("~end"+symb, m), token("JUMPDEST", m) + token("~end"+symb, m), token("JUMPDEST", m), }; return pd(aux, multiToken(nodelist, 10, m)); } // Memory allocations else if (node.val == "alloc") { - programData bytez = opcodeify(node.args[0], aux, height, dupvars); + programData bytez = opcodeify(node.args[0], aux, vaux); aux = bytez.aux; if (!bytez.outs) err("Alloc input has arity 0", m); @@ -251,7 +390,9 @@ programData opcodeify(Node node, for (unsigned i = 0; i < node.args.size(); i++) { Metadata m2 = node.args[i].metadata; nodes.push_back(token("DUP1", m2)); - programData sub = opcodeify(node.args[i], aux, height + 2, dupvars); + programVerticalAux vaux2 = vaux; + vaux2.height += 2; + programData sub = opcodeify(node.args[i], aux, vaux2); if (!sub.outs) err("Array_lit item " + unsignedToDecimal(i) + " has zero arity", m2); aux = sub.aux; @@ -276,10 +417,9 @@ programData opcodeify(Node node, err("Invalid arity for "+node.val, m); } for (int i = node.args.size() - 1; i >= 0; i--) { - programData sub = opcodeify(node.args[i], - aux, - height - i - 1 + node.args.size(), - dupvars); + programVerticalAux vaux2 = vaux; + vaux2.height = vaux.height - i - 1 + node.args.size(); + programData sub = opcodeify(node.args[i], aux, vaux2); aux = sub.aux; if (!sub.outs) err("Input "+unsignedToDecimal(i)+" has arity 0", sub.code.metadata); @@ -305,7 +445,7 @@ Node finalize(programData c) { if ((c.aux.allocUsed || c.aux.calldataUsed) && c.aux.vars.size() > 0) { Node nodelist[] = { token("0", m), - token(unsignedToDecimal(c.aux.vars.size() * 32 - 1)), + token(unsignedToDecimal(c.aux.nextVarMem - 1)), token("MSTORE8", m) }; bottom.push_back(multiToken(nodelist, 3, m)); diff --git a/libserpent/opcodes.h b/libserpent/opcodes.h index a254ea0b2..a7bcc1af9 100644 --- a/libserpent/opcodes.h +++ b/libserpent/opcodes.h @@ -1,20 +1,3 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ - #ifndef ETHSERP_OPCODES #define ETHSERP_OPCODES @@ -24,128 +7,131 @@ #include class Mapping { -public: - Mapping(std::string Op, int Opcode, int In, int Out) { - op = Op; - opcode = Opcode; - in = In; - out = Out; - } - std::string op; - int opcode; - int in; - int out; + public: + Mapping(std::string Op, int Opcode, int In, int Out) { + op = Op; + opcode = Opcode; + in = In; + out = Out; + } + std::string op; + int opcode; + int in; + int out; }; Mapping mapping[] = { - Mapping("STOP", 0x00, 0, 0), - Mapping("ADD", 0x01, 2, 1), - Mapping("MUL", 0x02, 2, 1), - Mapping("SUB", 0x03, 2, 1), - Mapping("DIV", 0x04, 2, 1), - Mapping("SDIV", 0x05, 2, 1), - Mapping("MOD", 0x06, 2, 1), - Mapping("SMOD", 0x07, 2, 1), - Mapping("EXP", 0x08, 2, 1), - Mapping("NEG", 0x09, 1, 1), - Mapping("LT", 0x0a, 2, 1), - Mapping("GT", 0x0b, 2, 1), - Mapping("SLT", 0x0c, 2, 1), - Mapping("SGT", 0x0d, 2, 1), - Mapping("EQ", 0x0e, 2, 1), - Mapping("NOT", 0x0f, 1, 1), - Mapping("AND", 0x10, 2, 1), - Mapping("OR", 0x11, 2, 1), - Mapping("XOR", 0x12, 2, 1), - Mapping("BYTE", 0x13, 2, 1), - Mapping("ADDMOD", 0x14, 3, 1), - Mapping("MULMOD", 0x15, 3, 1), - Mapping("SIGNEXTEND", 0x16, 2, 1), - Mapping("SHA3", 0x20, 2, 1), - Mapping("ADDRESS", 0x30, 0, 1), - Mapping("BALANCE", 0x31, 1, 1), - Mapping("ORIGIN", 0x32, 0, 1), - Mapping("CALLER", 0x33, 0, 1), - Mapping("CALLVALUE", 0x34, 0, 1), - Mapping("CALLDATALOAD", 0x35, 1, 1), - Mapping("CALLDATASIZE", 0x36, 0, 1), - Mapping("CALLDATACOPY", 0x37, 3, 1), - Mapping("CODESIZE", 0x38, 0, 1), - Mapping("CODECOPY", 0x39, 3, 1), - Mapping("GASPRICE", 0x3a, 0, 1), - Mapping("PREVHASH", 0x40, 0, 1), - Mapping("COINBASE", 0x41, 0, 1), - Mapping("TIMESTAMP", 0x42, 0, 1), - Mapping("NUMBER", 0x43, 0, 1), - Mapping("DIFFICULTY", 0x44, 0, 1), - Mapping("GASLIMIT", 0x45, 0, 1), - Mapping("POP", 0x50, 1, 0), - Mapping("MLOAD", 0x53, 1, 1), - Mapping("MSTORE", 0x54, 2, 0), - Mapping("MSTORE8", 0x55, 2, 0), - Mapping("SLOAD", 0x56, 1, 1), - Mapping("SSTORE", 0x57, 2, 0), - Mapping("JUMP", 0x58, 1, 0), - Mapping("JUMPI", 0x59, 2, 0), - Mapping("PC", 0x5a, 0, 1), - Mapping("MSIZE", 0x5b, 0, 1), - Mapping("GAS", 0x5c, 0, 1), - Mapping("JUMPDEST", 0x5d, 0, 0), - Mapping("CREATE", 0xf0, 3, 1), - Mapping("CALL", 0xf1, 7, 1), - Mapping("RETURN", 0xf2, 2, 0), - Mapping("CALL_CODE", 0xf3, 7, 1), - Mapping("SUICIDE", 0xff, 1, 0), - Mapping("---END---", 0x00, 0, 0), + Mapping("STOP", 0x00, 0, 0), + Mapping("ADD", 0x01, 2, 1), + Mapping("MUL", 0x02, 2, 1), + Mapping("SUB", 0x03, 2, 1), + Mapping("DIV", 0x04, 2, 1), + Mapping("SDIV", 0x05, 2, 1), + Mapping("MOD", 0x06, 2, 1), + Mapping("SMOD", 0x07, 2, 1), + Mapping("ADDMOD", 0x08, 3, 1), + Mapping("MULMOD", 0x09, 3, 1), + Mapping("EXP", 0x0a, 2, 1), + Mapping("SIGNEXTEND", 0x0b, 2, 1), + Mapping("LT", 0x10, 2, 1), + Mapping("GT", 0x11, 2, 1), + Mapping("SLT", 0x12, 2, 1), + Mapping("SGT", 0x13, 2, 1), + Mapping("EQ", 0x14, 2, 1), + Mapping("ISZERO", 0x15, 1, 1), + Mapping("AND", 0x16, 2, 1), + Mapping("OR", 0x17, 2, 1), + Mapping("XOR", 0x18, 2, 1), + Mapping("NOT", 0x19, 1, 1), + Mapping("BYTE", 0x1a, 2, 1), + Mapping("ADDMOD", 0x14, 3, 1), + Mapping("MULMOD", 0x15, 3, 1), + Mapping("SIGNEXTEND", 0x16, 2, 1), + Mapping("SHA3", 0x20, 2, 1), + Mapping("ADDRESS", 0x30, 0, 1), + Mapping("BALANCE", 0x31, 1, 1), + Mapping("ORIGIN", 0x32, 0, 1), + Mapping("CALLER", 0x33, 0, 1), + Mapping("CALLVALUE", 0x34, 0, 1), + Mapping("CALLDATALOAD", 0x35, 1, 1), + Mapping("CALLDATASIZE", 0x36, 0, 1), + Mapping("CALLDATACOPY", 0x37, 3, 1), + Mapping("CODESIZE", 0x38, 0, 1), + Mapping("CODECOPY", 0x39, 3, 1), + Mapping("GASPRICE", 0x3a, 0, 1), + Mapping("PREVHASH", 0x40, 0, 1), + Mapping("COINBASE", 0x41, 0, 1), + Mapping("TIMESTAMP", 0x42, 0, 1), + Mapping("NUMBER", 0x43, 0, 1), + Mapping("DIFFICULTY", 0x44, 0, 1), + Mapping("GASLIMIT", 0x45, 0, 1), + Mapping("POP", 0x50, 1, 0), + Mapping("MLOAD", 0x51, 1, 1), + Mapping("MSTORE", 0x52, 2, 0), + Mapping("MSTORE8", 0x53, 2, 0), + Mapping("SLOAD", 0x54, 1, 1), + Mapping("SSTORE", 0x55, 2, 0), + Mapping("JUMP", 0x56, 1, 0), + Mapping("JUMPI", 0x57, 2, 0), + Mapping("PC", 0x58, 0, 1), + Mapping("MSIZE", 0x59, 0, 1), + Mapping("GAS", 0x5a, 0, 1), + Mapping("JUMPDEST", 0x5b, 0, 0), + Mapping("LOG0", 0xa0, 2, 0), + Mapping("LOG1", 0xa1, 3, 0), + Mapping("LOG2", 0xa2, 4, 0), + Mapping("LOG3", 0xa3, 5, 0), + Mapping("LOG4", 0xa4, 6, 0), + Mapping("CREATE", 0xf0, 3, 1), + Mapping("CALL", 0xf1, 7, 1), + Mapping("RETURN", 0xf2, 2, 0), + Mapping("CALL_CODE", 0xf3, 7, 1), + Mapping("SUICIDE", 0xff, 1, 0), + Mapping("---END---", 0x00, 0, 0), }; std::map > opcodes; std::map reverseOpcodes; // Fetches everything EXCEPT PUSH1..32 -std::pair > _opdata(std::string ops, int opi) -{ - if (!opcodes.size()) - { - int i = 0; - while (mapping[i].op != "---END---") - { - Mapping mi = mapping[i]; - opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out); - i++; - } - for (i = 1; i <= 16; i++) - { - opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1); - opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1); - } - for (std::map >::iterator it=opcodes.begin(); it != opcodes.end(); it++) - reverseOpcodes[(*it).second[0]] = (*it).first; - } - std::string op; - std::vector opdata; - op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; - opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1); - return std::pair >(op, opdata); +std::pair > _opdata(std::string ops, int opi) { + if (!opcodes.size()) { + int i = 0; + while (mapping[i].op != "---END---") { + Mapping mi = mapping[i]; + opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out); + i++; + } + for (i = 1; i <= 16; i++) { + opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1); + opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1); + } + for (std::map >::iterator it=opcodes.begin(); + it != opcodes.end(); + it++) { + reverseOpcodes[(*it).second[0]] = (*it).first; + } + } + std::string op; + std::vector opdata; + op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; + opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1); + return std::pair >(op, opdata); } -int opcode(std::string op) -{ +int opcode(std::string op) { return _opdata(op, -1).second[0]; } -int opinputs(std::string op) -{ +int opinputs(std::string op) { return _opdata(op, -1).second[1]; } -int opoutputs(std::string op) -{ +int opoutputs(std::string op) { return _opdata(op, -1).second[2]; } -std::string op(int opcode) -{ +std::string op(int opcode) { return _opdata("", opcode).first; } diff --git a/libserpent/parser.cpp b/libserpent/parser.cpp index 0460c974c..4ceb1d12d 100644 --- a/libserpent/parser.cpp +++ b/libserpent/parser.cpp @@ -9,20 +9,21 @@ // Extended BEDMAS precedence order int precedence(Node tok) { std::string v = tok.val; - if (v == "!" || v == "not") return 0; - else if (v=="^" || v == "**") return 1; - else if (v=="*" || v=="/" || v=="@/" || v=="%" || v=="@%") return 2; - else if (v=="+" || v=="-") return 3; - else if (v=="<" || v==">" || v=="<=" || v==">=") return 4; - else if (v=="@<" || v=="@>" || v=="@<=" || v=="@>=") return 4; - else if (v=="&" || v=="|" || v=="xor" || v=="==" || v == "!=") return 5; - else if (v=="&&" || v=="and") return 6; - else if (v=="||" || v=="or") return 7; - else if (v==":") return 8; + if (v == ".") return -1; + else if (v == "!" || v == "not") return 1; + else if (v=="^" || v == "**") return 2; + else if (v=="*" || v=="/" || v=="@/" || v=="%" || v=="@%") return 3; + else if (v=="+" || v=="-") return 4; + else if (v=="<" || v==">" || v=="<=" || v==">=") return 5; + else if (v=="@<" || v=="@>" || v=="@<=" || v=="@>=") return 5; + else if (v=="&" || v=="|" || v=="xor" || v=="==" || v == "!=") return 6; + else if (v=="&&" || v=="and") return 7; + else if (v=="||" || v=="or") return 8; + else if (v==":") return 9; else if (v=="=") return 10; else if (v=="+=" || v=="-=" || v=="*=" || v=="/=" || v=="%=") return 10; else if (v=="@/=" || v=="@%=") return 10; - else return -1; + else return 0; } // Token classification for shunting-yard purposes @@ -32,8 +33,9 @@ int toktype(Node tok) { if (v == "(" || v == "[" || v == "{") return LPAREN; else if (v == ")" || v == "]" || v == "}") return RPAREN; else if (v == ",") return COMMA; - else if (v == "!" || v == "not" || v == "neg") return UNARY_OP; - else if (precedence(tok) >= 0) return BINARY_OP; + else if (v == "!" || v == "~" || v == "not") return UNARY_OP; + else if (precedence(tok) > 0) return BINARY_OP; + else if (precedence(tok) < 0) return TOKEN_SPLITTER; if (tok.val[0] != '"' && tok.val[0] != '\'') { for (unsigned i = 0; i < tok.val.length(); i++) { if (chartype(tok.val[i]) == SYMB) { @@ -68,6 +70,10 @@ std::vector shuntingYard(std::vector tokens) { } // Left parens go on stack and output queue else if (toktyp == LPAREN) { + while (stack.size() && toktype(stack.back()) == TOKEN_SPLITTER) { + oq.push_back(stack.back()); + stack.pop_back(); + } if (prevtyp != ALPHANUM && prevtyp != RPAREN) { oq.push_back(token("id", tok.metadata)); } @@ -88,16 +94,26 @@ std::vector shuntingYard(std::vector tokens) { else if (toktyp == UNARY_OP) { stack.push_back(tok); } + // If token splitter, just push it to the stack + else if (toktyp == TOKEN_SPLITTER) { + while (stack.size() && toktype(stack.back()) == TOKEN_SPLITTER) { + oq.push_back(stack.back()); + stack.pop_back(); + } + stack.push_back(tok); + } // If binary op, keep popping from stack while higher bedmas precedence else if (toktyp == BINARY_OP) { if (tok.val == "-" && prevtyp != ALPHANUM && prevtyp != RPAREN) { - stack.push_back(token("neg", tok.metadata)); + stack.push_back(tok); + oq.push_back(token("0", tok.metadata)); } else { int prec = precedence(tok); while (stack.size() && (toktype(stack.back()) == BINARY_OP - || toktype(stack.back()) == UNARY_OP) + || toktype(stack.back()) == UNARY_OP + || toktype(stack.back()) == TOKEN_SPLITTER) && precedence(stack.back()) <= prec) { oq.push_back(stack.back()); stack.pop_back(); @@ -133,9 +149,9 @@ Node treefy(std::vector stream) { int typ = toktype(tok); // If unary, take node off end of oq and wrap it with the operator // If binary, do the same with two nodes - if (typ == UNARY_OP || typ == BINARY_OP) { + if (typ == UNARY_OP || typ == BINARY_OP || typ == TOKEN_SPLITTER) { std::vector args; - int rounds = (typ == BINARY_OP) ? 2 : 1; + int rounds = (typ == UNARY_OP) ? 1 : 2; for (int i = 0; i < rounds; i++) { if (oq.size() == 0) { err("Line malformed, not enough args for "+tok.val, @@ -245,7 +261,8 @@ int spaceCount(std::string s) { // Is this a command that takes an argument on the same line? bool bodied(std::string tok) { return tok == "if" || tok == "elif" || tok == "while" - || tok == "with" || tok == "def"; + || tok == "with" || tok == "def" || tok == "extern" + || tok == "data"; } // Is this a command that takes an argument as a child block? diff --git a/libserpent/rewriter.cpp b/libserpent/rewriter.cpp index bf6a73828..3042eeb45 100644 --- a/libserpent/rewriter.cpp +++ b/libserpent/rewriter.cpp @@ -11,16 +11,11 @@ std::string valid[][3] = { { "unless", "2", "2" }, { "while", "2", "2" }, { "until", "2", "2" }, - { "code", "1", "2" }, - { "init", "2", "2" }, - { "shared", "2", "3" }, { "alloc", "1", "1" }, { "array", "1", "1" }, - { "call", "2", "4" }, - { "call_code", "2", "4" }, + { "call", "2", tt256 }, + { "call_code", "2", tt256 }, { "create", "1", "4" }, - { "msg", "4", "6" }, - { "msg_stateless", "4", "6" }, { "getch", "2", "2" }, { "setch", "3", "3" }, { "sha3", "1", "2" }, @@ -30,6 +25,9 @@ std::string valid[][3] = { { "max", "2", "2" }, { "array_lit", "0", tt256 }, { "seq", "0", tt256 }, + { "log", "1", "6" }, + { "outer", "1", "1" }, + { "set", "2", "2" }, { "---END---", "", "" } //Keep this line at the end of the list }; @@ -68,7 +66,7 @@ std::string macros[][2] = { }, { "(!= $a $b)", - "(not (eq $a $b))" + "(iszero (eq $a $b))" }, { "(min a b)", @@ -87,7 +85,7 @@ std::string macros[][2] = { "$code" }, { - "(access msg.data $ind)", + "(access (. msg data) $ind)", "(calldataload (mul 32 $ind))" }, { @@ -100,22 +98,22 @@ std::string macros[][2] = { }, { "(while $cond $do)", - "(until (not $cond) $do)", + "(until (iszero $cond) $do)", }, { - "(while (not $cond) $do)", + "(while (iszero $cond) $do)", "(until $cond $do)", }, { "(if $cond $do)", - "(unless (not $cond) $do)", + "(unless (iszero $cond) $do)", }, { - "(if (not $cond) $do)", + "(if (iszero $cond) $do)", "(unless $cond $do)", }, { - "(access contract.storage $ind)", + "(access (. self storage) $ind)", "(sload $ind)" }, { @@ -123,7 +121,7 @@ std::string macros[][2] = { "(mload (add $var (mul 32 $ind)))" }, { - "(set (access contract.storage $ind) $val)", + "(set (access (. self storage) $ind) $val)", "(sstore $ind $val)" }, { @@ -140,11 +138,11 @@ std::string macros[][2] = { }, { "(send $to $value)", - "(call (sub (gas) 25) $to $value 0 0 0 0)" + "(~call (sub (gas) 25) $to $value 0 0 0 0)" }, { "(send $gas $to $value)", - "(call $gas $to $value 0 0 0 0)" + "(~call $gas $to $value 0 0 0 0)" }, { "(sha3 $x)", @@ -176,19 +174,19 @@ std::string macros[][2] = { }, { "(>= $x $y)", - "(not (slt $x $y))" + "(iszero (slt $x $y))" }, { "(<= $x $y)", - "(not (sgt $x $y))" + "(iszero (sgt $x $y))" }, { "(@>= $x $y)", - "(not (lt $x $y))" + "(iszero (lt $x $y))" }, { "(@<= $x $y)", - "(not (gt $x $y))" + "(iszero (gt $x $y))" }, { "(create $code)", @@ -198,68 +196,25 @@ std::string macros[][2] = { "(create $endowment $code)", "(with $1 (msize) (create $endowment (get $1) (lll (outer $code) (msize))))" }, - // Call and msg { - "(call $f $dataval)", - "(msg (sub (gas) 45) $f 0 $dataval)" + "(sha256 $x)", + "(seq (set $1 $x) (pop (~call 101 2 0 (ref $1) 32 (ref $2) 32)) (get $2))" }, { - "(call $f $inp $inpsz)", - "(msg (sub (gas) 25) $f 0 $inp $inpsz)" + "(sha256 $arr $sz)", + "(seq (pop (~call 101 2 0 $arr (mul 32 $sz) (ref $2) 32)) (get $2))" }, { - "(call $f $inp $inpsz $outsz)", - "(with $1 $outsz (with $2 (alloc (mul 32 (get $1))) (seq (call (sub (gas) (add 25 (get $1))) $f 0 $inp (mul 32 $inpsz) (get $2) (mul 32 (get $1))) (get $2))))" + "(ripemd160 $x)", + "(seq (set $1 $x) (pop (~call 101 3 0 (ref $1) 32 (ref $2) 32)) (get $2))" }, { - "(msg $gas $to $val $inp $inpsz)", - "(seq (call $gas $to $val $inp (mul 32 $inpsz) (ref $1) 32) (get $1))" + "(ripemd160 $arr $sz)", + "(seq (pop (~call 101 3 0 $arr (mul 32 $sz) (ref $2) 32)) (get $2))" }, { - "(msg $gas $to $val $dataval)", - "(seq (set $1 $dataval) (call $gas $to $val (ref $1) 32 (ref $2) 32) (get $2))" - }, - { - "(msg $gas $to $val $inp $inpsz $outsz)", - "(with $1 (mul 32 $outsz) (with $2 (alloc (get $1)) (seq (call $gas $to $val $inp (mul 32 $inpsz) (get $2) (get $1)) (get $2))))" - }, - // Call stateless and msg stateless - { - "(call_code $f $dataval)", - "(msg_code (sub (gas) 45) $f 0 $dataval)" - }, - { - "(call_code $f $inp $inpsz)", - "(msg_code (sub (gas) 25) $f 0 $inp $inpsz)" - }, - { - "(call_code $f $inp $inpsz $outsz)", - "(with $1 $outsz (with $2 (alloc (mul 32 (get $1))) (seq (call_code (sub (gas) (add 25 (get $1))) $f 0 $inp (mul 32 $inpsz) (get $2) (mul 32 (get $1))) (get $2))))" - }, - { - "(msg_code $gas $to $val $inp $inpsz)", - "(seq (call_code $gas $to $val $inp (mul 32 $inpsz) (ref $1) 32) (get $1))" - }, - { - "(msg_code $gas $to $val $dataval)", - "(seq (set $1 $dataval) (call_code $gas $to $val (ref $1) 32 (ref $2) 32) (get $2))" - }, - { - "(msg_code $gas $to $val $inp $inpsz $outsz)", - "(with $1 (mul 32 $outsz) (with $2 (alloc (get $1)) (call_code $gas $to $val $inp (mul 32 $inpsz) (get $2) (get $1)) (get $2)))" - }, - // Wrappers - { - "(outer (init $init $code))", - "(seq $init (~return 0 (lll $code 0)))" - }, - { - "(outer (shared $shared (init $init (code $code))))", - "(seq $shared $init (~return 0 (lll (seq $shared $code) 0)))" - }, - { - "(outer $code)", - "(~return 0 (lll $code 0))" + "(ecrecover $h $v $r $s)", + "(seq (declare $1) (declare $2) (declare $3) (declare $4) (set $1 $h) (set $2 $v) (set $3 $r) (set $4 $s) (pop (~call 101 1 0 (ref $1) 128 (ref $5) 32)) (get $5))" }, { "(seq (seq) $x)", @@ -277,20 +232,36 @@ std::string macros[][2] = { "(with (= $var $val) $cond)", "(with $var $val $cond)" }, - { "msg.datasize", "(div (calldatasize) 32)" }, - { "msg.sender", "(caller)" }, - { "msg.value", "(callvalue)" }, - { "tx.gasprice", "(gasprice)" }, - { "tx.origin", "(origin)" }, - { "tx.gas", "(gas)" }, - { "contract.balance", "(balance (address))" }, - { "contract.address", "(address)" }, - { "block.prevhash", "(prevhash)" }, - { "block.coinbase", "(coinbase)" }, - { "block.timestamp", "(timestamp)" }, - { "block.number", "(number)" }, - { "block.difficulty", "(difficulty)" }, - { "block.gaslimit", "(gaslimit)" }, + { + "(log $t1)", + "(~log1 $t1 0 0)" + }, + { + "(log $t1 $t2)", + "(~log2 $t1 $t2 0 0)" + }, + { + "(log $t1 $t2 $t3)", + "(~log3 $t1 $t2 $t3 0 0)" + }, + { + "(log $t1 $t2 $t3 $t4)", + "(~log4 $t1 $t2 $t3 $t4 0 0)" + }, + { "(. msg datasize)", "(div (calldatasize) 32)" }, + { "(. msg sender)", "(caller)" }, + { "(. msg value)", "(callvalue)" }, + { "(. tx gasprice)", "(gasprice)" }, + { "(. tx origin)", "(origin)" }, + { "(. tx gas)", "(gas)" }, + { "(. $x balance)", "(balance $x)" }, + { "self", "(address)" }, + { "(. block prevhash)", "(prevhash)" }, + { "(. block coinbase)", "(coinbase)" }, + { "(. block timestamp)", "(timestamp)" }, + { "(. block number)", "(number)" }, + { "(. block difficulty)", "(difficulty)" }, + { "(. block gaslimit)", "(gaslimit)" }, { "stop", "(stop)" }, { "---END---", "" } //Keep this line at the end of the list }; @@ -303,7 +274,9 @@ std::string synonyms[][2] = { { "|", "~or" }, { "&", "~and" }, { "elif", "if" }, - { "!", "not" }, + { "!", "iszero" }, + { "~", "~not" }, + { "not", "iszero" }, { "string", "alloc" }, { "+", "add" }, { "-", "sub" }, @@ -324,11 +297,50 @@ std::string synonyms[][2] = { { "---END---", "" } //Keep this line at the end of the list }; +std::string setters[][2] = { + { "+=", "+" }, + { "-=", "-" }, + { "*=", "*" }, + { "/=", "/" }, + { "%=", "%" }, + { "^=", "^" }, + { "!=", "!" }, + { "---END---", "" } //Keep this line at the end of the list +}; + +// Match result storing object struct matchResult { bool success; std::map map; }; +// Storage variable index storing object +struct svObj { + std::map offsets; + std::map indices; + std::map > coefficients; + std::map nonfinal; + std::string globalOffset; +}; + +// Preprocessing result storing object +class preprocessAux { + public: + preprocessAux() { + globalExterns = std::map(); + localExterns = std::map >(); + localExterns["self"] = std::map(); + } + std::map globalExterns; + std::map > localExterns; + svObj storageVars; +}; + +#define preprocessResult std::pair + +// Main pattern matching routine, for those patterns that can be expressed +// using our standard mini-language above +// // Returns two values. First, a boolean to determine whether the node matches // the pattern, second, if the node does match then a map mapping variables // in the pattern to nodes @@ -343,6 +355,7 @@ matchResult match(Node p, Node n) { } } else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) { + // do nothing } else { for (unsigned i = 0; i < p.args.size(); i++) { @@ -389,37 +402,585 @@ Node subst(Node pattern, } } -// array_lit transform +// Processes mutable array literals Node array_lit_transform(Node node) { + Metadata m = node.metadata; std::vector o1; - o1.push_back(token(unsignedToDecimal(node.args.size() * 32), node.metadata)); + o1.push_back(token(unsignedToDecimal(node.args.size() * 32), m)); std::vector o2; std::string symb = "_temp"+mkUniqueToken()+"_0"; - o2.push_back(token(symb, node.metadata)); - o2.push_back(astnode("alloc", o1, node.metadata)); + o2.push_back(token(symb, m)); + o2.push_back(astnode("alloc", o1, m)); std::vector o3; - o3.push_back(astnode("set", o2, node.metadata)); + o3.push_back(astnode("set", o2, m)); for (unsigned i = 0; i < node.args.size(); i++) { - // (mstore (add (get symb) i*32) v) std::vector o5; - o5.push_back(token(symb, node.metadata)); + o5.push_back(token(symb, m)); std::vector o6; - o6.push_back(astnode("get", o5, node.metadata)); - o6.push_back(token(unsignedToDecimal(i * 32), node.metadata)); + o6.push_back(astnode("get", o5, m)); + o6.push_back(token(unsignedToDecimal(i * 32), m)); std::vector o7; o7.push_back(astnode("add", o6)); o7.push_back(node.args[i]); - o3.push_back(astnode("mstore", o7, node.metadata)); + o3.push_back(astnode("mstore", o7, m)); } std::vector o8; - o8.push_back(token(symb, node.metadata)); + o8.push_back(token(symb, m)); o3.push_back(astnode("get", o8)); - return astnode("seq", o3, node.metadata); + return astnode("seq", o3, m); +} + +// Is the given node something of the form +// self.cow +// self.horse[0] +// self.a[6][7][self.storage[3]].chicken[9] +bool isNodeStorageVariable(Node node) { + std::vector nodez; + nodez.push_back(node); + while (1) { + if (nodez.back().type == TOKEN) return false; + if (nodez.back().args.size() == 0) return false; + if (nodez.back().val != "." && nodez.back().val != "access") + return false; + if (nodez.back().args[0].val == "self") return true; + nodez.push_back(nodez.back().args[0]); + } +} + +Node optimize(Node inp); + +Node apply_rules(preprocessResult pr); + +// Convert: +// self.cow -> ["cow"] +// self.horse[0] -> ["horse", "0"] +// self.a[6][7][self.storage[3]].chicken[9] -> +// ["6", "7", (sload 3), "chicken", "9"] +std::vector listfyStorageAccess(Node node) { + std::vector out; + std::vector nodez; + nodez.push_back(node); + while (1) { + if (nodez.back().type == TOKEN) { + out.push_back(token("--" + nodez.back().val, node.metadata)); + std::vector outrev; + for (int i = (signed)out.size() - 1; i >= 0; i--) { + outrev.push_back(out[i]); + } + return outrev; + } + if (nodez.back().val == ".") + nodez.back().args[1].val = "--" + nodez.back().args[1].val; + if (nodez.back().args.size() == 0) + err("Error parsing storage variable statement", node.metadata); + if (nodez.back().args.size() == 1) + out.push_back(token(tt256m1, node.metadata)); + else + out.push_back(nodez.back().args[1]); + nodez.push_back(nodez.back().args[0]); + } +} + +// Cool function for debug purposes (named cerrStringList to make +// all prints searchable via 'cerr') +void cerrStringList(std::vector s, std::string suffix="") { + for (unsigned i = 0; i < s.size(); i++) std::cerr << s[i] << " "; + std::cerr << suffix << "\n"; +} + +// Populate an svObj with the arguments needed to determine +// the storage position of a node +svObj getStorageVars(svObj pre, Node node, std::string prefix="", int index=0) { + Metadata m = node.metadata; + if (!pre.globalOffset.size()) pre.globalOffset = "0"; + std::vector h; + std::vector coefficients; + // Array accesses or atoms + if (node.val == "access" || node.type == TOKEN) { + std::string tot = "1"; + h = listfyStorageAccess(node); + coefficients.push_back("1"); + for (unsigned i = h.size() - 1; i >= 1; i--) { + // Array sizes must be constant or at least arithmetically + // evaluable at compile time + h[i] = optimize(apply_rules(preprocessResult( + h[i], preprocessAux()))); + if (!isNumberLike(h[i])) + err("Array size must be fixed value", m); + // Create a list of the coefficient associated with each + // array index + coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); + } + } + // Tuples + else { + int startc; + // Handle the (fun args...) case + if (node.val == "fun") { + startc = 1; + h = listfyStorageAccess(node.args[0]); + } + // Handle the ( args...) case, which + // the serpent parser produces when the function + // is a simple name and not a complex astnode + else { + startc = 0; + h = listfyStorageAccess(token(node.val, m)); + } + svObj sub = pre; + sub.globalOffset = "0"; + // Evaluate tuple elements recursively + for (unsigned i = startc; i < node.args.size(); i++) { + sub = getStorageVars(sub, + node.args[i], + prefix+h[0].val.substr(2)+".", + i-1); + } + coefficients.push_back(sub.globalOffset); + for (unsigned i = h.size() - 1; i >= 1; i--) { + // Array sizes must be constant or at least arithmetically + // evaluable at compile time + h[i] = optimize(apply_rules(preprocessResult( + h[i], preprocessAux()))); + if (!isNumberLike(h[i])) + err("Array size must be fixed value", m); + // Create a list of the coefficient associated with each + // array index + coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); + } + pre.offsets = sub.offsets; + pre.coefficients = sub.coefficients; + pre.nonfinal = sub.nonfinal; + pre.nonfinal[prefix+h[0].val.substr(2)] = true; + } + pre.coefficients[prefix+h[0].val.substr(2)] = coefficients; + pre.offsets[prefix+h[0].val.substr(2)] = pre.globalOffset; + pre.indices[prefix+h[0].val.substr(2)] = index; + if (decimalGt(tt176, coefficients.back())) + pre.globalOffset = decimalAdd(pre.globalOffset, coefficients.back()); + return pre; +} + +// Transform a node of the form (call to funid vars...) into +// a call + +#define psn std::pair + +Node call_transform(Node node, std::string op) { + Metadata m = node.metadata; + // We're gonna make lots of temporary variables, + // so set up a unique flag for them + std::string prefix = "_temp"+mkUniqueToken()+"_"; + // kwargs = map of special arguments + std::map kwargs; + kwargs["value"] = token("0", m); + kwargs["gas"] = parseLLL("(- (gas) 25)"); + std::vector args; + for (unsigned i = 0; i < node.args.size(); i++) { + if (node.args[i].val == "=" || node.args[i].val == "set") { + if (node.args[i].args.size() != 2) + err("Malformed set", m); + kwargs[node.args[i].args[0].val] = node.args[i].args[1]; + } + else args.push_back(node.args[i]); + } + if (args.size() < 2) err("Too few arguments for call!", m); + kwargs["to"] = args[0]; + kwargs["funid"] = args[1]; + std::vector inputs; + for (unsigned i = 2; i < args.size(); i++) { + inputs.push_back(args[i]); + } + std::vector with; + std::vector precompute; + std::vector post; + if (kwargs.count("data")) { + if (!kwargs.count("datasz")) err("Required param datasz", m); + // The strategy here is, we store the function ID byte at the index + // before the start of the byte, but then we store the value that was + // there before and reinstate it once the process is over + // store data: data array start + with.push_back(psn(prefix+"data", kwargs["data"])); + // store data: prior: data array - 32 + Node prior = astnode("sub", token(prefix+"data", m), token("32", m), m); + with.push_back(psn(prefix+"prior", prior)); + // store data: priormem: data array - 32 prior memory value + Node priormem = astnode("mload", token(prefix+"prior", m), m); + with.push_back(psn(prefix+"priormem", priormem)); + // post: reinstate prior mem at data array - 32 + post.push_back(astnode("mstore", + token(prefix+"prior", m), + token(prefix+"priormem", m), + m)); + // store data: datastart: data array - 1 + Node datastart = astnode("sub", + token(prefix+"data", m), + token("1", m), + m); + with.push_back(psn(prefix+"datastart", datastart)); + // push funid byte to datastart + precompute.push_back(astnode("mstore8", + token(prefix+"datastart", m), + kwargs["funid"], + m)); + // set data array start loc + kwargs["datain"] = token(prefix+"datastart", m); + kwargs["datainsz"] = astnode("add", + token("1", m), + astnode("mul", + token("32", m), + kwargs["datasz"], + m), + m); + } + else { + // Here, there is no data array, instead there are function arguments. + // This actually lets us be much more efficient with how we set things + // up. + // Pre-declare variables; relies on declared variables being sequential + precompute.push_back(astnode("declare", + token(prefix+"prebyte", m), + m)); + for (unsigned i = 0; i < inputs.size(); i++) { + precompute.push_back(astnode("declare", + token(prefix+unsignedToDecimal(i), m), + m)); + } + // Set up variables to store the function arguments, and store the + // function ID at the byte before the start + Node datastart = astnode("add", + token("31", m), + astnode("ref", + token(prefix+"prebyte", m), + m), + m); + precompute.push_back(astnode("mstore8", + datastart, + kwargs["funid"], + m)); + for (unsigned i = 0; i < inputs.size(); i++) { + precompute.push_back(astnode("set", + token(prefix+unsignedToDecimal(i), m), + inputs[i], + m)); + + } + kwargs["datain"] = datastart; + kwargs["datainsz"] = token(unsignedToDecimal(inputs.size()*32+1), m); + } + if (!kwargs.count("outsz")) { + kwargs["dataout"] = astnode("ref", token(prefix+"dataout", m), m); + kwargs["dataoutsz"] = token("32", node.metadata); + post.push_back(astnode("get", token(prefix+"dataout", m), m)); + } + else { + kwargs["dataout"] = kwargs["out"]; + kwargs["dataoutsz"] = kwargs["outsz"]; + post.push_back(astnode("ref", token(prefix+"dataout", m), m)); + } + // Set up main call + std::vector main; + for (unsigned i = 0; i < precompute.size(); i++) { + main.push_back(precompute[i]); + } + std::vector call; + call.push_back(kwargs["gas"]); + call.push_back(kwargs["to"]); + call.push_back(kwargs["value"]); + call.push_back(kwargs["datain"]); + call.push_back(kwargs["datainsz"]); + call.push_back(kwargs["dataout"]); + call.push_back(kwargs["dataoutsz"]); + main.push_back(astnode("pop", astnode("~"+op, call, m), m)); + for (unsigned i = 0; i < post.size(); i++) { + main.push_back(post[i]); + } + Node mainNode = astnode("seq", main, node.metadata); + // Add with variables + for (int i = with.size() - 1; i >= 0; i--) { + mainNode = astnode("with", + token(with[i].first, m), + with[i].second, + mainNode, + m); + } + return mainNode; +} + +// Preprocess input containing functions +// +// localExterns is a map of the form, eg, +// +// { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... } +// +// Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc +// +// globalExterns is a one-level map, eg from above +// +// { foo: 1, bar: 1, baz: 2, qux: 0 } +// +// Note that globalExterns may be ambiguous +preprocessResult preprocess(Node inp) { + inp = inp.args[0]; + Metadata m = inp.metadata; + if (inp.val != "seq") { + std::vector args; + args.push_back(inp); + inp = astnode("seq", args, m); + } + std::vector empty; + Node init = astnode("seq", empty, m); + Node shared = astnode("seq", empty, m); + std::vector any; + std::vector functions; + preprocessAux out = preprocessAux(); + out.localExterns["self"] = std::map(); + int functionCount = 0; + int storageDataCount = 0; + for (unsigned i = 0; i < inp.args.size(); i++) { + Node obj = inp.args[i]; + // Functions + if (obj.val == "def") { + if (obj.args.size() == 0) + err("Empty def", m); + std::string funName = obj.args[0].val; + // Init, shared and any are special functions + if (funName == "init" || funName == "shared" || funName == "any") { + if (obj.args[0].args.size()) + err(funName+" cannot have arguments", m); + } + if (funName == "init") init = obj.args[1]; + else if (funName == "shared") shared = obj.args[1]; + else if (funName == "any") any.push_back(obj.args[1]); + else { + // Other functions + functions.push_back(obj); + out.localExterns["self"][obj.args[0].val] = functionCount; + functionCount++; + } + } + // Extern declarations + else if (obj.val == "extern") { + std::string externName = obj.args[0].args[0].val; + Node al = obj.args[0].args[1]; + if (!out.localExterns.count(externName)) + out.localExterns[externName] = std::map(); + for (unsigned i = 0; i < al.args.size(); i++) { + out.globalExterns[al.args[i].val] = i; + out.localExterns[externName][al.args[i].val] = i; + } + } + // Storage variables/structures + else if (obj.val == "data") { + out.storageVars = getStorageVars(out.storageVars, + obj.args[0], + "", + storageDataCount); + storageDataCount += 1; + } + else any.push_back(obj); + } + std::vector main; + if (shared.args.size()) main.push_back(shared); + if (init.args.size()) main.push_back(init); + + std::vector code; + if (shared.args.size()) code.push_back(shared); + for (unsigned i = 0; i < any.size(); i++) + code.push_back(any[i]); + for (unsigned i = 0; i < functions.size(); i++) + code.push_back(functions[i]); + main.push_back(astnode("~return", + token("0", m), + astnode("lll", + astnode("seq", code, m), + token("0", m), + m), + m)); + + + + return preprocessResult(astnode("seq", main, inp.metadata), out); +} + +// Transform ".(args...)" into +// (call args...) +Node dotTransform(Node node, preprocessAux aux) { + Metadata m = node.metadata; + Node pre = node.args[0].args[0]; + std::string post = node.args[0].args[1].val; + if (node.args[0].args[1].type == ASTNODE) + err("Function name must be static", m); + // Search for as=? and call=code keywords + std::string as = ""; + bool call_code = false; + for (unsigned i = 1; i < node.args.size(); i++) { + Node arg = node.args[i]; + if (arg.val == "=" || arg.val == "set") { + if (arg.args[0].val == "as") + as = arg.args[1].val; + if (arg.args[0].val == "call" && arg.args[1].val == "code") + call_code = true; + } + } + if (pre.val == "self") { + if (as.size()) err("Cannot use \"as\" when calling self!", m); + as = pre.val; + } + std::vector args; + args.push_back(pre); + // Determine the funId assuming the "as" keyword was used + if (as.size() > 0 && aux.localExterns.count(as)) { + if (!aux.localExterns[as].count(post)) + err("Invalid call: "+printSimple(pre)+"."+post, m); + std::string funid = unsignedToDecimal(aux.localExterns[as][post]); + args.push_back(token(funid, m)); + } + // Determine the funId otherwise + else if (!as.size()) { + if (!aux.globalExterns.count(post)) + err("Invalid call: "+printSimple(pre)+"."+post, m); + std::string key = unsignedToDecimal(aux.globalExterns[post]); + args.push_back(token(key, m)); + } + else err("Invalid call: "+printSimple(pre)+"."+post, m); + for (unsigned i = 1; i < node.args.size(); i++) + args.push_back(node.args[i]); + return astnode(call_code ? "call_code" : "call", args, m); } +// Transform an access of the form self.bob, self.users[5], etc into +// a storage access +// +// There exist two types of objects: finite objects, and infinite +// objects. Finite objects are packed optimally tightly into storage +// accesses; for example: +// +// data obj[100](a, b[2][4], c) +// +// obj[0].a -> 0 +// obj[0].b[0][0] -> 1 +// obj[0].b[1][3] -> 8 +// obj[45].c -> 459 +// +// Infinite objects are accessed by sha3([v1, v2, v3 ... ]), where +// the values are a list of array indices and keyword indices, for +// example: +// data obj[](a, b[2][4], c) +// data obj2[](a, b[][], c) +// +// obj[0].a -> sha3([0, 0, 0]) +// obj[5].b[1][3] -> sha3([0, 5, 1, 1, 3]) +// obj[45].c -> sha3([0, 45, 2]) +// obj2[0].a -> sha3([1, 0, 0]) +// obj2[5].b[1][3] -> sha3([1, 5, 1, 1, 3]) +// obj2[45].c -> sha3([1, 45, 2]) +Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { + Metadata m = node.metadata; + // Get a list of all of the "access parameters" used in order + // eg. self.users[5].cow[4][m[2]][woof] -> + // [--self, --users, 5, --cow, 4, m[2], woof] + std::vector hlist = listfyStorageAccess(node); + // For infinite arrays, the terms array will just provide a list + // of indices. For finite arrays, it's a list of index*coefficient + std::vector terms; + std::string offset = "0"; + std::string prefix = ""; + std::string varPrefix = "_temp"+mkUniqueToken()+"_"; + int c = 0; + std::vector coefficients; + coefficients.push_back(""); + for (unsigned i = 1; i < hlist.size(); i++) { + // We pre-add the -- flag to parameter-like terms. For example, + // self.users[m] -> [--self, --users, m] + // self.users.m -> [--self, --users, --m] + if (hlist[i].val.substr(0, 2) == "--") { + prefix += hlist[i].val.substr(2) + "."; + std::string tempPrefix = prefix.substr(0, prefix.size()-1); + if (!aux.storageVars.offsets.count(tempPrefix)) + return node; + if (c < (signed)coefficients.size() - 1) + err("Too few array index lookups", m); + if (c > (signed)coefficients.size() - 1) + err("Too many array index lookups", m); + coefficients = aux.storageVars.coefficients[tempPrefix]; + // If the size of an object exceeds 2^176, we make it an infinite + // array + if (decimalGt(coefficients.back(), tt176) && !mapstyle) + return storageTransform(node, aux, true); + offset = decimalAdd(offset, aux.storageVars.offsets[tempPrefix]); + c = 0; + if (mapstyle) + terms.push_back(token(unsignedToDecimal( + aux.storageVars.indices[tempPrefix]))); + } + else if (mapstyle) { + terms.push_back(hlist[i]); + c += 1; + } + else { + if (c > (signed)coefficients.size() - 2) + err("Too many array index lookups", m); + terms.push_back( + astnode("mul", + hlist[i], + token(coefficients[coefficients.size() - 2 - c], m), + m)); + + c += 1; + } + } + if (aux.storageVars.nonfinal.count(prefix.substr(0, prefix.size()-1))) + err("Storage variable access not deep enough", m); + if (c < (signed)coefficients.size() - 1) { + err("Too few array index lookups", m); + } + if (c > (signed)coefficients.size() - 1) { + err("Too many array index lookups", m); + } + if (mapstyle) { + // We pre-declare variables, relying on the idea that sequentially + // declared variables are doing to appear beside each other in + // memory + std::vector main; + for (unsigned i = 0; i < terms.size(); i++) + main.push_back(astnode("declare", + token(varPrefix+unsignedToDecimal(i), m), + m)); + for (unsigned i = 0; i < terms.size(); i++) + main.push_back(astnode("set", + token(varPrefix+unsignedToDecimal(i), m), + terms[i], + m)); + main.push_back(astnode("ref", token(varPrefix+"0", m), m)); + Node sz = token(unsignedToDecimal(terms.size()), m); + return astnode("sload", + astnode("sha3", + astnode("seq", main, m), + sz, + m), + m); + } + else { + // We add up all the index*coefficients + Node out = token(offset, node.metadata); + for (unsigned i = 0; i < terms.size(); i++) { + std::vector temp; + temp.push_back(out); + temp.push_back(terms[i]); + out = astnode("add", temp, node.metadata); + } + std::vector temp2; + temp2.push_back(out); + return astnode("sload", temp2, node.metadata); + } +} + + // Recursively applies rewrite rules -Node apply_rules(Node node) { +Node apply_rules(preprocessResult pr) { + Node node = pr.first; // If the rewrite rules have not yet been parsed, parse them if (!nodeMacros.size()) { for (int i = 0; i < 9999; i++) { @@ -430,6 +991,32 @@ Node apply_rules(Node node) { nodeMacros.push_back(o); } } + // Assignment transformations + for (int i = 0; i < 9999; i++) { + if (setters[i][0] == "---END---") break; + if (node.val == setters[i][0]) { + node = astnode("=", + node.args[0], + astnode(setters[i][1], + node.args[0], + node.args[1], + node.metadata), + node.metadata); + } + } + // Special storage transformation + if (isNodeStorageVariable(node)) { + node = storageTransform(node, pr.second); + } + if (node.val == "=" && isNodeStorageVariable(node.args[0])) { + Node t = storageTransform(node.args[0], pr.second); + if (t.val == "sload") { + std::vector o; + o.push_back(t.args[0]); + o.push_back(node.args[1]); + node = astnode("sstore", o, node.metadata); + } + } // Main code unsigned pos = 0; std::string prefix = "_temp"+mkUniqueToken()+"_"; @@ -451,18 +1038,43 @@ Node apply_rules(Node node) { pos = 0; } } - // Array_lit special instruction + // Special transformations + if (node.val == "outer") { + pr = preprocess(node); + node = pr.first; + } if (node.val == "array_lit") node = array_lit_transform(node); + if (node.val == "fun" && node.args[0].val == ".") { + node = dotTransform(node, pr.second); + } + if (node.val == "call") + node = call_transform(node, "call"); + if (node.val == "call_code") + node = call_transform(node, "call_code"); if (node.type == ASTNODE) { unsigned i = 0; if (node.val == "set" || node.val == "ref" - || node.val == "get" || node.val == "with") { + || node.val == "get" || node.val == "with" + || node.val == "def" || node.val == "declare") { node.args[0].val = "'" + node.args[0].val; i = 1; } + if (node.val == "def") { + for (unsigned j = 0; j < node.args[0].args.size(); j++) { + if (node.args[0].args[j].val == ":") { + node.args[0].args[j].val = "kv"; + node.args[0].args[j].args[0].val = + "'" + node.args[0].args[j].args[0].val; + } + else { + node.args[0].args[j].val = "'" + node.args[0].args[j].val; + } + } + } for (; i < node.args.size(); i++) { - node.args[i] = apply_rules(node.args[i]); + node.args[i] = + apply_rules(preprocessResult(node.args[i], pr.second)); } } else if (node.type == TOKEN && !isNumberLike(node)) { @@ -479,6 +1091,7 @@ Node apply_rules(Node node) { return node; } +// Compile-time arithmetic calculations Node optimize(Node inp) { if (inp.type == TOKEN) { Node o = tryNumberize(inp); @@ -489,6 +1102,26 @@ Node optimize(Node inp) { for (unsigned i = 0; i < inp.args.size(); i++) { inp.args[i] = optimize(inp.args[i]); } + // Degenerate cases for add and mul + if (inp.args.size() == 2) { + if (inp.val == "add" && inp.args[0].type == TOKEN && + inp.args[0].val == "0") { + inp = inp.args[1]; + } + if (inp.val == "add" && inp.args[1].type == TOKEN && + inp.args[1].val == "0") { + inp = inp.args[0]; + } + if (inp.val == "mul" && inp.args[0].type == TOKEN && + inp.args[0].val == "1") { + inp = inp.args[1]; + } + if (inp.val == "mul" && inp.args[1].type == TOKEN && + inp.args[1].val == "1") { + inp = inp.args[0]; + } + } + // Arithmetic computation if (inp.args.size() == 2 && inp.args[0].type == TOKEN && inp.args[1].type == TOKEN) { @@ -519,6 +1152,9 @@ Node optimize(Node inp) { && decimalGt(tt255, inp.args[1].val)) { o = decimalMod(inp.args[0].val, inp.args[1].val); } + else if (inp.val == "exp") { + o = decimalModExp(inp.args[0].val, inp.args[1].val, tt256); + } if (o.length()) return token(o, inp.metadata); } return inp; @@ -529,10 +1165,11 @@ Node validate(Node inp) { int i = 0; while(valid[i][0] != "---END---") { if (inp.val == valid[i][0]) { - if (decimalGt(valid[i][1], unsignedToDecimal(inp.args.size()))) { + std::string sz = unsignedToDecimal(inp.args.size()); + if (decimalGt(valid[i][1], sz)) { err("Too few arguments for "+inp.val, inp.metadata); } - if (decimalGt(unsignedToDecimal(inp.args.size()), valid[i][2])) { + if (decimalGt(sz, valid[i][2])) { err("Too many arguments for "+inp.val, inp.metadata); } } @@ -543,18 +1180,31 @@ Node validate(Node inp) { return inp; } -Node preprocess(Node inp) { +Node postValidate(Node inp) { + if (inp.type == ASTNODE) { + if (inp.val == ".") + err("Invalid object member (ie. a foo.bar not mapped to anything)", + inp.metadata); + for (unsigned i = 0; i < inp.args.size(); i++) + postValidate(inp.args[i]); + } + return inp; +} + +Node outerWrap(Node inp) { std::vector args; args.push_back(inp); return astnode("outer", args, inp.metadata); } Node rewrite(Node inp) { - return optimize(apply_rules(validate(preprocess(inp)))); + return postValidate(optimize(apply_rules(preprocessResult( + validate(outerWrap(inp)), preprocessAux())))); } Node rewriteChunk(Node inp) { - return optimize(apply_rules(validate(inp))); + return postValidate(optimize(apply_rules(preprocessResult( + validate(inp), preprocessAux())))); } using namespace std; diff --git a/libserpent/tokenize.cpp b/libserpent/tokenize.cpp index a5d3f1c5b..c6a211593 100644 --- a/libserpent/tokenize.cpp +++ b/libserpent/tokenize.cpp @@ -13,7 +13,7 @@ int chartype(char c) { if (c >= '0' && c <= '9') return ALPHANUM; else if (c >= 'a' && c <= 'z') return ALPHANUM; else if (c >= 'A' && c <= 'Z') return ALPHANUM; - else if (std::string("~._$").find(c) != std::string::npos) return ALPHANUM; + else if (std::string("~_$").find(c) != std::string::npos) return ALPHANUM; else if (c == '\t' || c == ' ' || c == '\n') return SPACE; else if (std::string("()[]{}").find(c) != std::string::npos) return BRACK; else if (c == '"') return DQUOTE; diff --git a/libserpent/util.cpp b/libserpent/util.cpp index cc1394a21..39eeb20be 100644 --- a/libserpent/util.cpp +++ b/libserpent/util.cpp @@ -26,6 +26,28 @@ Node astnode(std::string val, std::vector args, Metadata met) { return o; } +//AST node constructors for a specific number of children +Node astnode(std::string val, Node a, Metadata met) { + std::vector args; + args.push_back(a); + return astnode(val, args, met); +} + +Node astnode(std::string val, Node a, Node b, Metadata met) { + std::vector args; + args.push_back(a); + args.push_back(b); + return astnode(val, args, met); +} + +Node astnode(std::string val, Node a, Node b, Node c, Metadata met) { + std::vector args; + args.push_back(a); + args.push_back(b); + args.push_back(c); + return astnode(val, args, met); +} + // Print token list std::string printTokens(std::vector tokens) { std::string s = ""; diff --git a/libserpent/util.h b/libserpent/util.h index 4fb19bb98..c0a2e9324 100644 --- a/libserpent/util.h +++ b/libserpent/util.h @@ -22,7 +22,8 @@ const int TOKEN = 0, COLON = 11, UNARY_OP = 12, BINARY_OP = 13, - COMPOUND = 14; + COMPOUND = 14, + TOKEN_SPLITTER = 15; // Stores metadata about each token class Metadata { @@ -48,6 +49,9 @@ struct Node { }; Node token(std::string val, Metadata met=Metadata()); Node astnode(std::string val, std::vector args, Metadata met=Metadata()); +Node astnode(std::string val, Node a, Metadata met=Metadata()); +Node astnode(std::string val, Node a, Node b, Metadata met=Metadata()); +Node astnode(std::string val, Node a, Node b, Node c, Metadata met=Metadata()); // Number of tokens in a tree int treeSize(Node prog);