diff --git a/libserpent/bignum.h b/libserpent/bignum.h index 599365b6c..99571acd2 100644 --- a/libserpent/bignum.h +++ b/libserpent/bignum.h @@ -35,4 +35,7 @@ bool decimalGt(std::string a, std::string b, bool eqAllowed=false); unsigned decimalToUnsigned(std::string a); +#define utd unsignedToDecimal +#define dtu decimalToUnsigned + #endif diff --git a/libserpent/compiler.cpp b/libserpent/compiler.cpp index 30628fbc9..a3e5b1c60 100644 --- a/libserpent/compiler.cpp +++ b/libserpent/compiler.cpp @@ -6,6 +6,7 @@ #include "bignum.h" #include "opcodes.h" +// Auxiliary data that is gathered while compiling struct programAux { std::map vars; int nextVarMem; @@ -13,15 +14,19 @@ struct programAux { bool calldataUsed; int step; int labelLength; - int functionCount; }; +// Auxiliary data that gets passed down vertically +// but not back up struct programVerticalAux { int height; + std::string innerScopeName; std::map dupvars; std::map funvars; + std::vector scopes; }; +// Compilation result struct programData { programAux aux; Node code; @@ -34,7 +39,6 @@ programAux Aux() { o.calldataUsed = false; o.step = 0; o.nextVarMem = 32; - o.functionCount = 0; return o; } @@ -43,6 +47,7 @@ programVerticalAux verticalAux() { o.height = 0; o.dupvars = std::map(); o.funvars = std::map(); + o.scopes = std::vector(); return o; } @@ -72,29 +77,58 @@ Node popwrap(Node node) { return multiToken(nodelist, 2, node.metadata); } +// Grabs variables +mss getVariables(Node node, mss cur=mss()) { + Metadata m = node.metadata; + // Tokens don't contain any variables + if (node.type == TOKEN) + return cur; + // Don't descend into call fragments + else if (node.val == "lll") + return getVariables(node.args[1], cur); + // At global scope get/set/ref also declare + else if (node.val == "get" || node.val == "set" || node.val == "ref") { + if (node.args[0].type != TOKEN) + err("Variable name must be simple token," + " not complex expression! " + printSimple(node.args[0]), m); + if (!cur.count(node.args[0].val)) { + cur[node.args[0].val] = utd(cur.size() * 32 + 32); + //std::cerr << node.args[0].val << " " << cur[node.args[0].val] << "\n"; + } + } + // Recursively process children + for (unsigned i = 0; i < node.args.size(); i++) { + cur = getVariables(node.args[i], cur); + } + return cur; +} + // Turns LLL tree into tree of code fragments programData opcodeify(Node node, programAux aux=Aux(), programVerticalAux vaux=verticalAux()) { std::string symb = "_"+mkUniqueToken(); Metadata m = node.metadata; + // Get variables + if (!aux.vars.size()) { + aux.vars = getVariables(node); + aux.nextVarMem = aux.vars.size() * 32 + 32; + } // Numbers if (node.type == TOKEN) { return pd(aux, nodeToNumeric(node), 1); } - else if (node.val == "ref" || node.val == "get" || - node.val == "set" || node.val == "declare") { + else if (node.val == "ref" || node.val == "get" || node.val == "set") { std::string varname = node.args[0].val; - if (!aux.vars.count(varname)) { - aux.vars[varname] = unsignedToDecimal(aux.nextVarMem); - aux.nextVarMem += 32; - } - if (varname == "'msg.data") aux.calldataUsed = true; + // Determine reference to variable + Node varNode = tkn(aux.vars[varname], m); + //std::cerr << varname << " " << printSimple(varNode) << "\n"; // Set variable if (node.val == "set") { programData sub = opcodeify(node.args[1], aux, vaux); if (!sub.outs) err("Value to set variable must have nonzero arity!", m); + // What if we are setting a stack variable? if (vaux.dupvars.count(node.args[0].val)) { int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); @@ -105,149 +139,65 @@ programData opcodeify(Node node, }; return pd(sub.aux, multiToken(nodelist, 3, m), 0); } - Node nodelist[] = { - sub.code, - token(sub.aux.vars[varname], m), - token("MSTORE", m), - }; - return pd(sub.aux, multiToken(nodelist, 3, m), 0); + // Setting a memory variable + else { + Node nodelist[] = { + sub.code, + varNode, + token("MSTORE", m), + }; + return pd(sub.aux, multiToken(nodelist, 3, m), 0); + } } // Get variable else if (node.val == "get") { - if (vaux.dupvars.count(node.args[0].val)) { + // Getting a stack variable + if (vaux.dupvars.count(node.args[0].val)) { int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); return pd(aux, token("DUP"+unsignedToDecimal(h)), 1); } - Node nodelist[] = - { token(aux.vars[varname], m), token("MLOAD", m) }; - return pd(aux, multiToken(nodelist, 2, m), 1); + // Getting a memory variable + else { + Node nodelist[] = + { varNode, token("MLOAD", m) }; + return pd(aux, multiToken(nodelist, 2, m), 1); + } } // Refer variable else if (node.val == "ref") { if (vaux.dupvars.count(node.args[0].val)) err("Cannot ref stack variable!", m); - return pd(aux, token(aux.vars[varname], m), 1); - } - // Declare variable - else { - return pd(aux, multiToken(nullptr, 0, m), 0); + return pd(aux, varNode, 1); } } - // Define functions (TODO: eventually move to rewriter.cpp, keep - // compiler pure LLL) - if (node.val == "def") { - std::vector varNames; - std::vector varSizes; - bool useLt32 = false; - int totalSz = 0; - if (node.args.size() != 2) - err("Malformed def!", m); - // Collect the list of variable names and variable byte counts - for (unsigned i = 0; i < node.args[0].args.size(); i++) { - if (node.args[0].args[i].val == "kv") { - if (node.args[0].args[i].args.size() != 2) - err("Malformed def!", m); - varNames.push_back(node.args[0].args[i].args[0].val); - varSizes.push_back( - decimalToUnsigned(node.args[0].args[i].args[1].val)); - if (varSizes.back() > 32) - err("Max argument width: 32 bytes", m); - useLt32 = true; + // Comments do nothing + else if (node.val == "comment") { + Node nodelist[] = { }; + return pd(aux, multiToken(nodelist, 0, m), 0); + } + // Custom operation sequence + // eg. (ops bytez id msize swap1 msize add 0 swap1 mstore) == alloc + if (node.val == "ops") { + std::vector subs2; + int depth = 0; + for (unsigned i = 0; i < node.args.size(); i++) { + std::string op = upperCase(node.args[i].val); + if (node.args[i].type == ASTNODE || opinputs(op) == -1) { + programVerticalAux vaux2 = vaux; + vaux2.height = vaux.height - i - 1 + node.args.size(); + programData sub = opcodeify(node.args[i], aux, vaux2); + aux = sub.aux; + depth += sub.outs; + subs2.push_back(sub.code); } else { - varNames.push_back(node.args[0].args[i].val); - varSizes.push_back(32); + subs2.push_back(token(op, m)); + depth += opoutputs(op) - opinputs(op); } - aux.vars[varNames.back()] = unsignedToDecimal(aux.nextVarMem + 32 * i); - totalSz += varSizes.back(); } - int functionCount = aux.functionCount; - int nextVarMem = aux.nextVarMem; - aux.nextVarMem += 32 * varNames.size(); - aux.functionCount += 1; - programData inner; - // If we're only using 32-byte variables, then great, just copy - // over the calldata! - if (!useLt32) { - programData sub = opcodeify(node.args[1], aux, vaux); - Node nodelist[] = { - token(unsignedToDecimal(totalSz), m), - token("1", m), - token(unsignedToDecimal(nextVarMem), m), - token("CALLDATACOPY", m), - sub.code - }; - inner = pd(sub.aux, multiToken(nodelist, 5, m), 0); - } - else { - std::vector innerList; - int cum = 1; - for (unsigned i = 0; i < varNames.size();) { - // If we get a series of 32-byte values, we calldatacopy them - if (varSizes[i] == 32) { - unsigned until = i+1; - while (until < varNames.size() && varSizes[until] == 32) - until += 1; - innerList.push_back(token(unsignedToDecimal((until - i) * 32), m)); - innerList.push_back(token(unsignedToDecimal(cum), m)); - innerList.push_back(token(unsignedToDecimal(nextVarMem + i * 32), m)); - innerList.push_back(token("CALLDATACOPY", m)); - cum += (until - i) * 32; - i = until; - } - // Otherwise, we do a clever trick to extract the value - else { - innerList.push_back(token(unsignedToDecimal(32 - varSizes[i]), m)); - innerList.push_back(token("256", m)); - innerList.push_back(token("EXP", m)); - innerList.push_back(token(unsignedToDecimal(cum), m)); - innerList.push_back(token("CALLDATALOAD", m)); - innerList.push_back(token("DIV", m)); - innerList.push_back(token(unsignedToDecimal(nextVarMem + i * 32), m)); - innerList.push_back(token("MSTORE", m)); - cum += varSizes[i]; - i += 1; - } - } - // If caller == origin, then it's from a tx, so unpack, otherwise - // plain copy - programData sub = opcodeify(node.args[1], aux, vaux); - Node ilnode = astnode("", innerList, m); - Node nodelist[] = { - token(unsignedToDecimal(32 * varNames.size()), m), - token("1", m), - token(unsignedToDecimal(nextVarMem), m), - token("CALLDATACOPY", m), - token("CALLER", m), - token("ORIGIN", m), - token("EQ", m), - token("ISZERO", m), - token("$maincode"+symb, m), - token("JUMPI", m), - ilnode, - token("~maincode"+symb, m), - token("JUMPDEST", m), - sub.code - }; - inner = pd(sub.aux, multiToken(nodelist, 14, m), 0); - } - // Check if the function call byte is the same - Node nodelist2[] = { - token("0", m), - token("CALLDATALOAD", m), - token("0", m), - token("BYTE", m), - token(unsignedToDecimal(functionCount), m), - token("EQ", m), - token("ISZERO", m), - token("$endcode"+symb, m), - token("JUMPI", m), - inner.code, - token("~endcode"+symb, m), - token("JUMPDEST", m), - }; - return pd(inner.aux, multiToken(nodelist2, 12, m), 0); + if (depth < 0 || depth > 1) err("Stack depth mismatch", m); + return pd(aux, astnode("_", subs2, m), 0); } // Code blocks if (node.val == "lll" && node.args.size() == 2) { @@ -372,49 +322,14 @@ programData opcodeify(Node node, }; return pd(aux, multiToken(nodelist, 8, m), 1); } - // Array literals - else if (node.val == "array_lit") { - aux.allocUsed = true; - std::vector nodes; - if (!node.args.size()) { - nodes.push_back(token("MSIZE", m)); - return pd(aux, astnode("_", nodes, m)); - } - nodes.push_back(token("MSIZE", m)); - nodes.push_back(token("0", m)); - nodes.push_back(token("MSIZE", m)); - nodes.push_back(token(unsignedToDecimal(node.args.size() * 32 - 1), m)); - nodes.push_back(token("ADD", m)); - nodes.push_back(token("MSTORE8", m)); - for (unsigned i = 0; i < node.args.size(); i++) { - Metadata m2 = node.args[i].metadata; - nodes.push_back(token("DUP1", m2)); - programVerticalAux vaux2 = vaux; - vaux2.height += 2; - programData sub = opcodeify(node.args[i], aux, vaux2); - if (!sub.outs) - err("Array_lit item " + unsignedToDecimal(i) + " has zero arity", m2); - aux = sub.aux; - nodes.push_back(sub.code); - nodes.push_back(token("SWAP1", m2)); - if (i > 0) { - nodes.push_back(token(unsignedToDecimal(i * 32), m2)); - nodes.push_back(token("ADD", m2)); - } - nodes.push_back(token("MSTORE", m2)); - } - return pd(aux, astnode("_", nodes, m), 1); - } // All other functions/operators else { std::vector subs2; int depth = opinputs(upperCase(node.val)); - if (node.val != "debug") { - if (depth == -1) - err("Not a function or opcode: "+node.val, m); - if ((int)node.args.size() != depth) - err("Invalid arity for "+node.val, m); - } + if (depth == -1) + err("Not a function or opcode: "+node.val, m); + if ((int)node.args.size() != depth) + err("Invalid arity for "+node.val, m); for (int i = node.args.size() - 1; i >= 0; i--) { programVerticalAux vaux2 = vaux; vaux2.height = vaux.height - i - 1 + node.args.size(); @@ -424,13 +339,8 @@ programData opcodeify(Node node, err("Input "+unsignedToDecimal(i)+" has arity 0", sub.code.metadata); subs2.push_back(sub.code); } - if (node.val == "debug") { - subs2.push_back(token("DUP"+unsignedToDecimal(node.args.size()), m)); - for (int i = 0; i <= (int)node.args.size(); i++) - subs2.push_back(token("POP", m)); - } - else subs2.push_back(token(upperCase(node.val), m)); - int outdepth = node.val == "debug" ? 0 : opoutputs(upperCase(node.val)); + subs2.push_back(token(upperCase(node.val), m)); + int outdepth = opoutputs(upperCase(node.val)); return pd(aux, astnode("_", subs2, m), outdepth); } } @@ -449,15 +359,6 @@ Node finalize(programData c) { }; bottom.push_back(multiToken(nodelist, 3, m)); } - // If msg.data is being used as an array, then we need to copy it - if (c.aux.calldataUsed) { - Node nodelist[] = { - token("MSIZE", m), token("CALLDATASIZE", m), token("0", m), - token("MSIZE", m), token("CALLDATACOPY", m), - token(c.aux.vars["'msg.data"], m), token("MSTORE", m) - }; - bottom.push_back(multiToken(nodelist, 7, m)); - } // The actual code bottom.push_back(c.code); return astnode("_", bottom, m); diff --git a/libserpent/opcodes.h b/libserpent/opcodes.h index a7bcc1af9..41423c169 100644 --- a/libserpent/opcodes.h +++ b/libserpent/opcodes.h @@ -5,6 +5,7 @@ #include #include #include +#include "util.h" class Mapping { public: @@ -20,119 +21,25 @@ class Mapping { int out; }; -Mapping mapping[] = { - Mapping("STOP", 0x00, 0, 0), - Mapping("ADD", 0x01, 2, 1), - Mapping("MUL", 0x02, 2, 1), - Mapping("SUB", 0x03, 2, 1), - Mapping("DIV", 0x04, 2, 1), - Mapping("SDIV", 0x05, 2, 1), - Mapping("MOD", 0x06, 2, 1), - Mapping("SMOD", 0x07, 2, 1), - Mapping("ADDMOD", 0x08, 3, 1), - Mapping("MULMOD", 0x09, 3, 1), - Mapping("EXP", 0x0a, 2, 1), - Mapping("SIGNEXTEND", 0x0b, 2, 1), - Mapping("LT", 0x10, 2, 1), - Mapping("GT", 0x11, 2, 1), - Mapping("SLT", 0x12, 2, 1), - Mapping("SGT", 0x13, 2, 1), - Mapping("EQ", 0x14, 2, 1), - Mapping("ISZERO", 0x15, 1, 1), - Mapping("AND", 0x16, 2, 1), - Mapping("OR", 0x17, 2, 1), - Mapping("XOR", 0x18, 2, 1), - Mapping("NOT", 0x19, 1, 1), - Mapping("BYTE", 0x1a, 2, 1), - Mapping("ADDMOD", 0x14, 3, 1), - Mapping("MULMOD", 0x15, 3, 1), - Mapping("SIGNEXTEND", 0x16, 2, 1), - Mapping("SHA3", 0x20, 2, 1), - Mapping("ADDRESS", 0x30, 0, 1), - Mapping("BALANCE", 0x31, 1, 1), - Mapping("ORIGIN", 0x32, 0, 1), - Mapping("CALLER", 0x33, 0, 1), - Mapping("CALLVALUE", 0x34, 0, 1), - Mapping("CALLDATALOAD", 0x35, 1, 1), - Mapping("CALLDATASIZE", 0x36, 0, 1), - Mapping("CALLDATACOPY", 0x37, 3, 1), - Mapping("CODESIZE", 0x38, 0, 1), - Mapping("CODECOPY", 0x39, 3, 1), - Mapping("GASPRICE", 0x3a, 0, 1), - Mapping("PREVHASH", 0x40, 0, 1), - Mapping("COINBASE", 0x41, 0, 1), - Mapping("TIMESTAMP", 0x42, 0, 1), - Mapping("NUMBER", 0x43, 0, 1), - Mapping("DIFFICULTY", 0x44, 0, 1), - Mapping("GASLIMIT", 0x45, 0, 1), - Mapping("POP", 0x50, 1, 0), - Mapping("MLOAD", 0x51, 1, 1), - Mapping("MSTORE", 0x52, 2, 0), - Mapping("MSTORE8", 0x53, 2, 0), - Mapping("SLOAD", 0x54, 1, 1), - Mapping("SSTORE", 0x55, 2, 0), - Mapping("JUMP", 0x56, 1, 0), - Mapping("JUMPI", 0x57, 2, 0), - Mapping("PC", 0x58, 0, 1), - Mapping("MSIZE", 0x59, 0, 1), - Mapping("GAS", 0x5a, 0, 1), - Mapping("JUMPDEST", 0x5b, 0, 0), - Mapping("LOG0", 0xa0, 2, 0), - Mapping("LOG1", 0xa1, 3, 0), - Mapping("LOG2", 0xa2, 4, 0), - Mapping("LOG3", 0xa3, 5, 0), - Mapping("LOG4", 0xa4, 6, 0), - Mapping("CREATE", 0xf0, 3, 1), - Mapping("CALL", 0xf1, 7, 1), - Mapping("RETURN", 0xf2, 2, 0), - Mapping("CALL_CODE", 0xf3, 7, 1), - Mapping("SUICIDE", 0xff, 1, 0), - Mapping("---END---", 0x00, 0, 0), -}; +extern Mapping mapping[]; -std::map > opcodes; -std::map reverseOpcodes; +extern std::map > opcodes; +extern std::map reverseOpcodes; -// Fetches everything EXCEPT PUSH1..32 -std::pair > _opdata(std::string ops, int opi) { - if (!opcodes.size()) { - int i = 0; - while (mapping[i].op != "---END---") { - Mapping mi = mapping[i]; - opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out); - i++; - } - for (i = 1; i <= 16; i++) { - opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1); - opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1); - } - for (std::map >::iterator it=opcodes.begin(); - it != opcodes.end(); - it++) { - reverseOpcodes[(*it).second[0]] = (*it).first; - } - } - std::string op; - std::vector opdata; - op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; - opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1); - return std::pair >(op, opdata); -} +std::pair > _opdata(std::string ops, int opi); + +int opcode(std::string op); + +int opinputs(std::string op); + +int opoutputs(std::string op); -int opcode(std::string op) { - return _opdata(op, -1).second[0]; -} +std::string op(int opcode); -int opinputs(std::string op) { - return _opdata(op, -1).second[1]; -} +extern std::string lllSpecials[][3]; -int opoutputs(std::string op) { - return _opdata(op, -1).second[2]; -} +extern std::map > lllMap; -std::string op(int opcode) { - return _opdata("", opcode).first; -} +bool isValidLLLFunc(std::string f, int argc); #endif diff --git a/libserpent/parser.cpp b/libserpent/parser.cpp index 4ceb1d12d..2b9d73702 100644 --- a/libserpent/parser.cpp +++ b/libserpent/parser.cpp @@ -12,17 +12,15 @@ int precedence(Node tok) { if (v == ".") return -1; else if (v == "!" || v == "not") return 1; else if (v=="^" || v == "**") return 2; - else if (v=="*" || v=="/" || v=="@/" || v=="%" || v=="@%") return 3; + else if (v=="*" || v=="/" || v=="%") return 3; else if (v=="+" || v=="-") return 4; else if (v=="<" || v==">" || v=="<=" || v==">=") return 5; - else if (v=="@<" || v=="@>" || v=="@<=" || v=="@>=") return 5; else if (v=="&" || v=="|" || v=="xor" || v=="==" || v == "!=") return 6; else if (v=="&&" || v=="and") return 7; else if (v=="||" || v=="or") return 8; - else if (v==":") return 9; else if (v=="=") return 10; else if (v=="+=" || v=="-=" || v=="*=" || v=="/=" || v=="%=") return 10; - else if (v=="@/=" || v=="@%=") return 10; + else if (v==":" || v == "::") return 11; else return 0; } @@ -223,8 +221,15 @@ Node treefy(std::vector stream) { filename = filename.substr(1, filename.length() - 2); if (!exists(root + filename)) err("File does not exist: "+root + filename, tok.metadata); - oq.back().args.pop_back(); - oq.back().args.push_back(parseSerpent(root + filename)); + if (v == "inset") { + oq.pop_back(); + oq.push_back(parseSerpent(root + filename)); + } + else { + oq.back().args.pop_back(); + oq.back().args.push_back( + asn("outer", parseSerpent(root + filename), tok.metadata)); + } } //Useful for debugging //for (int i = 0; i < oq.size(); i++) { @@ -237,7 +242,7 @@ Node treefy(std::vector stream) { err("Output blank", Metadata()); } else if (oq.size() > 1) { - err("Multiple expressions or unclosed bracket", oq[1].metadata); + return asn("multi", oq, oq[0].metadata); } return oq[0]; @@ -262,15 +267,9 @@ int spaceCount(std::string s) { bool bodied(std::string tok) { return tok == "if" || tok == "elif" || tok == "while" || tok == "with" || tok == "def" || tok == "extern" - || tok == "data"; -} - -// Is this a command that takes an argument as a child block? -bool childBlocked(std::string tok) { - return tok == "if" || tok == "elif" || tok == "else" - || tok == "code" || tok == "shared" || tok == "init" - || tok == "while" || tok == "repeat" || tok == "for" - || tok == "with" || tok == "def"; + || tok == "data" || tok == "assert" || tok == "return" + || tok == "fun" || tok == "scope" || tok == "macro" + || tok == "type"; } // Are the two commands meant to continue each other? @@ -278,10 +277,7 @@ bool bodiedContinued(std::string prev, std::string tok) { return (prev == "if" && tok == "elif") || (prev == "elif" && tok == "else") || (prev == "elif" && tok == "elif") - || (prev == "if" && tok == "else") - || (prev == "init" && tok == "code") - || (prev == "shared" && tok == "code") - || (prev == "shared" && tok == "init"); + || (prev == "if" && tok == "else"); } // Is a line of code empty? @@ -310,16 +306,17 @@ Node parseLines(std::vector lines, Metadata metadata, int sp) { } // Tokenize current line std::vector tokens = tokenize(main.substr(sp), metadata); - // Remove extraneous tokens, including if / elif + // Remove comments std::vector tokens2; for (unsigned j = 0; j < tokens.size(); j++) { if (tokens[j].val == "#" || tokens[j].val == "//") break; - if (j >= 1 || !bodied(tokens[j].val)) { - tokens2.push_back(tokens[j]); - } + tokens2.push_back(tokens[j]); } - if (tokens2.size() > 0 && tokens2.back().val == ":") + bool expectingChildBlock = false; + if (tokens2.size() > 0 && tokens2.back().val == ":") { tokens2.pop_back(); + expectingChildBlock = true; + } // Parse current line Node out = parseSerpentTokenStream(tokens2); // Parse child block @@ -343,14 +340,8 @@ Node parseLines(std::vector lines, Metadata metadata, int sp) { for (unsigned i = 0; i < childBlock.size(); i++) { if (childBlock[i].length() > 0) { cbe = false; break; } } - // Bring back if / elif into AST - if (bodied(tokens[0].val)) { - std::vector args; - args.push_back(out); - out = astnode(tokens[0].val, args, out.metadata); - } // Add child block to AST - if (childBlocked(tokens[0].val)) { + if (expectingChildBlock) { if (cbe) err("Expected indented child block!", out.metadata); out.type = ASTNODE; @@ -360,6 +351,37 @@ Node parseLines(std::vector lines, Metadata metadata, int sp) { } else if (!cbe) err("Did not expect indented child block!", out.metadata); + else if (out.args.size() && out.args[out.args.size() - 1].val == ":") { + Node n = out.args[out.args.size() - 1]; + out.args.pop_back(); + out.args.push_back(n.args[0]); + out.args.push_back(n.args[1]); + } + // Bring back if / elif into AST + if (bodied(tokens[0].val)) { + if (out.val != "multi") { + // token not being used in bodied form + } + else if (out.args[0].val == "id") + out = astnode(tokens[0].val, out.args[1].args, out.metadata); + else if (out.args[0].type == TOKEN) { + std::vector out2; + for (unsigned i = 1; i < out.args.size(); i++) + out2.push_back(out.args[i]); + out = astnode(tokens[0].val, out2, out.metadata); + } + else + out = astnode("fun", out.args, out.metadata); + } + // Multi not supported + if (out.val == "multi") + err("Multiple expressions or unclosed bracket", out.metadata); + // Convert top-level colon expressions into non-colon expressions; + // makes if statements and the like equivalent indented or not + //if (out.val == ":" && out.args[0].type == TOKEN) + // out = asn(out.args[0].val, out.args[1], out.metadata); + //if (bodied(tokens[0].val) && out.args[0].val == ":") + // out = asn(tokens[0].val, out.args[0].args); if (o.size() == 0 || o.back().type == TOKEN) { o.push_back(out); continue; diff --git a/libserpent/rewriter.cpp b/libserpent/rewriter.cpp index 443457acf..294c9a0b3 100644 --- a/libserpent/rewriter.cpp +++ b/libserpent/rewriter.cpp @@ -2,37 +2,25 @@ #include #include #include -#include #include "util.h" #include "lllparser.h" #include "bignum.h" +#include "optimize.h" +#include "rewriteutils.h" +#include "preprocess.h" +#include "functions.h" +#include "opcodes.h" -std::string valid[][3] = { - { "if", "2", "3" }, - { "unless", "2", "2" }, - { "while", "2", "2" }, - { "until", "2", "2" }, - { "alloc", "1", "1" }, - { "array", "1", "1" }, - { "call", "2", tt256 }, - { "call_code", "2", tt256 }, - { "create", "1", "4" }, - { "getch", "2", "2" }, - { "setch", "3", "3" }, - { "sha3", "1", "2" }, - { "return", "1", "2" }, - { "inset", "1", "1" }, - { "min", "2", "2" }, - { "max", "2", "2" }, - { "array_lit", "0", tt256 }, - { "seq", "0", tt256 }, - { "log", "1", "6" }, - { "outer", "1", "1" }, - { "set", "2", "2" }, - { "---END---", "", "" } //Keep this line at the end of the list -}; - +// Rewrite rules std::string macros[][2] = { + { + "(seq $x)", + "$x" + }, + { + "(seq (seq) $x)", + "$x" + }, { "(+= $a $b)", "(set $a (+ $a $b))" @@ -58,24 +46,28 @@ std::string macros[][2] = { "(set $a (^ $a $b))" }, { - "(@/= $a $b)", - "(set $a (@/ $a $b))" + "(!= $a $b)", + "(iszero (eq $a $b))" }, { - "(@%= $a $b)", - "(set $a (@% $a $b))" + "(assert $x)", + "(unless $x (stop))" }, { - "(!= $a $b)", - "(iszero (eq $a $b))" + "(min $a $b)", + "(with $1 $a (with $2 $b (if (lt $1 $2) $1 $2)))" + }, + { + "(max $a $b)", + "(with $1 $a (with $2 $b (if (lt $1 $2) $2 $1)))" }, { - "(min a b)", - "(with $1 a (with $2 b (if (lt $1 $2) $1 $2)))" + "(smin $a $b)", + "(with $1 $a (with $2 $b (if (slt $1 $2) $1 $2)))" }, { - "(max a b)", - "(with $1 a (with $2 b (if (lt $1 $2) $2 $1)))" + "(smax $a $b)", + "(with $1 $a (with $2 $b (if (slt $1 $2) $2 $1)))" }, { "(if $cond $do (else $else))", @@ -85,10 +77,6 @@ std::string macros[][2] = { "(code $code)", "$code" }, - { - "(access (. msg data) $ind)", - "(calldataload (mul 32 $ind))" - }, { "(slice $arr $pos)", "(add $arr (mul 32 $pos))", @@ -125,13 +113,17 @@ std::string macros[][2] = { "(set (access (. self storage) $ind) $val)", "(sstore $ind $val)" }, + { + "(set (sload $ind) $val)", + "(sstore $ind $val)" + }, { "(set (access $var $ind) $val)", "(mstore (add $var (mul 32 $ind)) $val)" }, { "(getch $var $ind)", - "(mod (mload (add $var $ind)) 256)" + "(mod (mload (sub (add $var $ind) 31)) 256)" }, { "(setch $var $ind $val)", @@ -149,6 +141,10 @@ std::string macros[][2] = { "(sha3 $x)", "(seq (set $1 $x) (~sha3 (ref $1) 32))" }, + { + "(sha3 $mstart (= chars $msize))", + "(~sha3 $mstart $msize)" + }, { "(sha3 $mstart $msize)", "(~sha3 $mstart (mul 32 $msize))" @@ -161,6 +157,10 @@ std::string macros[][2] = { "(return $x)", "(seq (set $1 $x) (~return (ref $1) 32))" }, + { + "(return $mstart (= chars $msize))", + "(~return $mstart $msize)" + }, { "(return $start $len)", "(~return $start (mul 32 $len))" @@ -171,7 +171,7 @@ std::string macros[][2] = { }, { "(|| $x $y)", - "(with $1 $x (if (get $1) (get $1) $y))" + "(with $1 $x (if $1 $1 $y))" }, { "(>= $x $y)", @@ -181,45 +181,41 @@ std::string macros[][2] = { "(<= $x $y)", "(iszero (sgt $x $y))" }, - { - "(@>= $x $y)", - "(iszero (lt $x $y))" - }, - { - "(@<= $x $y)", - "(iszero (gt $x $y))" - }, { "(create $code)", "(create 0 $code)" }, { "(create $endowment $code)", - "(with $1 (msize) (create $endowment (get $1) (lll (outer $code) (msize))))" + "(with $1 (msize) (create $endowment (get $1) (lll $code (msize))))" }, { "(sha256 $x)", - "(seq (set $1 $x) (pop (~call 101 2 0 (ref $1) 32 (ref $2) 32)) (get $2))" + "(with $1 (alloc 64) (seq (mstore (add (get $1) 32) $x) (pop (~call 101 2 0 (add (get $1) 32) 32 (get $1) 32)) (mload (get $1))))" + }, + { + "(sha256 $arr (= chars $sz))", + "(with $1 (alloc 32) (seq (pop (~call 101 2 0 $arr $sz (get $1) 32)) (mload (get $1))))" }, { "(sha256 $arr $sz)", - "(seq (pop (~call 101 2 0 $arr (mul 32 $sz) (ref $2) 32)) (get $2))" + "(with $1 (alloc 32) (seq (pop (~call 101 2 0 $arr (mul 32 $sz) (get $1) 32)) (mload (get $1))))" }, { "(ripemd160 $x)", - "(seq (set $1 $x) (pop (~call 101 3 0 (ref $1) 32 (ref $2) 32)) (get $2))" + "(with $1 (alloc 64) (seq (mstore (add (get $1) 32) $x) (pop (~call 101 3 0 (add (get $1) 32) 32 (get $1) 32)) (mload (get $1))))" }, { - "(ripemd160 $arr $sz)", - "(seq (pop (~call 101 3 0 $arr (mul 32 $sz) (ref $2) 32)) (get $2))" + "(ripemd160 $arr (= chars $sz))", + "(with $1 (alloc 32) (seq (pop (~call 101 3 0 $arr $sz (mload $1) 32)) (mload (get $1))))" }, { - "(ecrecover $h $v $r $s)", - "(seq (declare $1) (declare $2) (declare $3) (declare $4) (set $1 $h) (set $2 $v) (set $3 $r) (set $4 $s) (pop (~call 101 1 0 (ref $1) 128 (ref $5) 32)) (get $5))" + "(ripemd160 $arr $sz)", + "(with $1 (alloc 32) (seq (pop (~call 101 3 0 $arr (mul 32 $sz) (get $1) 32)) (mload (get $1))))" }, { - "(seq (seq) $x)", - "$x" + "(ecrecover $h $v $r $s)", + "(with $1 (alloc 160) (seq (mstore (get $1) $h) (mstore (add (get $1) 32) $v) (mstore (add (get $1) 64) $r) (mstore (add (get $1) 96) $s) (pop (~call 101 1 0 (get $1) 128 (add (get $1 128)) 32)) (mload (add (get $1) 128))))" }, { "(inset $x)", @@ -235,21 +231,64 @@ std::string macros[][2] = { }, { "(log $t1)", - "(~log1 $t1 0 0)" + "(~log1 0 0 $t1)" }, { "(log $t1 $t2)", - "(~log2 $t1 $t2 0 0)" + "(~log2 0 0 $t1 $t2)" }, { "(log $t1 $t2 $t3)", - "(~log3 $t1 $t2 $t3 0 0)" + "(~log3 0 0 $t1 $t2 $t3)" }, { "(log $t1 $t2 $t3 $t4)", - "(~log4 $t1 $t2 $t3 $t4 0 0)" + "(~log4 0 0 $t1 $t2 $t3 $t4)" + }, + { + "(logarr $a $sz)", + "(~log0 $a (mul 32 $sz))" + }, + { + "(logarr $a $sz $t1)", + "(~log1 $a (mul 32 $sz) $t1)" + }, + { + "(logarr $a $sz $t1 $t2)", + "(~log2 $a (mul 32 $sz) $t1 $t2)" + }, + { + "(logarr $a $sz $t1 $t2 $t3)", + "(~log3 $a (mul 32 $sz) $t1 $t2 $t3)" + }, + { + "(logarr $a $sz $t1 $t2 $t3 $t4)", + "(~log4 $a (mul 32 $sz) $t1 $t2 $t3 $t4)" + }, + { + "(save $loc $array (= chars $count))", + "(with $location (ref $loc) (with $c $count (with $end (div $c 32) (with $i 0 (seq (while (slt $i $end) (seq (sstore (add $i $location) (access $array $i)) (set $i (add $i 1)))) (sstore (add $i $location) (~and (access $array $i) (sub 0 (exp 256 (sub 32 (mod $c 32)))))))))))" + }, + { + "(save $loc $array $count)", + "(with $location (ref $loc) (with $end $count (with $i 0 (while (slt $i $end) (seq (sstore (add $i $location) (access $array $i)) (set $i (add $i 1)))))))" + }, + { + "(load $loc (= chars $count))", + "(with $location (ref $loc) (with $c $count (with $a (alloc $c) (with $i 0 (seq (while (slt $i (div $c 32)) (seq (set (access $a $i) (sload (add $location $i))) (set $i (add $i 1)))) (set (access $a $i) (~and (sload (add $location $i)) (sub 0 (exp 256 (sub 32 (mod $c 32)))))) $a)))))" + }, + { + "(load $loc $count)", + "(with $location (ref $loc) (with $c $count (with $a (alloc $c) (with $i 0 (seq (while (slt $i $c) (seq (set (access $a $i) (sload (add $location $i))) (set $i (add $i 1)))) $a)))))" + }, + { + "(unsafe_mcopy $to $from $sz)", + "(with _sz $sz (with _from $from (with _to $to (seq (comment STARTING UNSAFE MCOPY) (with _i 0 (while (lt _i _sz) (seq (mstore (add $to _i) (mload (add _from _i))) (set _i (add _i 32)))))))))" + }, + { + "(mcopy $to $from $_sz)", + "(with _to $to (with _from $from (with _sz $sz (seq (comment STARTING MCOPY (with _i 0 (seq (while (lt (add _i 31) _sz) (seq (mstore (add _to _i) (mload (add _from _i))) (set _i (add _i 32)))) (with _mask (exp 256 (sub 32 (mod _sz 32))) (mstore (add $to _i) (add (mod (mload (add $to _i)) _mask) (and (mload (add $from _i)) (sub 0 _mask))))))))))))" }, - { "(. msg datasize)", "(div (calldatasize) 32)" }, { "(. msg sender)", "(caller)" }, { "(. msg value)", "(callvalue)" }, { "(. tx gasprice)", "(gasprice)" }, @@ -267,8 +306,7 @@ std::string macros[][2] = { { "---END---", "" } //Keep this line at the end of the list }; -std::vector > nodeMacros; - +// Token synonyms std::string synonyms[][2] = { { "or", "||" }, { "and", "&&" }, @@ -286,10 +324,6 @@ std::string synonyms[][2] = { { "^", "exp" }, { "**", "exp" }, { "%", "smod" }, - { "@/", "div" }, - { "@%", "mod" }, - { "@<", "lt" }, - { "@>", "gt" }, { "<", "slt" }, { ">", "sgt" }, { "=", "set" }, @@ -298,6 +332,10 @@ std::string synonyms[][2] = { { "---END---", "" } //Keep this line at the end of the list }; +std::map synonymMap; + +// Custom setters (need to be registered separately +// for use with managed storage) std::string setters[][2] = { { "+=", "+" }, { "-=", "-" }, @@ -305,550 +343,136 @@ std::string setters[][2] = { { "/=", "/" }, { "%=", "%" }, { "^=", "^" }, - { "!=", "!" }, { "---END---", "" } //Keep this line at the end of the list }; -// Match result storing object -struct matchResult { - bool success; - std::map map; -}; - -// Storage variable index storing object -struct svObj { - std::map offsets; - std::map indices; - std::map > coefficients; - std::map nonfinal; - std::string globalOffset; -}; - -// Preprocessing result storing object -class preprocessAux { - public: - preprocessAux() { - globalExterns = std::map(); - localExterns = std::map >(); - localExterns["self"] = std::map(); - } - std::map globalExterns; - std::map > localExterns; - svObj storageVars; -}; - -#define preprocessResult std::pair - -// Main pattern matching routine, for those patterns that can be expressed -// using our standard mini-language above -// -// Returns two values. First, a boolean to determine whether the node matches -// the pattern, second, if the node does match then a map mapping variables -// in the pattern to nodes -matchResult match(Node p, Node n) { - matchResult o; - o.success = false; - if (p.type == TOKEN) { - if (p.val == n.val && n.type == TOKEN) o.success = true; - else if (p.val[0] == '$') { - o.success = true; - o.map[p.val.substr(1)] = n; - } - } - else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) { - // do nothing - } - else { - for (unsigned i = 0; i < p.args.size(); i++) { - matchResult oPrime = match(p.args[i], n.args[i]); - if (!oPrime.success) { - o.success = false; - return o; - } - for (std::map::iterator it = oPrime.map.begin(); - it != oPrime.map.end(); - it++) { - o.map[(*it).first] = (*it).second; - } - } - o.success = true; - } - return o; -} - -// Fills in the pattern with a dictionary mapping variable names to -// nodes (these dicts are generated by match). Match and subst together -// create a full pattern-matching engine. -Node subst(Node pattern, - std::map dict, - std::string varflag, - Metadata metadata) { - if (pattern.type == TOKEN && pattern.val[0] == '$') { - if (dict.count(pattern.val.substr(1))) { - return dict[pattern.val.substr(1)]; - } - else { - return token(varflag + pattern.val.substr(1), metadata); - } - } - else if (pattern.type == TOKEN) { - return pattern; - } - else { - std::vector args; - for (unsigned i = 0; i < pattern.args.size(); i++) { - args.push_back(subst(pattern.args[i], dict, varflag, metadata)); - } - return astnode(pattern.val, args, metadata); - } -} +std::map setterMap; // Processes mutable array literals - Node array_lit_transform(Node node) { + std::string prefix = "_temp"+mkUniqueToken() + "_"; Metadata m = node.metadata; - std::vector o1; - o1.push_back(token(unsignedToDecimal(node.args.size() * 32), m)); - std::vector o2; - std::string symb = "_temp"+mkUniqueToken()+"_0"; - o2.push_back(token(symb, m)); - o2.push_back(astnode("alloc", o1, m)); - std::vector o3; - o3.push_back(astnode("set", o2, m)); + std::map d; + std::string o = "(seq (set $arr (alloc "+utd(node.args.size()*32)+"))"; for (unsigned i = 0; i < node.args.size(); i++) { - std::vector o5; - o5.push_back(token(symb, m)); - std::vector o6; - o6.push_back(astnode("get", o5, m)); - o6.push_back(token(unsignedToDecimal(i * 32), m)); - std::vector o7; - o7.push_back(astnode("add", o6)); - o7.push_back(node.args[i]); - o3.push_back(astnode("mstore", o7, m)); - } - std::vector o8; - o8.push_back(token(symb, m)); - o3.push_back(astnode("get", o8)); - return astnode("seq", o3, m); -} - -// Is the given node something of the form -// self.cow -// self.horse[0] -// self.a[6][7][self.storage[3]].chicken[9] -bool isNodeStorageVariable(Node node) { - std::vector nodez; - nodez.push_back(node); - while (1) { - if (nodez.back().type == TOKEN) return false; - if (nodez.back().args.size() == 0) return false; - if (nodez.back().val != "." && nodez.back().val != "access") - return false; - if (nodez.back().args[0].val == "self") return true; - nodez.push_back(nodez.back().args[0]); + o += " (mstore (add (get $arr) "+utd(i * 32)+") $"+utd(i)+")"; + d[utd(i)] = node.args[i]; } + o += " (get $arr))"; + return subst(parseLLL(o), d, prefix, m); } -Node optimize(Node inp); - -Node apply_rules(preprocessResult pr); - -// Convert: -// self.cow -> ["cow"] -// self.horse[0] -> ["horse", "0"] -// self.a[6][7][self.storage[3]].chicken[9] -> -// ["6", "7", (sload 3), "chicken", "9"] -std::vector listfyStorageAccess(Node node) { - std::vector out; - std::vector nodez; - nodez.push_back(node); - while (1) { - if (nodez.back().type == TOKEN) { - out.push_back(token("--" + nodez.back().val, node.metadata)); - std::vector outrev; - for (int i = (signed)out.size() - 1; i >= 0; i--) { - outrev.push_back(out[i]); +// Processes long text literals +Node string_transform(Node node) { + Metadata m = node.metadata; + if (!node.args.size()) + err("Empty text!", m); + if (node.args[0].val.size() < 2 + || node.args[0].val[0] != '"' + || node.args[0].val[node.args[0].val.size() - 1] != '"') + err("Text contents don't look like a string!", m); + std::string bin = node.args[0].val.substr(1, node.args[0].val.size() - 2); + unsigned sz = bin.size(); + std::vector o; + for (unsigned i = 0; i < sz; i += 32) { + std::string t = binToNumeric(bin.substr(i, 32)); + if ((sz - i) < 32 && (sz - i) > 0) { + while ((sz - i) < 32) { + t = decimalMul(t, "256"); + i--; } - return outrev; + i = sz; } - if (nodez.back().val == ".") - nodez.back().args[1].val = "--" + nodez.back().args[1].val; - if (nodez.back().args.size() == 0) - err("Error parsing storage variable statement", node.metadata); - if (nodez.back().args.size() == 1) - out.push_back(token(tt256m1, node.metadata)); - else - out.push_back(nodez.back().args[1]); - nodez.push_back(nodez.back().args[0]); + o.push_back(token(t, node.metadata)); } + node = astnode("array_lit", o, node.metadata); + return array_lit_transform(node); } -// Cool function for debug purposes (named cerrStringList to make -// all prints searchable via 'cerr') -void cerrStringList(std::vector s, std::string suffix="") { - for (unsigned i = 0; i < s.size(); i++) std::cerr << s[i] << " "; - std::cerr << suffix << "\n"; -} -// Populate an svObj with the arguments needed to determine -// the storage position of a node -svObj getStorageVars(svObj pre, Node node, std::string prefix="", int index=0) { - Metadata m = node.metadata; - if (!pre.globalOffset.size()) pre.globalOffset = "0"; - std::vector h; - std::vector coefficients; - // Array accesses or atoms - if (node.val == "access" || node.type == TOKEN) { - std::string tot = "1"; - h = listfyStorageAccess(node); - coefficients.push_back("1"); - for (unsigned i = h.size() - 1; i >= 1; i--) { - // Array sizes must be constant or at least arithmetically - // evaluable at compile time - h[i] = optimize(apply_rules(preprocessResult( - h[i], preprocessAux()))); - if (!isNumberLike(h[i])) - err("Array size must be fixed value", m); - // Create a list of the coefficient associated with each - // array index - coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); - } - } - // Tuples - else { - int startc; - // Handle the (fun args...) case - if (node.val == "fun") { - startc = 1; - h = listfyStorageAccess(node.args[0]); - } - // Handle the ( args...) case, which - // the serpent parser produces when the function - // is a simple name and not a complex astnode - else { - startc = 0; - h = listfyStorageAccess(token(node.val, m)); - } - svObj sub = pre; - sub.globalOffset = "0"; - // Evaluate tuple elements recursively - for (unsigned i = startc; i < node.args.size(); i++) { - sub = getStorageVars(sub, - node.args[i], - prefix+h[0].val.substr(2)+".", - i-1); - } - coefficients.push_back(sub.globalOffset); - for (unsigned i = h.size() - 1; i >= 1; i--) { - // Array sizes must be constant or at least arithmetically - // evaluable at compile time - h[i] = optimize(apply_rules(preprocessResult( - h[i], preprocessAux()))); - if (!isNumberLike(h[i])) - err("Array size must be fixed value", m); - // Create a list of the coefficient associated with each - // array index - coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); - } - pre.offsets = sub.offsets; - pre.coefficients = sub.coefficients; - pre.nonfinal = sub.nonfinal; - pre.nonfinal[prefix+h[0].val.substr(2)] = true; - } - pre.coefficients[prefix+h[0].val.substr(2)] = coefficients; - pre.offsets[prefix+h[0].val.substr(2)] = pre.globalOffset; - pre.indices[prefix+h[0].val.substr(2)] = index; - if (decimalGt(tt176, coefficients.back())) - pre.globalOffset = decimalAdd(pre.globalOffset, coefficients.back()); - return pre; -} +Node apply_rules(preprocessResult pr); -// Transform a node of the form (call to funid vars...) into +// Transform ".(args...)" into // a call - -#define psn std::pair - -Node call_transform(Node node, std::string op) { +Node dotTransform(Node node, preprocessAux aux) { Metadata m = node.metadata; // We're gonna make lots of temporary variables, // so set up a unique flag for them std::string prefix = "_temp"+mkUniqueToken()+"_"; + // Check that the function name is a token + if (node.args[0].args[1].type == ASTNODE) + err("Function name must be static", m); + + Node dotOwner = node.args[0].args[0]; + std::string dotMember = node.args[0].args[1].val; // kwargs = map of special arguments std::map kwargs; kwargs["value"] = token("0", m); - kwargs["gas"] = parseLLL("(- (gas) 25)"); - std::vector args; - for (unsigned i = 0; i < node.args.size(); i++) { - if (node.args[i].val == "=" || node.args[i].val == "set") { - if (node.args[i].args.size() != 2) - err("Malformed set", m); - kwargs[node.args[i].args[0].val] = node.args[i].args[1]; - } - else args.push_back(node.args[i]); - } - if (args.size() < 2) err("Too few arguments for call!", m); - kwargs["to"] = args[0]; - kwargs["funid"] = args[1]; - std::vector inputs; - for (unsigned i = 2; i < args.size(); i++) { - inputs.push_back(args[i]); - } - std::vector with; - std::vector precompute; - std::vector post; - if (kwargs.count("data")) { - if (!kwargs.count("datasz")) err("Required param datasz", m); - // The strategy here is, we store the function ID byte at the index - // before the start of the byte, but then we store the value that was - // there before and reinstate it once the process is over - // store data: data array start - with.push_back(psn(prefix+"data", kwargs["data"])); - // store data: prior: data array - 32 - Node prior = astnode("sub", token(prefix+"data", m), token("32", m), m); - with.push_back(psn(prefix+"prior", prior)); - // store data: priormem: data array - 32 prior memory value - Node priormem = astnode("mload", token(prefix+"prior", m), m); - with.push_back(psn(prefix+"priormem", priormem)); - // post: reinstate prior mem at data array - 32 - post.push_back(astnode("mstore", - token(prefix+"prior", m), - token(prefix+"priormem", m), - m)); - // store data: datastart: data array - 1 - Node datastart = astnode("sub", - token(prefix+"data", m), - token("1", m), - m); - with.push_back(psn(prefix+"datastart", datastart)); - // push funid byte to datastart - precompute.push_back(astnode("mstore8", - token(prefix+"datastart", m), - kwargs["funid"], - m)); - // set data array start loc - kwargs["datain"] = token(prefix+"datastart", m); - kwargs["datainsz"] = astnode("add", - token("1", m), - astnode("mul", - token("32", m), - kwargs["datasz"], - m), - m); - } - else { - // Here, there is no data array, instead there are function arguments. - // This actually lets us be much more efficient with how we set things - // up. - // Pre-declare variables; relies on declared variables being sequential - precompute.push_back(astnode("declare", - token(prefix+"prebyte", m), - m)); - for (unsigned i = 0; i < inputs.size(); i++) { - precompute.push_back(astnode("declare", - token(prefix+unsignedToDecimal(i), m), - m)); - } - // Set up variables to store the function arguments, and store the - // function ID at the byte before the start - Node datastart = astnode("add", - token("31", m), - astnode("ref", - token(prefix+"prebyte", m), - m), - m); - precompute.push_back(astnode("mstore8", - datastart, - kwargs["funid"], - m)); - for (unsigned i = 0; i < inputs.size(); i++) { - precompute.push_back(astnode("set", - token(prefix+unsignedToDecimal(i), m), - inputs[i], - m)); - - } - kwargs["datain"] = datastart; - kwargs["datainsz"] = token(unsignedToDecimal(inputs.size()*32+1), m); - } - if (!kwargs.count("outsz")) { - kwargs["dataout"] = astnode("ref", token(prefix+"dataout", m), m); - kwargs["dataoutsz"] = token("32", node.metadata); - post.push_back(astnode("get", token(prefix+"dataout", m), m)); - } - else { - kwargs["dataout"] = kwargs["out"]; - kwargs["dataoutsz"] = kwargs["outsz"]; - post.push_back(astnode("ref", token(prefix+"dataout", m), m)); - } - // Set up main call - std::vector main; - for (unsigned i = 0; i < precompute.size(); i++) { - main.push_back(precompute[i]); - } - std::vector call; - call.push_back(kwargs["gas"]); - call.push_back(kwargs["to"]); - call.push_back(kwargs["value"]); - call.push_back(kwargs["datain"]); - call.push_back(kwargs["datainsz"]); - call.push_back(kwargs["dataout"]); - call.push_back(kwargs["dataoutsz"]); - main.push_back(astnode("pop", astnode("~"+op, call, m), m)); - for (unsigned i = 0; i < post.size(); i++) { - main.push_back(post[i]); - } - Node mainNode = astnode("seq", main, node.metadata); - // Add with variables - for (int i = with.size() - 1; i >= 0; i--) { - mainNode = astnode("with", - token(with[i].first, m), - with[i].second, - mainNode, - m); - } - return mainNode; -} - -// Preprocess input containing functions -// -// localExterns is a map of the form, eg, -// -// { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... } -// -// Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc -// -// globalExterns is a one-level map, eg from above -// -// { foo: 1, bar: 1, baz: 2, qux: 0 } -// -// Note that globalExterns may be ambiguous -preprocessResult preprocess(Node inp) { - inp = inp.args[0]; - Metadata m = inp.metadata; - if (inp.val != "seq") { - std::vector args; - args.push_back(inp); - inp = astnode("seq", args, m); - } - std::vector empty; - Node init = astnode("seq", empty, m); - Node shared = astnode("seq", empty, m); - std::vector any; - std::vector functions; - preprocessAux out = preprocessAux(); - out.localExterns["self"] = std::map(); - int functionCount = 0; - int storageDataCount = 0; - for (unsigned i = 0; i < inp.args.size(); i++) { - Node obj = inp.args[i]; - // Functions - if (obj.val == "def") { - if (obj.args.size() == 0) - err("Empty def", m); - std::string funName = obj.args[0].val; - // Init, shared and any are special functions - if (funName == "init" || funName == "shared" || funName == "any") { - if (obj.args[0].args.size()) - err(funName+" cannot have arguments", m); - } - if (funName == "init") init = obj.args[1]; - else if (funName == "shared") shared = obj.args[1]; - else if (funName == "any") any.push_back(obj.args[1]); - else { - // Other functions - functions.push_back(obj); - out.localExterns["self"][obj.args[0].val] = functionCount; - functionCount++; - } - } - // Extern declarations - else if (obj.val == "extern") { - std::string externName = obj.args[0].args[0].val; - Node al = obj.args[0].args[1]; - if (!out.localExterns.count(externName)) - out.localExterns[externName] = std::map(); - for (unsigned i = 0; i < al.args.size(); i++) { - out.globalExterns[al.args[i].val] = i; - out.localExterns[externName][al.args[i].val] = i; - } - } - // Storage variables/structures - else if (obj.val == "data") { - out.storageVars = getStorageVars(out.storageVars, - obj.args[0], - "", - storageDataCount); - storageDataCount += 1; - } - else any.push_back(obj); - } - std::vector main; - if (shared.args.size()) main.push_back(shared); - if (init.args.size()) main.push_back(init); - - std::vector code; - if (shared.args.size()) code.push_back(shared); - for (unsigned i = 0; i < any.size(); i++) - code.push_back(any[i]); - for (unsigned i = 0; i < functions.size(); i++) - code.push_back(functions[i]); - main.push_back(astnode("~return", - token("0", m), - astnode("lll", - astnode("seq", code, m), - token("0", m), - m), - m)); - - - - return preprocessResult(astnode("seq", main, inp.metadata), out); -} - -// Transform ".(args...)" into -// (call args...) -Node dotTransform(Node node, preprocessAux aux) { - Metadata m = node.metadata; - Node pre = node.args[0].args[0]; - std::string post = node.args[0].args[1].val; - if (node.args[0].args[1].type == ASTNODE) - err("Function name must be static", m); - // Search for as=? and call=code keywords + kwargs["gas"] = subst(parseLLL("(- (gas) 25)"), msn(), prefix, m); + // Search for as=? and call=code keywords, and isolate the actual + // function arguments + std::vector fnargs; std::string as = ""; - bool call_code = false; + std::string op = "call"; for (unsigned i = 1; i < node.args.size(); i++) { - Node arg = node.args[i]; + fnargs.push_back(node.args[i]); + Node arg = fnargs.back(); if (arg.val == "=" || arg.val == "set") { if (arg.args[0].val == "as") as = arg.args[1].val; if (arg.args[0].val == "call" && arg.args[1].val == "code") - call_code = true; + op = "callcode"; + if (arg.args[0].val == "gas") + kwargs["gas"] = arg.args[1]; + if (arg.args[0].val == "value") + kwargs["value"] = arg.args[1]; + if (arg.args[0].val == "outsz") + kwargs["outsz"] = arg.args[1]; } } - if (pre.val == "self") { + if (dotOwner.val == "self") { if (as.size()) err("Cannot use \"as\" when calling self!", m); - as = pre.val; + as = dotOwner.val; } - std::vector args; - args.push_back(pre); - // Determine the funId assuming the "as" keyword was used + // Determine the funId and sig assuming the "as" keyword was used + int funId = 0; + std::string sig; if (as.size() > 0 && aux.localExterns.count(as)) { - if (!aux.localExterns[as].count(post)) - err("Invalid call: "+printSimple(pre)+"."+post, m); - std::string funid = unsignedToDecimal(aux.localExterns[as][post]); - args.push_back(token(funid, m)); + if (!aux.localExterns[as].count(dotMember)) + err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m); + funId = aux.localExterns[as][dotMember]; + sig = aux.localExternSigs[as][dotMember]; } - // Determine the funId otherwise + // Determine the funId and sig otherwise else if (!as.size()) { - if (!aux.globalExterns.count(post)) - err("Invalid call: "+printSimple(pre)+"."+post, m); - std::string key = unsignedToDecimal(aux.globalExterns[post]); - args.push_back(token(key, m)); + if (!aux.globalExterns.count(dotMember)) + err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m); + std::string key = unsignedToDecimal(aux.globalExterns[dotMember]); + funId = aux.globalExterns[dotMember]; + sig = aux.globalExternSigs[dotMember]; + } + else err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m); + // Pack arguments + kwargs["data"] = packArguments(fnargs, sig, funId, m); + kwargs["to"] = dotOwner; + Node main; + // Pack output + if (!kwargs.count("outsz")) { + main = parseLLL( + "(with _data $data (seq " + "(pop (~"+op+" $gas $to $value (access _data 0) (access _data 1) (ref $dataout) 32))" + "(get $dataout)))"); + } + else { + main = parseLLL( + "(with _data $data (with _outsz (mul 32 $outsz) (with _out (alloc _outsz) (seq " + "(pop (~"+op+" $gas $to $value (access _data 0) (access _data 1) _out _outsz))" + "(get _out)))))"); } - else err("Invalid call: "+printSimple(pre)+"."+post, m); - for (unsigned i = 1; i < node.args.size(); i++) - args.push_back(node.args[i]); - return astnode(call_code ? "call_code" : "call", args, m); + // Set up main call + + Node o = subst(main, kwargs, prefix, m); + return o; } // Transform an access of the form self.bob, self.users[5], etc into @@ -877,7 +501,8 @@ Node dotTransform(Node node, preprocessAux aux) { // obj2[0].a -> sha3([1, 0, 0]) // obj2[5].b[1][3] -> sha3([1, 5, 1, 1, 3]) // obj2[45].c -> sha3([1, 45, 2]) -Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { +Node storageTransform(Node node, preprocessAux aux, + bool mapstyle=false, bool ref=false) { Metadata m = node.metadata; // Get a list of all of the "access parameters" used in order // eg. self.users[5].cow[4][m[2]][woof] -> @@ -909,7 +534,7 @@ Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { // If the size of an object exceeds 2^176, we make it an infinite // array if (decimalGt(coefficients.back(), tt176) && !mapstyle) - return storageTransform(node, aux, true); + return storageTransform(node, aux, true, ref); offset = decimalAdd(offset, aux.storageVars.offsets[tempPrefix]); c = 0; if (mapstyle) @@ -940,28 +565,29 @@ Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { if (c > (signed)coefficients.size() - 1) { err("Too many array index lookups", m); } + Node o; if (mapstyle) { - // We pre-declare variables, relying on the idea that sequentially - // declared variables are doing to appear beside each other in - // memory - std::vector main; + std::string t = "_temp_"+mkUniqueToken(); + std::vector sub; for (unsigned i = 0; i < terms.size(); i++) - main.push_back(astnode("declare", - token(varPrefix+unsignedToDecimal(i), m), - m)); - for (unsigned i = 0; i < terms.size(); i++) - main.push_back(astnode("set", - token(varPrefix+unsignedToDecimal(i), m), - terms[i], - m)); - main.push_back(astnode("ref", token(varPrefix+"0", m), m)); - Node sz = token(unsignedToDecimal(terms.size()), m); - return astnode("sload", - astnode("sha3", - astnode("seq", main, m), - sz, - m), - m); + sub.push_back(asn("mstore", + asn("add", + tkn(utd(i * 32), m), + asn("get", tkn(t+"pos", m), m), + m), + terms[i], + m)); + sub.push_back(tkn(t+"pos", m)); + Node main = asn("with", + tkn(t+"pos", m), + asn("alloc", tkn(utd(terms.size() * 32), m), m), + asn("seq", sub, m), + m); + Node sz = token(utd(terms.size() * 32), m); + o = astnode("~sha3", + main, + sz, + m); } else { // We add up all the index*coefficients @@ -972,42 +598,92 @@ Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { temp.push_back(terms[i]); out = astnode("add", temp, node.metadata); } - std::vector temp2; - temp2.push_back(out); - return astnode("sload", temp2, node.metadata); + o = out; + } + if (ref) return o; + else return astnode("sload", o, node.metadata); +} + +// Basic rewrite rule execution +std::pair rulesTransform(Node node, rewriteRuleSet macros) { + std::string prefix = "_temp_"+mkUniqueToken(); + bool changed = false; + if (!macros.ruleLists.count(node.val)) + return std::pair(node, false); + std::vector rules = macros.ruleLists[node.val]; + for (unsigned pos = 0; pos < rules.size(); pos++) { + rewriteRule macro = rules[pos]; + matchResult mr = match(macro.pattern, node); + if (mr.success) { + node = subst(macro.substitution, mr.map, prefix, node.metadata); + std::pair o = rulesTransform(node, macros); + o.second = true; + return o; + } + } + return std::pair(node, changed); +} + +std::pair synonymTransform(Node node) { + bool changed = false; + if (node.type == ASTNODE && synonymMap.count(node.val)) { + node.val = synonymMap[node.val]; + changed = true; } + return std::pair(node, changed); } +rewriteRuleSet nodeMacros; +rewriteRuleSet setterMacros; -// Recursively applies rewrite rules -Node apply_rules(preprocessResult pr) { +bool dontDescend(std::string s) { + return s == "macro" || s == "comment" || s == "outer"; +} + +// Recursively applies any set of rewrite rules +std::pair apply_rules_iter(preprocessResult pr, rewriteRuleSet rules) { + bool changed = false; Node node = pr.first; - // If the rewrite rules have not yet been parsed, parse them - if (!nodeMacros.size()) { - for (int i = 0; i < 9999; i++) { - std::vector o; - if (macros[i][0] == "---END---") break; - o.push_back(parseLLL(macros[i][0])); - o.push_back(parseLLL(macros[i][1])); - nodeMacros.push_back(o); + if (dontDescend(node.val)) + return std::pair(node, false); + std::pair o = rulesTransform(node, rules); + node = o.first; + changed = changed || o.second; + if (node.type == ASTNODE) { + for (unsigned i = 0; i < node.args.size(); i++) { + std::pair r = + apply_rules_iter(preprocessResult(node.args[i], pr.second), rules); + node.args[i] = r.first; + changed = changed || r.second; } } - // Assignment transformations - for (int i = 0; i < 9999; i++) { - if (setters[i][0] == "---END---") break; - if (node.val == setters[i][0]) { - node = astnode("=", - node.args[0], - astnode(setters[i][1], - node.args[0], - node.args[1], - node.metadata), - node.metadata); - } + return std::pair(node, changed); +} + +// Recursively applies rewrite rules and other primary transformations +std::pair mainTransform(preprocessResult pr) { + bool changed = false; + Node node = pr.first; + + // Anything inside "outer" should be treated as a separate program + // and thus recursively compiled in its entirety + if (node.val == "outer") { + node = apply_rules(preprocess(node.args[0])); + changed = true; } + + // Don't descend into comments, macros and inner scopes + if (dontDescend(node.val)) + return std::pair(node, changed); + // Special storage transformation if (isNodeStorageVariable(node)) { node = storageTransform(node, pr.second); + changed = true; + } + if (node.val == "ref" && isNodeStorageVariable(node.args[0])) { + node = storageTransform(node.args[0], pr.second, false, true); + changed = true; } if (node.val == "=" && isNodeStorageVariable(node.args[0])) { Node t = storageTransform(node.args[0], pr.second); @@ -1017,195 +693,213 @@ Node apply_rules(preprocessResult pr) { o.push_back(node.args[1]); node = astnode("sstore", o, node.metadata); } + changed = true; } // Main code - unsigned pos = 0; - std::string prefix = "_temp"+mkUniqueToken()+"_"; - while(1) { - if (synonyms[pos][0] == "---END---") { - break; - } - else if (node.type == ASTNODE && node.val == synonyms[pos][0]) { - node.val = synonyms[pos][1]; - } - pos++; - } - for (pos = 0; pos < nodeMacros.size(); pos++) { - Node pattern = nodeMacros[pos][0]; - matchResult mr = match(pattern, node); - if (mr.success) { - Node pattern2 = nodeMacros[pos][1]; - node = subst(pattern2, mr.map, prefix, node.metadata); - pos = 0; - } - } + std::pair pnb = synonymTransform(node); + node = pnb.first; + changed = changed || pnb.second; + // std::cerr << priority << " " << macros.size() << "\n"; + std::pair pnc = rulesTransform(node, nodeMacros); + node = pnc.first; + changed = changed || pnc.second; + + // Special transformations - if (node.val == "outer") { - pr = preprocess(node); - node = pr.first; - } - if (node.val == "array_lit") + if (node.val == "array_lit") { node = array_lit_transform(node); + changed = true; + } if (node.val == "fun" && node.args[0].val == ".") { node = dotTransform(node, pr.second); + changed = true; + } + if (node.val == "text") { + node = string_transform(node); + changed = true; } - if (node.val == "call") - node = call_transform(node, "call"); - if (node.val == "call_code") - node = call_transform(node, "call_code"); if (node.type == ASTNODE) { unsigned i = 0; + // Arg 0 of all of these is a variable, so should not be changed if (node.val == "set" || node.val == "ref" - || node.val == "get" || node.val == "with" - || node.val == "def" || node.val == "declare") { - node.args[0].val = "'" + node.args[0].val; + || node.val == "get" || node.val == "with") { + if (node.args[0].type == TOKEN && + node.args[0].val.size() > 0 && node.args[0].val[0] != '\'') { + node.args[0].val = "'" + node.args[0].val; + changed = true; + } i = 1; } - if (node.val == "def") { - for (unsigned j = 0; j < node.args[0].args.size(); j++) { - if (node.args[0].args[j].val == ":") { - node.args[0].args[j].val = "kv"; - node.args[0].args[j].args[0].val = - "'" + node.args[0].args[j].args[0].val; - } - else { - node.args[0].args[j].val = "'" + node.args[0].args[j].val; - } - } + // Convert arglen(x) to '_len_x + else if (node.val == "arglen") { + node.val = "get"; + node.args[0].val = "'_len_" + node.args[0].val; + i = 1; + changed = true; } + // Recursively process children for (; i < node.args.size(); i++) { - node.args[i] = - apply_rules(preprocessResult(node.args[i], pr.second)); + std::pair r = + mainTransform(preprocessResult(node.args[i], pr.second)); + node.args[i] = r.first; + changed = changed || r.second; } } + // Add leading ' to variable names, and wrap them inside get else if (node.type == TOKEN && !isNumberLike(node)) { - node.val = "'" + node.val; - std::vector args; - args.push_back(node); - node = astnode("get", args, node.metadata); + if (node.val.size() && node.val[0] != '\'' && node.val[0] != '$') { + Node n = astnode("get", tkn("'"+node.val), node.metadata); + node = n; + changed = true; + } } - // This allows people to use ~x as a way of having functions with the same - // name and arity as macros; the idea is that ~x is a "final" form, and - // should not be remacroed, but it is converted back at the end - if (node.type == ASTNODE && node.val[0] == '~') - node.val = node.val.substr(1); - return node; + // Convert all numbers to normalized form + else if (node.type == TOKEN && isNumberLike(node) && !isDecimal(node.val)) { + node.val = strToNumeric(node.val); + changed = true; + } + return std::pair(node, changed); } -// Compile-time arithmetic calculations -Node optimize(Node inp) { - if (inp.type == TOKEN) { - Node o = tryNumberize(inp); - if (decimalGt(o.val, tt256, true)) - err("Value too large (exceeds 32 bytes or 2^256)", inp.metadata); - return o; +// Do some preprocessing to convert all of our macro lists into compiled +// forms that can then be reused +void parseMacros() { + for (int i = 0; i < 9999; i++) { + std::vector o; + if (macros[i][0] == "---END---") break; + nodeMacros.addRule(rewriteRule( + parseLLL(macros[i][0]), + parseLLL(macros[i][1]) + )); } - for (unsigned i = 0; i < inp.args.size(); i++) { - inp.args[i] = optimize(inp.args[i]); + for (int i = 0; i < 9999; i++) { + std::vector o; + if (setters[i][0] == "---END---") break; + setterMacros.addRule(rewriteRule( + asn(setters[i][0], tkn("$x"), tkn("$y")), + asn("=", tkn("$x"), asn(setters[i][1], tkn("$x"), tkn("$y"))) + )); } - // Degenerate cases for add and mul - if (inp.args.size() == 2) { - if (inp.val == "add" && inp.args[0].type == TOKEN && - inp.args[0].val == "0") { - inp = inp.args[1]; - } - if (inp.val == "add" && inp.args[1].type == TOKEN && - inp.args[1].val == "0") { - inp = inp.args[0]; - } - if (inp.val == "mul" && inp.args[0].type == TOKEN && - inp.args[0].val == "1") { - inp = inp.args[1]; - } - if (inp.val == "mul" && inp.args[1].type == TOKEN && - inp.args[1].val == "1") { - inp = inp.args[0]; - } + for (int i = 0; i < 9999; i++) { + if (synonyms[i][0] == "---END---") break; + synonymMap[synonyms[i][0]] = synonyms[i][1]; + } +} + +Node apply_rules(preprocessResult pr) { + // If the rewrite rules have not yet been parsed, parse them + if (!nodeMacros.ruleLists.size()) parseMacros(); + // Iterate over macros by priority list + std::map::iterator it; + std::pair r; + for(it=pr.second.customMacros.begin(); + it != pr.second.customMacros.end(); it++) { + while (1) { + // std::cerr << "STARTING ARI CYCLE: " << (*it).first <<"\n"; + // std::cerr << printAST(pr.first) << "\n"; + r = apply_rules_iter(pr, (*it).second); + pr.first = r.first; + if (!r.second) break; + } + } + // Apply setter macros + while (1) { + r = apply_rules_iter(pr, setterMacros); + pr.first = r.first; + if (!r.second) break; } - // Arithmetic computation - if (inp.args.size() == 2 - && inp.args[0].type == TOKEN - && inp.args[1].type == TOKEN) { - std::string o; - if (inp.val == "add") { - o = decimalMod(decimalAdd(inp.args[0].val, inp.args[1].val), tt256); - } - else if (inp.val == "sub") { - if (decimalGt(inp.args[0].val, inp.args[1].val, true)) - o = decimalSub(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "mul") { - o = decimalMod(decimalMul(inp.args[0].val, inp.args[1].val), tt256); - } - else if (inp.val == "div" && inp.args[1].val != "0") { - o = decimalDiv(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "sdiv" && inp.args[1].val != "0" - && decimalGt(tt255, inp.args[0].val) - && decimalGt(tt255, inp.args[1].val)) { - o = decimalDiv(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "mod" && inp.args[1].val != "0") { - o = decimalMod(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "smod" && inp.args[1].val != "0" - && decimalGt(tt255, inp.args[0].val) - && decimalGt(tt255, inp.args[1].val)) { - o = decimalMod(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "exp") { - o = decimalModExp(inp.args[0].val, inp.args[1].val, tt256); - } - if (o.length()) return token(o, inp.metadata); + // Apply all other mactos + while (1) { + r = mainTransform(pr); + pr.first = r.first; + if (!r.second) break; } - return inp; + return r.first; } +// Pre-validation Node validate(Node inp) { + Metadata m = inp.metadata; if (inp.type == ASTNODE) { int i = 0; - while(valid[i][0] != "---END---") { - if (inp.val == valid[i][0]) { + while(validFunctions[i][0] != "---END---") { + if (inp.val == validFunctions[i][0]) { std::string sz = unsignedToDecimal(inp.args.size()); - if (decimalGt(valid[i][1], sz)) { + if (decimalGt(validFunctions[i][1], sz)) { err("Too few arguments for "+inp.val, inp.metadata); } - if (decimalGt(sz, valid[i][2])) { + if (decimalGt(sz, validFunctions[i][2])) { err("Too many arguments for "+inp.val, inp.metadata); } } i++; } + } + else if (inp.type == TOKEN) { + if (!inp.val.size()) err("??? empty token", m); + if (inp.val[0] == '_') err("Variables cannot start with _", m); } for (unsigned i = 0; i < inp.args.size(); i++) validate(inp.args[i]); return inp; } Node postValidate(Node inp) { + // This allows people to use ~x as a way of having functions with the same + // name and arity as macros; the idea is that ~x is a "final" form, and + // should not be remacroed, but it is converted back at the end + if (inp.val.size() > 0 && inp.val[0] == '~') { + inp.val = inp.val.substr(1); + } if (inp.type == ASTNODE) { if (inp.val == ".") err("Invalid object member (ie. a foo.bar not mapped to anything)", inp.metadata); - for (unsigned i = 0; i < inp.args.size(); i++) - postValidate(inp.args[i]); + else if (opcode(inp.val) >= 0) { + if ((signed)inp.args.size() < opinputs(inp.val)) + err("Too few arguments for "+inp.val, inp.metadata); + if ((signed)inp.args.size() > opinputs(inp.val)) + err("Too many arguments for "+inp.val, inp.metadata); + } + else if (isValidLLLFunc(inp.val, inp.args.size())) { + // do nothing + } + else err ("Invalid argument count or LLL function: "+printSimple(inp), inp.metadata); + for (unsigned i = 0; i < inp.args.size(); i++) { + inp.args[i] = postValidate(inp.args[i]); + } } return inp; } -Node outerWrap(Node inp) { - std::vector args; - args.push_back(inp); - return astnode("outer", args, inp.metadata); + +Node rewriteChunk(Node inp) { + return postValidate(optimize(apply_rules( + preprocessResult( + validate(inp), preprocessAux())))); } -Node rewrite(Node inp) { - return postValidate(optimize(apply_rules(preprocessResult( - validate(outerWrap(inp)), preprocessAux())))); +// Flatten nested sequence into flat sequence +Node flattenSeq(Node inp) { + std::vector o; + if (inp.val == "seq" && inp.type == ASTNODE) { + for (unsigned i = 0; i < inp.args.size(); i++) { + if (inp.args[i].val == "seq" && inp.args[i].type == ASTNODE) + o = extend(o, flattenSeq(inp.args[i]).args); + else + o.push_back(flattenSeq(inp.args[i])); + } + } + else if (inp.type == ASTNODE) { + for (unsigned i = 0; i < inp.args.size(); i++) { + o.push_back(flattenSeq(inp.args[i])); + } + } + else return inp; + return asn(inp.val, o, inp.metadata); } -Node rewriteChunk(Node inp) { - return postValidate(optimize(apply_rules(preprocessResult( - validate(inp), preprocessAux())))); +Node rewrite(Node inp) { + return postValidate(optimize(apply_rules(preprocess(flattenSeq(inp))))); } using namespace std; diff --git a/libserpent/tokenize.cpp b/libserpent/tokenize.cpp index c6a211593..b60cc8a44 100644 --- a/libserpent/tokenize.cpp +++ b/libserpent/tokenize.cpp @@ -13,8 +13,8 @@ int chartype(char c) { if (c >= '0' && c <= '9') return ALPHANUM; else if (c >= 'a' && c <= 'z') return ALPHANUM; else if (c >= 'A' && c <= 'Z') return ALPHANUM; - else if (std::string("~_$").find(c) != std::string::npos) return ALPHANUM; - else if (c == '\t' || c == ' ' || c == '\n') return SPACE; + else if (std::string("~_$@").find(c) != std::string::npos) return ALPHANUM; + else if (c == '\t' || c == ' ' || c == '\n' || c == '\r') return SPACE; else if (std::string("()[]{}").find(c) != std::string::npos) return BRACK; else if (c == '"') return DQUOTE; else if (c == '\'') return SQUOTE; diff --git a/libserpent/util.cpp b/libserpent/util.cpp index fbce5e8b5..5e83c0e41 100644 --- a/libserpent/util.cpp +++ b/libserpent/util.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include "util.h" #include "bignum.h" #include @@ -28,6 +27,11 @@ Node astnode(std::string val, std::vector args, Metadata met) { } //AST node constructors for a specific number of children +Node astnode(std::string val, Metadata met) { + std::vector args; + return astnode(val, args, met); +} + Node astnode(std::string val, Node a, Metadata met) { std::vector args; args.push_back(a); @@ -49,6 +53,16 @@ Node astnode(std::string val, Node a, Node b, Node c, Metadata met) { return astnode(val, args, met); } +Node astnode(std::string val, Node a, Node b, Node c, Node d, Metadata met) { + std::vector args; + args.push_back(a); + args.push_back(b); + args.push_back(c); + args.push_back(d); + return astnode(val, args, met); +} + + // Print token list std::string printTokens(std::vector tokens) { std::string s = ""; @@ -146,6 +160,15 @@ std::string indentLines(std::string inp) { return joinLines(lines); } +// Binary to hexadecimal +std::string binToNumeric(std::string inp) { + std::string o = "0"; + for (unsigned i = 0; i < inp.length(); i++) { + o = decimalAdd(decimalMul(o,"256"), unsignedToDecimal((unsigned char)inp[i])); + } + return o; +} + // Converts string to simple numeric format std::string strToNumeric(std::string inp) { std::string o = "0"; @@ -154,7 +177,7 @@ std::string strToNumeric(std::string inp) { } else if ((inp[0] == '"' && inp[inp.length()-1] == '"') || (inp[0] == '\'' && inp[inp.length()-1] == '\'')) { - for (unsigned i = 1; i < inp.length() - 1; i++) { + for (unsigned i = 1; i < inp.length() - 1; i++) { o = decimalAdd(decimalMul(o,"256"), unsignedToDecimal((unsigned char)inp[i])); } } @@ -181,6 +204,14 @@ bool isNumberLike(Node node) { return strToNumeric(node.val) != ""; } +// Is the number decimal? +bool isDecimal(std::string inp) { + for (unsigned i = 0; i < inp.length(); i++) { + if (inp[i] < '0' || inp[i] > '9') return false; + } + return true; +} + //Normalizes number representations Node nodeToNumeric(Node node) { std::string o = strToNumeric(node.val); @@ -246,6 +277,14 @@ void err(std::string errtext, Metadata met) { throw(err); } +//Report warning +void warn(std::string errtext, Metadata met) { + std::string err = "Warning (file \"" + met.file + "\", line " + + unsignedToDecimal(met.ln + 1) + ", char " + unsignedToDecimal(met.ch) + + "): " + errtext; + std::cerr << err << "\n"; +} + //Bin to hex std::string binToHex(std::string inp) { std::string o = ""; @@ -280,7 +319,15 @@ std::string upperCase(std::string inp) { //Three-int vector std::vector triple(int a, int b, int c) { - std::vector o; - o.push_back(a); o.push_back(b); o.push_back(c); - return o; + std::vector v; + v.push_back(a); + v.push_back(b); + v.push_back(c); + return v; +} + +//Extend node vector +std::vector extend(std::vector a, std::vector b) { + for (unsigned i = 0; i < b.size(); i++) a.push_back(b[i]); + return a; } diff --git a/libserpent/util.h b/libserpent/util.h index c0a2e9324..e25712d0f 100644 --- a/libserpent/util.h +++ b/libserpent/util.h @@ -28,30 +28,36 @@ const int TOKEN = 0, // Stores metadata about each token class Metadata { public: - Metadata(std::string File="main", int Ln=0, int Ch=0) { + Metadata(std::string File="main", int Ln=-1, int Ch=-1) { file = File; ln = Ln; ch = Ch; + fixed = false; } std::string file; int ln; int ch; + bool fixed; }; std::string mkUniqueToken(); // type can be TOKEN or ASTNODE -struct Node { - int type; - std::string val; - std::vector args; - Metadata metadata; +class Node { + public: + int type; + std::string val; + std::vector args; + Metadata metadata; }; Node token(std::string val, Metadata met=Metadata()); Node astnode(std::string val, std::vector args, Metadata met=Metadata()); +Node astnode(std::string val, Metadata met=Metadata()); Node astnode(std::string val, Node a, Metadata met=Metadata()); Node astnode(std::string val, Node a, Node b, Metadata met=Metadata()); Node astnode(std::string val, Node a, Node b, Node c, Metadata met=Metadata()); +Node astnode(std::string val, Node a, Node b, + Node c, Node d, Metadata met=Metadata()); // Number of tokens in a tree int treeSize(Node prog); @@ -74,6 +80,9 @@ std::string joinLines(std::vector lines); // Indent all lines by 4 spaces std::string indentLines(std::string inp); +// Converts binary to simple numeric format +std::string binToNumeric(std::string inp); + // Converts string to simple numeric format std::string strToNumeric(std::string inp); @@ -98,6 +107,9 @@ bool exists(std::string fileName); //Report error void err(std::string errtext, Metadata met); +//Report warning +void warn(std::string errtext, Metadata met); + //Bin to hex std::string binToHex(std::string inp); @@ -110,4 +122,16 @@ std::string upperCase(std::string inp); //Three-int vector std::vector triple(int a, int b, int c); +//Extend node vector +std::vector extend(std::vector a, std::vector b); + +// Is the number decimal? +bool isDecimal(std::string inp); + +#define asn astnode +#define tkn token +#define msi std::map +#define msn std::map +#define mss std::map + #endif diff --git a/libsolidity/CompilerStack.cpp b/libsolidity/CompilerStack.cpp index 9fdc88baa..79716fdec 100644 --- a/libsolidity/CompilerStack.cpp +++ b/libsolidity/CompilerStack.cpp @@ -36,13 +36,12 @@ namespace dev namespace solidity { -void CompilerStack::addSource(string const& _name, string const& _content) +bool CompilerStack::addSource(string const& _name, string const& _content) { - if (m_sources.count(_name)) - BOOST_THROW_EXCEPTION(CompilerError() << errinfo_comment("Source by given name already exists.")); - + bool existed = m_sources.count(_name); reset(true); m_sources[_name].scanner = make_shared(CharStream(_content), _name); + return existed; } void CompilerStack::setSource(string const& _sourceCode) @@ -181,6 +180,11 @@ SourceUnit const& CompilerStack::getAST(string const& _sourceName) const return *getSource(_sourceName).ast; } +ContractDefinition const& CompilerStack::getContractDefinition(string const& _contractName) const +{ + return *getContract(_contractName).contract; +} + bytes CompilerStack::staticCompile(std::string const& _sourceCode, bool _optimize) { CompilerStack stack; diff --git a/libsolidity/CompilerStack.h b/libsolidity/CompilerStack.h index 5ad6f0a60..358c8fb77 100644 --- a/libsolidity/CompilerStack.h +++ b/libsolidity/CompilerStack.h @@ -57,7 +57,8 @@ public: CompilerStack(): m_parseSuccessful(false) {} /// Adds a source object (e.g. file) to the parser. After this, parse has to be called again. - void addSource(std::string const& _name, std::string const& _content); + /// @returns true if a source object by the name already existed and was replaced. + bool addSource(std::string const& _name, std::string const& _content); void setSource(std::string const& _sourceCode); /// Parses all source units that were added void parse(); @@ -86,9 +87,13 @@ public: /// Can be one of 3 types defined at @c DocumentationType std::string const& getJsonDocumentation(std::string const& _contractName, DocumentationType _type) const; - /// Returns the previously used scanner, useful for counting lines during error reporting. + /// @returns the previously used scanner, useful for counting lines during error reporting. Scanner const& getScanner(std::string const& _sourceName = "") const; + /// @returns the parsed source unit with the supplied name. SourceUnit const& getAST(std::string const& _sourceName = "") const; + /// @returns the parsed contract with the supplied name. Throws an exception if the contract + /// does not exist. + ContractDefinition const& getContractDefinition(std::string const& _contractName) const; /// Compile the given @a _sourceCode to bytecode. If a scanner is provided, it is used for /// scanning the source code - this is useful for printing exception information. diff --git a/sc/cmdline.cpp b/sc/cmdline.cpp index b44d2538c..a5fed37d6 100644 --- a/sc/cmdline.cpp +++ b/sc/cmdline.cpp @@ -10,6 +10,19 @@ int main(int argv, char** argc) { std::cerr << "Must provide a command and arguments! Try parse, rewrite, compile, assemble\n"; return 0; } + if (argv == 2 && (std::string(argc[1]) == "--help" || std::string(argc[1]) == "-h" )) { + std::cout << argc[1] << "\n"; + + std::cout << "serpent command input\n"; + std::cout << "where input -s for from stdin, a file, or interpreted as serpent code if does not exist as file."; + std::cout << "where command: \n"; + std::cout << " parse: Just parses and returns s-expression code.\n"; + std::cout << " rewrite: Parse, use rewrite rules print s-expressions of result.\n"; + std::cout << " compile: Return resulting compiled EVM code in hex.\n"; + std::cout << " assemble: Return result from step before compilation.\n"; + return 0; + } + std::string flag = ""; std::string command = argc[1]; std::string input;