diff --git a/libserpent/functions.cpp b/libserpent/functions.cpp new file mode 100644 index 000000000..78e12e84a --- /dev/null +++ b/libserpent/functions.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "optimize.h" +#include "rewriteutils.h" +#include "preprocess.h" +#include "functions.h" + +std::string getSignature(std::vector args) { + std::string o; + for (unsigned i = 0; i < args.size(); i++) { + if (args[i].val == ":" && args[i].args[1].val == "s") + o += "s"; + else if (args[i].val == ":" && args[i].args[1].val == "a") + o += "a"; + else + o += "i"; + } + return o; +} + +// Convert a list of arguments into a node containing a +// < datastart, datasz > pair + +Node packArguments(std::vector args, std::string sig, + int funId, Metadata m) { + // Plain old 32 byte arguments + std::vector nargs; + // Variable-sized arguments + std::vector vargs; + // Variable sizes + std::vector sizes; + // Is a variable an array? + std::vector isArray; + // Fill up above three argument lists + int argCount = 0; + for (unsigned i = 0; i < args.size(); i++) { + Metadata m = args[i].metadata; + if (args[i].val == "=") { + // do nothing + } + else { + // Determine the correct argument type + char argType; + if (sig.size() > 0) { + if (argCount >= (signed)sig.size()) + err("Too many args", m); + argType = sig[argCount]; + } + else argType = 'i'; + // Integer (also usable for short strings) + if (argType == 'i') { + if (args[i].val == ":") + err("Function asks for int, provided string or array", m); + nargs.push_back(args[i]); + } + // Long string + else if (argType == 's') { + if (args[i].val != ":") + err("Must specify string length", m); + vargs.push_back(args[i].args[0]); + sizes.push_back(args[i].args[1]); + isArray.push_back(false); + } + // Array + else if (argType == 'a') { + if (args[i].val != ":") + err("Must specify array length", m); + vargs.push_back(args[i].args[0]); + sizes.push_back(args[i].args[1]); + isArray.push_back(true); + } + else err("Invalid arg type in signature", m); + argCount++; + } + } + int static_arg_size = 1 + (vargs.size() + nargs.size()) * 32; + // Start off by saving the size variables and calculating the total + msn kwargs; + kwargs["funid"] = tkn(utd(funId), m); + std::string pattern = + "(with _sztot "+utd(static_arg_size)+" " + " (with _sizes (alloc "+utd(sizes.size() * 32)+") " + " (seq "; + for (unsigned i = 0; i < sizes.size(); i++) { + std::string sizeIncrement = + isArray[i] ? "(mul 32 _x)" : "_x"; + pattern += + "(with _x $sz"+utd(i)+"(seq " + " (mstore (add _sizes "+utd(i * 32)+") _x) " + " (set _sztot (add _sztot "+sizeIncrement+" )))) "; + kwargs["sz"+utd(i)] = sizes[i]; + } + // Allocate memory, and set first data byte + pattern += + "(with _datastart (alloc (add _sztot 32)) (seq " + " (mstore8 _datastart $funid) "; + // Copy over size variables + for (unsigned i = 0; i < sizes.size(); i++) { + int v = 1 + i * 32; + pattern += + " (mstore " + " (add _datastart "+utd(v)+") " + " (mload (add _sizes "+utd(v-1)+"))) "; + } + // Store normal arguments + for (unsigned i = 0; i < nargs.size(); i++) { + int v = 1 + (i + sizes.size()) * 32; + pattern += + " (mstore (add _datastart "+utd(v)+") $"+utd(i)+") "; + kwargs[utd(i)] = nargs[i]; + } + // Loop through variable-sized arguments, store them + pattern += + " (with _pos (add _datastart "+utd(static_arg_size)+") (seq"; + for (unsigned i = 0; i < vargs.size(); i++) { + std::string copySize = + isArray[i] ? "(mul 32 (mload (add _sizes "+utd(i * 32)+")))" + : "(mload (add _sizes "+utd(i * 32)+"))"; + pattern += + " (unsafe_mcopy _pos $vl"+utd(i)+" "+copySize+") " + " (set _pos (add _pos "+copySize+")) "; + kwargs["vl"+utd(i)] = vargs[i]; + } + // Return a 2-item array containing the start and size + pattern += " (array_lit _datastart _sztot))))))))"; + std::string prefix = "_temp_"+mkUniqueToken(); + // Fill in pattern, return triple + return subst(parseLLL(pattern), kwargs, prefix, m); +} + +// Create a node for argument unpacking +Node unpackArguments(std::vector vars, Metadata m) { + std::vector varNames; + std::vector longVarNames; + std::vector longVarIsArray; + // Fill in variable and long variable names, as well as which + // long variables are arrays and which are strings + for (unsigned i = 0; i < vars.size(); i++) { + if (vars[i].val == ":") { + if (vars[i].args.size() != 2) + err("Malformed def!", m); + longVarNames.push_back(vars[i].args[0].val); + std::string tag = vars[i].args[1].val; + if (tag == "s") + longVarIsArray.push_back(false); + else if (tag == "a") + longVarIsArray.push_back(true); + else + err("Function value can only be string or array", m); + } + else { + varNames.push_back(vars[i].val); + } + } + std::vector sub; + if (!varNames.size() && !longVarNames.size()) { + // do nothing if we have no arguments + } + else { + std::vector varNodes; + for (unsigned i = 0; i < longVarNames.size(); i++) + varNodes.push_back(token(longVarNames[i], m)); + for (unsigned i = 0; i < varNames.size(); i++) + varNodes.push_back(token(varNames[i], m)); + // Copy over variable lengths and short variables + for (unsigned i = 0; i < varNodes.size(); i++) { + int pos = 1 + i * 32; + std::string prefix = (i < longVarNames.size()) ? "_len_" : ""; + sub.push_back(asn("untyped", asn("set", + token(prefix+varNodes[i].val, m), + asn("calldataload", tkn(utd(pos), m), m), + m))); + } + // Copy over long variables + if (longVarNames.size() > 0) { + std::vector sub2; + int pos = varNodes.size() * 32 + 1; + Node tot = tkn("_tot", m); + for (unsigned i = 0; i < longVarNames.size(); i++) { + Node var = tkn(longVarNames[i], m); + Node varlen = longVarIsArray[i] + ? asn("mul", tkn("32", m), tkn("_len_"+longVarNames[i], m)) + : tkn("_len_"+longVarNames[i], m); + sub2.push_back(asn("untyped", + asn("set", var, asn("alloc", varlen)))); + sub2.push_back(asn("calldatacopy", var, tot, varlen)); + sub2.push_back(asn("set", tot, asn("add", tot, varlen))); + } + std::string prefix = "_temp_"+mkUniqueToken(); + sub.push_back(subst( + astnode("with", tot, tkn(utd(pos), m), asn("seq", sub2)), + msn(), + prefix, + m)); + } + } + return asn("seq", sub, m); +} diff --git a/libserpent/functions.h b/libserpent/functions.h new file mode 100644 index 000000000..68a1c69ce --- /dev/null +++ b/libserpent/functions.h @@ -0,0 +1,39 @@ +#ifndef ETHSERP_FUNCTIONS +#define ETHSERP_FUNCTIONS + +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "optimize.h" +#include "rewriteutils.h" +#include "preprocess.h" + + +class argPack { + public: + argPack(Node a, Node b, Node c) { + pre = a; + datastart = b; + datasz = c; + } + Node pre; + Node datastart; + Node datasz; +}; + +// Get a signature from a function +std::string getSignature(std::vector args); + +// Convert a list of arguments into a node +// triple, given the signature of a function +Node packArguments(std::vector args, std::string sig, + int funId, Metadata m); + +// Create a node for argument unpacking +Node unpackArguments(std::vector vars, Metadata m); + +#endif diff --git a/libserpent/opcodes.cpp b/libserpent/opcodes.cpp new file mode 100644 index 000000000..b24144e46 --- /dev/null +++ b/libserpent/opcodes.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include "opcodes.h" +#include "util.h" +#include "bignum.h" + +Mapping mapping[] = { + Mapping("STOP", 0x00, 0, 0), + Mapping("ADD", 0x01, 2, 1), + Mapping("MUL", 0x02, 2, 1), + Mapping("SUB", 0x03, 2, 1), + Mapping("DIV", 0x04, 2, 1), + Mapping("SDIV", 0x05, 2, 1), + Mapping("MOD", 0x06, 2, 1), + Mapping("SMOD", 0x07, 2, 1), + Mapping("ADDMOD", 0x08, 3, 1), + Mapping("MULMOD", 0x09, 3, 1), + Mapping("EXP", 0x0a, 2, 1), + Mapping("SIGNEXTEND", 0x0b, 2, 1), + Mapping("LT", 0x10, 2, 1), + Mapping("GT", 0x11, 2, 1), + Mapping("SLT", 0x12, 2, 1), + Mapping("SGT", 0x13, 2, 1), + Mapping("EQ", 0x14, 2, 1), + Mapping("ISZERO", 0x15, 1, 1), + Mapping("AND", 0x16, 2, 1), + Mapping("OR", 0x17, 2, 1), + Mapping("XOR", 0x18, 2, 1), + Mapping("NOT", 0x19, 1, 1), + Mapping("BYTE", 0x1a, 2, 1), + Mapping("SHA3", 0x20, 2, 1), + Mapping("ADDRESS", 0x30, 0, 1), + Mapping("BALANCE", 0x31, 1, 1), + Mapping("ORIGIN", 0x32, 0, 1), + Mapping("CALLER", 0x33, 0, 1), + Mapping("CALLVALUE", 0x34, 0, 1), + Mapping("CALLDATALOAD", 0x35, 1, 1), + Mapping("CALLDATASIZE", 0x36, 0, 1), + Mapping("CALLDATACOPY", 0x37, 3, 0), + Mapping("CODESIZE", 0x38, 0, 1), + Mapping("CODECOPY", 0x39, 3, 0), + Mapping("GASPRICE", 0x3a, 0, 1), + Mapping("EXTCODESIZE", 0x3b, 1, 1), + Mapping("EXTCODECOPY", 0x3c, 4, 0), + Mapping("PREVHASH", 0x40, 0, 1), + Mapping("COINBASE", 0x41, 0, 1), + Mapping("TIMESTAMP", 0x42, 0, 1), + Mapping("NUMBER", 0x43, 0, 1), + Mapping("DIFFICULTY", 0x44, 0, 1), + Mapping("GASLIMIT", 0x45, 0, 1), + Mapping("POP", 0x50, 1, 0), + Mapping("MLOAD", 0x51, 1, 1), + Mapping("MSTORE", 0x52, 2, 0), + Mapping("MSTORE8", 0x53, 2, 0), + Mapping("SLOAD", 0x54, 1, 1), + Mapping("SSTORE", 0x55, 2, 0), + Mapping("JUMP", 0x56, 1, 0), + Mapping("JUMPI", 0x57, 2, 0), + Mapping("PC", 0x58, 0, 1), + Mapping("MSIZE", 0x59, 0, 1), + Mapping("GAS", 0x5a, 0, 1), + Mapping("JUMPDEST", 0x5b, 0, 0), + Mapping("LOG0", 0xa0, 2, 0), + Mapping("LOG1", 0xa1, 3, 0), + Mapping("LOG2", 0xa2, 4, 0), + Mapping("LOG3", 0xa3, 5, 0), + Mapping("LOG4", 0xa4, 6, 0), + Mapping("CREATE", 0xf0, 3, 1), + Mapping("CALL", 0xf1, 7, 1), + Mapping("CALLCODE", 0xf2, 7, 1), + Mapping("RETURN", 0xf3, 2, 0), + Mapping("SUICIDE", 0xff, 1, 0), + Mapping("---END---", 0x00, 0, 0), +}; + +std::map > opcodes; +std::map reverseOpcodes; + +// Fetches everything EXCEPT PUSH1..32 +std::pair > _opdata(std::string ops, int opi) { + if (!opcodes.size()) { + int i = 0; + while (mapping[i].op != "---END---") { + Mapping mi = mapping[i]; + opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out); + i++; + } + for (i = 1; i <= 16; i++) { + opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1); + opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1); + } + for (std::map >::iterator it=opcodes.begin(); + it != opcodes.end(); + it++) { + reverseOpcodes[(*it).second[0]] = (*it).first; + } + } + ops = upperCase(ops); + std::string op; + std::vector opdata; + op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; + opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1); + return std::pair >(op, opdata); +} + +int opcode(std::string op) { + return _opdata(op, -1).second[0]; +} + +int opinputs(std::string op) { + return _opdata(op, -1).second[1]; +} + +int opoutputs(std::string op) { + return _opdata(op, -1).second[2]; +} + +std::string op(int opcode) { + return _opdata("", opcode).first; +} + +std::string lllSpecials[][3] = { + { "ref", "1", "1" }, + { "get", "1", "1" }, + { "set", "2", "2" }, + { "with", "3", "3" }, + { "comment", "0", "2147483647" }, + { "ops", "0", "2147483647" }, + { "lll", "2", "2" }, + { "seq", "0", "2147483647" }, + { "if", "3", "3" }, + { "unless", "2", "2" }, + { "until", "2", "2" }, + { "alloc", "1", "1" }, + { "---END---", "0", "0" }, +}; + +std::map > lllMap; + +// Is a function name one of the valid functions above? +bool isValidLLLFunc(std::string f, int argc) { + if (lllMap.size() == 0) { + for (int i = 0; ; i++) { + if (lllSpecials[i][0] == "---END---") break; + lllMap[lllSpecials[i][0]] = std::pair( + dtu(lllSpecials[i][1]), dtu(lllSpecials[i][2])); + } + } + return lllMap.count(f) + && argc >= lllMap[f].first + && argc <= lllMap[f].second; +} diff --git a/libserpent/optimize.cpp b/libserpent/optimize.cpp new file mode 100644 index 000000000..e689fcb69 --- /dev/null +++ b/libserpent/optimize.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" + +// Compile-time arithmetic calculations +Node optimize(Node inp) { + if (inp.type == TOKEN) { + Node o = tryNumberize(inp); + if (decimalGt(o.val, tt256, true)) + err("Value too large (exceeds 32 bytes or 2^256)", inp.metadata); + return o; + } + for (unsigned i = 0; i < inp.args.size(); i++) { + inp.args[i] = optimize(inp.args[i]); + } + // Arithmetic-specific transform + if (inp.val == "+") inp.val = "add"; + if (inp.val == "*") inp.val = "mul"; + if (inp.val == "-") inp.val = "sub"; + if (inp.val == "/") inp.val = "sdiv"; + if (inp.val == "^") inp.val = "exp"; + if (inp.val == "**") inp.val = "exp"; + if (inp.val == "%") inp.val = "smod"; + // Degenerate cases for add and mul + if (inp.args.size() == 2) { + if (inp.val == "add" && inp.args[0].type == TOKEN && + inp.args[0].val == "0") { + Node x = inp.args[1]; + inp = x; + } + if (inp.val == "add" && inp.args[1].type == TOKEN && + inp.args[1].val == "0") { + Node x = inp.args[0]; + inp = x; + } + if (inp.val == "mul" && inp.args[0].type == TOKEN && + inp.args[0].val == "1") { + Node x = inp.args[1]; + inp = x; + } + if (inp.val == "mul" && inp.args[1].type == TOKEN && + inp.args[1].val == "1") { + Node x = inp.args[0]; + inp = x; + } + } + // Arithmetic computation + if (inp.args.size() == 2 + && inp.args[0].type == TOKEN + && inp.args[1].type == TOKEN) { + std::string o; + if (inp.val == "add") { + o = decimalMod(decimalAdd(inp.args[0].val, inp.args[1].val), tt256); + } + else if (inp.val == "sub") { + if (decimalGt(inp.args[0].val, inp.args[1].val, true)) + o = decimalSub(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "mul") { + o = decimalMod(decimalMul(inp.args[0].val, inp.args[1].val), tt256); + } + else if (inp.val == "div" && inp.args[1].val != "0") { + o = decimalDiv(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "sdiv" && inp.args[1].val != "0" + && decimalGt(tt255, inp.args[0].val) + && decimalGt(tt255, inp.args[1].val)) { + o = decimalDiv(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "mod" && inp.args[1].val != "0") { + o = decimalMod(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "smod" && inp.args[1].val != "0" + && decimalGt(tt255, inp.args[0].val) + && decimalGt(tt255, inp.args[1].val)) { + o = decimalMod(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "exp") { + o = decimalModExp(inp.args[0].val, inp.args[1].val, tt256); + } + if (o.length()) return token(o, inp.metadata); + } + return inp; +} + +// Is a node degenerate (ie. trivial to calculate) ? +bool isDegenerate(Node n) { + return optimize(n).type == TOKEN; +} + +// Is a node purely arithmetic? +bool isPureArithmetic(Node n) { + return isNumberLike(optimize(n)); +} diff --git a/libserpent/optimize.h b/libserpent/optimize.h new file mode 100644 index 000000000..06ea3bba1 --- /dev/null +++ b/libserpent/optimize.h @@ -0,0 +1,19 @@ +#ifndef ETHSERP_OPTIMIZER +#define ETHSERP_OPTIMIZER + +#include +#include +#include +#include +#include "util.h" + +// Compile-time arithmetic calculations +Node optimize(Node inp); + +// Is a node degenerate (ie. trivial to calculate) ? +bool isDegenerate(Node n); + +// Is a node purely arithmetic? +bool isPureArithmetic(Node n); + +#endif diff --git a/libserpent/preprocess.cpp b/libserpent/preprocess.cpp new file mode 100644 index 000000000..2df149945 --- /dev/null +++ b/libserpent/preprocess.cpp @@ -0,0 +1,327 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "rewriteutils.h" +#include "optimize.h" +#include "preprocess.h" +#include "functions.h" +#include "opcodes.h" + +// Convert a function of the form (def (f x y z) (do stuff)) into +// (if (first byte of ABI is correct) (seq (setup x y z) (do stuff))) +Node convFunction(Node node, int functionCount) { + std::string prefix = "_temp"+mkUniqueToken()+"_"; + Metadata m = node.metadata; + + if (node.args.size() != 2) + err("Malformed def!", m); + // Collect the list of variable names and variable byte counts + Node unpack = unpackArguments(node.args[0].args, m); + // And the actual code + Node body = node.args[1]; + // Main LLL-based function body + return astnode("if", + astnode("eq", + astnode("get", token("__funid", m), m), + token(unsignedToDecimal(functionCount), m), + m), + astnode("seq", unpack, body, m)); +} + +// Populate an svObj with the arguments needed to determine +// the storage position of a node +svObj getStorageVars(svObj pre, Node node, std::string prefix, + int index) { + Metadata m = node.metadata; + if (!pre.globalOffset.size()) pre.globalOffset = "0"; + std::vector h; + std::vector coefficients; + // Array accesses or atoms + if (node.val == "access" || node.type == TOKEN) { + std::string tot = "1"; + h = listfyStorageAccess(node); + coefficients.push_back("1"); + for (unsigned i = h.size() - 1; i >= 1; i--) { + // Array sizes must be constant or at least arithmetically + // evaluable at compile time + if (!isPureArithmetic(h[i])) + err("Array size must be fixed value", m); + // Create a list of the coefficient associated with each + // array index + coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); + } + } + // Tuples + else { + int startc; + // Handle the (fun args...) case + if (node.val == "fun") { + startc = 1; + h = listfyStorageAccess(node.args[0]); + } + // Handle the ( args...) case, which + // the serpent parser produces when the function + // is a simple name and not a complex astnode + else { + startc = 0; + h = listfyStorageAccess(token(node.val, m)); + } + svObj sub = pre; + sub.globalOffset = "0"; + // Evaluate tuple elements recursively + for (unsigned i = startc; i < node.args.size(); i++) { + sub = getStorageVars(sub, + node.args[i], + prefix+h[0].val.substr(2)+".", + i-startc); + } + coefficients.push_back(sub.globalOffset); + for (unsigned i = h.size() - 1; i >= 1; i--) { + // Array sizes must be constant or at least arithmetically + // evaluable at compile time + if (!isPureArithmetic(h[i])) + err("Array size must be fixed value", m); + // Create a list of the coefficient associated with each + // array index + coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); + } + pre.offsets = sub.offsets; + pre.coefficients = sub.coefficients; + pre.nonfinal = sub.nonfinal; + pre.nonfinal[prefix+h[0].val.substr(2)] = true; + } + pre.coefficients[prefix+h[0].val.substr(2)] = coefficients; + pre.offsets[prefix+h[0].val.substr(2)] = pre.globalOffset; + pre.indices[prefix+h[0].val.substr(2)] = index; + if (decimalGt(tt176, coefficients.back())) + pre.globalOffset = decimalAdd(pre.globalOffset, coefficients.back()); + return pre; +} + +// Preprocess input containing functions +// +// localExterns is a map of the form, eg, +// +// { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... } +// +// localExternSigs is a map of the form, eg, +// +// { x : { foo: iii, bar: iis, baz: ia }, y: { qux: i, foo: as } ... } +// +// Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc +// and that x.foo has three integers as arguments, x.bar has two +// integers and a variable-length string, and baz has an integer +// and an array +// +// globalExterns is a one-level map, eg from above +// +// { foo: 1, bar: 1, baz: 2, qux: 0 } +// +// globalExternSigs is a one-level map, eg from above +// +// { foo: as, bar: iis, baz: ia, qux: i} +// +// Note that globalExterns and globalExternSigs may be ambiguous +// Also, a null signature implies an infinite tail of integers +preprocessResult preprocessInit(Node inp) { + Metadata m = inp.metadata; + if (inp.val != "seq") + inp = astnode("seq", inp, m); + std::vector empty = std::vector(); + Node init = astnode("seq", empty, m); + Node shared = astnode("seq", empty, m); + std::vector any; + std::vector functions; + preprocessAux out = preprocessAux(); + out.localExterns["self"] = std::map(); + int functionCount = 0; + int storageDataCount = 0; + for (unsigned i = 0; i < inp.args.size(); i++) { + Node obj = inp.args[i]; + // Functions + if (obj.val == "def") { + if (obj.args.size() == 0) + err("Empty def", m); + std::string funName = obj.args[0].val; + // Init, shared and any are special functions + if (funName == "init" || funName == "shared" || funName == "any") { + if (obj.args[0].args.size()) + err(funName+" cannot have arguments", m); + } + if (funName == "init") init = obj.args[1]; + else if (funName == "shared") shared = obj.args[1]; + else if (funName == "any") any.push_back(obj.args[1]); + else { + // Other functions + functions.push_back(convFunction(obj, functionCount)); + out.localExterns["self"][obj.args[0].val] = functionCount; + out.localExternSigs["self"][obj.args[0].val] + = getSignature(obj.args[0].args); + functionCount++; + } + } + // Extern declarations + else if (obj.val == "extern") { + std::string externName = obj.args[0].val; + Node al = obj.args[1]; + if (!out.localExterns.count(externName)) + out.localExterns[externName] = std::map(); + for (unsigned i = 0; i < al.args.size(); i++) { + if (al.args[i].val == ":") { + std::string v = al.args[i].args[0].val; + std::string sig = al.args[i].args[1].val; + out.globalExterns[v] = i; + out.globalExternSigs[v] = sig; + out.localExterns[externName][v] = i; + out.localExternSigs[externName][v] = sig; + } + else { + std::string v = al.args[i].val; + out.globalExterns[v] = i; + out.globalExternSigs[v] = ""; + out.localExterns[externName][v] = i; + out.localExternSigs[externName][v] = ""; + } + } + } + // Custom macros + else if (obj.val == "macro" || (obj.val == "fun" && obj.args[0].val == "macro")) { + // Rules for valid macros: + // + // There are only four categories of valid macros: + // + // 1. a macro where the outer function is something + // which is NOT an existing valid function/extern/datum + // 2. a macro of the form set(c(x), d) where c must NOT + // be an existing valid function/extern/datum + // 3. something of the form access(c(x)), where c must NOT + // be an existing valid function/extern/datum + // 4. something of the form set(access(c(x)), d) where c must + // NOT be an existing valid function/extern/datum + // 5. something of the form with(c(x), d, e) where c must + // NOT be an existing valid function/extern/datum + bool valid = false; + Node pattern; + Node substitution; + int priority; + // Priority not set: default zero + if (obj.val == "macro") { + pattern = obj.args[0]; + substitution = obj.args[1]; + priority = 0; + } + // Specified priority + else { + pattern = obj.args[1]; + substitution = obj.args[2]; + if (obj.args[0].args.size()) + priority = dtu(obj.args[0].args[0].val); + else + priority = 0; + } + if (opcode(pattern.val) < 0 && !isValidFunctionName(pattern.val)) + valid = true; + if (pattern.val == "set" && + opcode(pattern.args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].val)) + valid = true; + if (pattern.val == "access" && + opcode(pattern.args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].val)) + if (pattern.val == "set" && + pattern.args[0].val == "access" && + opcode(pattern.args[0].args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].args[0].val)) + valid = true; + if (pattern.val == "with" && + opcode(pattern.args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].val)) + valid = true; + if (valid) { + if (!out.customMacros.count(priority)) + out.customMacros[priority] = rewriteRuleSet(); + out.customMacros[priority].addRule + (rewriteRule(pattern, substitution)); + } + else warn("Macro does not fit valid template: "+printSimple(pattern), m); + } + // Variable types + else if (obj.val == "type") { + std::string typeName = obj.args[0].val; + std::vector vars = obj.args[1].args; + for (unsigned i = 0; i < vars.size(); i++) + out.types[vars[i].val] = typeName; + } + // Storage variables/structures + else if (obj.val == "data") { + out.storageVars = getStorageVars(out.storageVars, + obj.args[0], + "", + storageDataCount); + storageDataCount += 1; + } + else any.push_back(obj); + } + // Set up top-level AST structure + std::vector main; + if (shared.args.size()) main.push_back(shared); + if (init.args.size()) main.push_back(init); + + std::vector code; + if (shared.args.size()) code.push_back(shared); + for (unsigned i = 0; i < any.size(); i++) + code.push_back(any[i]); + for (unsigned i = 0; i < functions.size(); i++) + code.push_back(functions[i]); + Node codeNode; + if (functions.size() > 0) { + codeNode = astnode("with", + token("__funid", m), + astnode("byte", + token("0", m), + astnode("calldataload", token("0", m), m), + m), + astnode("seq", code, m), + m); + } + else codeNode = astnode("seq", code, m); + main.push_back(astnode("~return", + token("0", m), + astnode("lll", + codeNode, + token("0", m), + m), + m)); + + + Node result; + if (main.size() == 1) result = main[0]; + else result = astnode("seq", main, inp.metadata); + return preprocessResult(result, out); +} + +preprocessResult processTypes (preprocessResult pr) { + preprocessAux aux = pr.second; + Node node = pr.first; + if (node.type == TOKEN && aux.types.count(node.val)) + node = asn(aux.types[node.val], node, node.metadata); + else if (node.val == "untyped") + return preprocessResult(node.args[0], aux); + else if (node.val == "outer") + return preprocessResult(node, aux); + else { + for (unsigned i = 0; i < node.args.size(); i++) { + node.args[i] = + processTypes(preprocessResult(node.args[i], aux)).first; + } + } + return preprocessResult(node, aux); +} + +preprocessResult preprocess(Node n) { + return processTypes(preprocessInit(n)); +} diff --git a/libserpent/preprocess.h b/libserpent/preprocess.h new file mode 100644 index 000000000..321fb8527 --- /dev/null +++ b/libserpent/preprocess.h @@ -0,0 +1,50 @@ +#ifndef ETHSERP_PREPROCESSOR +#define ETHSERP_PREPROCESSOR + +#include +#include +#include +#include +#include "util.h" +#include "rewriteutils.h" + +// Storage variable index storing object +struct svObj { + std::map offsets; + std::map indices; + std::map > coefficients; + std::map nonfinal; + std::string globalOffset; +}; + + + +// Preprocessing result storing object +class preprocessAux { + public: + preprocessAux() { + globalExterns = std::map(); + localExterns = std::map >(); + localExterns["self"] = std::map(); + } + std::map globalExterns; + std::map globalExternSigs; + std::map > localExterns; + std::map > localExternSigs; + std::map customMacros; + std::map types; + svObj storageVars; +}; + +#define preprocessResult std::pair + +// Populate an svObj with the arguments needed to determine +// the storage position of a node +svObj getStorageVars(svObj pre, Node node, std::string prefix="", + int index=0); + +// Preprocess a function (see cpp for details) +preprocessResult preprocess(Node inp); + + +#endif diff --git a/libserpent/rewriteutils.cpp b/libserpent/rewriteutils.cpp new file mode 100644 index 000000000..0d810bdbc --- /dev/null +++ b/libserpent/rewriteutils.cpp @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "rewriteutils.h" +#include "optimize.h" + +// Valid functions and their min and max argument counts +std::string validFunctions[][3] = { + { "if", "2", "3" }, + { "unless", "2", "2" }, + { "while", "2", "2" }, + { "until", "2", "2" }, + { "alloc", "1", "1" }, + { "array", "1", "1" }, + { "call", "2", tt256 }, + { "callcode", "2", tt256 }, + { "create", "1", "4" }, + { "getch", "2", "2" }, + { "setch", "3", "3" }, + { "sha3", "1", "2" }, + { "return", "1", "2" }, + { "inset", "1", "1" }, + { "min", "2", "2" }, + { "max", "2", "2" }, + { "array_lit", "0", tt256 }, + { "seq", "0", tt256 }, + { "log", "1", "6" }, + { "outer", "1", "1" }, + { "set", "2", "2" }, + { "get", "1", "1" }, + { "ref", "1", "1" }, + { "declare", "1", tt256 }, + { "with", "3", "3" }, + { "outer", "1", "1" }, + { "mcopy", "3", "3" }, + { "unsafe_mcopy", "3", "3" }, + { "save", "3", "3" }, + { "load", "2", "2" }, + { "---END---", "", "" } //Keep this line at the end of the list +}; + +std::map vfMap; + +// Is a function name one of the valid functions above? +bool isValidFunctionName(std::string f) { + if (vfMap.size() == 0) { + for (int i = 0; ; i++) { + if (validFunctions[i][0] == "---END---") break; + vfMap[validFunctions[i][0]] = true; + } + } + return vfMap.count(f); +} + +// Cool function for debug purposes (named cerrStringList to make +// all prints searchable via 'cerr') +void cerrStringList(std::vector s, std::string suffix) { + for (unsigned i = 0; i < s.size(); i++) std::cerr << s[i] << " "; + std::cerr << suffix << "\n"; +} + +// Convert: +// self.cow -> ["cow"] +// self.horse[0] -> ["horse", "0"] +// self.a[6][7][self.storage[3]].chicken[9] -> +// ["6", "7", (sload 3), "chicken", "9"] +std::vector listfyStorageAccess(Node node) { + std::vector out; + std::vector nodez; + nodez.push_back(node); + while (1) { + if (nodez.back().type == TOKEN) { + out.push_back(token("--" + nodez.back().val, node.metadata)); + std::vector outrev; + for (int i = (signed)out.size() - 1; i >= 0; i--) { + outrev.push_back(out[i]); + } + return outrev; + } + if (nodez.back().val == ".") + nodez.back().args[1].val = "--" + nodez.back().args[1].val; + if (nodez.back().args.size() == 0) + err("Error parsing storage variable statement", node.metadata); + if (nodez.back().args.size() == 1) + out.push_back(token(tt256m1, node.metadata)); + else + out.push_back(nodez.back().args[1]); + nodez.push_back(nodez.back().args[0]); + } +} + +// Is the given node something of the form +// self.cow +// self.horse[0] +// self.a[6][7][self.storage[3]].chicken[9] +bool isNodeStorageVariable(Node node) { + std::vector nodez; + nodez.push_back(node); + while (1) { + if (nodez.back().type == TOKEN) return false; + if (nodez.back().args.size() == 0) return false; + if (nodez.back().val != "." && nodez.back().val != "access") + return false; + if (nodez.back().args[0].val == "self") return true; + nodez.push_back(nodez.back().args[0]); + } +} + +// Main pattern matching routine, for those patterns that can be expressed +// using our standard mini-language above +// +// Returns two values. First, a boolean to determine whether the node matches +// the pattern, second, if the node does match then a map mapping variables +// in the pattern to nodes +matchResult match(Node p, Node n) { + matchResult o; + o.success = false; + if (p.type == TOKEN) { + if (p.val == n.val && n.type == TOKEN) o.success = true; + else if (p.val[0] == '$' || p.val[0] == '@') { + o.success = true; + o.map[p.val.substr(1)] = n; + } + } + else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) { + // do nothing + } + else { + for (unsigned i = 0; i < p.args.size(); i++) { + matchResult oPrime = match(p.args[i], n.args[i]); + if (!oPrime.success) { + o.success = false; + return o; + } + for (std::map::iterator it = oPrime.map.begin(); + it != oPrime.map.end(); + it++) { + o.map[(*it).first] = (*it).second; + } + } + o.success = true; + } + return o; +} + + +// Fills in the pattern with a dictionary mapping variable names to +// nodes (these dicts are generated by match). Match and subst together +// create a full pattern-matching engine. +Node subst(Node pattern, + std::map dict, + std::string varflag, + Metadata m) { + // Swap out patterns at the token level + if (pattern.metadata.ln == -1) + pattern.metadata = m; + if (pattern.type == TOKEN && + pattern.val[0] == '$') { + if (dict.count(pattern.val.substr(1))) { + return dict[pattern.val.substr(1)]; + } + else { + return token(varflag + pattern.val.substr(1), m); + } + } + // Other tokens are untouched + else if (pattern.type == TOKEN) { + return pattern; + } + // Substitute recursively for ASTs + else { + std::vector args; + for (unsigned i = 0; i < pattern.args.size(); i++) { + args.push_back(subst(pattern.args[i], dict, varflag, m)); + } + return asn(pattern.val, args, m); + } +} + +// Transforms a sequence containing two-argument with statements +// into a statement containing those statements in nested form +Node withTransform (Node source) { + Node o = token("--"); + Metadata m = source.metadata; + std::vector args; + for (int i = source.args.size() - 1; i >= 0; i--) { + Node a = source.args[i]; + if (a.val == "with" && a.args.size() == 2) { + std::vector flipargs; + for (int j = args.size() - 1; j >= 0; j--) + flipargs.push_back(args[i]); + if (o.val != "--") + flipargs.push_back(o); + o = asn("with", a.args[0], a.args[1], asn("seq", flipargs, m), m); + args = std::vector(); + } + else { + args.push_back(a); + } + } + std::vector flipargs; + for (int j = args.size() - 1; j >= 0; j--) + flipargs.push_back(args[j]); + if (o.val != "--") + flipargs.push_back(o); + return asn("seq", flipargs, m); +} diff --git a/libserpent/rewriteutils.h b/libserpent/rewriteutils.h new file mode 100644 index 000000000..3a9a837ad --- /dev/null +++ b/libserpent/rewriteutils.h @@ -0,0 +1,76 @@ +#ifndef ETHSERP_REWRITEUTILS +#define ETHSERP_REWRITEUTILS + +#include +#include +#include +#include +#include "util.h" + +// Valid functions and their min and max argument counts +extern std::string validFunctions[][3]; + +extern std::map vfMap; + +bool isValidFunctionName(std::string f); + +// Converts deep array access into ordered list of the arguments +// along the descent +std::vector listfyStorageAccess(Node node); + +// Cool function for debug purposes (named cerrStringList to make +// all prints searchable via 'cerr') +void cerrStringList(std::vector s, std::string suffix=""); + +// Is the given node something of the form +// self.cow +// self.horse[0] +// self.a[6][7][self.storage[3]].chicken[9] +bool isNodeStorageVariable(Node node); + +// Applies rewrite rules adding without wrapper +Node rewriteChunk(Node inp); + +// Match result storing object +struct matchResult { + bool success; + std::map map; +}; + +// Match node to pattern +matchResult match(Node p, Node n); + +// Substitute node using pattern +Node subst(Node pattern, + std::map dict, + std::string varflag, + Metadata m); + +Node withTransform(Node source); + +class rewriteRule { + public: + rewriteRule(Node p, Node s) { + pattern = p; + substitution = s; + } + Node pattern; + Node substitution; +}; + +class rewriteRuleSet { + public: + rewriteRuleSet() { + ruleLists = std::map >(); + } + void addRule(rewriteRule r) { + if (!ruleLists.count(r.pattern.val)) + ruleLists[r.pattern.val] = std::vector(); + ruleLists[r.pattern.val].push_back(r); + } + std::map > ruleLists; +}; + + + +#endif