diff --git a/CMakeLists.txt b/CMakeLists.txt index 1293b4427..5763c9b85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,6 +165,8 @@ if (WIN32) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") set(CPACK_PACKAGE_VERSION "0.7") set(CPACK_GENERATOR "NSIS") + # seems to be not working + # set(CPACK_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/alethzero/alethzero.bmp") # our stuff set(CPACK_COMPONENT_ALETHZERO_GROUP "Applications") @@ -178,6 +180,8 @@ if (WIN32) set(CPACK_NSIS_URL_INFO_ABOUT "https://github.com/ethereum/cpp-ethereum") set(CPACK_NSIS_CONTACT "ethereum.org") set(CPACK_NSIS_MODIFY_PATH ON) + set(CPACK_NSIS_MUI_ICON "${CMAKE_CURRENT_SOURCE_DIR}/alethzero/alethzero.ico") + set(CPACK_NSIS_MUI_UNIICON "${CMAKE_CURRENT_SOURCE_DIR}/alethzero/alethzero.ico") include(CPack) endif (WIN32) diff --git a/alethzero/CMakeLists.txt b/alethzero/CMakeLists.txt index 39c02d6be..8c8a37a42 100644 --- a/alethzero/CMakeLists.txt +++ b/alethzero/CMakeLists.txt @@ -27,6 +27,7 @@ endif () eth_add_executable(${EXECUTABLE} ICON alethzero UI_RESOURCES alethzero.icns Main.ui + WIN_RESOURCES alethzero.rc ) add_dependencies(${EXECUTABLE} BuildInfo.h) diff --git a/alethzero/alethzero.ico b/alethzero/alethzero.ico new file mode 100644 index 000000000..acee27751 Binary files /dev/null and b/alethzero/alethzero.ico differ diff --git a/alethzero/alethzero.rc b/alethzero/alethzero.rc new file mode 100644 index 000000000..29c778bd4 --- /dev/null +++ b/alethzero/alethzero.rc @@ -0,0 +1 @@ +APP_ICON ICON DISCARDABLE "alethzero.ico" \ No newline at end of file diff --git a/cmake/EthExecutableHelper.cmake b/cmake/EthExecutableHelper.cmake index 0ec1e10d4..88e2a3e16 100644 --- a/cmake/EthExecutableHelper.cmake +++ b/cmake/EthExecutableHelper.cmake @@ -18,7 +18,7 @@ macro(eth_add_executable EXECUTABLE) set (extra_macro_args ${ARGN}) set (options) set (one_value_args ICON) - set (multi_value_args UI_RESOURCES) + set (multi_value_args UI_RESOURCES WIN_RESOURCES) cmake_parse_arguments (ETH_ADD_EXECUTABLE "${options}" "${one_value_args}" "${multi_value_args}" "${extra_macro_args}") if (APPLE) @@ -38,7 +38,7 @@ macro(eth_add_executable EXECUTABLE) set_source_files_properties(${MACOSX_BUNDLE_ICON_FILE}.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources) else () - add_executable(${EXECUTABLE} ${ETH_ADD_EXECUTABLE_UI_RESOURCES} ${SRC_LIST} ${HEADERS}) + add_executable(${EXECUTABLE} ${ETH_ADD_EXECUTABLE_UI_RESOURCES} ${ETH_ADD_EXECUTABLE_WIN_RESOURCES} ${SRC_LIST} ${HEADERS}) endif() endmacro() @@ -60,7 +60,11 @@ macro(eth_install_executable EXECUTABLE) cmake_parse_arguments (ETH_INSTALL_EXECUTABLE "${options}" "${one_value_args}" "${multi_value_args}" "${extra_macro_args}") if (ETH_INSTALL_EXECUTABLE_QMLDIR) - set(eth_qml_dir "-qmldir=${ETH_INSTALL_EXECUTABLE_QMLDIR}") + if (APPLE) + set(eth_qml_dir "-qmldir=${ETH_INSTALL_EXECUTABLE_QMLDIR}") + elseif (WIN32) + set(eth_qml_dir --qmldir ${ETH_INSTALL_EXECUTABLE_QMLDIR}) + endif() message(STATUS "${EXECUTABLE} qmldir: ${eth_qml_dir}") endif() @@ -68,7 +72,8 @@ macro(eth_install_executable EXECUTABLE) # First have qt5 install plugins and frameworks add_custom_command(TARGET ${EXECUTABLE} POST_BUILD COMMAND ${MACDEPLOYQT_APP} ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${EXECUTABLE}.app ${eth_qml_dir} - WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) + WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} + ) # This tool and next will inspect linked libraries in order to determine which dependencies are required if (${CMAKE_CFG_INTDIR} STREQUAL ".") @@ -108,6 +113,13 @@ macro(eth_install_executable EXECUTABLE) $/platforms ) + # ugly way, improve that + add_custom_command(TARGET ${EXECUTABLE} POST_BUILD + COMMAND cmake -E copy_directory + "${ETH_DEPENDENCY_INSTALL_DIR}/qml" + $ + ) + install( FILES ${DLLS} DESTINATION bin COMPONENT ${EXECUTABLE} @@ -118,6 +130,14 @@ macro(eth_install_executable EXECUTABLE) COMPONENT ${EXECUTABLE} ) + file (GLOB QMLS ${ETH_DEPENDENCY_INSTALL_DIR}/qml/*) + foreach(QML ${QMLS}) + install( DIRECTORY ${QML} + DESTINATION bin + COMPONENT ${EXECUTABLE} + ) + endforeach() + install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin COMPONENT ${EXECUTABLE} diff --git a/libethereum/Transaction.cpp b/libethereum/Transaction.cpp index 0c1173134..24c56930e 100644 --- a/libethereum/Transaction.cpp +++ b/libethereum/Transaction.cpp @@ -43,9 +43,9 @@ Transaction::Transaction(bytesConstRef _rlpData, bool _checkSender) m_receiveAddress = rlp[field = 3].toHash
(); m_value = rlp[field = 4].toInt(); m_data = rlp[field = 5].toBytes(); + byte v = rlp[field = 6].toInt() - 27; h256 r = rlp[field = 7].toInt(); h256 s = rlp[field = 8].toInt(); - byte v = rlp[field = 6].toInt() - 27; m_vrs = SignatureStruct{ r, s, v }; if (_checkSender) m_sender = sender(); @@ -74,7 +74,7 @@ Address Transaction::sender() const { if (!m_sender) { - auto p = recover(*(Signature const*)&m_vrs, sha3(WithoutSignature)); + auto p = recover(m_vrs, sha3(WithoutSignature)); if (!p) BOOST_THROW_EXCEPTION(InvalidSignature()); m_sender = right160(dev::sha3(bytesConstRef(p.data(), sizeof(p)))); diff --git a/libevm/VM.h b/libevm/VM.h index 3eb330fcd..1f3fc17d5 100644 --- a/libevm/VM.h +++ b/libevm/VM.h @@ -217,6 +217,8 @@ inline bytesConstRef VM::go(ExtVMFace& _ext, OnOpFunc const& _onOp, uint64_t _st require(7); runGas = (bigint)c_callGas + m_stack[m_stack.size() - 1]; newTempSize = std::max(memNeed(m_stack[m_stack.size() - 6], m_stack[m_stack.size() - 7]), memNeed(m_stack[m_stack.size() - 4], m_stack[m_stack.size() - 5])); + if (_ext.depth == 1024) + BOOST_THROW_EXCEPTION(OutOfGas()); break; case Instruction::CREATE: @@ -226,6 +228,8 @@ inline bytesConstRef VM::go(ExtVMFace& _ext, OnOpFunc const& _onOp, uint64_t _st u256 inSize = m_stack[m_stack.size() - 3]; newTempSize = (bigint)inOff + inSize; runGas = c_createGas; + if (_ext.depth == 1024) + BOOST_THROW_EXCEPTION(OutOfGas()); break; } case Instruction::EXP: @@ -566,6 +570,7 @@ inline bytesConstRef VM::go(ExtVMFace& _ext, OnOpFunc const& _onOp, uint64_t _st break; default: // this is unreachable, but if someone introduces a bug in the future, he may get here. + assert(false); BOOST_THROW_EXCEPTION(InvalidOpcode() << errinfo_comment("CALLDATACOPY, CODECOPY or EXTCODECOPY instruction requested.")); break; } @@ -795,8 +800,6 @@ inline bytesConstRef VM::go(ExtVMFace& _ext, OnOpFunc const& _onOp, uint64_t _st if (_ext.balance(_ext.myAddress) >= endowment) { - if (_ext.depth == 1024) - BOOST_THROW_EXCEPTION(OutOfGas()); _ext.subBalance(endowment); m_stack.push_back((u160)_ext.create(endowment, m_gas, bytesConstRef(m_temp.data() + initOff, initSize), _onOp)); } @@ -825,8 +828,6 @@ inline bytesConstRef VM::go(ExtVMFace& _ext, OnOpFunc const& _onOp, uint64_t _st if (_ext.balance(_ext.myAddress) >= value) { - if (_ext.depth == 1024) - BOOST_THROW_EXCEPTION(OutOfGas()); _ext.subBalance(value); m_stack.push_back(_ext.call(inst == Instruction::CALL ? receiveAddress : _ext.myAddress, value, bytesConstRef(m_temp.data() + inOff, inSize), gas, bytesRef(m_temp.data() + outOff, outSize), _onOp, {}, receiveAddress)); } diff --git a/libserpent/bignum.h b/libserpent/bignum.h index 599365b6c..99571acd2 100644 --- a/libserpent/bignum.h +++ b/libserpent/bignum.h @@ -35,4 +35,7 @@ bool decimalGt(std::string a, std::string b, bool eqAllowed=false); unsigned decimalToUnsigned(std::string a); +#define utd unsignedToDecimal +#define dtu decimalToUnsigned + #endif diff --git a/libserpent/compiler.cpp b/libserpent/compiler.cpp index 30628fbc9..a3e5b1c60 100644 --- a/libserpent/compiler.cpp +++ b/libserpent/compiler.cpp @@ -6,6 +6,7 @@ #include "bignum.h" #include "opcodes.h" +// Auxiliary data that is gathered while compiling struct programAux { std::map vars; int nextVarMem; @@ -13,15 +14,19 @@ struct programAux { bool calldataUsed; int step; int labelLength; - int functionCount; }; +// Auxiliary data that gets passed down vertically +// but not back up struct programVerticalAux { int height; + std::string innerScopeName; std::map dupvars; std::map funvars; + std::vector scopes; }; +// Compilation result struct programData { programAux aux; Node code; @@ -34,7 +39,6 @@ programAux Aux() { o.calldataUsed = false; o.step = 0; o.nextVarMem = 32; - o.functionCount = 0; return o; } @@ -43,6 +47,7 @@ programVerticalAux verticalAux() { o.height = 0; o.dupvars = std::map(); o.funvars = std::map(); + o.scopes = std::vector(); return o; } @@ -72,29 +77,58 @@ Node popwrap(Node node) { return multiToken(nodelist, 2, node.metadata); } +// Grabs variables +mss getVariables(Node node, mss cur=mss()) { + Metadata m = node.metadata; + // Tokens don't contain any variables + if (node.type == TOKEN) + return cur; + // Don't descend into call fragments + else if (node.val == "lll") + return getVariables(node.args[1], cur); + // At global scope get/set/ref also declare + else if (node.val == "get" || node.val == "set" || node.val == "ref") { + if (node.args[0].type != TOKEN) + err("Variable name must be simple token," + " not complex expression! " + printSimple(node.args[0]), m); + if (!cur.count(node.args[0].val)) { + cur[node.args[0].val] = utd(cur.size() * 32 + 32); + //std::cerr << node.args[0].val << " " << cur[node.args[0].val] << "\n"; + } + } + // Recursively process children + for (unsigned i = 0; i < node.args.size(); i++) { + cur = getVariables(node.args[i], cur); + } + return cur; +} + // Turns LLL tree into tree of code fragments programData opcodeify(Node node, programAux aux=Aux(), programVerticalAux vaux=verticalAux()) { std::string symb = "_"+mkUniqueToken(); Metadata m = node.metadata; + // Get variables + if (!aux.vars.size()) { + aux.vars = getVariables(node); + aux.nextVarMem = aux.vars.size() * 32 + 32; + } // Numbers if (node.type == TOKEN) { return pd(aux, nodeToNumeric(node), 1); } - else if (node.val == "ref" || node.val == "get" || - node.val == "set" || node.val == "declare") { + else if (node.val == "ref" || node.val == "get" || node.val == "set") { std::string varname = node.args[0].val; - if (!aux.vars.count(varname)) { - aux.vars[varname] = unsignedToDecimal(aux.nextVarMem); - aux.nextVarMem += 32; - } - if (varname == "'msg.data") aux.calldataUsed = true; + // Determine reference to variable + Node varNode = tkn(aux.vars[varname], m); + //std::cerr << varname << " " << printSimple(varNode) << "\n"; // Set variable if (node.val == "set") { programData sub = opcodeify(node.args[1], aux, vaux); if (!sub.outs) err("Value to set variable must have nonzero arity!", m); + // What if we are setting a stack variable? if (vaux.dupvars.count(node.args[0].val)) { int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); @@ -105,149 +139,65 @@ programData opcodeify(Node node, }; return pd(sub.aux, multiToken(nodelist, 3, m), 0); } - Node nodelist[] = { - sub.code, - token(sub.aux.vars[varname], m), - token("MSTORE", m), - }; - return pd(sub.aux, multiToken(nodelist, 3, m), 0); + // Setting a memory variable + else { + Node nodelist[] = { + sub.code, + varNode, + token("MSTORE", m), + }; + return pd(sub.aux, multiToken(nodelist, 3, m), 0); + } } // Get variable else if (node.val == "get") { - if (vaux.dupvars.count(node.args[0].val)) { + // Getting a stack variable + if (vaux.dupvars.count(node.args[0].val)) { int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); return pd(aux, token("DUP"+unsignedToDecimal(h)), 1); } - Node nodelist[] = - { token(aux.vars[varname], m), token("MLOAD", m) }; - return pd(aux, multiToken(nodelist, 2, m), 1); + // Getting a memory variable + else { + Node nodelist[] = + { varNode, token("MLOAD", m) }; + return pd(aux, multiToken(nodelist, 2, m), 1); + } } // Refer variable else if (node.val == "ref") { if (vaux.dupvars.count(node.args[0].val)) err("Cannot ref stack variable!", m); - return pd(aux, token(aux.vars[varname], m), 1); - } - // Declare variable - else { - return pd(aux, multiToken(nullptr, 0, m), 0); + return pd(aux, varNode, 1); } } - // Define functions (TODO: eventually move to rewriter.cpp, keep - // compiler pure LLL) - if (node.val == "def") { - std::vector varNames; - std::vector varSizes; - bool useLt32 = false; - int totalSz = 0; - if (node.args.size() != 2) - err("Malformed def!", m); - // Collect the list of variable names and variable byte counts - for (unsigned i = 0; i < node.args[0].args.size(); i++) { - if (node.args[0].args[i].val == "kv") { - if (node.args[0].args[i].args.size() != 2) - err("Malformed def!", m); - varNames.push_back(node.args[0].args[i].args[0].val); - varSizes.push_back( - decimalToUnsigned(node.args[0].args[i].args[1].val)); - if (varSizes.back() > 32) - err("Max argument width: 32 bytes", m); - useLt32 = true; + // Comments do nothing + else if (node.val == "comment") { + Node nodelist[] = { }; + return pd(aux, multiToken(nodelist, 0, m), 0); + } + // Custom operation sequence + // eg. (ops bytez id msize swap1 msize add 0 swap1 mstore) == alloc + if (node.val == "ops") { + std::vector subs2; + int depth = 0; + for (unsigned i = 0; i < node.args.size(); i++) { + std::string op = upperCase(node.args[i].val); + if (node.args[i].type == ASTNODE || opinputs(op) == -1) { + programVerticalAux vaux2 = vaux; + vaux2.height = vaux.height - i - 1 + node.args.size(); + programData sub = opcodeify(node.args[i], aux, vaux2); + aux = sub.aux; + depth += sub.outs; + subs2.push_back(sub.code); } else { - varNames.push_back(node.args[0].args[i].val); - varSizes.push_back(32); + subs2.push_back(token(op, m)); + depth += opoutputs(op) - opinputs(op); } - aux.vars[varNames.back()] = unsignedToDecimal(aux.nextVarMem + 32 * i); - totalSz += varSizes.back(); } - int functionCount = aux.functionCount; - int nextVarMem = aux.nextVarMem; - aux.nextVarMem += 32 * varNames.size(); - aux.functionCount += 1; - programData inner; - // If we're only using 32-byte variables, then great, just copy - // over the calldata! - if (!useLt32) { - programData sub = opcodeify(node.args[1], aux, vaux); - Node nodelist[] = { - token(unsignedToDecimal(totalSz), m), - token("1", m), - token(unsignedToDecimal(nextVarMem), m), - token("CALLDATACOPY", m), - sub.code - }; - inner = pd(sub.aux, multiToken(nodelist, 5, m), 0); - } - else { - std::vector innerList; - int cum = 1; - for (unsigned i = 0; i < varNames.size();) { - // If we get a series of 32-byte values, we calldatacopy them - if (varSizes[i] == 32) { - unsigned until = i+1; - while (until < varNames.size() && varSizes[until] == 32) - until += 1; - innerList.push_back(token(unsignedToDecimal((until - i) * 32), m)); - innerList.push_back(token(unsignedToDecimal(cum), m)); - innerList.push_back(token(unsignedToDecimal(nextVarMem + i * 32), m)); - innerList.push_back(token("CALLDATACOPY", m)); - cum += (until - i) * 32; - i = until; - } - // Otherwise, we do a clever trick to extract the value - else { - innerList.push_back(token(unsignedToDecimal(32 - varSizes[i]), m)); - innerList.push_back(token("256", m)); - innerList.push_back(token("EXP", m)); - innerList.push_back(token(unsignedToDecimal(cum), m)); - innerList.push_back(token("CALLDATALOAD", m)); - innerList.push_back(token("DIV", m)); - innerList.push_back(token(unsignedToDecimal(nextVarMem + i * 32), m)); - innerList.push_back(token("MSTORE", m)); - cum += varSizes[i]; - i += 1; - } - } - // If caller == origin, then it's from a tx, so unpack, otherwise - // plain copy - programData sub = opcodeify(node.args[1], aux, vaux); - Node ilnode = astnode("", innerList, m); - Node nodelist[] = { - token(unsignedToDecimal(32 * varNames.size()), m), - token("1", m), - token(unsignedToDecimal(nextVarMem), m), - token("CALLDATACOPY", m), - token("CALLER", m), - token("ORIGIN", m), - token("EQ", m), - token("ISZERO", m), - token("$maincode"+symb, m), - token("JUMPI", m), - ilnode, - token("~maincode"+symb, m), - token("JUMPDEST", m), - sub.code - }; - inner = pd(sub.aux, multiToken(nodelist, 14, m), 0); - } - // Check if the function call byte is the same - Node nodelist2[] = { - token("0", m), - token("CALLDATALOAD", m), - token("0", m), - token("BYTE", m), - token(unsignedToDecimal(functionCount), m), - token("EQ", m), - token("ISZERO", m), - token("$endcode"+symb, m), - token("JUMPI", m), - inner.code, - token("~endcode"+symb, m), - token("JUMPDEST", m), - }; - return pd(inner.aux, multiToken(nodelist2, 12, m), 0); + if (depth < 0 || depth > 1) err("Stack depth mismatch", m); + return pd(aux, astnode("_", subs2, m), 0); } // Code blocks if (node.val == "lll" && node.args.size() == 2) { @@ -372,49 +322,14 @@ programData opcodeify(Node node, }; return pd(aux, multiToken(nodelist, 8, m), 1); } - // Array literals - else if (node.val == "array_lit") { - aux.allocUsed = true; - std::vector nodes; - if (!node.args.size()) { - nodes.push_back(token("MSIZE", m)); - return pd(aux, astnode("_", nodes, m)); - } - nodes.push_back(token("MSIZE", m)); - nodes.push_back(token("0", m)); - nodes.push_back(token("MSIZE", m)); - nodes.push_back(token(unsignedToDecimal(node.args.size() * 32 - 1), m)); - nodes.push_back(token("ADD", m)); - nodes.push_back(token("MSTORE8", m)); - for (unsigned i = 0; i < node.args.size(); i++) { - Metadata m2 = node.args[i].metadata; - nodes.push_back(token("DUP1", m2)); - programVerticalAux vaux2 = vaux; - vaux2.height += 2; - programData sub = opcodeify(node.args[i], aux, vaux2); - if (!sub.outs) - err("Array_lit item " + unsignedToDecimal(i) + " has zero arity", m2); - aux = sub.aux; - nodes.push_back(sub.code); - nodes.push_back(token("SWAP1", m2)); - if (i > 0) { - nodes.push_back(token(unsignedToDecimal(i * 32), m2)); - nodes.push_back(token("ADD", m2)); - } - nodes.push_back(token("MSTORE", m2)); - } - return pd(aux, astnode("_", nodes, m), 1); - } // All other functions/operators else { std::vector subs2; int depth = opinputs(upperCase(node.val)); - if (node.val != "debug") { - if (depth == -1) - err("Not a function or opcode: "+node.val, m); - if ((int)node.args.size() != depth) - err("Invalid arity for "+node.val, m); - } + if (depth == -1) + err("Not a function or opcode: "+node.val, m); + if ((int)node.args.size() != depth) + err("Invalid arity for "+node.val, m); for (int i = node.args.size() - 1; i >= 0; i--) { programVerticalAux vaux2 = vaux; vaux2.height = vaux.height - i - 1 + node.args.size(); @@ -424,13 +339,8 @@ programData opcodeify(Node node, err("Input "+unsignedToDecimal(i)+" has arity 0", sub.code.metadata); subs2.push_back(sub.code); } - if (node.val == "debug") { - subs2.push_back(token("DUP"+unsignedToDecimal(node.args.size()), m)); - for (int i = 0; i <= (int)node.args.size(); i++) - subs2.push_back(token("POP", m)); - } - else subs2.push_back(token(upperCase(node.val), m)); - int outdepth = node.val == "debug" ? 0 : opoutputs(upperCase(node.val)); + subs2.push_back(token(upperCase(node.val), m)); + int outdepth = opoutputs(upperCase(node.val)); return pd(aux, astnode("_", subs2, m), outdepth); } } @@ -449,15 +359,6 @@ Node finalize(programData c) { }; bottom.push_back(multiToken(nodelist, 3, m)); } - // If msg.data is being used as an array, then we need to copy it - if (c.aux.calldataUsed) { - Node nodelist[] = { - token("MSIZE", m), token("CALLDATASIZE", m), token("0", m), - token("MSIZE", m), token("CALLDATACOPY", m), - token(c.aux.vars["'msg.data"], m), token("MSTORE", m) - }; - bottom.push_back(multiToken(nodelist, 7, m)); - } // The actual code bottom.push_back(c.code); return astnode("_", bottom, m); diff --git a/libserpent/functions.cpp b/libserpent/functions.cpp new file mode 100644 index 000000000..78e12e84a --- /dev/null +++ b/libserpent/functions.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "optimize.h" +#include "rewriteutils.h" +#include "preprocess.h" +#include "functions.h" + +std::string getSignature(std::vector args) { + std::string o; + for (unsigned i = 0; i < args.size(); i++) { + if (args[i].val == ":" && args[i].args[1].val == "s") + o += "s"; + else if (args[i].val == ":" && args[i].args[1].val == "a") + o += "a"; + else + o += "i"; + } + return o; +} + +// Convert a list of arguments into a node containing a +// < datastart, datasz > pair + +Node packArguments(std::vector args, std::string sig, + int funId, Metadata m) { + // Plain old 32 byte arguments + std::vector nargs; + // Variable-sized arguments + std::vector vargs; + // Variable sizes + std::vector sizes; + // Is a variable an array? + std::vector isArray; + // Fill up above three argument lists + int argCount = 0; + for (unsigned i = 0; i < args.size(); i++) { + Metadata m = args[i].metadata; + if (args[i].val == "=") { + // do nothing + } + else { + // Determine the correct argument type + char argType; + if (sig.size() > 0) { + if (argCount >= (signed)sig.size()) + err("Too many args", m); + argType = sig[argCount]; + } + else argType = 'i'; + // Integer (also usable for short strings) + if (argType == 'i') { + if (args[i].val == ":") + err("Function asks for int, provided string or array", m); + nargs.push_back(args[i]); + } + // Long string + else if (argType == 's') { + if (args[i].val != ":") + err("Must specify string length", m); + vargs.push_back(args[i].args[0]); + sizes.push_back(args[i].args[1]); + isArray.push_back(false); + } + // Array + else if (argType == 'a') { + if (args[i].val != ":") + err("Must specify array length", m); + vargs.push_back(args[i].args[0]); + sizes.push_back(args[i].args[1]); + isArray.push_back(true); + } + else err("Invalid arg type in signature", m); + argCount++; + } + } + int static_arg_size = 1 + (vargs.size() + nargs.size()) * 32; + // Start off by saving the size variables and calculating the total + msn kwargs; + kwargs["funid"] = tkn(utd(funId), m); + std::string pattern = + "(with _sztot "+utd(static_arg_size)+" " + " (with _sizes (alloc "+utd(sizes.size() * 32)+") " + " (seq "; + for (unsigned i = 0; i < sizes.size(); i++) { + std::string sizeIncrement = + isArray[i] ? "(mul 32 _x)" : "_x"; + pattern += + "(with _x $sz"+utd(i)+"(seq " + " (mstore (add _sizes "+utd(i * 32)+") _x) " + " (set _sztot (add _sztot "+sizeIncrement+" )))) "; + kwargs["sz"+utd(i)] = sizes[i]; + } + // Allocate memory, and set first data byte + pattern += + "(with _datastart (alloc (add _sztot 32)) (seq " + " (mstore8 _datastart $funid) "; + // Copy over size variables + for (unsigned i = 0; i < sizes.size(); i++) { + int v = 1 + i * 32; + pattern += + " (mstore " + " (add _datastart "+utd(v)+") " + " (mload (add _sizes "+utd(v-1)+"))) "; + } + // Store normal arguments + for (unsigned i = 0; i < nargs.size(); i++) { + int v = 1 + (i + sizes.size()) * 32; + pattern += + " (mstore (add _datastart "+utd(v)+") $"+utd(i)+") "; + kwargs[utd(i)] = nargs[i]; + } + // Loop through variable-sized arguments, store them + pattern += + " (with _pos (add _datastart "+utd(static_arg_size)+") (seq"; + for (unsigned i = 0; i < vargs.size(); i++) { + std::string copySize = + isArray[i] ? "(mul 32 (mload (add _sizes "+utd(i * 32)+")))" + : "(mload (add _sizes "+utd(i * 32)+"))"; + pattern += + " (unsafe_mcopy _pos $vl"+utd(i)+" "+copySize+") " + " (set _pos (add _pos "+copySize+")) "; + kwargs["vl"+utd(i)] = vargs[i]; + } + // Return a 2-item array containing the start and size + pattern += " (array_lit _datastart _sztot))))))))"; + std::string prefix = "_temp_"+mkUniqueToken(); + // Fill in pattern, return triple + return subst(parseLLL(pattern), kwargs, prefix, m); +} + +// Create a node for argument unpacking +Node unpackArguments(std::vector vars, Metadata m) { + std::vector varNames; + std::vector longVarNames; + std::vector longVarIsArray; + // Fill in variable and long variable names, as well as which + // long variables are arrays and which are strings + for (unsigned i = 0; i < vars.size(); i++) { + if (vars[i].val == ":") { + if (vars[i].args.size() != 2) + err("Malformed def!", m); + longVarNames.push_back(vars[i].args[0].val); + std::string tag = vars[i].args[1].val; + if (tag == "s") + longVarIsArray.push_back(false); + else if (tag == "a") + longVarIsArray.push_back(true); + else + err("Function value can only be string or array", m); + } + else { + varNames.push_back(vars[i].val); + } + } + std::vector sub; + if (!varNames.size() && !longVarNames.size()) { + // do nothing if we have no arguments + } + else { + std::vector varNodes; + for (unsigned i = 0; i < longVarNames.size(); i++) + varNodes.push_back(token(longVarNames[i], m)); + for (unsigned i = 0; i < varNames.size(); i++) + varNodes.push_back(token(varNames[i], m)); + // Copy over variable lengths and short variables + for (unsigned i = 0; i < varNodes.size(); i++) { + int pos = 1 + i * 32; + std::string prefix = (i < longVarNames.size()) ? "_len_" : ""; + sub.push_back(asn("untyped", asn("set", + token(prefix+varNodes[i].val, m), + asn("calldataload", tkn(utd(pos), m), m), + m))); + } + // Copy over long variables + if (longVarNames.size() > 0) { + std::vector sub2; + int pos = varNodes.size() * 32 + 1; + Node tot = tkn("_tot", m); + for (unsigned i = 0; i < longVarNames.size(); i++) { + Node var = tkn(longVarNames[i], m); + Node varlen = longVarIsArray[i] + ? asn("mul", tkn("32", m), tkn("_len_"+longVarNames[i], m)) + : tkn("_len_"+longVarNames[i], m); + sub2.push_back(asn("untyped", + asn("set", var, asn("alloc", varlen)))); + sub2.push_back(asn("calldatacopy", var, tot, varlen)); + sub2.push_back(asn("set", tot, asn("add", tot, varlen))); + } + std::string prefix = "_temp_"+mkUniqueToken(); + sub.push_back(subst( + astnode("with", tot, tkn(utd(pos), m), asn("seq", sub2)), + msn(), + prefix, + m)); + } + } + return asn("seq", sub, m); +} diff --git a/libserpent/functions.h b/libserpent/functions.h new file mode 100644 index 000000000..68a1c69ce --- /dev/null +++ b/libserpent/functions.h @@ -0,0 +1,39 @@ +#ifndef ETHSERP_FUNCTIONS +#define ETHSERP_FUNCTIONS + +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "optimize.h" +#include "rewriteutils.h" +#include "preprocess.h" + + +class argPack { + public: + argPack(Node a, Node b, Node c) { + pre = a; + datastart = b; + datasz = c; + } + Node pre; + Node datastart; + Node datasz; +}; + +// Get a signature from a function +std::string getSignature(std::vector args); + +// Convert a list of arguments into a node +// triple, given the signature of a function +Node packArguments(std::vector args, std::string sig, + int funId, Metadata m); + +// Create a node for argument unpacking +Node unpackArguments(std::vector vars, Metadata m); + +#endif diff --git a/libserpent/opcodes.cpp b/libserpent/opcodes.cpp new file mode 100644 index 000000000..b24144e46 --- /dev/null +++ b/libserpent/opcodes.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include "opcodes.h" +#include "util.h" +#include "bignum.h" + +Mapping mapping[] = { + Mapping("STOP", 0x00, 0, 0), + Mapping("ADD", 0x01, 2, 1), + Mapping("MUL", 0x02, 2, 1), + Mapping("SUB", 0x03, 2, 1), + Mapping("DIV", 0x04, 2, 1), + Mapping("SDIV", 0x05, 2, 1), + Mapping("MOD", 0x06, 2, 1), + Mapping("SMOD", 0x07, 2, 1), + Mapping("ADDMOD", 0x08, 3, 1), + Mapping("MULMOD", 0x09, 3, 1), + Mapping("EXP", 0x0a, 2, 1), + Mapping("SIGNEXTEND", 0x0b, 2, 1), + Mapping("LT", 0x10, 2, 1), + Mapping("GT", 0x11, 2, 1), + Mapping("SLT", 0x12, 2, 1), + Mapping("SGT", 0x13, 2, 1), + Mapping("EQ", 0x14, 2, 1), + Mapping("ISZERO", 0x15, 1, 1), + Mapping("AND", 0x16, 2, 1), + Mapping("OR", 0x17, 2, 1), + Mapping("XOR", 0x18, 2, 1), + Mapping("NOT", 0x19, 1, 1), + Mapping("BYTE", 0x1a, 2, 1), + Mapping("SHA3", 0x20, 2, 1), + Mapping("ADDRESS", 0x30, 0, 1), + Mapping("BALANCE", 0x31, 1, 1), + Mapping("ORIGIN", 0x32, 0, 1), + Mapping("CALLER", 0x33, 0, 1), + Mapping("CALLVALUE", 0x34, 0, 1), + Mapping("CALLDATALOAD", 0x35, 1, 1), + Mapping("CALLDATASIZE", 0x36, 0, 1), + Mapping("CALLDATACOPY", 0x37, 3, 0), + Mapping("CODESIZE", 0x38, 0, 1), + Mapping("CODECOPY", 0x39, 3, 0), + Mapping("GASPRICE", 0x3a, 0, 1), + Mapping("EXTCODESIZE", 0x3b, 1, 1), + Mapping("EXTCODECOPY", 0x3c, 4, 0), + Mapping("PREVHASH", 0x40, 0, 1), + Mapping("COINBASE", 0x41, 0, 1), + Mapping("TIMESTAMP", 0x42, 0, 1), + Mapping("NUMBER", 0x43, 0, 1), + Mapping("DIFFICULTY", 0x44, 0, 1), + Mapping("GASLIMIT", 0x45, 0, 1), + Mapping("POP", 0x50, 1, 0), + Mapping("MLOAD", 0x51, 1, 1), + Mapping("MSTORE", 0x52, 2, 0), + Mapping("MSTORE8", 0x53, 2, 0), + Mapping("SLOAD", 0x54, 1, 1), + Mapping("SSTORE", 0x55, 2, 0), + Mapping("JUMP", 0x56, 1, 0), + Mapping("JUMPI", 0x57, 2, 0), + Mapping("PC", 0x58, 0, 1), + Mapping("MSIZE", 0x59, 0, 1), + Mapping("GAS", 0x5a, 0, 1), + Mapping("JUMPDEST", 0x5b, 0, 0), + Mapping("LOG0", 0xa0, 2, 0), + Mapping("LOG1", 0xa1, 3, 0), + Mapping("LOG2", 0xa2, 4, 0), + Mapping("LOG3", 0xa3, 5, 0), + Mapping("LOG4", 0xa4, 6, 0), + Mapping("CREATE", 0xf0, 3, 1), + Mapping("CALL", 0xf1, 7, 1), + Mapping("CALLCODE", 0xf2, 7, 1), + Mapping("RETURN", 0xf3, 2, 0), + Mapping("SUICIDE", 0xff, 1, 0), + Mapping("---END---", 0x00, 0, 0), +}; + +std::map > opcodes; +std::map reverseOpcodes; + +// Fetches everything EXCEPT PUSH1..32 +std::pair > _opdata(std::string ops, int opi) { + if (!opcodes.size()) { + int i = 0; + while (mapping[i].op != "---END---") { + Mapping mi = mapping[i]; + opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out); + i++; + } + for (i = 1; i <= 16; i++) { + opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1); + opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1); + } + for (std::map >::iterator it=opcodes.begin(); + it != opcodes.end(); + it++) { + reverseOpcodes[(*it).second[0]] = (*it).first; + } + } + ops = upperCase(ops); + std::string op; + std::vector opdata; + op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; + opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1); + return std::pair >(op, opdata); +} + +int opcode(std::string op) { + return _opdata(op, -1).second[0]; +} + +int opinputs(std::string op) { + return _opdata(op, -1).second[1]; +} + +int opoutputs(std::string op) { + return _opdata(op, -1).second[2]; +} + +std::string op(int opcode) { + return _opdata("", opcode).first; +} + +std::string lllSpecials[][3] = { + { "ref", "1", "1" }, + { "get", "1", "1" }, + { "set", "2", "2" }, + { "with", "3", "3" }, + { "comment", "0", "2147483647" }, + { "ops", "0", "2147483647" }, + { "lll", "2", "2" }, + { "seq", "0", "2147483647" }, + { "if", "3", "3" }, + { "unless", "2", "2" }, + { "until", "2", "2" }, + { "alloc", "1", "1" }, + { "---END---", "0", "0" }, +}; + +std::map > lllMap; + +// Is a function name one of the valid functions above? +bool isValidLLLFunc(std::string f, int argc) { + if (lllMap.size() == 0) { + for (int i = 0; ; i++) { + if (lllSpecials[i][0] == "---END---") break; + lllMap[lllSpecials[i][0]] = std::pair( + dtu(lllSpecials[i][1]), dtu(lllSpecials[i][2])); + } + } + return lllMap.count(f) + && argc >= lllMap[f].first + && argc <= lllMap[f].second; +} diff --git a/libserpent/opcodes.h b/libserpent/opcodes.h index a7bcc1af9..41423c169 100644 --- a/libserpent/opcodes.h +++ b/libserpent/opcodes.h @@ -5,6 +5,7 @@ #include #include #include +#include "util.h" class Mapping { public: @@ -20,119 +21,25 @@ class Mapping { int out; }; -Mapping mapping[] = { - Mapping("STOP", 0x00, 0, 0), - Mapping("ADD", 0x01, 2, 1), - Mapping("MUL", 0x02, 2, 1), - Mapping("SUB", 0x03, 2, 1), - Mapping("DIV", 0x04, 2, 1), - Mapping("SDIV", 0x05, 2, 1), - Mapping("MOD", 0x06, 2, 1), - Mapping("SMOD", 0x07, 2, 1), - Mapping("ADDMOD", 0x08, 3, 1), - Mapping("MULMOD", 0x09, 3, 1), - Mapping("EXP", 0x0a, 2, 1), - Mapping("SIGNEXTEND", 0x0b, 2, 1), - Mapping("LT", 0x10, 2, 1), - Mapping("GT", 0x11, 2, 1), - Mapping("SLT", 0x12, 2, 1), - Mapping("SGT", 0x13, 2, 1), - Mapping("EQ", 0x14, 2, 1), - Mapping("ISZERO", 0x15, 1, 1), - Mapping("AND", 0x16, 2, 1), - Mapping("OR", 0x17, 2, 1), - Mapping("XOR", 0x18, 2, 1), - Mapping("NOT", 0x19, 1, 1), - Mapping("BYTE", 0x1a, 2, 1), - Mapping("ADDMOD", 0x14, 3, 1), - Mapping("MULMOD", 0x15, 3, 1), - Mapping("SIGNEXTEND", 0x16, 2, 1), - Mapping("SHA3", 0x20, 2, 1), - Mapping("ADDRESS", 0x30, 0, 1), - Mapping("BALANCE", 0x31, 1, 1), - Mapping("ORIGIN", 0x32, 0, 1), - Mapping("CALLER", 0x33, 0, 1), - Mapping("CALLVALUE", 0x34, 0, 1), - Mapping("CALLDATALOAD", 0x35, 1, 1), - Mapping("CALLDATASIZE", 0x36, 0, 1), - Mapping("CALLDATACOPY", 0x37, 3, 1), - Mapping("CODESIZE", 0x38, 0, 1), - Mapping("CODECOPY", 0x39, 3, 1), - Mapping("GASPRICE", 0x3a, 0, 1), - Mapping("PREVHASH", 0x40, 0, 1), - Mapping("COINBASE", 0x41, 0, 1), - Mapping("TIMESTAMP", 0x42, 0, 1), - Mapping("NUMBER", 0x43, 0, 1), - Mapping("DIFFICULTY", 0x44, 0, 1), - Mapping("GASLIMIT", 0x45, 0, 1), - Mapping("POP", 0x50, 1, 0), - Mapping("MLOAD", 0x51, 1, 1), - Mapping("MSTORE", 0x52, 2, 0), - Mapping("MSTORE8", 0x53, 2, 0), - Mapping("SLOAD", 0x54, 1, 1), - Mapping("SSTORE", 0x55, 2, 0), - Mapping("JUMP", 0x56, 1, 0), - Mapping("JUMPI", 0x57, 2, 0), - Mapping("PC", 0x58, 0, 1), - Mapping("MSIZE", 0x59, 0, 1), - Mapping("GAS", 0x5a, 0, 1), - Mapping("JUMPDEST", 0x5b, 0, 0), - Mapping("LOG0", 0xa0, 2, 0), - Mapping("LOG1", 0xa1, 3, 0), - Mapping("LOG2", 0xa2, 4, 0), - Mapping("LOG3", 0xa3, 5, 0), - Mapping("LOG4", 0xa4, 6, 0), - Mapping("CREATE", 0xf0, 3, 1), - Mapping("CALL", 0xf1, 7, 1), - Mapping("RETURN", 0xf2, 2, 0), - Mapping("CALL_CODE", 0xf3, 7, 1), - Mapping("SUICIDE", 0xff, 1, 0), - Mapping("---END---", 0x00, 0, 0), -}; +extern Mapping mapping[]; -std::map > opcodes; -std::map reverseOpcodes; +extern std::map > opcodes; +extern std::map reverseOpcodes; -// Fetches everything EXCEPT PUSH1..32 -std::pair > _opdata(std::string ops, int opi) { - if (!opcodes.size()) { - int i = 0; - while (mapping[i].op != "---END---") { - Mapping mi = mapping[i]; - opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out); - i++; - } - for (i = 1; i <= 16; i++) { - opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1); - opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1); - } - for (std::map >::iterator it=opcodes.begin(); - it != opcodes.end(); - it++) { - reverseOpcodes[(*it).second[0]] = (*it).first; - } - } - std::string op; - std::vector opdata; - op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; - opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1); - return std::pair >(op, opdata); -} +std::pair > _opdata(std::string ops, int opi); + +int opcode(std::string op); + +int opinputs(std::string op); + +int opoutputs(std::string op); -int opcode(std::string op) { - return _opdata(op, -1).second[0]; -} +std::string op(int opcode); -int opinputs(std::string op) { - return _opdata(op, -1).second[1]; -} +extern std::string lllSpecials[][3]; -int opoutputs(std::string op) { - return _opdata(op, -1).second[2]; -} +extern std::map > lllMap; -std::string op(int opcode) { - return _opdata("", opcode).first; -} +bool isValidLLLFunc(std::string f, int argc); #endif diff --git a/libserpent/optimize.cpp b/libserpent/optimize.cpp new file mode 100644 index 000000000..e689fcb69 --- /dev/null +++ b/libserpent/optimize.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" + +// Compile-time arithmetic calculations +Node optimize(Node inp) { + if (inp.type == TOKEN) { + Node o = tryNumberize(inp); + if (decimalGt(o.val, tt256, true)) + err("Value too large (exceeds 32 bytes or 2^256)", inp.metadata); + return o; + } + for (unsigned i = 0; i < inp.args.size(); i++) { + inp.args[i] = optimize(inp.args[i]); + } + // Arithmetic-specific transform + if (inp.val == "+") inp.val = "add"; + if (inp.val == "*") inp.val = "mul"; + if (inp.val == "-") inp.val = "sub"; + if (inp.val == "/") inp.val = "sdiv"; + if (inp.val == "^") inp.val = "exp"; + if (inp.val == "**") inp.val = "exp"; + if (inp.val == "%") inp.val = "smod"; + // Degenerate cases for add and mul + if (inp.args.size() == 2) { + if (inp.val == "add" && inp.args[0].type == TOKEN && + inp.args[0].val == "0") { + Node x = inp.args[1]; + inp = x; + } + if (inp.val == "add" && inp.args[1].type == TOKEN && + inp.args[1].val == "0") { + Node x = inp.args[0]; + inp = x; + } + if (inp.val == "mul" && inp.args[0].type == TOKEN && + inp.args[0].val == "1") { + Node x = inp.args[1]; + inp = x; + } + if (inp.val == "mul" && inp.args[1].type == TOKEN && + inp.args[1].val == "1") { + Node x = inp.args[0]; + inp = x; + } + } + // Arithmetic computation + if (inp.args.size() == 2 + && inp.args[0].type == TOKEN + && inp.args[1].type == TOKEN) { + std::string o; + if (inp.val == "add") { + o = decimalMod(decimalAdd(inp.args[0].val, inp.args[1].val), tt256); + } + else if (inp.val == "sub") { + if (decimalGt(inp.args[0].val, inp.args[1].val, true)) + o = decimalSub(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "mul") { + o = decimalMod(decimalMul(inp.args[0].val, inp.args[1].val), tt256); + } + else if (inp.val == "div" && inp.args[1].val != "0") { + o = decimalDiv(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "sdiv" && inp.args[1].val != "0" + && decimalGt(tt255, inp.args[0].val) + && decimalGt(tt255, inp.args[1].val)) { + o = decimalDiv(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "mod" && inp.args[1].val != "0") { + o = decimalMod(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "smod" && inp.args[1].val != "0" + && decimalGt(tt255, inp.args[0].val) + && decimalGt(tt255, inp.args[1].val)) { + o = decimalMod(inp.args[0].val, inp.args[1].val); + } + else if (inp.val == "exp") { + o = decimalModExp(inp.args[0].val, inp.args[1].val, tt256); + } + if (o.length()) return token(o, inp.metadata); + } + return inp; +} + +// Is a node degenerate (ie. trivial to calculate) ? +bool isDegenerate(Node n) { + return optimize(n).type == TOKEN; +} + +// Is a node purely arithmetic? +bool isPureArithmetic(Node n) { + return isNumberLike(optimize(n)); +} diff --git a/libserpent/optimize.h b/libserpent/optimize.h new file mode 100644 index 000000000..06ea3bba1 --- /dev/null +++ b/libserpent/optimize.h @@ -0,0 +1,19 @@ +#ifndef ETHSERP_OPTIMIZER +#define ETHSERP_OPTIMIZER + +#include +#include +#include +#include +#include "util.h" + +// Compile-time arithmetic calculations +Node optimize(Node inp); + +// Is a node degenerate (ie. trivial to calculate) ? +bool isDegenerate(Node n); + +// Is a node purely arithmetic? +bool isPureArithmetic(Node n); + +#endif diff --git a/libserpent/parser.cpp b/libserpent/parser.cpp index 4ceb1d12d..2b9d73702 100644 --- a/libserpent/parser.cpp +++ b/libserpent/parser.cpp @@ -12,17 +12,15 @@ int precedence(Node tok) { if (v == ".") return -1; else if (v == "!" || v == "not") return 1; else if (v=="^" || v == "**") return 2; - else if (v=="*" || v=="/" || v=="@/" || v=="%" || v=="@%") return 3; + else if (v=="*" || v=="/" || v=="%") return 3; else if (v=="+" || v=="-") return 4; else if (v=="<" || v==">" || v=="<=" || v==">=") return 5; - else if (v=="@<" || v=="@>" || v=="@<=" || v=="@>=") return 5; else if (v=="&" || v=="|" || v=="xor" || v=="==" || v == "!=") return 6; else if (v=="&&" || v=="and") return 7; else if (v=="||" || v=="or") return 8; - else if (v==":") return 9; else if (v=="=") return 10; else if (v=="+=" || v=="-=" || v=="*=" || v=="/=" || v=="%=") return 10; - else if (v=="@/=" || v=="@%=") return 10; + else if (v==":" || v == "::") return 11; else return 0; } @@ -223,8 +221,15 @@ Node treefy(std::vector stream) { filename = filename.substr(1, filename.length() - 2); if (!exists(root + filename)) err("File does not exist: "+root + filename, tok.metadata); - oq.back().args.pop_back(); - oq.back().args.push_back(parseSerpent(root + filename)); + if (v == "inset") { + oq.pop_back(); + oq.push_back(parseSerpent(root + filename)); + } + else { + oq.back().args.pop_back(); + oq.back().args.push_back( + asn("outer", parseSerpent(root + filename), tok.metadata)); + } } //Useful for debugging //for (int i = 0; i < oq.size(); i++) { @@ -237,7 +242,7 @@ Node treefy(std::vector stream) { err("Output blank", Metadata()); } else if (oq.size() > 1) { - err("Multiple expressions or unclosed bracket", oq[1].metadata); + return asn("multi", oq, oq[0].metadata); } return oq[0]; @@ -262,15 +267,9 @@ int spaceCount(std::string s) { bool bodied(std::string tok) { return tok == "if" || tok == "elif" || tok == "while" || tok == "with" || tok == "def" || tok == "extern" - || tok == "data"; -} - -// Is this a command that takes an argument as a child block? -bool childBlocked(std::string tok) { - return tok == "if" || tok == "elif" || tok == "else" - || tok == "code" || tok == "shared" || tok == "init" - || tok == "while" || tok == "repeat" || tok == "for" - || tok == "with" || tok == "def"; + || tok == "data" || tok == "assert" || tok == "return" + || tok == "fun" || tok == "scope" || tok == "macro" + || tok == "type"; } // Are the two commands meant to continue each other? @@ -278,10 +277,7 @@ bool bodiedContinued(std::string prev, std::string tok) { return (prev == "if" && tok == "elif") || (prev == "elif" && tok == "else") || (prev == "elif" && tok == "elif") - || (prev == "if" && tok == "else") - || (prev == "init" && tok == "code") - || (prev == "shared" && tok == "code") - || (prev == "shared" && tok == "init"); + || (prev == "if" && tok == "else"); } // Is a line of code empty? @@ -310,16 +306,17 @@ Node parseLines(std::vector lines, Metadata metadata, int sp) { } // Tokenize current line std::vector tokens = tokenize(main.substr(sp), metadata); - // Remove extraneous tokens, including if / elif + // Remove comments std::vector tokens2; for (unsigned j = 0; j < tokens.size(); j++) { if (tokens[j].val == "#" || tokens[j].val == "//") break; - if (j >= 1 || !bodied(tokens[j].val)) { - tokens2.push_back(tokens[j]); - } + tokens2.push_back(tokens[j]); } - if (tokens2.size() > 0 && tokens2.back().val == ":") + bool expectingChildBlock = false; + if (tokens2.size() > 0 && tokens2.back().val == ":") { tokens2.pop_back(); + expectingChildBlock = true; + } // Parse current line Node out = parseSerpentTokenStream(tokens2); // Parse child block @@ -343,14 +340,8 @@ Node parseLines(std::vector lines, Metadata metadata, int sp) { for (unsigned i = 0; i < childBlock.size(); i++) { if (childBlock[i].length() > 0) { cbe = false; break; } } - // Bring back if / elif into AST - if (bodied(tokens[0].val)) { - std::vector args; - args.push_back(out); - out = astnode(tokens[0].val, args, out.metadata); - } // Add child block to AST - if (childBlocked(tokens[0].val)) { + if (expectingChildBlock) { if (cbe) err("Expected indented child block!", out.metadata); out.type = ASTNODE; @@ -360,6 +351,37 @@ Node parseLines(std::vector lines, Metadata metadata, int sp) { } else if (!cbe) err("Did not expect indented child block!", out.metadata); + else if (out.args.size() && out.args[out.args.size() - 1].val == ":") { + Node n = out.args[out.args.size() - 1]; + out.args.pop_back(); + out.args.push_back(n.args[0]); + out.args.push_back(n.args[1]); + } + // Bring back if / elif into AST + if (bodied(tokens[0].val)) { + if (out.val != "multi") { + // token not being used in bodied form + } + else if (out.args[0].val == "id") + out = astnode(tokens[0].val, out.args[1].args, out.metadata); + else if (out.args[0].type == TOKEN) { + std::vector out2; + for (unsigned i = 1; i < out.args.size(); i++) + out2.push_back(out.args[i]); + out = astnode(tokens[0].val, out2, out.metadata); + } + else + out = astnode("fun", out.args, out.metadata); + } + // Multi not supported + if (out.val == "multi") + err("Multiple expressions or unclosed bracket", out.metadata); + // Convert top-level colon expressions into non-colon expressions; + // makes if statements and the like equivalent indented or not + //if (out.val == ":" && out.args[0].type == TOKEN) + // out = asn(out.args[0].val, out.args[1], out.metadata); + //if (bodied(tokens[0].val) && out.args[0].val == ":") + // out = asn(tokens[0].val, out.args[0].args); if (o.size() == 0 || o.back().type == TOKEN) { o.push_back(out); continue; diff --git a/libserpent/preprocess.cpp b/libserpent/preprocess.cpp new file mode 100644 index 000000000..2df149945 --- /dev/null +++ b/libserpent/preprocess.cpp @@ -0,0 +1,327 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "rewriteutils.h" +#include "optimize.h" +#include "preprocess.h" +#include "functions.h" +#include "opcodes.h" + +// Convert a function of the form (def (f x y z) (do stuff)) into +// (if (first byte of ABI is correct) (seq (setup x y z) (do stuff))) +Node convFunction(Node node, int functionCount) { + std::string prefix = "_temp"+mkUniqueToken()+"_"; + Metadata m = node.metadata; + + if (node.args.size() != 2) + err("Malformed def!", m); + // Collect the list of variable names and variable byte counts + Node unpack = unpackArguments(node.args[0].args, m); + // And the actual code + Node body = node.args[1]; + // Main LLL-based function body + return astnode("if", + astnode("eq", + astnode("get", token("__funid", m), m), + token(unsignedToDecimal(functionCount), m), + m), + astnode("seq", unpack, body, m)); +} + +// Populate an svObj with the arguments needed to determine +// the storage position of a node +svObj getStorageVars(svObj pre, Node node, std::string prefix, + int index) { + Metadata m = node.metadata; + if (!pre.globalOffset.size()) pre.globalOffset = "0"; + std::vector h; + std::vector coefficients; + // Array accesses or atoms + if (node.val == "access" || node.type == TOKEN) { + std::string tot = "1"; + h = listfyStorageAccess(node); + coefficients.push_back("1"); + for (unsigned i = h.size() - 1; i >= 1; i--) { + // Array sizes must be constant or at least arithmetically + // evaluable at compile time + if (!isPureArithmetic(h[i])) + err("Array size must be fixed value", m); + // Create a list of the coefficient associated with each + // array index + coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); + } + } + // Tuples + else { + int startc; + // Handle the (fun args...) case + if (node.val == "fun") { + startc = 1; + h = listfyStorageAccess(node.args[0]); + } + // Handle the ( args...) case, which + // the serpent parser produces when the function + // is a simple name and not a complex astnode + else { + startc = 0; + h = listfyStorageAccess(token(node.val, m)); + } + svObj sub = pre; + sub.globalOffset = "0"; + // Evaluate tuple elements recursively + for (unsigned i = startc; i < node.args.size(); i++) { + sub = getStorageVars(sub, + node.args[i], + prefix+h[0].val.substr(2)+".", + i-startc); + } + coefficients.push_back(sub.globalOffset); + for (unsigned i = h.size() - 1; i >= 1; i--) { + // Array sizes must be constant or at least arithmetically + // evaluable at compile time + if (!isPureArithmetic(h[i])) + err("Array size must be fixed value", m); + // Create a list of the coefficient associated with each + // array index + coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); + } + pre.offsets = sub.offsets; + pre.coefficients = sub.coefficients; + pre.nonfinal = sub.nonfinal; + pre.nonfinal[prefix+h[0].val.substr(2)] = true; + } + pre.coefficients[prefix+h[0].val.substr(2)] = coefficients; + pre.offsets[prefix+h[0].val.substr(2)] = pre.globalOffset; + pre.indices[prefix+h[0].val.substr(2)] = index; + if (decimalGt(tt176, coefficients.back())) + pre.globalOffset = decimalAdd(pre.globalOffset, coefficients.back()); + return pre; +} + +// Preprocess input containing functions +// +// localExterns is a map of the form, eg, +// +// { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... } +// +// localExternSigs is a map of the form, eg, +// +// { x : { foo: iii, bar: iis, baz: ia }, y: { qux: i, foo: as } ... } +// +// Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc +// and that x.foo has three integers as arguments, x.bar has two +// integers and a variable-length string, and baz has an integer +// and an array +// +// globalExterns is a one-level map, eg from above +// +// { foo: 1, bar: 1, baz: 2, qux: 0 } +// +// globalExternSigs is a one-level map, eg from above +// +// { foo: as, bar: iis, baz: ia, qux: i} +// +// Note that globalExterns and globalExternSigs may be ambiguous +// Also, a null signature implies an infinite tail of integers +preprocessResult preprocessInit(Node inp) { + Metadata m = inp.metadata; + if (inp.val != "seq") + inp = astnode("seq", inp, m); + std::vector empty = std::vector(); + Node init = astnode("seq", empty, m); + Node shared = astnode("seq", empty, m); + std::vector any; + std::vector functions; + preprocessAux out = preprocessAux(); + out.localExterns["self"] = std::map(); + int functionCount = 0; + int storageDataCount = 0; + for (unsigned i = 0; i < inp.args.size(); i++) { + Node obj = inp.args[i]; + // Functions + if (obj.val == "def") { + if (obj.args.size() == 0) + err("Empty def", m); + std::string funName = obj.args[0].val; + // Init, shared and any are special functions + if (funName == "init" || funName == "shared" || funName == "any") { + if (obj.args[0].args.size()) + err(funName+" cannot have arguments", m); + } + if (funName == "init") init = obj.args[1]; + else if (funName == "shared") shared = obj.args[1]; + else if (funName == "any") any.push_back(obj.args[1]); + else { + // Other functions + functions.push_back(convFunction(obj, functionCount)); + out.localExterns["self"][obj.args[0].val] = functionCount; + out.localExternSigs["self"][obj.args[0].val] + = getSignature(obj.args[0].args); + functionCount++; + } + } + // Extern declarations + else if (obj.val == "extern") { + std::string externName = obj.args[0].val; + Node al = obj.args[1]; + if (!out.localExterns.count(externName)) + out.localExterns[externName] = std::map(); + for (unsigned i = 0; i < al.args.size(); i++) { + if (al.args[i].val == ":") { + std::string v = al.args[i].args[0].val; + std::string sig = al.args[i].args[1].val; + out.globalExterns[v] = i; + out.globalExternSigs[v] = sig; + out.localExterns[externName][v] = i; + out.localExternSigs[externName][v] = sig; + } + else { + std::string v = al.args[i].val; + out.globalExterns[v] = i; + out.globalExternSigs[v] = ""; + out.localExterns[externName][v] = i; + out.localExternSigs[externName][v] = ""; + } + } + } + // Custom macros + else if (obj.val == "macro" || (obj.val == "fun" && obj.args[0].val == "macro")) { + // Rules for valid macros: + // + // There are only four categories of valid macros: + // + // 1. a macro where the outer function is something + // which is NOT an existing valid function/extern/datum + // 2. a macro of the form set(c(x), d) where c must NOT + // be an existing valid function/extern/datum + // 3. something of the form access(c(x)), where c must NOT + // be an existing valid function/extern/datum + // 4. something of the form set(access(c(x)), d) where c must + // NOT be an existing valid function/extern/datum + // 5. something of the form with(c(x), d, e) where c must + // NOT be an existing valid function/extern/datum + bool valid = false; + Node pattern; + Node substitution; + int priority; + // Priority not set: default zero + if (obj.val == "macro") { + pattern = obj.args[0]; + substitution = obj.args[1]; + priority = 0; + } + // Specified priority + else { + pattern = obj.args[1]; + substitution = obj.args[2]; + if (obj.args[0].args.size()) + priority = dtu(obj.args[0].args[0].val); + else + priority = 0; + } + if (opcode(pattern.val) < 0 && !isValidFunctionName(pattern.val)) + valid = true; + if (pattern.val == "set" && + opcode(pattern.args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].val)) + valid = true; + if (pattern.val == "access" && + opcode(pattern.args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].val)) + if (pattern.val == "set" && + pattern.args[0].val == "access" && + opcode(pattern.args[0].args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].args[0].val)) + valid = true; + if (pattern.val == "with" && + opcode(pattern.args[0].val) < 0 && + !isValidFunctionName(pattern.args[0].val)) + valid = true; + if (valid) { + if (!out.customMacros.count(priority)) + out.customMacros[priority] = rewriteRuleSet(); + out.customMacros[priority].addRule + (rewriteRule(pattern, substitution)); + } + else warn("Macro does not fit valid template: "+printSimple(pattern), m); + } + // Variable types + else if (obj.val == "type") { + std::string typeName = obj.args[0].val; + std::vector vars = obj.args[1].args; + for (unsigned i = 0; i < vars.size(); i++) + out.types[vars[i].val] = typeName; + } + // Storage variables/structures + else if (obj.val == "data") { + out.storageVars = getStorageVars(out.storageVars, + obj.args[0], + "", + storageDataCount); + storageDataCount += 1; + } + else any.push_back(obj); + } + // Set up top-level AST structure + std::vector main; + if (shared.args.size()) main.push_back(shared); + if (init.args.size()) main.push_back(init); + + std::vector code; + if (shared.args.size()) code.push_back(shared); + for (unsigned i = 0; i < any.size(); i++) + code.push_back(any[i]); + for (unsigned i = 0; i < functions.size(); i++) + code.push_back(functions[i]); + Node codeNode; + if (functions.size() > 0) { + codeNode = astnode("with", + token("__funid", m), + astnode("byte", + token("0", m), + astnode("calldataload", token("0", m), m), + m), + astnode("seq", code, m), + m); + } + else codeNode = astnode("seq", code, m); + main.push_back(astnode("~return", + token("0", m), + astnode("lll", + codeNode, + token("0", m), + m), + m)); + + + Node result; + if (main.size() == 1) result = main[0]; + else result = astnode("seq", main, inp.metadata); + return preprocessResult(result, out); +} + +preprocessResult processTypes (preprocessResult pr) { + preprocessAux aux = pr.second; + Node node = pr.first; + if (node.type == TOKEN && aux.types.count(node.val)) + node = asn(aux.types[node.val], node, node.metadata); + else if (node.val == "untyped") + return preprocessResult(node.args[0], aux); + else if (node.val == "outer") + return preprocessResult(node, aux); + else { + for (unsigned i = 0; i < node.args.size(); i++) { + node.args[i] = + processTypes(preprocessResult(node.args[i], aux)).first; + } + } + return preprocessResult(node, aux); +} + +preprocessResult preprocess(Node n) { + return processTypes(preprocessInit(n)); +} diff --git a/libserpent/preprocess.h b/libserpent/preprocess.h new file mode 100644 index 000000000..321fb8527 --- /dev/null +++ b/libserpent/preprocess.h @@ -0,0 +1,50 @@ +#ifndef ETHSERP_PREPROCESSOR +#define ETHSERP_PREPROCESSOR + +#include +#include +#include +#include +#include "util.h" +#include "rewriteutils.h" + +// Storage variable index storing object +struct svObj { + std::map offsets; + std::map indices; + std::map > coefficients; + std::map nonfinal; + std::string globalOffset; +}; + + + +// Preprocessing result storing object +class preprocessAux { + public: + preprocessAux() { + globalExterns = std::map(); + localExterns = std::map >(); + localExterns["self"] = std::map(); + } + std::map globalExterns; + std::map globalExternSigs; + std::map > localExterns; + std::map > localExternSigs; + std::map customMacros; + std::map types; + svObj storageVars; +}; + +#define preprocessResult std::pair + +// Populate an svObj with the arguments needed to determine +// the storage position of a node +svObj getStorageVars(svObj pre, Node node, std::string prefix="", + int index=0); + +// Preprocess a function (see cpp for details) +preprocessResult preprocess(Node inp); + + +#endif diff --git a/libserpent/rewriter.cpp b/libserpent/rewriter.cpp index 443457acf..294c9a0b3 100644 --- a/libserpent/rewriter.cpp +++ b/libserpent/rewriter.cpp @@ -2,37 +2,25 @@ #include #include #include -#include #include "util.h" #include "lllparser.h" #include "bignum.h" +#include "optimize.h" +#include "rewriteutils.h" +#include "preprocess.h" +#include "functions.h" +#include "opcodes.h" -std::string valid[][3] = { - { "if", "2", "3" }, - { "unless", "2", "2" }, - { "while", "2", "2" }, - { "until", "2", "2" }, - { "alloc", "1", "1" }, - { "array", "1", "1" }, - { "call", "2", tt256 }, - { "call_code", "2", tt256 }, - { "create", "1", "4" }, - { "getch", "2", "2" }, - { "setch", "3", "3" }, - { "sha3", "1", "2" }, - { "return", "1", "2" }, - { "inset", "1", "1" }, - { "min", "2", "2" }, - { "max", "2", "2" }, - { "array_lit", "0", tt256 }, - { "seq", "0", tt256 }, - { "log", "1", "6" }, - { "outer", "1", "1" }, - { "set", "2", "2" }, - { "---END---", "", "" } //Keep this line at the end of the list -}; - +// Rewrite rules std::string macros[][2] = { + { + "(seq $x)", + "$x" + }, + { + "(seq (seq) $x)", + "$x" + }, { "(+= $a $b)", "(set $a (+ $a $b))" @@ -58,24 +46,28 @@ std::string macros[][2] = { "(set $a (^ $a $b))" }, { - "(@/= $a $b)", - "(set $a (@/ $a $b))" + "(!= $a $b)", + "(iszero (eq $a $b))" }, { - "(@%= $a $b)", - "(set $a (@% $a $b))" + "(assert $x)", + "(unless $x (stop))" }, { - "(!= $a $b)", - "(iszero (eq $a $b))" + "(min $a $b)", + "(with $1 $a (with $2 $b (if (lt $1 $2) $1 $2)))" + }, + { + "(max $a $b)", + "(with $1 $a (with $2 $b (if (lt $1 $2) $2 $1)))" }, { - "(min a b)", - "(with $1 a (with $2 b (if (lt $1 $2) $1 $2)))" + "(smin $a $b)", + "(with $1 $a (with $2 $b (if (slt $1 $2) $1 $2)))" }, { - "(max a b)", - "(with $1 a (with $2 b (if (lt $1 $2) $2 $1)))" + "(smax $a $b)", + "(with $1 $a (with $2 $b (if (slt $1 $2) $2 $1)))" }, { "(if $cond $do (else $else))", @@ -85,10 +77,6 @@ std::string macros[][2] = { "(code $code)", "$code" }, - { - "(access (. msg data) $ind)", - "(calldataload (mul 32 $ind))" - }, { "(slice $arr $pos)", "(add $arr (mul 32 $pos))", @@ -125,13 +113,17 @@ std::string macros[][2] = { "(set (access (. self storage) $ind) $val)", "(sstore $ind $val)" }, + { + "(set (sload $ind) $val)", + "(sstore $ind $val)" + }, { "(set (access $var $ind) $val)", "(mstore (add $var (mul 32 $ind)) $val)" }, { "(getch $var $ind)", - "(mod (mload (add $var $ind)) 256)" + "(mod (mload (sub (add $var $ind) 31)) 256)" }, { "(setch $var $ind $val)", @@ -149,6 +141,10 @@ std::string macros[][2] = { "(sha3 $x)", "(seq (set $1 $x) (~sha3 (ref $1) 32))" }, + { + "(sha3 $mstart (= chars $msize))", + "(~sha3 $mstart $msize)" + }, { "(sha3 $mstart $msize)", "(~sha3 $mstart (mul 32 $msize))" @@ -161,6 +157,10 @@ std::string macros[][2] = { "(return $x)", "(seq (set $1 $x) (~return (ref $1) 32))" }, + { + "(return $mstart (= chars $msize))", + "(~return $mstart $msize)" + }, { "(return $start $len)", "(~return $start (mul 32 $len))" @@ -171,7 +171,7 @@ std::string macros[][2] = { }, { "(|| $x $y)", - "(with $1 $x (if (get $1) (get $1) $y))" + "(with $1 $x (if $1 $1 $y))" }, { "(>= $x $y)", @@ -181,45 +181,41 @@ std::string macros[][2] = { "(<= $x $y)", "(iszero (sgt $x $y))" }, - { - "(@>= $x $y)", - "(iszero (lt $x $y))" - }, - { - "(@<= $x $y)", - "(iszero (gt $x $y))" - }, { "(create $code)", "(create 0 $code)" }, { "(create $endowment $code)", - "(with $1 (msize) (create $endowment (get $1) (lll (outer $code) (msize))))" + "(with $1 (msize) (create $endowment (get $1) (lll $code (msize))))" }, { "(sha256 $x)", - "(seq (set $1 $x) (pop (~call 101 2 0 (ref $1) 32 (ref $2) 32)) (get $2))" + "(with $1 (alloc 64) (seq (mstore (add (get $1) 32) $x) (pop (~call 101 2 0 (add (get $1) 32) 32 (get $1) 32)) (mload (get $1))))" + }, + { + "(sha256 $arr (= chars $sz))", + "(with $1 (alloc 32) (seq (pop (~call 101 2 0 $arr $sz (get $1) 32)) (mload (get $1))))" }, { "(sha256 $arr $sz)", - "(seq (pop (~call 101 2 0 $arr (mul 32 $sz) (ref $2) 32)) (get $2))" + "(with $1 (alloc 32) (seq (pop (~call 101 2 0 $arr (mul 32 $sz) (get $1) 32)) (mload (get $1))))" }, { "(ripemd160 $x)", - "(seq (set $1 $x) (pop (~call 101 3 0 (ref $1) 32 (ref $2) 32)) (get $2))" + "(with $1 (alloc 64) (seq (mstore (add (get $1) 32) $x) (pop (~call 101 3 0 (add (get $1) 32) 32 (get $1) 32)) (mload (get $1))))" }, { - "(ripemd160 $arr $sz)", - "(seq (pop (~call 101 3 0 $arr (mul 32 $sz) (ref $2) 32)) (get $2))" + "(ripemd160 $arr (= chars $sz))", + "(with $1 (alloc 32) (seq (pop (~call 101 3 0 $arr $sz (mload $1) 32)) (mload (get $1))))" }, { - "(ecrecover $h $v $r $s)", - "(seq (declare $1) (declare $2) (declare $3) (declare $4) (set $1 $h) (set $2 $v) (set $3 $r) (set $4 $s) (pop (~call 101 1 0 (ref $1) 128 (ref $5) 32)) (get $5))" + "(ripemd160 $arr $sz)", + "(with $1 (alloc 32) (seq (pop (~call 101 3 0 $arr (mul 32 $sz) (get $1) 32)) (mload (get $1))))" }, { - "(seq (seq) $x)", - "$x" + "(ecrecover $h $v $r $s)", + "(with $1 (alloc 160) (seq (mstore (get $1) $h) (mstore (add (get $1) 32) $v) (mstore (add (get $1) 64) $r) (mstore (add (get $1) 96) $s) (pop (~call 101 1 0 (get $1) 128 (add (get $1 128)) 32)) (mload (add (get $1) 128))))" }, { "(inset $x)", @@ -235,21 +231,64 @@ std::string macros[][2] = { }, { "(log $t1)", - "(~log1 $t1 0 0)" + "(~log1 0 0 $t1)" }, { "(log $t1 $t2)", - "(~log2 $t1 $t2 0 0)" + "(~log2 0 0 $t1 $t2)" }, { "(log $t1 $t2 $t3)", - "(~log3 $t1 $t2 $t3 0 0)" + "(~log3 0 0 $t1 $t2 $t3)" }, { "(log $t1 $t2 $t3 $t4)", - "(~log4 $t1 $t2 $t3 $t4 0 0)" + "(~log4 0 0 $t1 $t2 $t3 $t4)" + }, + { + "(logarr $a $sz)", + "(~log0 $a (mul 32 $sz))" + }, + { + "(logarr $a $sz $t1)", + "(~log1 $a (mul 32 $sz) $t1)" + }, + { + "(logarr $a $sz $t1 $t2)", + "(~log2 $a (mul 32 $sz) $t1 $t2)" + }, + { + "(logarr $a $sz $t1 $t2 $t3)", + "(~log3 $a (mul 32 $sz) $t1 $t2 $t3)" + }, + { + "(logarr $a $sz $t1 $t2 $t3 $t4)", + "(~log4 $a (mul 32 $sz) $t1 $t2 $t3 $t4)" + }, + { + "(save $loc $array (= chars $count))", + "(with $location (ref $loc) (with $c $count (with $end (div $c 32) (with $i 0 (seq (while (slt $i $end) (seq (sstore (add $i $location) (access $array $i)) (set $i (add $i 1)))) (sstore (add $i $location) (~and (access $array $i) (sub 0 (exp 256 (sub 32 (mod $c 32)))))))))))" + }, + { + "(save $loc $array $count)", + "(with $location (ref $loc) (with $end $count (with $i 0 (while (slt $i $end) (seq (sstore (add $i $location) (access $array $i)) (set $i (add $i 1)))))))" + }, + { + "(load $loc (= chars $count))", + "(with $location (ref $loc) (with $c $count (with $a (alloc $c) (with $i 0 (seq (while (slt $i (div $c 32)) (seq (set (access $a $i) (sload (add $location $i))) (set $i (add $i 1)))) (set (access $a $i) (~and (sload (add $location $i)) (sub 0 (exp 256 (sub 32 (mod $c 32)))))) $a)))))" + }, + { + "(load $loc $count)", + "(with $location (ref $loc) (with $c $count (with $a (alloc $c) (with $i 0 (seq (while (slt $i $c) (seq (set (access $a $i) (sload (add $location $i))) (set $i (add $i 1)))) $a)))))" + }, + { + "(unsafe_mcopy $to $from $sz)", + "(with _sz $sz (with _from $from (with _to $to (seq (comment STARTING UNSAFE MCOPY) (with _i 0 (while (lt _i _sz) (seq (mstore (add $to _i) (mload (add _from _i))) (set _i (add _i 32)))))))))" + }, + { + "(mcopy $to $from $_sz)", + "(with _to $to (with _from $from (with _sz $sz (seq (comment STARTING MCOPY (with _i 0 (seq (while (lt (add _i 31) _sz) (seq (mstore (add _to _i) (mload (add _from _i))) (set _i (add _i 32)))) (with _mask (exp 256 (sub 32 (mod _sz 32))) (mstore (add $to _i) (add (mod (mload (add $to _i)) _mask) (and (mload (add $from _i)) (sub 0 _mask))))))))))))" }, - { "(. msg datasize)", "(div (calldatasize) 32)" }, { "(. msg sender)", "(caller)" }, { "(. msg value)", "(callvalue)" }, { "(. tx gasprice)", "(gasprice)" }, @@ -267,8 +306,7 @@ std::string macros[][2] = { { "---END---", "" } //Keep this line at the end of the list }; -std::vector > nodeMacros; - +// Token synonyms std::string synonyms[][2] = { { "or", "||" }, { "and", "&&" }, @@ -286,10 +324,6 @@ std::string synonyms[][2] = { { "^", "exp" }, { "**", "exp" }, { "%", "smod" }, - { "@/", "div" }, - { "@%", "mod" }, - { "@<", "lt" }, - { "@>", "gt" }, { "<", "slt" }, { ">", "sgt" }, { "=", "set" }, @@ -298,6 +332,10 @@ std::string synonyms[][2] = { { "---END---", "" } //Keep this line at the end of the list }; +std::map synonymMap; + +// Custom setters (need to be registered separately +// for use with managed storage) std::string setters[][2] = { { "+=", "+" }, { "-=", "-" }, @@ -305,550 +343,136 @@ std::string setters[][2] = { { "/=", "/" }, { "%=", "%" }, { "^=", "^" }, - { "!=", "!" }, { "---END---", "" } //Keep this line at the end of the list }; -// Match result storing object -struct matchResult { - bool success; - std::map map; -}; - -// Storage variable index storing object -struct svObj { - std::map offsets; - std::map indices; - std::map > coefficients; - std::map nonfinal; - std::string globalOffset; -}; - -// Preprocessing result storing object -class preprocessAux { - public: - preprocessAux() { - globalExterns = std::map(); - localExterns = std::map >(); - localExterns["self"] = std::map(); - } - std::map globalExterns; - std::map > localExterns; - svObj storageVars; -}; - -#define preprocessResult std::pair - -// Main pattern matching routine, for those patterns that can be expressed -// using our standard mini-language above -// -// Returns two values. First, a boolean to determine whether the node matches -// the pattern, second, if the node does match then a map mapping variables -// in the pattern to nodes -matchResult match(Node p, Node n) { - matchResult o; - o.success = false; - if (p.type == TOKEN) { - if (p.val == n.val && n.type == TOKEN) o.success = true; - else if (p.val[0] == '$') { - o.success = true; - o.map[p.val.substr(1)] = n; - } - } - else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) { - // do nothing - } - else { - for (unsigned i = 0; i < p.args.size(); i++) { - matchResult oPrime = match(p.args[i], n.args[i]); - if (!oPrime.success) { - o.success = false; - return o; - } - for (std::map::iterator it = oPrime.map.begin(); - it != oPrime.map.end(); - it++) { - o.map[(*it).first] = (*it).second; - } - } - o.success = true; - } - return o; -} - -// Fills in the pattern with a dictionary mapping variable names to -// nodes (these dicts are generated by match). Match and subst together -// create a full pattern-matching engine. -Node subst(Node pattern, - std::map dict, - std::string varflag, - Metadata metadata) { - if (pattern.type == TOKEN && pattern.val[0] == '$') { - if (dict.count(pattern.val.substr(1))) { - return dict[pattern.val.substr(1)]; - } - else { - return token(varflag + pattern.val.substr(1), metadata); - } - } - else if (pattern.type == TOKEN) { - return pattern; - } - else { - std::vector args; - for (unsigned i = 0; i < pattern.args.size(); i++) { - args.push_back(subst(pattern.args[i], dict, varflag, metadata)); - } - return astnode(pattern.val, args, metadata); - } -} +std::map setterMap; // Processes mutable array literals - Node array_lit_transform(Node node) { + std::string prefix = "_temp"+mkUniqueToken() + "_"; Metadata m = node.metadata; - std::vector o1; - o1.push_back(token(unsignedToDecimal(node.args.size() * 32), m)); - std::vector o2; - std::string symb = "_temp"+mkUniqueToken()+"_0"; - o2.push_back(token(symb, m)); - o2.push_back(astnode("alloc", o1, m)); - std::vector o3; - o3.push_back(astnode("set", o2, m)); + std::map d; + std::string o = "(seq (set $arr (alloc "+utd(node.args.size()*32)+"))"; for (unsigned i = 0; i < node.args.size(); i++) { - std::vector o5; - o5.push_back(token(symb, m)); - std::vector o6; - o6.push_back(astnode("get", o5, m)); - o6.push_back(token(unsignedToDecimal(i * 32), m)); - std::vector o7; - o7.push_back(astnode("add", o6)); - o7.push_back(node.args[i]); - o3.push_back(astnode("mstore", o7, m)); - } - std::vector o8; - o8.push_back(token(symb, m)); - o3.push_back(astnode("get", o8)); - return astnode("seq", o3, m); -} - -// Is the given node something of the form -// self.cow -// self.horse[0] -// self.a[6][7][self.storage[3]].chicken[9] -bool isNodeStorageVariable(Node node) { - std::vector nodez; - nodez.push_back(node); - while (1) { - if (nodez.back().type == TOKEN) return false; - if (nodez.back().args.size() == 0) return false; - if (nodez.back().val != "." && nodez.back().val != "access") - return false; - if (nodez.back().args[0].val == "self") return true; - nodez.push_back(nodez.back().args[0]); + o += " (mstore (add (get $arr) "+utd(i * 32)+") $"+utd(i)+")"; + d[utd(i)] = node.args[i]; } + o += " (get $arr))"; + return subst(parseLLL(o), d, prefix, m); } -Node optimize(Node inp); - -Node apply_rules(preprocessResult pr); - -// Convert: -// self.cow -> ["cow"] -// self.horse[0] -> ["horse", "0"] -// self.a[6][7][self.storage[3]].chicken[9] -> -// ["6", "7", (sload 3), "chicken", "9"] -std::vector listfyStorageAccess(Node node) { - std::vector out; - std::vector nodez; - nodez.push_back(node); - while (1) { - if (nodez.back().type == TOKEN) { - out.push_back(token("--" + nodez.back().val, node.metadata)); - std::vector outrev; - for (int i = (signed)out.size() - 1; i >= 0; i--) { - outrev.push_back(out[i]); +// Processes long text literals +Node string_transform(Node node) { + Metadata m = node.metadata; + if (!node.args.size()) + err("Empty text!", m); + if (node.args[0].val.size() < 2 + || node.args[0].val[0] != '"' + || node.args[0].val[node.args[0].val.size() - 1] != '"') + err("Text contents don't look like a string!", m); + std::string bin = node.args[0].val.substr(1, node.args[0].val.size() - 2); + unsigned sz = bin.size(); + std::vector o; + for (unsigned i = 0; i < sz; i += 32) { + std::string t = binToNumeric(bin.substr(i, 32)); + if ((sz - i) < 32 && (sz - i) > 0) { + while ((sz - i) < 32) { + t = decimalMul(t, "256"); + i--; } - return outrev; + i = sz; } - if (nodez.back().val == ".") - nodez.back().args[1].val = "--" + nodez.back().args[1].val; - if (nodez.back().args.size() == 0) - err("Error parsing storage variable statement", node.metadata); - if (nodez.back().args.size() == 1) - out.push_back(token(tt256m1, node.metadata)); - else - out.push_back(nodez.back().args[1]); - nodez.push_back(nodez.back().args[0]); + o.push_back(token(t, node.metadata)); } + node = astnode("array_lit", o, node.metadata); + return array_lit_transform(node); } -// Cool function for debug purposes (named cerrStringList to make -// all prints searchable via 'cerr') -void cerrStringList(std::vector s, std::string suffix="") { - for (unsigned i = 0; i < s.size(); i++) std::cerr << s[i] << " "; - std::cerr << suffix << "\n"; -} -// Populate an svObj with the arguments needed to determine -// the storage position of a node -svObj getStorageVars(svObj pre, Node node, std::string prefix="", int index=0) { - Metadata m = node.metadata; - if (!pre.globalOffset.size()) pre.globalOffset = "0"; - std::vector h; - std::vector coefficients; - // Array accesses or atoms - if (node.val == "access" || node.type == TOKEN) { - std::string tot = "1"; - h = listfyStorageAccess(node); - coefficients.push_back("1"); - for (unsigned i = h.size() - 1; i >= 1; i--) { - // Array sizes must be constant or at least arithmetically - // evaluable at compile time - h[i] = optimize(apply_rules(preprocessResult( - h[i], preprocessAux()))); - if (!isNumberLike(h[i])) - err("Array size must be fixed value", m); - // Create a list of the coefficient associated with each - // array index - coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); - } - } - // Tuples - else { - int startc; - // Handle the (fun args...) case - if (node.val == "fun") { - startc = 1; - h = listfyStorageAccess(node.args[0]); - } - // Handle the ( args...) case, which - // the serpent parser produces when the function - // is a simple name and not a complex astnode - else { - startc = 0; - h = listfyStorageAccess(token(node.val, m)); - } - svObj sub = pre; - sub.globalOffset = "0"; - // Evaluate tuple elements recursively - for (unsigned i = startc; i < node.args.size(); i++) { - sub = getStorageVars(sub, - node.args[i], - prefix+h[0].val.substr(2)+".", - i-1); - } - coefficients.push_back(sub.globalOffset); - for (unsigned i = h.size() - 1; i >= 1; i--) { - // Array sizes must be constant or at least arithmetically - // evaluable at compile time - h[i] = optimize(apply_rules(preprocessResult( - h[i], preprocessAux()))); - if (!isNumberLike(h[i])) - err("Array size must be fixed value", m); - // Create a list of the coefficient associated with each - // array index - coefficients.push_back(decimalMul(coefficients.back(), h[i].val)); - } - pre.offsets = sub.offsets; - pre.coefficients = sub.coefficients; - pre.nonfinal = sub.nonfinal; - pre.nonfinal[prefix+h[0].val.substr(2)] = true; - } - pre.coefficients[prefix+h[0].val.substr(2)] = coefficients; - pre.offsets[prefix+h[0].val.substr(2)] = pre.globalOffset; - pre.indices[prefix+h[0].val.substr(2)] = index; - if (decimalGt(tt176, coefficients.back())) - pre.globalOffset = decimalAdd(pre.globalOffset, coefficients.back()); - return pre; -} +Node apply_rules(preprocessResult pr); -// Transform a node of the form (call to funid vars...) into +// Transform ".(args...)" into // a call - -#define psn std::pair - -Node call_transform(Node node, std::string op) { +Node dotTransform(Node node, preprocessAux aux) { Metadata m = node.metadata; // We're gonna make lots of temporary variables, // so set up a unique flag for them std::string prefix = "_temp"+mkUniqueToken()+"_"; + // Check that the function name is a token + if (node.args[0].args[1].type == ASTNODE) + err("Function name must be static", m); + + Node dotOwner = node.args[0].args[0]; + std::string dotMember = node.args[0].args[1].val; // kwargs = map of special arguments std::map kwargs; kwargs["value"] = token("0", m); - kwargs["gas"] = parseLLL("(- (gas) 25)"); - std::vector args; - for (unsigned i = 0; i < node.args.size(); i++) { - if (node.args[i].val == "=" || node.args[i].val == "set") { - if (node.args[i].args.size() != 2) - err("Malformed set", m); - kwargs[node.args[i].args[0].val] = node.args[i].args[1]; - } - else args.push_back(node.args[i]); - } - if (args.size() < 2) err("Too few arguments for call!", m); - kwargs["to"] = args[0]; - kwargs["funid"] = args[1]; - std::vector inputs; - for (unsigned i = 2; i < args.size(); i++) { - inputs.push_back(args[i]); - } - std::vector with; - std::vector precompute; - std::vector post; - if (kwargs.count("data")) { - if (!kwargs.count("datasz")) err("Required param datasz", m); - // The strategy here is, we store the function ID byte at the index - // before the start of the byte, but then we store the value that was - // there before and reinstate it once the process is over - // store data: data array start - with.push_back(psn(prefix+"data", kwargs["data"])); - // store data: prior: data array - 32 - Node prior = astnode("sub", token(prefix+"data", m), token("32", m), m); - with.push_back(psn(prefix+"prior", prior)); - // store data: priormem: data array - 32 prior memory value - Node priormem = astnode("mload", token(prefix+"prior", m), m); - with.push_back(psn(prefix+"priormem", priormem)); - // post: reinstate prior mem at data array - 32 - post.push_back(astnode("mstore", - token(prefix+"prior", m), - token(prefix+"priormem", m), - m)); - // store data: datastart: data array - 1 - Node datastart = astnode("sub", - token(prefix+"data", m), - token("1", m), - m); - with.push_back(psn(prefix+"datastart", datastart)); - // push funid byte to datastart - precompute.push_back(astnode("mstore8", - token(prefix+"datastart", m), - kwargs["funid"], - m)); - // set data array start loc - kwargs["datain"] = token(prefix+"datastart", m); - kwargs["datainsz"] = astnode("add", - token("1", m), - astnode("mul", - token("32", m), - kwargs["datasz"], - m), - m); - } - else { - // Here, there is no data array, instead there are function arguments. - // This actually lets us be much more efficient with how we set things - // up. - // Pre-declare variables; relies on declared variables being sequential - precompute.push_back(astnode("declare", - token(prefix+"prebyte", m), - m)); - for (unsigned i = 0; i < inputs.size(); i++) { - precompute.push_back(astnode("declare", - token(prefix+unsignedToDecimal(i), m), - m)); - } - // Set up variables to store the function arguments, and store the - // function ID at the byte before the start - Node datastart = astnode("add", - token("31", m), - astnode("ref", - token(prefix+"prebyte", m), - m), - m); - precompute.push_back(astnode("mstore8", - datastart, - kwargs["funid"], - m)); - for (unsigned i = 0; i < inputs.size(); i++) { - precompute.push_back(astnode("set", - token(prefix+unsignedToDecimal(i), m), - inputs[i], - m)); - - } - kwargs["datain"] = datastart; - kwargs["datainsz"] = token(unsignedToDecimal(inputs.size()*32+1), m); - } - if (!kwargs.count("outsz")) { - kwargs["dataout"] = astnode("ref", token(prefix+"dataout", m), m); - kwargs["dataoutsz"] = token("32", node.metadata); - post.push_back(astnode("get", token(prefix+"dataout", m), m)); - } - else { - kwargs["dataout"] = kwargs["out"]; - kwargs["dataoutsz"] = kwargs["outsz"]; - post.push_back(astnode("ref", token(prefix+"dataout", m), m)); - } - // Set up main call - std::vector main; - for (unsigned i = 0; i < precompute.size(); i++) { - main.push_back(precompute[i]); - } - std::vector call; - call.push_back(kwargs["gas"]); - call.push_back(kwargs["to"]); - call.push_back(kwargs["value"]); - call.push_back(kwargs["datain"]); - call.push_back(kwargs["datainsz"]); - call.push_back(kwargs["dataout"]); - call.push_back(kwargs["dataoutsz"]); - main.push_back(astnode("pop", astnode("~"+op, call, m), m)); - for (unsigned i = 0; i < post.size(); i++) { - main.push_back(post[i]); - } - Node mainNode = astnode("seq", main, node.metadata); - // Add with variables - for (int i = with.size() - 1; i >= 0; i--) { - mainNode = astnode("with", - token(with[i].first, m), - with[i].second, - mainNode, - m); - } - return mainNode; -} - -// Preprocess input containing functions -// -// localExterns is a map of the form, eg, -// -// { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... } -// -// Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc -// -// globalExterns is a one-level map, eg from above -// -// { foo: 1, bar: 1, baz: 2, qux: 0 } -// -// Note that globalExterns may be ambiguous -preprocessResult preprocess(Node inp) { - inp = inp.args[0]; - Metadata m = inp.metadata; - if (inp.val != "seq") { - std::vector args; - args.push_back(inp); - inp = astnode("seq", args, m); - } - std::vector empty; - Node init = astnode("seq", empty, m); - Node shared = astnode("seq", empty, m); - std::vector any; - std::vector functions; - preprocessAux out = preprocessAux(); - out.localExterns["self"] = std::map(); - int functionCount = 0; - int storageDataCount = 0; - for (unsigned i = 0; i < inp.args.size(); i++) { - Node obj = inp.args[i]; - // Functions - if (obj.val == "def") { - if (obj.args.size() == 0) - err("Empty def", m); - std::string funName = obj.args[0].val; - // Init, shared and any are special functions - if (funName == "init" || funName == "shared" || funName == "any") { - if (obj.args[0].args.size()) - err(funName+" cannot have arguments", m); - } - if (funName == "init") init = obj.args[1]; - else if (funName == "shared") shared = obj.args[1]; - else if (funName == "any") any.push_back(obj.args[1]); - else { - // Other functions - functions.push_back(obj); - out.localExterns["self"][obj.args[0].val] = functionCount; - functionCount++; - } - } - // Extern declarations - else if (obj.val == "extern") { - std::string externName = obj.args[0].args[0].val; - Node al = obj.args[0].args[1]; - if (!out.localExterns.count(externName)) - out.localExterns[externName] = std::map(); - for (unsigned i = 0; i < al.args.size(); i++) { - out.globalExterns[al.args[i].val] = i; - out.localExterns[externName][al.args[i].val] = i; - } - } - // Storage variables/structures - else if (obj.val == "data") { - out.storageVars = getStorageVars(out.storageVars, - obj.args[0], - "", - storageDataCount); - storageDataCount += 1; - } - else any.push_back(obj); - } - std::vector main; - if (shared.args.size()) main.push_back(shared); - if (init.args.size()) main.push_back(init); - - std::vector code; - if (shared.args.size()) code.push_back(shared); - for (unsigned i = 0; i < any.size(); i++) - code.push_back(any[i]); - for (unsigned i = 0; i < functions.size(); i++) - code.push_back(functions[i]); - main.push_back(astnode("~return", - token("0", m), - astnode("lll", - astnode("seq", code, m), - token("0", m), - m), - m)); - - - - return preprocessResult(astnode("seq", main, inp.metadata), out); -} - -// Transform ".(args...)" into -// (call args...) -Node dotTransform(Node node, preprocessAux aux) { - Metadata m = node.metadata; - Node pre = node.args[0].args[0]; - std::string post = node.args[0].args[1].val; - if (node.args[0].args[1].type == ASTNODE) - err("Function name must be static", m); - // Search for as=? and call=code keywords + kwargs["gas"] = subst(parseLLL("(- (gas) 25)"), msn(), prefix, m); + // Search for as=? and call=code keywords, and isolate the actual + // function arguments + std::vector fnargs; std::string as = ""; - bool call_code = false; + std::string op = "call"; for (unsigned i = 1; i < node.args.size(); i++) { - Node arg = node.args[i]; + fnargs.push_back(node.args[i]); + Node arg = fnargs.back(); if (arg.val == "=" || arg.val == "set") { if (arg.args[0].val == "as") as = arg.args[1].val; if (arg.args[0].val == "call" && arg.args[1].val == "code") - call_code = true; + op = "callcode"; + if (arg.args[0].val == "gas") + kwargs["gas"] = arg.args[1]; + if (arg.args[0].val == "value") + kwargs["value"] = arg.args[1]; + if (arg.args[0].val == "outsz") + kwargs["outsz"] = arg.args[1]; } } - if (pre.val == "self") { + if (dotOwner.val == "self") { if (as.size()) err("Cannot use \"as\" when calling self!", m); - as = pre.val; + as = dotOwner.val; } - std::vector args; - args.push_back(pre); - // Determine the funId assuming the "as" keyword was used + // Determine the funId and sig assuming the "as" keyword was used + int funId = 0; + std::string sig; if (as.size() > 0 && aux.localExterns.count(as)) { - if (!aux.localExterns[as].count(post)) - err("Invalid call: "+printSimple(pre)+"."+post, m); - std::string funid = unsignedToDecimal(aux.localExterns[as][post]); - args.push_back(token(funid, m)); + if (!aux.localExterns[as].count(dotMember)) + err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m); + funId = aux.localExterns[as][dotMember]; + sig = aux.localExternSigs[as][dotMember]; } - // Determine the funId otherwise + // Determine the funId and sig otherwise else if (!as.size()) { - if (!aux.globalExterns.count(post)) - err("Invalid call: "+printSimple(pre)+"."+post, m); - std::string key = unsignedToDecimal(aux.globalExterns[post]); - args.push_back(token(key, m)); + if (!aux.globalExterns.count(dotMember)) + err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m); + std::string key = unsignedToDecimal(aux.globalExterns[dotMember]); + funId = aux.globalExterns[dotMember]; + sig = aux.globalExternSigs[dotMember]; + } + else err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m); + // Pack arguments + kwargs["data"] = packArguments(fnargs, sig, funId, m); + kwargs["to"] = dotOwner; + Node main; + // Pack output + if (!kwargs.count("outsz")) { + main = parseLLL( + "(with _data $data (seq " + "(pop (~"+op+" $gas $to $value (access _data 0) (access _data 1) (ref $dataout) 32))" + "(get $dataout)))"); + } + else { + main = parseLLL( + "(with _data $data (with _outsz (mul 32 $outsz) (with _out (alloc _outsz) (seq " + "(pop (~"+op+" $gas $to $value (access _data 0) (access _data 1) _out _outsz))" + "(get _out)))))"); } - else err("Invalid call: "+printSimple(pre)+"."+post, m); - for (unsigned i = 1; i < node.args.size(); i++) - args.push_back(node.args[i]); - return astnode(call_code ? "call_code" : "call", args, m); + // Set up main call + + Node o = subst(main, kwargs, prefix, m); + return o; } // Transform an access of the form self.bob, self.users[5], etc into @@ -877,7 +501,8 @@ Node dotTransform(Node node, preprocessAux aux) { // obj2[0].a -> sha3([1, 0, 0]) // obj2[5].b[1][3] -> sha3([1, 5, 1, 1, 3]) // obj2[45].c -> sha3([1, 45, 2]) -Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { +Node storageTransform(Node node, preprocessAux aux, + bool mapstyle=false, bool ref=false) { Metadata m = node.metadata; // Get a list of all of the "access parameters" used in order // eg. self.users[5].cow[4][m[2]][woof] -> @@ -909,7 +534,7 @@ Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { // If the size of an object exceeds 2^176, we make it an infinite // array if (decimalGt(coefficients.back(), tt176) && !mapstyle) - return storageTransform(node, aux, true); + return storageTransform(node, aux, true, ref); offset = decimalAdd(offset, aux.storageVars.offsets[tempPrefix]); c = 0; if (mapstyle) @@ -940,28 +565,29 @@ Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { if (c > (signed)coefficients.size() - 1) { err("Too many array index lookups", m); } + Node o; if (mapstyle) { - // We pre-declare variables, relying on the idea that sequentially - // declared variables are doing to appear beside each other in - // memory - std::vector main; + std::string t = "_temp_"+mkUniqueToken(); + std::vector sub; for (unsigned i = 0; i < terms.size(); i++) - main.push_back(astnode("declare", - token(varPrefix+unsignedToDecimal(i), m), - m)); - for (unsigned i = 0; i < terms.size(); i++) - main.push_back(astnode("set", - token(varPrefix+unsignedToDecimal(i), m), - terms[i], - m)); - main.push_back(astnode("ref", token(varPrefix+"0", m), m)); - Node sz = token(unsignedToDecimal(terms.size()), m); - return astnode("sload", - astnode("sha3", - astnode("seq", main, m), - sz, - m), - m); + sub.push_back(asn("mstore", + asn("add", + tkn(utd(i * 32), m), + asn("get", tkn(t+"pos", m), m), + m), + terms[i], + m)); + sub.push_back(tkn(t+"pos", m)); + Node main = asn("with", + tkn(t+"pos", m), + asn("alloc", tkn(utd(terms.size() * 32), m), m), + asn("seq", sub, m), + m); + Node sz = token(utd(terms.size() * 32), m); + o = astnode("~sha3", + main, + sz, + m); } else { // We add up all the index*coefficients @@ -972,42 +598,92 @@ Node storageTransform(Node node, preprocessAux aux, bool mapstyle=false) { temp.push_back(terms[i]); out = astnode("add", temp, node.metadata); } - std::vector temp2; - temp2.push_back(out); - return astnode("sload", temp2, node.metadata); + o = out; + } + if (ref) return o; + else return astnode("sload", o, node.metadata); +} + +// Basic rewrite rule execution +std::pair rulesTransform(Node node, rewriteRuleSet macros) { + std::string prefix = "_temp_"+mkUniqueToken(); + bool changed = false; + if (!macros.ruleLists.count(node.val)) + return std::pair(node, false); + std::vector rules = macros.ruleLists[node.val]; + for (unsigned pos = 0; pos < rules.size(); pos++) { + rewriteRule macro = rules[pos]; + matchResult mr = match(macro.pattern, node); + if (mr.success) { + node = subst(macro.substitution, mr.map, prefix, node.metadata); + std::pair o = rulesTransform(node, macros); + o.second = true; + return o; + } + } + return std::pair(node, changed); +} + +std::pair synonymTransform(Node node) { + bool changed = false; + if (node.type == ASTNODE && synonymMap.count(node.val)) { + node.val = synonymMap[node.val]; + changed = true; } + return std::pair(node, changed); } +rewriteRuleSet nodeMacros; +rewriteRuleSet setterMacros; -// Recursively applies rewrite rules -Node apply_rules(preprocessResult pr) { +bool dontDescend(std::string s) { + return s == "macro" || s == "comment" || s == "outer"; +} + +// Recursively applies any set of rewrite rules +std::pair apply_rules_iter(preprocessResult pr, rewriteRuleSet rules) { + bool changed = false; Node node = pr.first; - // If the rewrite rules have not yet been parsed, parse them - if (!nodeMacros.size()) { - for (int i = 0; i < 9999; i++) { - std::vector o; - if (macros[i][0] == "---END---") break; - o.push_back(parseLLL(macros[i][0])); - o.push_back(parseLLL(macros[i][1])); - nodeMacros.push_back(o); + if (dontDescend(node.val)) + return std::pair(node, false); + std::pair o = rulesTransform(node, rules); + node = o.first; + changed = changed || o.second; + if (node.type == ASTNODE) { + for (unsigned i = 0; i < node.args.size(); i++) { + std::pair r = + apply_rules_iter(preprocessResult(node.args[i], pr.second), rules); + node.args[i] = r.first; + changed = changed || r.second; } } - // Assignment transformations - for (int i = 0; i < 9999; i++) { - if (setters[i][0] == "---END---") break; - if (node.val == setters[i][0]) { - node = astnode("=", - node.args[0], - astnode(setters[i][1], - node.args[0], - node.args[1], - node.metadata), - node.metadata); - } + return std::pair(node, changed); +} + +// Recursively applies rewrite rules and other primary transformations +std::pair mainTransform(preprocessResult pr) { + bool changed = false; + Node node = pr.first; + + // Anything inside "outer" should be treated as a separate program + // and thus recursively compiled in its entirety + if (node.val == "outer") { + node = apply_rules(preprocess(node.args[0])); + changed = true; } + + // Don't descend into comments, macros and inner scopes + if (dontDescend(node.val)) + return std::pair(node, changed); + // Special storage transformation if (isNodeStorageVariable(node)) { node = storageTransform(node, pr.second); + changed = true; + } + if (node.val == "ref" && isNodeStorageVariable(node.args[0])) { + node = storageTransform(node.args[0], pr.second, false, true); + changed = true; } if (node.val == "=" && isNodeStorageVariable(node.args[0])) { Node t = storageTransform(node.args[0], pr.second); @@ -1017,195 +693,213 @@ Node apply_rules(preprocessResult pr) { o.push_back(node.args[1]); node = astnode("sstore", o, node.metadata); } + changed = true; } // Main code - unsigned pos = 0; - std::string prefix = "_temp"+mkUniqueToken()+"_"; - while(1) { - if (synonyms[pos][0] == "---END---") { - break; - } - else if (node.type == ASTNODE && node.val == synonyms[pos][0]) { - node.val = synonyms[pos][1]; - } - pos++; - } - for (pos = 0; pos < nodeMacros.size(); pos++) { - Node pattern = nodeMacros[pos][0]; - matchResult mr = match(pattern, node); - if (mr.success) { - Node pattern2 = nodeMacros[pos][1]; - node = subst(pattern2, mr.map, prefix, node.metadata); - pos = 0; - } - } + std::pair pnb = synonymTransform(node); + node = pnb.first; + changed = changed || pnb.second; + // std::cerr << priority << " " << macros.size() << "\n"; + std::pair pnc = rulesTransform(node, nodeMacros); + node = pnc.first; + changed = changed || pnc.second; + + // Special transformations - if (node.val == "outer") { - pr = preprocess(node); - node = pr.first; - } - if (node.val == "array_lit") + if (node.val == "array_lit") { node = array_lit_transform(node); + changed = true; + } if (node.val == "fun" && node.args[0].val == ".") { node = dotTransform(node, pr.second); + changed = true; + } + if (node.val == "text") { + node = string_transform(node); + changed = true; } - if (node.val == "call") - node = call_transform(node, "call"); - if (node.val == "call_code") - node = call_transform(node, "call_code"); if (node.type == ASTNODE) { unsigned i = 0; + // Arg 0 of all of these is a variable, so should not be changed if (node.val == "set" || node.val == "ref" - || node.val == "get" || node.val == "with" - || node.val == "def" || node.val == "declare") { - node.args[0].val = "'" + node.args[0].val; + || node.val == "get" || node.val == "with") { + if (node.args[0].type == TOKEN && + node.args[0].val.size() > 0 && node.args[0].val[0] != '\'') { + node.args[0].val = "'" + node.args[0].val; + changed = true; + } i = 1; } - if (node.val == "def") { - for (unsigned j = 0; j < node.args[0].args.size(); j++) { - if (node.args[0].args[j].val == ":") { - node.args[0].args[j].val = "kv"; - node.args[0].args[j].args[0].val = - "'" + node.args[0].args[j].args[0].val; - } - else { - node.args[0].args[j].val = "'" + node.args[0].args[j].val; - } - } + // Convert arglen(x) to '_len_x + else if (node.val == "arglen") { + node.val = "get"; + node.args[0].val = "'_len_" + node.args[0].val; + i = 1; + changed = true; } + // Recursively process children for (; i < node.args.size(); i++) { - node.args[i] = - apply_rules(preprocessResult(node.args[i], pr.second)); + std::pair r = + mainTransform(preprocessResult(node.args[i], pr.second)); + node.args[i] = r.first; + changed = changed || r.second; } } + // Add leading ' to variable names, and wrap them inside get else if (node.type == TOKEN && !isNumberLike(node)) { - node.val = "'" + node.val; - std::vector args; - args.push_back(node); - node = astnode("get", args, node.metadata); + if (node.val.size() && node.val[0] != '\'' && node.val[0] != '$') { + Node n = astnode("get", tkn("'"+node.val), node.metadata); + node = n; + changed = true; + } } - // This allows people to use ~x as a way of having functions with the same - // name and arity as macros; the idea is that ~x is a "final" form, and - // should not be remacroed, but it is converted back at the end - if (node.type == ASTNODE && node.val[0] == '~') - node.val = node.val.substr(1); - return node; + // Convert all numbers to normalized form + else if (node.type == TOKEN && isNumberLike(node) && !isDecimal(node.val)) { + node.val = strToNumeric(node.val); + changed = true; + } + return std::pair(node, changed); } -// Compile-time arithmetic calculations -Node optimize(Node inp) { - if (inp.type == TOKEN) { - Node o = tryNumberize(inp); - if (decimalGt(o.val, tt256, true)) - err("Value too large (exceeds 32 bytes or 2^256)", inp.metadata); - return o; +// Do some preprocessing to convert all of our macro lists into compiled +// forms that can then be reused +void parseMacros() { + for (int i = 0; i < 9999; i++) { + std::vector o; + if (macros[i][0] == "---END---") break; + nodeMacros.addRule(rewriteRule( + parseLLL(macros[i][0]), + parseLLL(macros[i][1]) + )); } - for (unsigned i = 0; i < inp.args.size(); i++) { - inp.args[i] = optimize(inp.args[i]); + for (int i = 0; i < 9999; i++) { + std::vector o; + if (setters[i][0] == "---END---") break; + setterMacros.addRule(rewriteRule( + asn(setters[i][0], tkn("$x"), tkn("$y")), + asn("=", tkn("$x"), asn(setters[i][1], tkn("$x"), tkn("$y"))) + )); } - // Degenerate cases for add and mul - if (inp.args.size() == 2) { - if (inp.val == "add" && inp.args[0].type == TOKEN && - inp.args[0].val == "0") { - inp = inp.args[1]; - } - if (inp.val == "add" && inp.args[1].type == TOKEN && - inp.args[1].val == "0") { - inp = inp.args[0]; - } - if (inp.val == "mul" && inp.args[0].type == TOKEN && - inp.args[0].val == "1") { - inp = inp.args[1]; - } - if (inp.val == "mul" && inp.args[1].type == TOKEN && - inp.args[1].val == "1") { - inp = inp.args[0]; - } + for (int i = 0; i < 9999; i++) { + if (synonyms[i][0] == "---END---") break; + synonymMap[synonyms[i][0]] = synonyms[i][1]; + } +} + +Node apply_rules(preprocessResult pr) { + // If the rewrite rules have not yet been parsed, parse them + if (!nodeMacros.ruleLists.size()) parseMacros(); + // Iterate over macros by priority list + std::map::iterator it; + std::pair r; + for(it=pr.second.customMacros.begin(); + it != pr.second.customMacros.end(); it++) { + while (1) { + // std::cerr << "STARTING ARI CYCLE: " << (*it).first <<"\n"; + // std::cerr << printAST(pr.first) << "\n"; + r = apply_rules_iter(pr, (*it).second); + pr.first = r.first; + if (!r.second) break; + } + } + // Apply setter macros + while (1) { + r = apply_rules_iter(pr, setterMacros); + pr.first = r.first; + if (!r.second) break; } - // Arithmetic computation - if (inp.args.size() == 2 - && inp.args[0].type == TOKEN - && inp.args[1].type == TOKEN) { - std::string o; - if (inp.val == "add") { - o = decimalMod(decimalAdd(inp.args[0].val, inp.args[1].val), tt256); - } - else if (inp.val == "sub") { - if (decimalGt(inp.args[0].val, inp.args[1].val, true)) - o = decimalSub(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "mul") { - o = decimalMod(decimalMul(inp.args[0].val, inp.args[1].val), tt256); - } - else if (inp.val == "div" && inp.args[1].val != "0") { - o = decimalDiv(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "sdiv" && inp.args[1].val != "0" - && decimalGt(tt255, inp.args[0].val) - && decimalGt(tt255, inp.args[1].val)) { - o = decimalDiv(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "mod" && inp.args[1].val != "0") { - o = decimalMod(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "smod" && inp.args[1].val != "0" - && decimalGt(tt255, inp.args[0].val) - && decimalGt(tt255, inp.args[1].val)) { - o = decimalMod(inp.args[0].val, inp.args[1].val); - } - else if (inp.val == "exp") { - o = decimalModExp(inp.args[0].val, inp.args[1].val, tt256); - } - if (o.length()) return token(o, inp.metadata); + // Apply all other mactos + while (1) { + r = mainTransform(pr); + pr.first = r.first; + if (!r.second) break; } - return inp; + return r.first; } +// Pre-validation Node validate(Node inp) { + Metadata m = inp.metadata; if (inp.type == ASTNODE) { int i = 0; - while(valid[i][0] != "---END---") { - if (inp.val == valid[i][0]) { + while(validFunctions[i][0] != "---END---") { + if (inp.val == validFunctions[i][0]) { std::string sz = unsignedToDecimal(inp.args.size()); - if (decimalGt(valid[i][1], sz)) { + if (decimalGt(validFunctions[i][1], sz)) { err("Too few arguments for "+inp.val, inp.metadata); } - if (decimalGt(sz, valid[i][2])) { + if (decimalGt(sz, validFunctions[i][2])) { err("Too many arguments for "+inp.val, inp.metadata); } } i++; } + } + else if (inp.type == TOKEN) { + if (!inp.val.size()) err("??? empty token", m); + if (inp.val[0] == '_') err("Variables cannot start with _", m); } for (unsigned i = 0; i < inp.args.size(); i++) validate(inp.args[i]); return inp; } Node postValidate(Node inp) { + // This allows people to use ~x as a way of having functions with the same + // name and arity as macros; the idea is that ~x is a "final" form, and + // should not be remacroed, but it is converted back at the end + if (inp.val.size() > 0 && inp.val[0] == '~') { + inp.val = inp.val.substr(1); + } if (inp.type == ASTNODE) { if (inp.val == ".") err("Invalid object member (ie. a foo.bar not mapped to anything)", inp.metadata); - for (unsigned i = 0; i < inp.args.size(); i++) - postValidate(inp.args[i]); + else if (opcode(inp.val) >= 0) { + if ((signed)inp.args.size() < opinputs(inp.val)) + err("Too few arguments for "+inp.val, inp.metadata); + if ((signed)inp.args.size() > opinputs(inp.val)) + err("Too many arguments for "+inp.val, inp.metadata); + } + else if (isValidLLLFunc(inp.val, inp.args.size())) { + // do nothing + } + else err ("Invalid argument count or LLL function: "+printSimple(inp), inp.metadata); + for (unsigned i = 0; i < inp.args.size(); i++) { + inp.args[i] = postValidate(inp.args[i]); + } } return inp; } -Node outerWrap(Node inp) { - std::vector args; - args.push_back(inp); - return astnode("outer", args, inp.metadata); + +Node rewriteChunk(Node inp) { + return postValidate(optimize(apply_rules( + preprocessResult( + validate(inp), preprocessAux())))); } -Node rewrite(Node inp) { - return postValidate(optimize(apply_rules(preprocessResult( - validate(outerWrap(inp)), preprocessAux())))); +// Flatten nested sequence into flat sequence +Node flattenSeq(Node inp) { + std::vector o; + if (inp.val == "seq" && inp.type == ASTNODE) { + for (unsigned i = 0; i < inp.args.size(); i++) { + if (inp.args[i].val == "seq" && inp.args[i].type == ASTNODE) + o = extend(o, flattenSeq(inp.args[i]).args); + else + o.push_back(flattenSeq(inp.args[i])); + } + } + else if (inp.type == ASTNODE) { + for (unsigned i = 0; i < inp.args.size(); i++) { + o.push_back(flattenSeq(inp.args[i])); + } + } + else return inp; + return asn(inp.val, o, inp.metadata); } -Node rewriteChunk(Node inp) { - return postValidate(optimize(apply_rules(preprocessResult( - validate(inp), preprocessAux())))); +Node rewrite(Node inp) { + return postValidate(optimize(apply_rules(preprocess(flattenSeq(inp))))); } using namespace std; diff --git a/libserpent/rewriteutils.cpp b/libserpent/rewriteutils.cpp new file mode 100644 index 000000000..0d810bdbc --- /dev/null +++ b/libserpent/rewriteutils.cpp @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include "util.h" +#include "lllparser.h" +#include "bignum.h" +#include "rewriteutils.h" +#include "optimize.h" + +// Valid functions and their min and max argument counts +std::string validFunctions[][3] = { + { "if", "2", "3" }, + { "unless", "2", "2" }, + { "while", "2", "2" }, + { "until", "2", "2" }, + { "alloc", "1", "1" }, + { "array", "1", "1" }, + { "call", "2", tt256 }, + { "callcode", "2", tt256 }, + { "create", "1", "4" }, + { "getch", "2", "2" }, + { "setch", "3", "3" }, + { "sha3", "1", "2" }, + { "return", "1", "2" }, + { "inset", "1", "1" }, + { "min", "2", "2" }, + { "max", "2", "2" }, + { "array_lit", "0", tt256 }, + { "seq", "0", tt256 }, + { "log", "1", "6" }, + { "outer", "1", "1" }, + { "set", "2", "2" }, + { "get", "1", "1" }, + { "ref", "1", "1" }, + { "declare", "1", tt256 }, + { "with", "3", "3" }, + { "outer", "1", "1" }, + { "mcopy", "3", "3" }, + { "unsafe_mcopy", "3", "3" }, + { "save", "3", "3" }, + { "load", "2", "2" }, + { "---END---", "", "" } //Keep this line at the end of the list +}; + +std::map vfMap; + +// Is a function name one of the valid functions above? +bool isValidFunctionName(std::string f) { + if (vfMap.size() == 0) { + for (int i = 0; ; i++) { + if (validFunctions[i][0] == "---END---") break; + vfMap[validFunctions[i][0]] = true; + } + } + return vfMap.count(f); +} + +// Cool function for debug purposes (named cerrStringList to make +// all prints searchable via 'cerr') +void cerrStringList(std::vector s, std::string suffix) { + for (unsigned i = 0; i < s.size(); i++) std::cerr << s[i] << " "; + std::cerr << suffix << "\n"; +} + +// Convert: +// self.cow -> ["cow"] +// self.horse[0] -> ["horse", "0"] +// self.a[6][7][self.storage[3]].chicken[9] -> +// ["6", "7", (sload 3), "chicken", "9"] +std::vector listfyStorageAccess(Node node) { + std::vector out; + std::vector nodez; + nodez.push_back(node); + while (1) { + if (nodez.back().type == TOKEN) { + out.push_back(token("--" + nodez.back().val, node.metadata)); + std::vector outrev; + for (int i = (signed)out.size() - 1; i >= 0; i--) { + outrev.push_back(out[i]); + } + return outrev; + } + if (nodez.back().val == ".") + nodez.back().args[1].val = "--" + nodez.back().args[1].val; + if (nodez.back().args.size() == 0) + err("Error parsing storage variable statement", node.metadata); + if (nodez.back().args.size() == 1) + out.push_back(token(tt256m1, node.metadata)); + else + out.push_back(nodez.back().args[1]); + nodez.push_back(nodez.back().args[0]); + } +} + +// Is the given node something of the form +// self.cow +// self.horse[0] +// self.a[6][7][self.storage[3]].chicken[9] +bool isNodeStorageVariable(Node node) { + std::vector nodez; + nodez.push_back(node); + while (1) { + if (nodez.back().type == TOKEN) return false; + if (nodez.back().args.size() == 0) return false; + if (nodez.back().val != "." && nodez.back().val != "access") + return false; + if (nodez.back().args[0].val == "self") return true; + nodez.push_back(nodez.back().args[0]); + } +} + +// Main pattern matching routine, for those patterns that can be expressed +// using our standard mini-language above +// +// Returns two values. First, a boolean to determine whether the node matches +// the pattern, second, if the node does match then a map mapping variables +// in the pattern to nodes +matchResult match(Node p, Node n) { + matchResult o; + o.success = false; + if (p.type == TOKEN) { + if (p.val == n.val && n.type == TOKEN) o.success = true; + else if (p.val[0] == '$' || p.val[0] == '@') { + o.success = true; + o.map[p.val.substr(1)] = n; + } + } + else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) { + // do nothing + } + else { + for (unsigned i = 0; i < p.args.size(); i++) { + matchResult oPrime = match(p.args[i], n.args[i]); + if (!oPrime.success) { + o.success = false; + return o; + } + for (std::map::iterator it = oPrime.map.begin(); + it != oPrime.map.end(); + it++) { + o.map[(*it).first] = (*it).second; + } + } + o.success = true; + } + return o; +} + + +// Fills in the pattern with a dictionary mapping variable names to +// nodes (these dicts are generated by match). Match and subst together +// create a full pattern-matching engine. +Node subst(Node pattern, + std::map dict, + std::string varflag, + Metadata m) { + // Swap out patterns at the token level + if (pattern.metadata.ln == -1) + pattern.metadata = m; + if (pattern.type == TOKEN && + pattern.val[0] == '$') { + if (dict.count(pattern.val.substr(1))) { + return dict[pattern.val.substr(1)]; + } + else { + return token(varflag + pattern.val.substr(1), m); + } + } + // Other tokens are untouched + else if (pattern.type == TOKEN) { + return pattern; + } + // Substitute recursively for ASTs + else { + std::vector args; + for (unsigned i = 0; i < pattern.args.size(); i++) { + args.push_back(subst(pattern.args[i], dict, varflag, m)); + } + return asn(pattern.val, args, m); + } +} + +// Transforms a sequence containing two-argument with statements +// into a statement containing those statements in nested form +Node withTransform (Node source) { + Node o = token("--"); + Metadata m = source.metadata; + std::vector args; + for (int i = source.args.size() - 1; i >= 0; i--) { + Node a = source.args[i]; + if (a.val == "with" && a.args.size() == 2) { + std::vector flipargs; + for (int j = args.size() - 1; j >= 0; j--) + flipargs.push_back(args[i]); + if (o.val != "--") + flipargs.push_back(o); + o = asn("with", a.args[0], a.args[1], asn("seq", flipargs, m), m); + args = std::vector(); + } + else { + args.push_back(a); + } + } + std::vector flipargs; + for (int j = args.size() - 1; j >= 0; j--) + flipargs.push_back(args[j]); + if (o.val != "--") + flipargs.push_back(o); + return asn("seq", flipargs, m); +} diff --git a/libserpent/rewriteutils.h b/libserpent/rewriteutils.h new file mode 100644 index 000000000..3a9a837ad --- /dev/null +++ b/libserpent/rewriteutils.h @@ -0,0 +1,76 @@ +#ifndef ETHSERP_REWRITEUTILS +#define ETHSERP_REWRITEUTILS + +#include +#include +#include +#include +#include "util.h" + +// Valid functions and their min and max argument counts +extern std::string validFunctions[][3]; + +extern std::map vfMap; + +bool isValidFunctionName(std::string f); + +// Converts deep array access into ordered list of the arguments +// along the descent +std::vector listfyStorageAccess(Node node); + +// Cool function for debug purposes (named cerrStringList to make +// all prints searchable via 'cerr') +void cerrStringList(std::vector s, std::string suffix=""); + +// Is the given node something of the form +// self.cow +// self.horse[0] +// self.a[6][7][self.storage[3]].chicken[9] +bool isNodeStorageVariable(Node node); + +// Applies rewrite rules adding without wrapper +Node rewriteChunk(Node inp); + +// Match result storing object +struct matchResult { + bool success; + std::map map; +}; + +// Match node to pattern +matchResult match(Node p, Node n); + +// Substitute node using pattern +Node subst(Node pattern, + std::map dict, + std::string varflag, + Metadata m); + +Node withTransform(Node source); + +class rewriteRule { + public: + rewriteRule(Node p, Node s) { + pattern = p; + substitution = s; + } + Node pattern; + Node substitution; +}; + +class rewriteRuleSet { + public: + rewriteRuleSet() { + ruleLists = std::map >(); + } + void addRule(rewriteRule r) { + if (!ruleLists.count(r.pattern.val)) + ruleLists[r.pattern.val] = std::vector(); + ruleLists[r.pattern.val].push_back(r); + } + std::map > ruleLists; +}; + + + +#endif diff --git a/libserpent/tokenize.cpp b/libserpent/tokenize.cpp index c6a211593..b60cc8a44 100644 --- a/libserpent/tokenize.cpp +++ b/libserpent/tokenize.cpp @@ -13,8 +13,8 @@ int chartype(char c) { if (c >= '0' && c <= '9') return ALPHANUM; else if (c >= 'a' && c <= 'z') return ALPHANUM; else if (c >= 'A' && c <= 'Z') return ALPHANUM; - else if (std::string("~_$").find(c) != std::string::npos) return ALPHANUM; - else if (c == '\t' || c == ' ' || c == '\n') return SPACE; + else if (std::string("~_$@").find(c) != std::string::npos) return ALPHANUM; + else if (c == '\t' || c == ' ' || c == '\n' || c == '\r') return SPACE; else if (std::string("()[]{}").find(c) != std::string::npos) return BRACK; else if (c == '"') return DQUOTE; else if (c == '\'') return SQUOTE; diff --git a/libserpent/util.cpp b/libserpent/util.cpp index fbce5e8b5..5e83c0e41 100644 --- a/libserpent/util.cpp +++ b/libserpent/util.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include "util.h" #include "bignum.h" #include @@ -28,6 +27,11 @@ Node astnode(std::string val, std::vector args, Metadata met) { } //AST node constructors for a specific number of children +Node astnode(std::string val, Metadata met) { + std::vector args; + return astnode(val, args, met); +} + Node astnode(std::string val, Node a, Metadata met) { std::vector args; args.push_back(a); @@ -49,6 +53,16 @@ Node astnode(std::string val, Node a, Node b, Node c, Metadata met) { return astnode(val, args, met); } +Node astnode(std::string val, Node a, Node b, Node c, Node d, Metadata met) { + std::vector args; + args.push_back(a); + args.push_back(b); + args.push_back(c); + args.push_back(d); + return astnode(val, args, met); +} + + // Print token list std::string printTokens(std::vector tokens) { std::string s = ""; @@ -146,6 +160,15 @@ std::string indentLines(std::string inp) { return joinLines(lines); } +// Binary to hexadecimal +std::string binToNumeric(std::string inp) { + std::string o = "0"; + for (unsigned i = 0; i < inp.length(); i++) { + o = decimalAdd(decimalMul(o,"256"), unsignedToDecimal((unsigned char)inp[i])); + } + return o; +} + // Converts string to simple numeric format std::string strToNumeric(std::string inp) { std::string o = "0"; @@ -154,7 +177,7 @@ std::string strToNumeric(std::string inp) { } else if ((inp[0] == '"' && inp[inp.length()-1] == '"') || (inp[0] == '\'' && inp[inp.length()-1] == '\'')) { - for (unsigned i = 1; i < inp.length() - 1; i++) { + for (unsigned i = 1; i < inp.length() - 1; i++) { o = decimalAdd(decimalMul(o,"256"), unsignedToDecimal((unsigned char)inp[i])); } } @@ -181,6 +204,14 @@ bool isNumberLike(Node node) { return strToNumeric(node.val) != ""; } +// Is the number decimal? +bool isDecimal(std::string inp) { + for (unsigned i = 0; i < inp.length(); i++) { + if (inp[i] < '0' || inp[i] > '9') return false; + } + return true; +} + //Normalizes number representations Node nodeToNumeric(Node node) { std::string o = strToNumeric(node.val); @@ -246,6 +277,14 @@ void err(std::string errtext, Metadata met) { throw(err); } +//Report warning +void warn(std::string errtext, Metadata met) { + std::string err = "Warning (file \"" + met.file + "\", line " + + unsignedToDecimal(met.ln + 1) + ", char " + unsignedToDecimal(met.ch) + + "): " + errtext; + std::cerr << err << "\n"; +} + //Bin to hex std::string binToHex(std::string inp) { std::string o = ""; @@ -280,7 +319,15 @@ std::string upperCase(std::string inp) { //Three-int vector std::vector triple(int a, int b, int c) { - std::vector o; - o.push_back(a); o.push_back(b); o.push_back(c); - return o; + std::vector v; + v.push_back(a); + v.push_back(b); + v.push_back(c); + return v; +} + +//Extend node vector +std::vector extend(std::vector a, std::vector b) { + for (unsigned i = 0; i < b.size(); i++) a.push_back(b[i]); + return a; } diff --git a/libserpent/util.h b/libserpent/util.h index c0a2e9324..e25712d0f 100644 --- a/libserpent/util.h +++ b/libserpent/util.h @@ -28,30 +28,36 @@ const int TOKEN = 0, // Stores metadata about each token class Metadata { public: - Metadata(std::string File="main", int Ln=0, int Ch=0) { + Metadata(std::string File="main", int Ln=-1, int Ch=-1) { file = File; ln = Ln; ch = Ch; + fixed = false; } std::string file; int ln; int ch; + bool fixed; }; std::string mkUniqueToken(); // type can be TOKEN or ASTNODE -struct Node { - int type; - std::string val; - std::vector args; - Metadata metadata; +class Node { + public: + int type; + std::string val; + std::vector args; + Metadata metadata; }; Node token(std::string val, Metadata met=Metadata()); Node astnode(std::string val, std::vector args, Metadata met=Metadata()); +Node astnode(std::string val, Metadata met=Metadata()); Node astnode(std::string val, Node a, Metadata met=Metadata()); Node astnode(std::string val, Node a, Node b, Metadata met=Metadata()); Node astnode(std::string val, Node a, Node b, Node c, Metadata met=Metadata()); +Node astnode(std::string val, Node a, Node b, + Node c, Node d, Metadata met=Metadata()); // Number of tokens in a tree int treeSize(Node prog); @@ -74,6 +80,9 @@ std::string joinLines(std::vector lines); // Indent all lines by 4 spaces std::string indentLines(std::string inp); +// Converts binary to simple numeric format +std::string binToNumeric(std::string inp); + // Converts string to simple numeric format std::string strToNumeric(std::string inp); @@ -98,6 +107,9 @@ bool exists(std::string fileName); //Report error void err(std::string errtext, Metadata met); +//Report warning +void warn(std::string errtext, Metadata met); + //Bin to hex std::string binToHex(std::string inp); @@ -110,4 +122,16 @@ std::string upperCase(std::string inp); //Three-int vector std::vector triple(int a, int b, int c); +//Extend node vector +std::vector extend(std::vector a, std::vector b); + +// Is the number decimal? +bool isDecimal(std::string inp); + +#define asn astnode +#define tkn token +#define msi std::map +#define msn std::map +#define mss std::map + #endif diff --git a/libsolidity/CompilerStack.cpp b/libsolidity/CompilerStack.cpp index 9fdc88baa..79716fdec 100644 --- a/libsolidity/CompilerStack.cpp +++ b/libsolidity/CompilerStack.cpp @@ -36,13 +36,12 @@ namespace dev namespace solidity { -void CompilerStack::addSource(string const& _name, string const& _content) +bool CompilerStack::addSource(string const& _name, string const& _content) { - if (m_sources.count(_name)) - BOOST_THROW_EXCEPTION(CompilerError() << errinfo_comment("Source by given name already exists.")); - + bool existed = m_sources.count(_name); reset(true); m_sources[_name].scanner = make_shared(CharStream(_content), _name); + return existed; } void CompilerStack::setSource(string const& _sourceCode) @@ -181,6 +180,11 @@ SourceUnit const& CompilerStack::getAST(string const& _sourceName) const return *getSource(_sourceName).ast; } +ContractDefinition const& CompilerStack::getContractDefinition(string const& _contractName) const +{ + return *getContract(_contractName).contract; +} + bytes CompilerStack::staticCompile(std::string const& _sourceCode, bool _optimize) { CompilerStack stack; diff --git a/libsolidity/CompilerStack.h b/libsolidity/CompilerStack.h index 5ad6f0a60..358c8fb77 100644 --- a/libsolidity/CompilerStack.h +++ b/libsolidity/CompilerStack.h @@ -57,7 +57,8 @@ public: CompilerStack(): m_parseSuccessful(false) {} /// Adds a source object (e.g. file) to the parser. After this, parse has to be called again. - void addSource(std::string const& _name, std::string const& _content); + /// @returns true if a source object by the name already existed and was replaced. + bool addSource(std::string const& _name, std::string const& _content); void setSource(std::string const& _sourceCode); /// Parses all source units that were added void parse(); @@ -86,9 +87,13 @@ public: /// Can be one of 3 types defined at @c DocumentationType std::string const& getJsonDocumentation(std::string const& _contractName, DocumentationType _type) const; - /// Returns the previously used scanner, useful for counting lines during error reporting. + /// @returns the previously used scanner, useful for counting lines during error reporting. Scanner const& getScanner(std::string const& _sourceName = "") const; + /// @returns the parsed source unit with the supplied name. SourceUnit const& getAST(std::string const& _sourceName = "") const; + /// @returns the parsed contract with the supplied name. Throws an exception if the contract + /// does not exist. + ContractDefinition const& getContractDefinition(std::string const& _contractName) const; /// Compile the given @a _sourceCode to bytecode. If a scanner is provided, it is used for /// scanning the source code - this is useful for printing exception information. diff --git a/sc/cmdline.cpp b/sc/cmdline.cpp index b44d2538c..a5fed37d6 100644 --- a/sc/cmdline.cpp +++ b/sc/cmdline.cpp @@ -10,6 +10,19 @@ int main(int argv, char** argc) { std::cerr << "Must provide a command and arguments! Try parse, rewrite, compile, assemble\n"; return 0; } + if (argv == 2 && (std::string(argc[1]) == "--help" || std::string(argc[1]) == "-h" )) { + std::cout << argc[1] << "\n"; + + std::cout << "serpent command input\n"; + std::cout << "where input -s for from stdin, a file, or interpreted as serpent code if does not exist as file."; + std::cout << "where command: \n"; + std::cout << " parse: Just parses and returns s-expression code.\n"; + std::cout << " rewrite: Parse, use rewrite rules print s-expressions of result.\n"; + std::cout << " compile: Return resulting compiled EVM code in hex.\n"; + std::cout << " assemble: Return result from step before compilation.\n"; + return 0; + } + std::string flag = ""; std::string command = argc[1]; std::string input; diff --git a/test/solidityJSONInterfaceTest.cpp b/test/SolidityABIJSON.cpp similarity index 100% rename from test/solidityJSONInterfaceTest.cpp rename to test/SolidityABIJSON.cpp diff --git a/test/solidityCompiler.cpp b/test/SolidityCompiler.cpp similarity index 100% rename from test/solidityCompiler.cpp rename to test/SolidityCompiler.cpp diff --git a/test/solidityEndToEndTest.cpp b/test/SolidityEndToEndTest.cpp similarity index 99% rename from test/solidityEndToEndTest.cpp rename to test/SolidityEndToEndTest.cpp index 26f5528ab..aa74f8186 100644 --- a/test/solidityEndToEndTest.cpp +++ b/test/SolidityEndToEndTest.cpp @@ -36,7 +36,7 @@ namespace solidity namespace test { -BOOST_FIXTURE_TEST_SUITE(SolidityCompilerEndToEndTest, ExecutionFramework) +BOOST_FIXTURE_TEST_SUITE(SolidityEndToEndTest, ExecutionFramework) BOOST_AUTO_TEST_CASE(smoke_test) { diff --git a/test/solidityExpressionCompiler.cpp b/test/SolidityExpressionCompiler.cpp similarity index 100% rename from test/solidityExpressionCompiler.cpp rename to test/SolidityExpressionCompiler.cpp diff --git a/test/solidityNameAndTypeResolution.cpp b/test/SolidityNameAndTypeResolution.cpp similarity index 100% rename from test/solidityNameAndTypeResolution.cpp rename to test/SolidityNameAndTypeResolution.cpp diff --git a/test/solidityNatspecJSON.cpp b/test/SolidityNatspecJSON.cpp similarity index 100% rename from test/solidityNatspecJSON.cpp rename to test/SolidityNatspecJSON.cpp diff --git a/test/solidityOptimizerTest.cpp b/test/SolidityOptimizer.cpp similarity index 98% rename from test/solidityOptimizerTest.cpp rename to test/SolidityOptimizer.cpp index 388e0579c..ef5c6f9b5 100644 --- a/test/solidityOptimizerTest.cpp +++ b/test/SolidityOptimizer.cpp @@ -71,7 +71,7 @@ protected: Address m_nonOptimizedContract; }; -BOOST_FIXTURE_TEST_SUITE(SolidityOptimizerTest, OptimizerTestFramework) +BOOST_FIXTURE_TEST_SUITE(SolidityOptimizer, OptimizerTestFramework) BOOST_AUTO_TEST_CASE(smoke_test) { diff --git a/test/solidityParser.cpp b/test/SolidityParser.cpp similarity index 100% rename from test/solidityParser.cpp rename to test/SolidityParser.cpp diff --git a/test/solidityScanner.cpp b/test/SolidityScanner.cpp similarity index 100% rename from test/solidityScanner.cpp rename to test/SolidityScanner.cpp