Browse Source

Remove serpent.

cl-refactor
Paweł Bylica 9 years ago
parent
commit
58800610f4
  1. 11
      CMakeLists.txt
  2. 9
      libserpent/All.h
  3. 22
      libserpent/CMakeLists.txt
  4. 112
      libserpent/bignum.cpp
  5. 41
      libserpent/bignum.h
  6. 510
      libserpent/compiler.cpp
  7. 40
      libserpent/compiler.h
  8. 35
      libserpent/funcs.cpp
  9. 35
      libserpent/funcs.h
  10. 203
      libserpent/functions.cpp
  11. 39
      libserpent/functions.h
  12. 70
      libserpent/lllparser.cpp
  13. 13
      libserpent/lllparser.h
  14. 154
      libserpent/opcodes.cpp
  15. 45
      libserpent/opcodes.h
  16. 98
      libserpent/optimize.cpp
  17. 19
      libserpent/optimize.h
  18. 437
      libserpent/parser.cpp
  19. 13
      libserpent/parser.h
  20. 327
      libserpent/preprocess.cpp
  21. 50
      libserpent/preprocess.h
  22. 905
      libserpent/rewriter.cpp
  23. 16
      libserpent/rewriter.h
  24. 212
      libserpent/rewriteutils.cpp
  25. 76
      libserpent/rewriteutils.h
  26. 115
      libserpent/tokenize.cpp
  27. 16
      libserpent/tokenize.h
  28. 333
      libserpent/util.cpp
  29. 137
      libserpent/util.h
  30. 3
      libweb3jsonrpc/WebThreeStubServerBase.cpp
  31. 12
      pullSerpent.sh
  32. 13
      pysol/MANIFEST.in
  33. 0
      pysol/README.md
  34. 115
      pysol/pysolidity.cpp
  35. 41
      pysol/setup.py
  36. 19
      sc/CMakeLists.txt
  37. 129
      sc/cmdline.cpp

11
CMakeLists.txt

@ -171,11 +171,11 @@ function(configureProject)
if (ETHASHCUDA)
add_definitions(-DETH_ETHASHCUDA)
endif()
if (ETHSTRATUM)
add_definitions(-DETH_STRATUM)
endif()
if (FATDB)
add_definitions(-DETH_FATDB)
endif()
@ -302,7 +302,6 @@ eth_format_option(ETHASHCL)
eth_format_option(ETHASHCUDA)
eth_format_option(JSCONSOLE)
eth_format_option(OLYMPIC)
eth_format_option(SERPENT)
eth_format_option(ETHSTRATUM)
if (JSCONSOLE)
@ -346,7 +345,6 @@ message("-- OLYMPIC Default to the Olympic network ${OLYMPIC}
message("------------------------------------------------------------- components")
message("-- MINER Build miner ${MINER}")
message("-- TOOLS Build basic tools ${TOOLS}")
message("-- SERPENT Build Serpent language components ${SERPENT}")
message("-- GUI Build GUI components ${GUI}")
message("-- TESTS Build tests ${TESTS}")
message("-- ETHASHCL Build OpenCL components ${ETHASHCL}")
@ -389,11 +387,6 @@ if (GENERAL)
add_subdirectory(liblll)
endif ()
if (SERPENT)
add_subdirectory(libserpent)
add_subdirectory(sc)
endif ()
if (TOOLS)
add_subdirectory(lllc)
endif ()

9
libserpent/All.h

@ -1,9 +0,0 @@
#pragma once
#include "compiler.h"
#include "funcs.h"
#include "lllparser.h"
#include "parser.h"
#include "rewriter.h"
#include "tokenize.h"
#include "util.h"

22
libserpent/CMakeLists.txt

@ -1,22 +0,0 @@
cmake_policy(SET CMP0015 NEW)
set(CMAKE_AUTOMOC OFF)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB")
aux_source_directory(. SRC_LIST)
include_directories(BEFORE ..)
set(EXECUTABLE serpent)
file(GLOB HEADERS "*.h")
add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS})
target_link_libraries(${EXECUTABLE} lll)
target_link_libraries(${EXECUTABLE} evmcore)
target_link_libraries(${EXECUTABLE} devcore)
install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} )

112
libserpent/bignum.cpp

@ -1,112 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "bignum.h"
//Integer to string conversion
std::string unsignedToDecimal(unsigned branch) {
if (branch < 10) return nums.substr(branch, 1);
else return unsignedToDecimal(branch / 10) + nums.substr(branch % 10,1);
}
//Add two strings representing decimal values
std::string decimalAdd(std::string a, std::string b) {
std::string o = a;
while (b.length() < a.length()) b = "0" + b;
while (o.length() < b.length()) o = "0" + o;
bool carry = false;
for (int i = o.length() - 1; i >= 0; i--) {
o[i] = o[i] + b[i] - '0';
if (carry) o[i]++;
if (o[i] > '9') {
o[i] -= 10;
carry = true;
}
else carry = false;
}
if (carry) o = "1" + o;
return o;
}
//Helper function for decimalMul
std::string decimalDigitMul(std::string a, int dig) {
if (dig == 0) return "0";
else return decimalAdd(a, decimalDigitMul(a, dig - 1));
}
//Multiply two strings representing decimal values
std::string decimalMul(std::string a, std::string b) {
std::string o = "0";
for (unsigned i = 0; i < b.length(); i++) {
std::string n = decimalDigitMul(a, b[i] - '0');
if (n != "0") {
for (unsigned j = i + 1; j < b.length(); j++) n += "0";
}
o = decimalAdd(o, n);
}
return o;
}
//Modexp
std::string decimalModExp(std::string b, std::string e, std::string m) {
if (e == "0") return "1";
else if (e == "1") return b;
else if (decimalMod(e, "2") == "0") {
std::string o = decimalModExp(b, decimalDiv(e, "2"), m);
return decimalMod(decimalMul(o, o), m);
}
else {
std::string o = decimalModExp(b, decimalDiv(e, "2"), m);
return decimalMod(decimalMul(decimalMul(o, o), b), m);
}
}
//Is a greater than b? Flag allows equality
bool decimalGt(std::string a, std::string b, bool eqAllowed) {
if (a == b) return eqAllowed;
return (a.length() > b.length()) || (a.length() >= b.length() && a > b);
}
//Subtract the two strings representing decimal values
std::string decimalSub(std::string a, std::string b) {
if (b == "0") return a;
if (b == a) return "0";
while (b.length() < a.length()) b = "0" + b;
std::string c = b;
for (unsigned i = 0; i < c.length(); i++) c[i] = '0' + ('9' - c[i]);
std::string o = decimalAdd(decimalAdd(a, c).substr(1), "1");
while (o.size() > 1 && o[0] == '0') o = o.substr(1);
return o;
}
//Divide the two strings representing decimal values
std::string decimalDiv(std::string a, std::string b) {
std::string c = b;
if (decimalGt(c, a)) return "0";
int zeroes = -1;
while (decimalGt(a, c, true)) {
zeroes += 1;
c = c + "0";
}
c = c.substr(0, c.size() - 1);
std::string quot = "0";
while (decimalGt(a, c, true)) {
a = decimalSub(a, c);
quot = decimalAdd(quot, "1");
}
for (int i = 0; i < zeroes; i++) quot += "0";
return decimalAdd(quot, decimalDiv(a, b));
}
//Modulo the two strings representing decimal values
std::string decimalMod(std::string a, std::string b) {
return decimalSub(a, decimalMul(decimalDiv(a, b), b));
}
//String to int conversion
unsigned decimalToUnsigned(std::string a) {
if (a.size() == 0) return 0;
else return (a[a.size() - 1] - '0')
+ decimalToUnsigned(a.substr(0,a.size()-1)) * 10;
}

41
libserpent/bignum.h

@ -1,41 +0,0 @@
#ifndef ETHSERP_BIGNUM
#define ETHSERP_BIGNUM
const std::string nums = "0123456789";
const std::string tt256 =
"115792089237316195423570985008687907853269984665640564039457584007913129639936"
;
const std::string tt256m1 =
"115792089237316195423570985008687907853269984665640564039457584007913129639935"
;
const std::string tt255 =
"57896044618658097711785492504343953926634992332820282019728792003956564819968";
const std::string tt176 =
"95780971304118053647396689196894323976171195136475136";
std::string unsignedToDecimal(unsigned branch);
std::string decimalAdd(std::string a, std::string b);
std::string decimalMul(std::string a, std::string b);
std::string decimalSub(std::string a, std::string b);
std::string decimalDiv(std::string a, std::string b);
std::string decimalMod(std::string a, std::string b);
std::string decimalModExp(std::string b, std::string e, std::string m);
bool decimalGt(std::string a, std::string b, bool eqAllowed=false);
unsigned decimalToUnsigned(std::string a);
#define utd unsignedToDecimal
#define dtu decimalToUnsigned
#endif

510
libserpent/compiler.cpp

@ -1,510 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "bignum.h"
#include "opcodes.h"
// Auxiliary data that is gathered while compiling
struct programAux {
std::map<std::string, std::string> vars;
int nextVarMem;
bool allocUsed;
bool calldataUsed;
int step;
int labelLength;
};
// Auxiliary data that gets passed down vertically
// but not back up
struct programVerticalAux {
int height;
std::string innerScopeName;
std::map<std::string, int> dupvars;
std::map<std::string, int> funvars;
std::vector<mss> scopes;
};
// Compilation result
struct programData {
programAux aux;
Node code;
int outs;
};
programAux Aux() {
programAux o;
o.allocUsed = false;
o.calldataUsed = false;
o.step = 0;
o.nextVarMem = 32;
return o;
}
programVerticalAux verticalAux() {
programVerticalAux o;
o.height = 0;
o.dupvars = std::map<std::string, int>();
o.funvars = std::map<std::string, int>();
o.scopes = std::vector<mss>();
return o;
}
programData pd(programAux aux = Aux(), Node code=token("_"), int outs=0) {
programData o;
o.aux = aux;
o.code = code;
o.outs = outs;
return o;
}
Node multiToken(Node nodes[], int len, Metadata met) {
std::vector<Node> out;
for (int i = 0; i < len; i++) {
out.push_back(nodes[i]);
}
return astnode("_", out, met);
}
Node finalize(programData c);
Node popwrap(Node node) {
Node nodelist[] = {
node,
token("POP", node.metadata)
};
return multiToken(nodelist, 2, node.metadata);
}
// Turns LLL tree into tree of code fragments
programData opcodeify(Node node,
programAux aux=Aux(),
programVerticalAux vaux=verticalAux()) {
std::string symb = "_"+mkUniqueToken();
Metadata m = node.metadata;
// Numbers
if (node.type == TOKEN) {
return pd(aux, nodeToNumeric(node), 1);
}
else if (node.val == "ref" || node.val == "get" || node.val == "set") {
std::string varname = node.args[0].val;
// Determine reference to variable
if (!aux.vars.count(node.args[0].val)) {
aux.vars[node.args[0].val] = utd(aux.nextVarMem);
aux.nextVarMem += 32;
}
Node varNode = tkn(aux.vars[varname], m);
//std::cerr << varname << " " << printSimple(varNode) << "\n";
// Set variable
if (node.val == "set") {
programData sub = opcodeify(node.args[1], aux, vaux);
if (!sub.outs)
err("Value to set variable must have nonzero arity!", m);
// What if we are setting a stack variable?
if (vaux.dupvars.count(node.args[0].val)) {
int h = vaux.height - vaux.dupvars[node.args[0].val];
if (h > 16) err("Too deep for stack variable (max 16)", m);
Node nodelist[] = {
sub.code,
token("SWAP"+unsignedToDecimal(h), m),
token("POP", m)
};
return pd(sub.aux, multiToken(nodelist, 3, m), 0);
}
// Setting a memory variable
else {
Node nodelist[] = {
sub.code,
varNode,
token("MSTORE", m),
};
return pd(sub.aux, multiToken(nodelist, 3, m), 0);
}
}
// Get variable
else if (node.val == "get") {
// Getting a stack variable
if (vaux.dupvars.count(node.args[0].val)) {
int h = vaux.height - vaux.dupvars[node.args[0].val];
if (h > 16) err("Too deep for stack variable (max 16)", m);
return pd(aux, token("DUP"+unsignedToDecimal(h)), 1);
}
// Getting a memory variable
else {
Node nodelist[] =
{ varNode, token("MLOAD", m) };
return pd(aux, multiToken(nodelist, 2, m), 1);
}
}
// Refer variable
else if (node.val == "ref") {
if (vaux.dupvars.count(node.args[0].val))
err("Cannot ref stack variable!", m);
return pd(aux, varNode, 1);
}
}
// Comments do nothing
else if (node.val == "comment") {
return pd(aux, astnode("_", m), 0);
}
// Custom operation sequence
// eg. (ops bytez id msize swap1 msize add 0 swap1 mstore) == alloc
if (node.val == "ops") {
std::vector<Node> subs2;
int depth = 0;
for (unsigned i = 0; i < node.args.size(); i++) {
std::string op = upperCase(node.args[i].val);
if (node.args[i].type == ASTNODE || opinputs(op) == -1) {
programVerticalAux vaux2 = vaux;
vaux2.height = vaux.height - i - 1 + node.args.size();
programData sub = opcodeify(node.args[i], aux, vaux2);
aux = sub.aux;
depth += sub.outs;
subs2.push_back(sub.code);
}
else {
subs2.push_back(token(op, m));
depth += opoutputs(op) - opinputs(op);
}
}
if (depth < 0 || depth > 1) err("Stack depth mismatch", m);
return pd(aux, astnode("_", subs2, m), 0);
}
// Code blocks
if (node.val == "lll" && node.args.size() == 2) {
if (node.args[1].val != "0") aux.allocUsed = true;
std::vector<Node> o;
o.push_back(finalize(opcodeify(node.args[0])));
programData sub = opcodeify(node.args[1], aux, vaux);
Node code = astnode("____CODE", o, m);
Node nodelist[] = {
token("$begincode"+symb+".endcode"+symb, m), token("DUP1", m),
token("$begincode"+symb, m), sub.code, token("CODECOPY", m),
token("$endcode"+symb, m), token("JUMP", m),
token("~begincode"+symb, m), code,
token("~endcode"+symb, m), token("JUMPDEST", m)
};
return pd(sub.aux, multiToken(nodelist, 11, m), 1);
}
// Stack variables
if (node.val == "with") {
programData initial = opcodeify(node.args[1], aux, vaux);
programVerticalAux vaux2 = vaux;
vaux2.dupvars[node.args[0].val] = vaux.height;
vaux2.height += 1;
if (!initial.outs)
err("Initial variable value must have nonzero arity!", m);
programData sub = opcodeify(node.args[2], initial.aux, vaux2);
Node nodelist[] = {
initial.code,
sub.code
};
programData o = pd(sub.aux, multiToken(nodelist, 2, m), sub.outs);
if (sub.outs)
o.code.args.push_back(token("SWAP1", m));
o.code.args.push_back(token("POP", m));
return o;
}
// Seq of multiple statements
if (node.val == "seq") {
std::vector<Node> children;
int lastOut = 0;
for (unsigned i = 0; i < node.args.size(); i++) {
programData sub = opcodeify(node.args[i], aux, vaux);
aux = sub.aux;
if (sub.outs == 1) {
if (i < node.args.size() - 1) sub.code = popwrap(sub.code);
else lastOut = 1;
}
children.push_back(sub.code);
}
return pd(aux, astnode("_", children, m), lastOut);
}
// 2-part conditional (if gets rewritten to unless in rewrites)
else if (node.val == "unless" && node.args.size() == 2) {
programData cond = opcodeify(node.args[0], aux, vaux);
programData action = opcodeify(node.args[1], cond.aux, vaux);
aux = action.aux;
if (!cond.outs) err("Condition of if/unless statement has arity 0", m);
if (action.outs) action.code = popwrap(action.code);
Node nodelist[] = {
cond.code,
token("$endif"+symb, m), token("JUMPI", m),
action.code,
token("~endif"+symb, m), token("JUMPDEST", m)
};
return pd(aux, multiToken(nodelist, 6, m), 0);
}
// 3-part conditional
else if (node.val == "if" && node.args.size() == 3) {
programData ifd = opcodeify(node.args[0], aux, vaux);
programData thend = opcodeify(node.args[1], ifd.aux, vaux);
programData elsed = opcodeify(node.args[2], thend.aux, vaux);
aux = elsed.aux;
if (!ifd.outs)
err("Condition of if/unless statement has arity 0", m);
// Handle cases where one conditional outputs something
// and the other does not
int outs = (thend.outs && elsed.outs) ? 1 : 0;
if (thend.outs > outs) thend.code = popwrap(thend.code);
if (elsed.outs > outs) elsed.code = popwrap(elsed.code);
Node nodelist[] = {
ifd.code,
token("ISZERO", m),
token("$else"+symb, m), token("JUMPI", m),
thend.code,
token("$endif"+symb, m), token("JUMP", m),
token("~else"+symb, m), token("JUMPDEST", m),
elsed.code,
token("~endif"+symb, m), token("JUMPDEST", m)
};
return pd(aux, multiToken(nodelist, 12, m), outs);
}
// While (rewritten to this in rewrites)
else if (node.val == "until") {
programData cond = opcodeify(node.args[0], aux, vaux);
programData action = opcodeify(node.args[1], cond.aux, vaux);
aux = action.aux;
if (!cond.outs)
err("Condition of while/until loop has arity 0", m);
if (action.outs) action.code = popwrap(action.code);
Node nodelist[] = {
token("~beg"+symb, m), token("JUMPDEST", m),
cond.code,
token("$end"+symb, m), token("JUMPI", m),
action.code,
token("$beg"+symb, m), token("JUMP", m),
token("~end"+symb, m), token("JUMPDEST", m),
};
return pd(aux, multiToken(nodelist, 10, m));
}
// Memory allocations
else if (node.val == "alloc") {
programData bytez = opcodeify(node.args[0], aux, vaux);
aux = bytez.aux;
if (!bytez.outs)
err("Alloc input has arity 0", m);
aux.allocUsed = true;
Node nodelist[] = {
bytez.code,
token("MSIZE", m), token("SWAP1", m), token("MSIZE", m),
token("ADD", m),
token("0", m), token("SWAP1", m), token("MSTORE", m)
};
return pd(aux, multiToken(nodelist, 8, m), 1);
}
// All other functions/operators
else {
std::vector<Node> subs2;
int depth = opinputs(upperCase(node.val));
if (depth == -1)
err("Not a function or opcode: "+node.val, m);
if ((int)node.args.size() != depth)
err("Invalid arity for "+node.val, m);
for (int i = node.args.size() - 1; i >= 0; i--) {
programVerticalAux vaux2 = vaux;
vaux2.height = vaux.height - i - 1 + node.args.size();
programData sub = opcodeify(node.args[i], aux, vaux2);
aux = sub.aux;
if (!sub.outs)
err("Input "+unsignedToDecimal(i)+" has arity 0", sub.code.metadata);
subs2.push_back(sub.code);
}
subs2.push_back(token(upperCase(node.val), m));
int outdepth = opoutputs(upperCase(node.val));
return pd(aux, astnode("_", subs2, m), outdepth);
}
}
// Adds necessary wrappers to a program
Node finalize(programData c) {
std::vector<Node> bottom;
Metadata m = c.code.metadata;
// If we are using both alloc and variables, we need to pre-zfill
// some memory
if ((c.aux.allocUsed || c.aux.calldataUsed) && c.aux.vars.size() > 0) {
Node nodelist[] = {
token("0", m),
token(unsignedToDecimal(c.aux.nextVarMem - 1)),
token("MSTORE8", m)
};
bottom.push_back(multiToken(nodelist, 3, m));
}
// The actual code
bottom.push_back(c.code);
return astnode("_", bottom, m);
}
//LLL -> code fragment tree
Node buildFragmentTree(Node node) {
return finalize(opcodeify(node));
}
// Builds a dictionary mapping labels to variable names
void buildDict(Node program, programAux &aux, int labelLength) {
Metadata m = program.metadata;
// Token
if (program.type == TOKEN) {
if (isNumberLike(program)) {
aux.step += 1 + toByteArr(program.val, m).size();
}
else if (program.val[0] == '~') {
aux.vars[program.val.substr(1)] = unsignedToDecimal(aux.step);
}
else if (program.val[0] == '$') {
aux.step += labelLength + 1;
}
else aux.step += 1;
}
// A sub-program (ie. LLL)
else if (program.val == "____CODE") {
int step = aux.step;
aux.step = 0;
for (unsigned i = 0; i < program.args.size(); i++) {
buildDict(program.args[i], aux, labelLength);
}
aux.step += step;
}
// Normal sub-block
else {
for (unsigned i = 0; i < program.args.size(); i++) {
buildDict(program.args[i], aux, labelLength);
}
}
}
// Applies that dictionary
void substDict(Node program, programAux aux, int labelLength, std::vector<Node> &out) {
Metadata m = program.metadata;
std::vector<Node> inner;
if (program.type == TOKEN) {
if (program.val[0] == '$') {
std::string tokStr = "PUSH"+unsignedToDecimal(labelLength);
out.push_back(token(tokStr, m));
int dotLoc = program.val.find('.');
if (dotLoc == -1) {
std::string val = aux.vars[program.val.substr(1)];
inner = toByteArr(val, m, labelLength);
}
else {
std::string start = aux.vars[program.val.substr(1, dotLoc-1)],
end = aux.vars[program.val.substr(dotLoc + 1)],
dist = decimalSub(end, start);
inner = toByteArr(dist, m, labelLength);
}
for (unsigned i = 0; i < inner.size(); i++) out.push_back(inner[i]);
}
else if (program.val[0] == '~') { }
else if (isNumberLike(program)) {
inner = toByteArr(program.val, m);
out.push_back(token("PUSH"+unsignedToDecimal(inner.size())));
for (unsigned i = 0; i < inner.size(); i++) out.push_back(inner[i]);
}
else out.push_back(program);
}
else {
for (unsigned i = 0; i < program.args.size(); i++) {
substDict(program.args[i], aux, labelLength, out);
}
}
}
// Compiled fragtree -> compiled fragtree without labels
std::vector<Node> dereference(Node program) {
int sz = treeSize(program) * 4;
int labelLength = 1;
while (sz >= 256) { labelLength += 1; sz /= 256; }
programAux aux = Aux();
buildDict(program, aux, labelLength);
std::vector<Node> o;
substDict(program, aux, labelLength, o);
return o;
}
// Opcodes -> bin
std::string serialize(std::vector<Node> codons) {
std::string o;
for (unsigned i = 0; i < codons.size(); i++) {
int v;
if (isNumberLike(codons[i])) {
v = decimalToUnsigned(codons[i].val);
}
else if (codons[i].val.substr(0,4) == "PUSH") {
v = 95 + decimalToUnsigned(codons[i].val.substr(4));
}
else {
v = opcode(codons[i].val);
}
o += (char)v;
}
return o;
}
// Bin -> opcodes
std::vector<Node> deserialize(std::string ser) {
std::vector<Node> o;
int backCount = 0;
for (unsigned i = 0; i < ser.length(); i++) {
unsigned char v = (unsigned char)ser[i];
std::string oper = op((int)v);
if (oper != "" && backCount <= 0) o.push_back(token(oper));
else if (v >= 96 && v < 128 && backCount <= 0) {
o.push_back(token("PUSH"+unsignedToDecimal(v - 95)));
}
else o.push_back(token(unsignedToDecimal(v)));
if (v >= 96 && v < 128 && backCount <= 0) {
backCount = v - 95;
}
else backCount--;
}
return o;
}
// Fragtree -> bin
std::string assemble(Node fragTree) {
return serialize(dereference(fragTree));
}
// Fragtree -> tokens
std::vector<Node> prettyAssemble(Node fragTree) {
return dereference(fragTree);
}
// LLL -> bin
std::string compileLLL(Node program) {
return assemble(buildFragmentTree(program));
}
// LLL -> tokens
std::vector<Node> prettyCompileLLL(Node program) {
return prettyAssemble(buildFragmentTree(program));
}
// Converts a list of integer values to binary transaction data
std::string encodeDatalist(std::vector<std::string> vals) {
std::string o;
for (unsigned i = 0; i < vals.size(); i++) {
std::vector<Node> n = toByteArr(strToNumeric(vals[i]), Metadata(), 32);
for (unsigned j = 0; j < n.size(); j++) {
int v = decimalToUnsigned(n[j].val);
o += (char)v;
}
}
return o;
}
// Converts binary transaction data into a list of integer values
std::vector<std::string> decodeDatalist(std::string ser) {
std::vector<std::string> out;
for (unsigned i = 0; i < ser.length(); i+= 32) {
std::string o = "0";
for (unsigned j = i; j < i + 32; j++) {
int vj = (int)(unsigned char)ser[j];
o = decimalAdd(decimalMul(o, "256"), unsignedToDecimal(vj));
}
out.push_back(o);
}
return out;
}

40
libserpent/compiler.h

@ -1,40 +0,0 @@
#ifndef ETHSERP_COMPILER
#define ETHSERP_COMPILER
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// Compiled fragtree -> compiled fragtree without labels
std::vector<Node> dereference(Node program);
// LLL -> fragtree
Node buildFragmentTree(Node program);
// opcodes -> bin
std::string serialize(std::vector<Node> codons);
// Fragtree -> bin
std::string assemble(Node fragTree);
// Fragtree -> opcodes
std::vector<Node> prettyAssemble(Node fragTree);
// LLL -> bin
std::string compileLLL(Node program);
// LLL -> opcodes
std::vector<Node> prettyCompileLLL(Node program);
// bin -> opcodes
std::vector<Node> deserialize(std::string ser);
// Converts a list of integer values to binary transaction data
std::string encodeDatalist(std::vector<std::string> vals);
// Converts binary transaction data into a list of integer values
std::vector<std::string> decodeDatalist(std::string ser);
#endif

35
libserpent/funcs.cpp

@ -1,35 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include "funcs.h"
#include "bignum.h"
#include "util.h"
#include "parser.h"
#include "lllparser.h"
#include "compiler.h"
#include "rewriter.h"
#include "tokenize.h"
Node compileToLLL(std::string input) {
return rewrite(parseSerpent(input));
}
Node compileChunkToLLL(std::string input) {
return rewriteChunk(parseSerpent(input));
}
std::string compile(std::string input) {
return compileLLL(compileToLLL(input));
}
std::vector<Node> prettyCompile(std::string input) {
return prettyCompileLLL(compileToLLL(input));
}
std::string compileChunk(std::string input) {
return compileLLL(compileChunkToLLL(input));
}
std::vector<Node> prettyCompileChunk(std::string input) {
return prettyCompileLLL(compileChunkToLLL(input));
}

35
libserpent/funcs.h

@ -1,35 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include "bignum.h"
#include "util.h"
#include "parser.h"
#include "lllparser.h"
#include "compiler.h"
#include "rewriter.h"
#include "tokenize.h"
// Function listing:
//
// parseSerpent (serpent -> AST) std::string -> Node
// parseLLL (LLL -> AST) std::string -> Node
// rewrite (apply rewrite rules) Node -> Node
// compileToLLL (serpent -> LLL) std::string -> Node
// compileLLL (LLL -> EVMhex) Node -> std::string
// prettyCompileLLL (LLL -> EVMasm) Node -> std::vector<Node>
// prettyCompile (serpent -> EVMasm) std::string -> std::vector>Node>
// compile (serpent -> EVMhex) std::string -> std::string
// get_file_contents (filename -> file) std::string -> std::string
// exists (does file exist?) std::string -> bool
Node compileToLLL(std::string input);
Node compileChunkToLLL(std::string input);
std::string compile(std::string input);
std::vector<Node> prettyCompile(std::string input);
std::string compileChunk(std::string input);
std::vector<Node> prettyCompileChunk(std::string input);

203
libserpent/functions.cpp

@ -1,203 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "lllparser.h"
#include "bignum.h"
#include "optimize.h"
#include "rewriteutils.h"
#include "preprocess.h"
#include "functions.h"
std::string getSignature(std::vector<Node> args) {
std::string o;
for (unsigned i = 0; i < args.size(); i++) {
if (args[i].val == ":" && args[i].args[1].val == "s")
o += "s";
else if (args[i].val == ":" && args[i].args[1].val == "a")
o += "a";
else
o += "i";
}
return o;
}
// Convert a list of arguments into a node containing a
// < datastart, datasz > pair
Node packArguments(std::vector<Node> args, std::string sig,
int funId, Metadata m) {
// Plain old 32 byte arguments
std::vector<Node> nargs;
// Variable-sized arguments
std::vector<Node> vargs;
// Variable sizes
std::vector<Node> sizes;
// Is a variable an array?
std::vector<bool> isArray;
// Fill up above three argument lists
int argCount = 0;
for (unsigned i = 0; i < args.size(); i++) {
Metadata m = args[i].metadata;
if (args[i].val == "=") {
// do nothing
}
else {
// Determine the correct argument type
char argType;
if (sig.size() > 0) {
if (argCount >= (signed)sig.size())
err("Too many args", m);
argType = sig[argCount];
}
else argType = 'i';
// Integer (also usable for short strings)
if (argType == 'i') {
if (args[i].val == ":")
err("Function asks for int, provided string or array", m);
nargs.push_back(args[i]);
}
// Long string
else if (argType == 's') {
if (args[i].val != ":")
err("Must specify string length", m);
vargs.push_back(args[i].args[0]);
sizes.push_back(args[i].args[1]);
isArray.push_back(false);
}
// Array
else if (argType == 'a') {
if (args[i].val != ":")
err("Must specify array length", m);
vargs.push_back(args[i].args[0]);
sizes.push_back(args[i].args[1]);
isArray.push_back(true);
}
else err("Invalid arg type in signature", m);
argCount++;
}
}
int static_arg_size = 1 + (vargs.size() + nargs.size()) * 32;
// Start off by saving the size variables and calculating the total
msn kwargs;
kwargs["funid"] = tkn(utd(funId), m);
std::string pattern =
"(with _sztot "+utd(static_arg_size)+" "
" (with _sizes (alloc "+utd(sizes.size() * 32)+") "
" (seq ";
for (unsigned i = 0; i < sizes.size(); i++) {
std::string sizeIncrement =
isArray[i] ? "(mul 32 _x)" : "_x";
pattern +=
"(with _x $sz"+utd(i)+"(seq "
" (mstore (add _sizes "+utd(i * 32)+") _x) "
" (set _sztot (add _sztot "+sizeIncrement+" )))) ";
kwargs["sz"+utd(i)] = sizes[i];
}
// Allocate memory, and set first data byte
pattern +=
"(with _datastart (alloc (add _sztot 32)) (seq "
" (mstore8 _datastart $funid) ";
// Copy over size variables
for (unsigned i = 0; i < sizes.size(); i++) {
int v = 1 + i * 32;
pattern +=
" (mstore "
" (add _datastart "+utd(v)+") "
" (mload (add _sizes "+utd(v-1)+"))) ";
}
// Store normal arguments
for (unsigned i = 0; i < nargs.size(); i++) {
int v = 1 + (i + sizes.size()) * 32;
pattern +=
" (mstore (add _datastart "+utd(v)+") $"+utd(i)+") ";
kwargs[utd(i)] = nargs[i];
}
// Loop through variable-sized arguments, store them
pattern +=
" (with _pos (add _datastart "+utd(static_arg_size)+") (seq";
for (unsigned i = 0; i < vargs.size(); i++) {
std::string copySize =
isArray[i] ? "(mul 32 (mload (add _sizes "+utd(i * 32)+")))"
: "(mload (add _sizes "+utd(i * 32)+"))";
pattern +=
" (unsafe_mcopy _pos $vl"+utd(i)+" "+copySize+") "
" (set _pos (add _pos "+copySize+")) ";
kwargs["vl"+utd(i)] = vargs[i];
}
// Return a 2-item array containing the start and size
pattern += " (array_lit _datastart _sztot))))))))";
std::string prefix = "_temp_"+mkUniqueToken();
// Fill in pattern, return triple
return subst(parseLLL(pattern), kwargs, prefix, m);
}
// Create a node for argument unpacking
Node unpackArguments(std::vector<Node> vars, Metadata m) {
std::vector<std::string> varNames;
std::vector<std::string> longVarNames;
std::vector<bool> longVarIsArray;
// Fill in variable and long variable names, as well as which
// long variables are arrays and which are strings
for (unsigned i = 0; i < vars.size(); i++) {
if (vars[i].val == ":") {
if (vars[i].args.size() != 2)
err("Malformed def!", m);
longVarNames.push_back(vars[i].args[0].val);
std::string tag = vars[i].args[1].val;
if (tag == "s")
longVarIsArray.push_back(false);
else if (tag == "a")
longVarIsArray.push_back(true);
else
err("Function value can only be string or array", m);
}
else {
varNames.push_back(vars[i].val);
}
}
std::vector<Node> sub;
if (!varNames.size() && !longVarNames.size()) {
// do nothing if we have no arguments
}
else {
std::vector<Node> varNodes;
for (unsigned i = 0; i < longVarNames.size(); i++)
varNodes.push_back(token(longVarNames[i], m));
for (unsigned i = 0; i < varNames.size(); i++)
varNodes.push_back(token(varNames[i], m));
// Copy over variable lengths and short variables
for (unsigned i = 0; i < varNodes.size(); i++) {
int pos = 1 + i * 32;
std::string prefix = (i < longVarNames.size()) ? "_len_" : "";
sub.push_back(asn("untyped", asn("set",
token(prefix+varNodes[i].val, m),
asn("calldataload", tkn(utd(pos), m), m),
m)));
}
// Copy over long variables
if (longVarNames.size() > 0) {
std::vector<Node> sub2;
int pos = varNodes.size() * 32 + 1;
Node tot = tkn("_tot", m);
for (unsigned i = 0; i < longVarNames.size(); i++) {
Node var = tkn(longVarNames[i], m);
Node varlen = longVarIsArray[i]
? asn("mul", tkn("32", m), tkn("_len_"+longVarNames[i], m))
: tkn("_len_"+longVarNames[i], m);
sub2.push_back(asn("untyped",
asn("set", var, asn("alloc", varlen))));
sub2.push_back(asn("calldatacopy", var, tot, varlen));
sub2.push_back(asn("set", tot, asn("add", tot, varlen)));
}
std::string prefix = "_temp_"+mkUniqueToken();
sub.push_back(subst(
astnode("with", tot, tkn(utd(pos), m), asn("seq", sub2)),
msn(),
prefix,
m));
}
}
return asn("seq", sub, m);
}

39
libserpent/functions.h

@ -1,39 +0,0 @@
#ifndef ETHSERP_FUNCTIONS
#define ETHSERP_FUNCTIONS
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "lllparser.h"
#include "bignum.h"
#include "optimize.h"
#include "rewriteutils.h"
#include "preprocess.h"
class argPack {
public:
argPack(Node a, Node b, Node c) {
pre = a;
datastart = b;
datasz = c;
}
Node pre;
Node datastart;
Node datasz;
};
// Get a signature from a function
std::string getSignature(std::vector<Node> args);
// Convert a list of arguments into a <pre, mstart, msize> node
// triple, given the signature of a function
Node packArguments(std::vector<Node> args, std::string sig,
int funId, Metadata m);
// Create a node for argument unpacking
Node unpackArguments(std::vector<Node> vars, Metadata m);
#endif

70
libserpent/lllparser.cpp

@ -1,70 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "lllparser.h"
#include "tokenize.h"
struct _parseOutput {
Node node;
int newpos;
};
// Helper, returns subtree and position of start of next node
_parseOutput _parse(std::vector<Node> inp, int pos) {
Metadata met = inp[pos].metadata;
_parseOutput o;
// Bracket: keep grabbing tokens until we get to the
// corresponding closing bracket
if (inp[pos].val == "(" || inp[pos].val == "[") {
std::string fun, rbrack;
std::vector<Node> args;
pos += 1;
if (inp[pos].val == "[") {
fun = "access";
rbrack = "]";
}
else rbrack = ")";
// First argument is the function
while (inp[pos].val != ")") {
_parseOutput po = _parse(inp, pos);
if (fun.length() == 0 && po.node.type == 1) {
std::cerr << "Error: first arg must be function\n";
fun = po.node.val;
}
else if (fun.length() == 0) {
fun = po.node.val;
}
else {
args.push_back(po.node);
}
pos = po.newpos;
}
o.newpos = pos + 1;
o.node = astnode(fun, args, met);
}
// Normal token, return it and advance to next token
else {
o.newpos = pos + 1;
o.node = token(inp[pos].val, met);
}
return o;
}
// stream of tokens -> lisp parse tree
Node parseLLLTokenStream(std::vector<Node> inp) {
_parseOutput o = _parse(inp, 0);
return o.node;
}
// Parses LLL
Node parseLLL(std::string s, bool allowFileRead) {
std::string input = s;
std::string file = "main";
if (exists(s) && allowFileRead) {
file = s;
input = get_file_contents(s);
}
return parseLLLTokenStream(tokenize(s, Metadata(file, 0, 0), true));
}

13
libserpent/lllparser.h

@ -1,13 +0,0 @@
#ifndef ETHSERP_LLLPARSER
#define ETHSERP_LLLPARSER
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// LLL text -> parse tree
Node parseLLL(std::string s, bool allowFileRead=false);
#endif

154
libserpent/opcodes.cpp

@ -1,154 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "opcodes.h"
#include "util.h"
#include "bignum.h"
Mapping mapping[] = {
Mapping("STOP", 0x00, 0, 0),
Mapping("ADD", 0x01, 2, 1),
Mapping("MUL", 0x02, 2, 1),
Mapping("SUB", 0x03, 2, 1),
Mapping("DIV", 0x04, 2, 1),
Mapping("SDIV", 0x05, 2, 1),
Mapping("MOD", 0x06, 2, 1),
Mapping("SMOD", 0x07, 2, 1),
Mapping("ADDMOD", 0x08, 3, 1),
Mapping("MULMOD", 0x09, 3, 1),
Mapping("EXP", 0x0a, 2, 1),
Mapping("SIGNEXTEND", 0x0b, 2, 1),
Mapping("LT", 0x10, 2, 1),
Mapping("GT", 0x11, 2, 1),
Mapping("SLT", 0x12, 2, 1),
Mapping("SGT", 0x13, 2, 1),
Mapping("EQ", 0x14, 2, 1),
Mapping("ISZERO", 0x15, 1, 1),
Mapping("AND", 0x16, 2, 1),
Mapping("OR", 0x17, 2, 1),
Mapping("XOR", 0x18, 2, 1),
Mapping("NOT", 0x19, 1, 1),
Mapping("BYTE", 0x1a, 2, 1),
Mapping("SHA3", 0x20, 2, 1),
Mapping("ADDRESS", 0x30, 0, 1),
Mapping("BALANCE", 0x31, 1, 1),
Mapping("ORIGIN", 0x32, 0, 1),
Mapping("CALLER", 0x33, 0, 1),
Mapping("CALLVALUE", 0x34, 0, 1),
Mapping("CALLDATALOAD", 0x35, 1, 1),
Mapping("CALLDATASIZE", 0x36, 0, 1),
Mapping("CALLDATACOPY", 0x37, 3, 0),
Mapping("CODESIZE", 0x38, 0, 1),
Mapping("CODECOPY", 0x39, 3, 0),
Mapping("GASPRICE", 0x3a, 0, 1),
Mapping("EXTCODESIZE", 0x3b, 1, 1),
Mapping("EXTCODECOPY", 0x3c, 4, 0),
Mapping("BLOCKHASH", 0x40, 1, 1),
Mapping("COINBASE", 0x41, 0, 1),
Mapping("TIMESTAMP", 0x42, 0, 1),
Mapping("NUMBER", 0x43, 0, 1),
Mapping("DIFFICULTY", 0x44, 0, 1),
Mapping("GASLIMIT", 0x45, 0, 1),
Mapping("POP", 0x50, 1, 0),
Mapping("MLOAD", 0x51, 1, 1),
Mapping("MSTORE", 0x52, 2, 0),
Mapping("MSTORE8", 0x53, 2, 0),
Mapping("SLOAD", 0x54, 1, 1),
Mapping("SSTORE", 0x55, 2, 0),
Mapping("JUMP", 0x56, 1, 0),
Mapping("JUMPI", 0x57, 2, 0),
Mapping("PC", 0x58, 0, 1),
Mapping("MSIZE", 0x59, 0, 1),
Mapping("GAS", 0x5a, 0, 1),
Mapping("JUMPDEST", 0x5b, 0, 0),
Mapping("LOG0", 0xa0, 2, 0),
Mapping("LOG1", 0xa1, 3, 0),
Mapping("LOG2", 0xa2, 4, 0),
Mapping("LOG3", 0xa3, 5, 0),
Mapping("LOG4", 0xa4, 6, 0),
Mapping("CREATE", 0xf0, 3, 1),
Mapping("CALL", 0xf1, 7, 1),
Mapping("CALLCODE", 0xf2, 7, 1),
Mapping("RETURN", 0xf3, 2, 0),
Mapping("SUICIDE", 0xff, 1, 0),
Mapping("---END---", 0x00, 0, 0),
};
std::map<std::string, std::vector<int> > opcodes;
std::map<int, std::string> reverseOpcodes;
// Fetches everything EXCEPT PUSH1..32
std::pair<std::string, std::vector<int> > _opdata(std::string ops, int opi) {
if (!opcodes.size()) {
int i = 0;
while (mapping[i].op != "---END---") {
Mapping mi = mapping[i];
opcodes[mi.op] = triple(mi.opcode, mi.in, mi.out);
i++;
}
for (i = 1; i <= 16; i++) {
opcodes["DUP"+unsignedToDecimal(i)] = triple(0x7f + i, i, i+1);
opcodes["SWAP"+unsignedToDecimal(i)] = triple(0x8f + i, i+1, i+1);
}
for (std::map<std::string, std::vector<int> >::iterator it=opcodes.begin();
it != opcodes.end();
it++) {
reverseOpcodes[(*it).second[0]] = (*it).first;
}
}
ops = upperCase(ops);
std::string op;
std::vector<int> opdata;
op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : "";
opdata = opcodes.count(ops) ? opcodes[ops] : triple(-1, -1, -1);
return std::pair<std::string, std::vector<int> >(op, opdata);
}
int opcode(std::string op) {
return _opdata(op, -1).second[0];
}
int opinputs(std::string op) {
return _opdata(op, -1).second[1];
}
int opoutputs(std::string op) {
return _opdata(op, -1).second[2];
}
std::string op(int opcode) {
return _opdata("", opcode).first;
}
std::string lllSpecials[][3] = {
{ "ref", "1", "1" },
{ "get", "1", "1" },
{ "set", "2", "2" },
{ "with", "3", "3" },
{ "comment", "0", "2147483647" },
{ "ops", "0", "2147483647" },
{ "lll", "2", "2" },
{ "seq", "0", "2147483647" },
{ "if", "3", "3" },
{ "unless", "2", "2" },
{ "until", "2", "2" },
{ "alloc", "1", "1" },
{ "---END---", "0", "0" },
};
std::map<std::string, std::pair<int, int> > lllMap;
// Is a function name one of the valid functions above?
bool isValidLLLFunc(std::string f, int argc) {
if (lllMap.size() == 0) {
for (int i = 0; ; i++) {
if (lllSpecials[i][0] == "---END---") break;
lllMap[lllSpecials[i][0]] = std::pair<int, int>(
dtu(lllSpecials[i][1]), dtu(lllSpecials[i][2]));
}
}
return lllMap.count(f)
&& argc >= lllMap[f].first
&& argc <= lllMap[f].second;
}

45
libserpent/opcodes.h

@ -1,45 +0,0 @@
#ifndef ETHSERP_OPCODES
#define ETHSERP_OPCODES
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
class Mapping {
public:
Mapping(std::string Op, int Opcode, int In, int Out) {
op = Op;
opcode = Opcode;
in = In;
out = Out;
}
std::string op;
int opcode;
int in;
int out;
};
extern Mapping mapping[];
extern std::map<std::string, std::vector<int> > opcodes;
extern std::map<int, std::string> reverseOpcodes;
std::pair<std::string, std::vector<int> > _opdata(std::string ops, int opi);
int opcode(std::string op);
int opinputs(std::string op);
int opoutputs(std::string op);
std::string op(int opcode);
extern std::string lllSpecials[][3];
extern std::map<std::string, std::pair<int, int> > lllMap;
bool isValidLLLFunc(std::string f, int argc);
#endif

98
libserpent/optimize.cpp

@ -1,98 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "lllparser.h"
#include "bignum.h"
// Compile-time arithmetic calculations
Node optimize(Node inp) {
if (inp.type == TOKEN) {
Node o = tryNumberize(inp);
if (decimalGt(o.val, tt256, true))
err("Value too large (exceeds 32 bytes or 2^256)", inp.metadata);
return o;
}
for (unsigned i = 0; i < inp.args.size(); i++) {
inp.args[i] = optimize(inp.args[i]);
}
// Arithmetic-specific transform
if (inp.val == "+") inp.val = "add";
if (inp.val == "*") inp.val = "mul";
if (inp.val == "-") inp.val = "sub";
if (inp.val == "/") inp.val = "sdiv";
if (inp.val == "^") inp.val = "exp";
if (inp.val == "**") inp.val = "exp";
if (inp.val == "%") inp.val = "smod";
// Degenerate cases for add and mul
if (inp.args.size() == 2) {
if (inp.val == "add" && inp.args[0].type == TOKEN &&
inp.args[0].val == "0") {
Node x = inp.args[1];
inp = x;
}
if (inp.val == "add" && inp.args[1].type == TOKEN &&
inp.args[1].val == "0") {
Node x = inp.args[0];
inp = x;
}
if (inp.val == "mul" && inp.args[0].type == TOKEN &&
inp.args[0].val == "1") {
Node x = inp.args[1];
inp = x;
}
if (inp.val == "mul" && inp.args[1].type == TOKEN &&
inp.args[1].val == "1") {
Node x = inp.args[0];
inp = x;
}
}
// Arithmetic computation
if (inp.args.size() == 2
&& inp.args[0].type == TOKEN
&& inp.args[1].type == TOKEN) {
std::string o;
if (inp.val == "add") {
o = decimalMod(decimalAdd(inp.args[0].val, inp.args[1].val), tt256);
}
else if (inp.val == "sub") {
if (decimalGt(inp.args[0].val, inp.args[1].val, true))
o = decimalSub(inp.args[0].val, inp.args[1].val);
}
else if (inp.val == "mul") {
o = decimalMod(decimalMul(inp.args[0].val, inp.args[1].val), tt256);
}
else if (inp.val == "div" && inp.args[1].val != "0") {
o = decimalDiv(inp.args[0].val, inp.args[1].val);
}
else if (inp.val == "sdiv" && inp.args[1].val != "0"
&& decimalGt(tt255, inp.args[0].val)
&& decimalGt(tt255, inp.args[1].val)) {
o = decimalDiv(inp.args[0].val, inp.args[1].val);
}
else if (inp.val == "mod" && inp.args[1].val != "0") {
o = decimalMod(inp.args[0].val, inp.args[1].val);
}
else if (inp.val == "smod" && inp.args[1].val != "0"
&& decimalGt(tt255, inp.args[0].val)
&& decimalGt(tt255, inp.args[1].val)) {
o = decimalMod(inp.args[0].val, inp.args[1].val);
}
else if (inp.val == "exp") {
o = decimalModExp(inp.args[0].val, inp.args[1].val, tt256);
}
if (o.length()) return token(o, inp.metadata);
}
return inp;
}
// Is a node degenerate (ie. trivial to calculate) ?
bool isDegenerate(Node n) {
return optimize(n).type == TOKEN;
}
// Is a node purely arithmetic?
bool isPureArithmetic(Node n) {
return isNumberLike(optimize(n));
}

19
libserpent/optimize.h

@ -1,19 +0,0 @@
#ifndef ETHSERP_OPTIMIZER
#define ETHSERP_OPTIMIZER
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// Compile-time arithmetic calculations
Node optimize(Node inp);
// Is a node degenerate (ie. trivial to calculate) ?
bool isDegenerate(Node n);
// Is a node purely arithmetic?
bool isPureArithmetic(Node n);
#endif

437
libserpent/parser.cpp

@ -1,437 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "parser.h"
#include "tokenize.h"
// Extended BEDMAS precedence order
int precedence(Node tok) {
std::string v = tok.val;
if (v == ".") return -1;
else if (v == "!" || v == "not") return 1;
else if (v=="^" || v == "**") return 2;
else if (v=="*" || v=="/" || v=="%") return 3;
else if (v=="+" || v=="-") return 4;
else if (v=="<" || v==">" || v=="<=" || v==">=") return 5;
else if (v=="&" || v=="|" || v=="xor" || v=="==" || v == "!=") return 6;
else if (v=="&&" || v=="and") return 7;
else if (v=="||" || v=="or") return 8;
else if (v=="=") return 10;
else if (v=="+=" || v=="-=" || v=="*=" || v=="/=" || v=="%=") return 10;
else if (v==":" || v == "::") return 11;
else return 0;
}
// Token classification for shunting-yard purposes
int toktype(Node tok) {
if (tok.type == ASTNODE) return COMPOUND;
std::string v = tok.val;
if (v == "(" || v == "[" || v == "{") return LPAREN;
else if (v == ")" || v == "]" || v == "}") return RPAREN;
else if (v == ",") return COMMA;
else if (v == "!" || v == "~" || v == "not") return UNARY_OP;
else if (precedence(tok) > 0) return BINARY_OP;
else if (precedence(tok) < 0) return TOKEN_SPLITTER;
if (tok.val[0] != '"' && tok.val[0] != '\'') {
for (unsigned i = 0; i < tok.val.length(); i++) {
if (chartype(tok.val[i]) == SYMB) {
err("Invalid symbol: "+tok.val, tok.metadata);
}
}
}
return ALPHANUM;
}
// Converts to reverse polish notation
std::vector<Node> shuntingYard(std::vector<Node> tokens) {
std::vector<Node> iq;
for (int i = tokens.size() - 1; i >= 0; i--) {
iq.push_back(tokens[i]);
}
std::vector<Node> oq;
std::vector<Node> stack;
Node prev, tok;
int prevtyp = 0, toktyp = 0;
while (iq.size()) {
prev = tok;
prevtyp = toktyp;
tok = iq.back();
toktyp = toktype(tok);
iq.pop_back();
// Alphanumerics go straight to output queue
if (toktyp == ALPHANUM) {
oq.push_back(tok);
}
// Left parens go on stack and output queue
else if (toktyp == LPAREN) {
while (stack.size() && toktype(stack.back()) == TOKEN_SPLITTER) {
oq.push_back(stack.back());
stack.pop_back();
}
if (prevtyp != ALPHANUM && prevtyp != RPAREN) {
oq.push_back(token("id", tok.metadata));
}
stack.push_back(tok);
oq.push_back(tok);
}
// If rparen, keep moving from stack to output queue until lparen
else if (toktyp == RPAREN) {
while (stack.size() && toktype(stack.back()) != LPAREN) {
oq.push_back(stack.back());
stack.pop_back();
}
if (stack.size()) {
stack.pop_back();
}
oq.push_back(tok);
}
else if (toktyp == UNARY_OP) {
stack.push_back(tok);
}
// If token splitter, just push it to the stack
else if (toktyp == TOKEN_SPLITTER) {
while (stack.size() && toktype(stack.back()) == TOKEN_SPLITTER) {
oq.push_back(stack.back());
stack.pop_back();
}
stack.push_back(tok);
}
// If binary op, keep popping from stack while higher bedmas precedence
else if (toktyp == BINARY_OP) {
if (tok.val == "-" && prevtyp != ALPHANUM && prevtyp != RPAREN) {
stack.push_back(tok);
oq.push_back(token("0", tok.metadata));
}
else {
int prec = precedence(tok);
while (stack.size()
&& (toktype(stack.back()) == BINARY_OP
|| toktype(stack.back()) == UNARY_OP
|| toktype(stack.back()) == TOKEN_SPLITTER)
&& precedence(stack.back()) <= prec) {
oq.push_back(stack.back());
stack.pop_back();
}
stack.push_back(tok);
}
}
// Comma means finish evaluating the argument
else if (toktyp == COMMA) {
while (stack.size() && toktype(stack.back()) != LPAREN) {
oq.push_back(stack.back());
stack.pop_back();
}
}
}
while (stack.size()) {
oq.push_back(stack.back());
stack.pop_back();
}
return oq;
}
// Converts reverse polish notation into tree
Node treefy(std::vector<Node> stream) {
std::vector<Node> iq;
for (int i = stream.size() -1; i >= 0; i--) {
iq.push_back(stream[i]);
}
std::vector<Node> oq;
while (iq.size()) {
Node tok = iq.back();
iq.pop_back();
int typ = toktype(tok);
// If unary, take node off end of oq and wrap it with the operator
// If binary, do the same with two nodes
if (typ == UNARY_OP || typ == BINARY_OP || typ == TOKEN_SPLITTER) {
std::vector<Node> args;
int rounds = (typ == UNARY_OP) ? 1 : 2;
for (int i = 0; i < rounds; i++) {
if (oq.size() == 0) {
err("Line malformed, not enough args for "+tok.val,
tok.metadata);
}
args.push_back(oq.back());
oq.pop_back();
}
std::vector<Node> args2;
while (args.size()) {
args2.push_back(args.back());
args.pop_back();
}
oq.push_back(astnode(tok.val, args2, tok.metadata));
}
// If rparen, keep grabbing until we get to an lparen
else if (typ == RPAREN) {
std::vector<Node> args;
while (1) {
if (toktype(oq.back()) == LPAREN) break;
args.push_back(oq.back());
oq.pop_back();
if (!oq.size()) err("Bracket without matching", tok.metadata);
}
oq.pop_back();
args.push_back(oq.back());
oq.pop_back();
// We represent a[b] as (access a b)
if (tok.val == "]")
args.push_back(token("access", tok.metadata));
if (args.back().type == ASTNODE)
args.push_back(token("fun", tok.metadata));
std::string fun = args.back().val;
args.pop_back();
// We represent [1,2,3] as (array_lit 1 2 3)
if (fun == "access" && args.size() && args.back().val == "id") {
fun = "array_lit";
args.pop_back();
}
std::vector<Node> args2;
while (args.size()) {
args2.push_back(args.back());
args.pop_back();
}
// When evaluating 2 + (3 * 5), the shunting yard algo turns that
// into 2 ( id 3 5 * ) +, effectively putting "id" as a dummy
// function where the algo was expecting a function to call the
// thing inside the brackets. This reverses that step
if (fun == "id" && args2.size() == 1) {
oq.push_back(args2[0]);
}
else {
oq.push_back(astnode(fun, args2, tok.metadata));
}
}
else oq.push_back(tok);
// This is messy, but has to be done. Import/inset other files here
std::string v = oq.back().val;
if ((v == "inset" || v == "import" || v == "create")
&& oq.back().args.size() == 1
&& oq.back().args[0].type == TOKEN) {
int lastSlashPos = tok.metadata.file.rfind("/");
std::string root;
if (lastSlashPos >= 0)
root = tok.metadata.file.substr(0, lastSlashPos) + "/";
else
root = "";
std::string filename = oq.back().args[0].val;
filename = filename.substr(1, filename.length() - 2);
if (!exists(root + filename))
err("File does not exist: "+root + filename, tok.metadata);
if (v == "inset") {
oq.pop_back();
oq.push_back(parseSerpent(root + filename));
}
else {
oq.back().args.pop_back();
oq.back().args.push_back(
asn("outer", parseSerpent(root + filename), tok.metadata));
}
}
//Useful for debugging
//for (int i = 0; i < oq.size(); i++) {
// std::cerr << printSimple(oq[i]) << " ";
//}
//std::cerr << " <-\n";
}
// Output must have one argument
if (oq.size() == 0) {
err("Output blank", Metadata());
}
else if (oq.size() > 1) {
return asn("multi", oq, oq[0].metadata);
}
return oq[0];
}
// Parses one line of serpent
Node parseSerpentTokenStream(std::vector<Node> s) {
return treefy(shuntingYard(s));
}
// Count spaces at beginning of line
int spaceCount(std::string s) {
unsigned pos = 0;
while (pos < s.length() && (s[pos] == ' ' || s[pos] == '\t'))
pos++;
return pos;
}
// Is this a command that takes an argument on the same line?
bool bodied(std::string tok) {
return tok == "if" || tok == "elif" || tok == "while"
|| tok == "with" || tok == "def" || tok == "extern"
|| tok == "data" || tok == "assert" || tok == "return"
|| tok == "fun" || tok == "scope" || tok == "macro"
|| tok == "type";
}
// Are the two commands meant to continue each other?
bool bodiedContinued(std::string prev, std::string tok) {
return (prev == "if" && tok == "elif")
|| (prev == "elif" && tok == "else")
|| (prev == "elif" && tok == "elif")
|| (prev == "if" && tok == "else");
}
// Is a line of code empty?
bool isLineEmpty(std::string line) {
std::vector<Node> tokens = tokenize(line);
if (!tokens.size() || tokens[0].val == "#" || tokens[0].val == "//")
return true;
return false;
}
// Parse lines of serpent (helper function)
Node parseLines(std::vector<std::string> lines, Metadata metadata, int sp) {
std::vector<Node> o;
int origLine = metadata.ln;
unsigned i = 0;
while (i < lines.size()) {
metadata.ln = origLine + i;
std::string main = lines[i];
if (isLineEmpty(main)) {
i += 1;
continue;
}
int spaces = spaceCount(main);
if (spaces != sp) {
err("Indent mismatch", metadata);
}
// Tokenize current line
std::vector<Node> tokens = tokenize(main.substr(sp), metadata);
// Remove comments
std::vector<Node> tokens2;
for (unsigned j = 0; j < tokens.size(); j++) {
if (tokens[j].val == "#" || tokens[j].val == "//") break;
tokens2.push_back(tokens[j]);
}
bool expectingChildBlock = false;
if (tokens2.size() > 0 && tokens2.back().val == ":") {
tokens2.pop_back();
expectingChildBlock = true;
}
// Parse current line
Node out = parseSerpentTokenStream(tokens2);
// Parse child block
int childIndent = 999999;
std::vector<std::string> childBlock;
while (1) {
i++;
if (i >= lines.size())
break;
bool ile = isLineEmpty(lines[i]);
if (!ile) {
int spaces = spaceCount(lines[i]);
if (spaces <= sp) break;
childBlock.push_back(lines[i]);
if (spaces < childIndent) childIndent = spaces;
}
else childBlock.push_back("");
}
// Child block empty?
bool cbe = true;
for (unsigned i = 0; i < childBlock.size(); i++) {
if (childBlock[i].length() > 0) { cbe = false; break; }
}
// Add child block to AST
if (expectingChildBlock) {
if (cbe)
err("Expected indented child block!", out.metadata);
out.type = ASTNODE;
metadata.ln += 1;
out.args.push_back(parseLines(childBlock, metadata, childIndent));
metadata.ln -= 1;
}
else if (!cbe)
err("Did not expect indented child block!", out.metadata);
else if (out.args.size() && out.args[out.args.size() - 1].val == ":") {
Node n = out.args[out.args.size() - 1];
out.args.pop_back();
out.args.push_back(n.args[0]);
out.args.push_back(n.args[1]);
}
// Bring back if / elif into AST
if (bodied(tokens[0].val)) {
if (out.val != "multi") {
// token not being used in bodied form
}
else if (out.args[0].val == "id")
out = astnode(tokens[0].val, out.args[1].args, out.metadata);
else if (out.args[0].type == TOKEN) {
std::vector<Node> out2;
for (unsigned i = 1; i < out.args.size(); i++)
out2.push_back(out.args[i]);
out = astnode(tokens[0].val, out2, out.metadata);
}
else
out = astnode("fun", out.args, out.metadata);
}
// Multi not supported
if (out.val == "multi")
err("Multiple expressions or unclosed bracket", out.metadata);
// Convert top-level colon expressions into non-colon expressions;
// makes if statements and the like equivalent indented or not
//if (out.val == ":" && out.args[0].type == TOKEN)
// out = asn(out.args[0].val, out.args[1], out.metadata);
//if (bodied(tokens[0].val) && out.args[0].val == ":")
// out = asn(tokens[0].val, out.args[0].args);
if (o.size() == 0 || o.back().type == TOKEN) {
o.push_back(out);
continue;
}
// This is a little complicated. Basically, the idea here is to build
// constructions like [if [< x 5] [a] [elif [< x 10] [b] [else [c]]]]
std::vector<Node> u;
u.push_back(o.back());
if (bodiedContinued(o.back().val, out.val)) {
while (1) {
if (!bodiedContinued(u.back().val, out.val)) {
u.pop_back();
break;
}
if (!u.back().args.size()
|| !bodiedContinued(u.back().val, u.back().args.back().val)) {
break;
}
u.push_back(u.back().args.back());
}
u.back().args.push_back(out);
while (u.size() > 1) {
Node v = u.back();
u.pop_back();
u.back().args.pop_back();
u.back().args.push_back(v);
}
o.pop_back();
o.push_back(u[0]);
}
else o.push_back(out);
}
if (o.size() == 1)
return o[0];
else if (o.size())
return astnode("seq", o, o[0].metadata);
else
return astnode("seq", o, Metadata());
}
// Parses serpent code
Node parseSerpent(std::string s) {
std::string input = s;
std::string file = "main";
if (exists(s)) {
file = s;
input = get_file_contents(s);
}
return parseLines(splitLines(input), Metadata(file, 0, 0), 0);
}
using namespace std;

13
libserpent/parser.h

@ -1,13 +0,0 @@
#ifndef ETHSERP_PARSER
#define ETHSERP_PARSER
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// Serpent text -> parse tree
Node parseSerpent(std::string s);
#endif

327
libserpent/preprocess.cpp

@ -1,327 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "lllparser.h"
#include "bignum.h"
#include "rewriteutils.h"
#include "optimize.h"
#include "preprocess.h"
#include "functions.h"
#include "opcodes.h"
// Convert a function of the form (def (f x y z) (do stuff)) into
// (if (first byte of ABI is correct) (seq (setup x y z) (do stuff)))
Node convFunction(Node node, int functionCount) {
std::string prefix = "_temp"+mkUniqueToken()+"_";
Metadata m = node.metadata;
if (node.args.size() != 2)
err("Malformed def!", m);
// Collect the list of variable names and variable byte counts
Node unpack = unpackArguments(node.args[0].args, m);
// And the actual code
Node body = node.args[1];
// Main LLL-based function body
return astnode("if",
astnode("eq",
astnode("get", token("__funid", m), m),
token(unsignedToDecimal(functionCount), m),
m),
astnode("seq", unpack, body, m));
}
// Populate an svObj with the arguments needed to determine
// the storage position of a node
svObj getStorageVars(svObj pre, Node node, std::string prefix,
int index) {
Metadata m = node.metadata;
if (!pre.globalOffset.size()) pre.globalOffset = "0";
std::vector<Node> h;
std::vector<std::string> coefficients;
// Array accesses or atoms
if (node.val == "access" || node.type == TOKEN) {
std::string tot = "1";
h = listfyStorageAccess(node);
coefficients.push_back("1");
for (unsigned i = h.size() - 1; i >= 1; i--) {
// Array sizes must be constant or at least arithmetically
// evaluable at compile time
if (!isPureArithmetic(h[i]))
err("Array size must be fixed value", m);
// Create a list of the coefficient associated with each
// array index
coefficients.push_back(decimalMul(coefficients.back(), h[i].val));
}
}
// Tuples
else {
int startc;
// Handle the (fun <fun_astnode> args...) case
if (node.val == "fun") {
startc = 1;
h = listfyStorageAccess(node.args[0]);
}
// Handle the (<fun_name> args...) case, which
// the serpent parser produces when the function
// is a simple name and not a complex astnode
else {
startc = 0;
h = listfyStorageAccess(token(node.val, m));
}
svObj sub = pre;
sub.globalOffset = "0";
// Evaluate tuple elements recursively
for (unsigned i = startc; i < node.args.size(); i++) {
sub = getStorageVars(sub,
node.args[i],
prefix+h[0].val.substr(2)+".",
i-startc);
}
coefficients.push_back(sub.globalOffset);
for (unsigned i = h.size() - 1; i >= 1; i--) {
// Array sizes must be constant or at least arithmetically
// evaluable at compile time
if (!isPureArithmetic(h[i]))
err("Array size must be fixed value", m);
// Create a list of the coefficient associated with each
// array index
coefficients.push_back(decimalMul(coefficients.back(), h[i].val));
}
pre.offsets = sub.offsets;
pre.coefficients = sub.coefficients;
pre.nonfinal = sub.nonfinal;
pre.nonfinal[prefix+h[0].val.substr(2)] = true;
}
pre.coefficients[prefix+h[0].val.substr(2)] = coefficients;
pre.offsets[prefix+h[0].val.substr(2)] = pre.globalOffset;
pre.indices[prefix+h[0].val.substr(2)] = index;
if (decimalGt(tt176, coefficients.back()))
pre.globalOffset = decimalAdd(pre.globalOffset, coefficients.back());
return pre;
}
// Preprocess input containing functions
//
// localExterns is a map of the form, eg,
//
// { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... }
//
// localExternSigs is a map of the form, eg,
//
// { x : { foo: iii, bar: iis, baz: ia }, y: { qux: i, foo: as } ... }
//
// Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc
// and that x.foo has three integers as arguments, x.bar has two
// integers and a variable-length string, and baz has an integer
// and an array
//
// globalExterns is a one-level map, eg from above
//
// { foo: 1, bar: 1, baz: 2, qux: 0 }
//
// globalExternSigs is a one-level map, eg from above
//
// { foo: as, bar: iis, baz: ia, qux: i}
//
// Note that globalExterns and globalExternSigs may be ambiguous
// Also, a null signature implies an infinite tail of integers
preprocessResult preprocessInit(Node inp) {
Metadata m = inp.metadata;
if (inp.val != "seq")
inp = astnode("seq", inp, m);
std::vector<Node> empty = std::vector<Node>();
Node init = astnode("seq", empty, m);
Node shared = astnode("seq", empty, m);
std::vector<Node> any;
std::vector<Node> functions;
preprocessAux out = preprocessAux();
out.localExterns["self"] = std::map<std::string, int>();
int functionCount = 0;
int storageDataCount = 0;
for (unsigned i = 0; i < inp.args.size(); i++) {
Node obj = inp.args[i];
// Functions
if (obj.val == "def") {
if (obj.args.size() == 0)
err("Empty def", m);
std::string funName = obj.args[0].val;
// Init, shared and any are special functions
if (funName == "init" || funName == "shared" || funName == "any") {
if (obj.args[0].args.size())
err(funName+" cannot have arguments", m);
}
if (funName == "init") init = obj.args[1];
else if (funName == "shared") shared = obj.args[1];
else if (funName == "any") any.push_back(obj.args[1]);
else {
// Other functions
functions.push_back(convFunction(obj, functionCount));
out.localExterns["self"][obj.args[0].val] = functionCount;
out.localExternSigs["self"][obj.args[0].val]
= getSignature(obj.args[0].args);
functionCount++;
}
}
// Extern declarations
else if (obj.val == "extern") {
std::string externName = obj.args[0].val;
Node al = obj.args[1];
if (!out.localExterns.count(externName))
out.localExterns[externName] = std::map<std::string, int>();
for (unsigned i = 0; i < al.args.size(); i++) {
if (al.args[i].val == ":") {
std::string v = al.args[i].args[0].val;
std::string sig = al.args[i].args[1].val;
out.globalExterns[v] = i;
out.globalExternSigs[v] = sig;
out.localExterns[externName][v] = i;
out.localExternSigs[externName][v] = sig;
}
else {
std::string v = al.args[i].val;
out.globalExterns[v] = i;
out.globalExternSigs[v] = "";
out.localExterns[externName][v] = i;
out.localExternSigs[externName][v] = "";
}
}
}
// Custom macros
else if (obj.val == "macro" || (obj.val == "fun" && obj.args[0].val == "macro")) {
// Rules for valid macros:
//
// There are only four categories of valid macros:
//
// 1. a macro where the outer function is something
// which is NOT an existing valid function/extern/datum
// 2. a macro of the form set(c(x), d) where c must NOT
// be an existing valid function/extern/datum
// 3. something of the form access(c(x)), where c must NOT
// be an existing valid function/extern/datum
// 4. something of the form set(access(c(x)), d) where c must
// NOT be an existing valid function/extern/datum
// 5. something of the form with(c(x), d, e) where c must
// NOT be an existing valid function/extern/datum
bool valid = false;
Node pattern;
Node substitution;
int priority;
// Priority not set: default zero
if (obj.val == "macro") {
pattern = obj.args[0];
substitution = obj.args[1];
priority = 0;
}
// Specified priority
else {
pattern = obj.args[1];
substitution = obj.args[2];
if (obj.args[0].args.size())
priority = dtu(obj.args[0].args[0].val);
else
priority = 0;
}
if (opcode(pattern.val) < 0 && !isValidFunctionName(pattern.val))
valid = true;
if (pattern.val == "set" &&
opcode(pattern.args[0].val) < 0 &&
!isValidFunctionName(pattern.args[0].val))
valid = true;
if (pattern.val == "access" &&
opcode(pattern.args[0].val) < 0 &&
!isValidFunctionName(pattern.args[0].val))
if (pattern.val == "set" &&
pattern.args[0].val == "access" &&
opcode(pattern.args[0].args[0].val) < 0 &&
!isValidFunctionName(pattern.args[0].args[0].val))
valid = true;
if (pattern.val == "with" &&
opcode(pattern.args[0].val) < 0 &&
!isValidFunctionName(pattern.args[0].val))
valid = true;
if (valid) {
if (!out.customMacros.count(priority))
out.customMacros[priority] = rewriteRuleSet();
out.customMacros[priority].addRule
(rewriteRule(pattern, substitution));
}
else warn("Macro does not fit valid template: "+printSimple(pattern), m);
}
// Variable types
else if (obj.val == "type") {
std::string typeName = obj.args[0].val;
std::vector<Node> vars = obj.args[1].args;
for (unsigned i = 0; i < vars.size(); i++)
out.types[vars[i].val] = typeName;
}
// Storage variables/structures
else if (obj.val == "data") {
out.storageVars = getStorageVars(out.storageVars,
obj.args[0],
"",
storageDataCount);
storageDataCount += 1;
}
else any.push_back(obj);
}
// Set up top-level AST structure
std::vector<Node> main;
if (shared.args.size()) main.push_back(shared);
if (init.args.size()) main.push_back(init);
std::vector<Node> code;
if (shared.args.size()) code.push_back(shared);
for (unsigned i = 0; i < any.size(); i++)
code.push_back(any[i]);
for (unsigned i = 0; i < functions.size(); i++)
code.push_back(functions[i]);
Node codeNode;
if (functions.size() > 0) {
codeNode = astnode("with",
token("__funid", m),
astnode("byte",
token("0", m),
astnode("calldataload", token("0", m), m),
m),
astnode("seq", code, m),
m);
}
else codeNode = astnode("seq", code, m);
main.push_back(astnode("~return",
token("0", m),
astnode("lll",
codeNode,
token("0", m),
m),
m));
Node result;
if (main.size() == 1) result = main[0];
else result = astnode("seq", main, inp.metadata);
return preprocessResult(result, out);
}
preprocessResult processTypes (preprocessResult pr) {
preprocessAux aux = pr.second;
Node node = pr.first;
if (node.type == TOKEN && aux.types.count(node.val))
node = asn(aux.types[node.val], node, node.metadata);
else if (node.val == "untyped")
return preprocessResult(node.args[0], aux);
else if (node.val == "outer")
return preprocessResult(node, aux);
else {
for (unsigned i = 0; i < node.args.size(); i++) {
node.args[i] =
processTypes(preprocessResult(node.args[i], aux)).first;
}
}
return preprocessResult(node, aux);
}
preprocessResult preprocess(Node n) {
return processTypes(preprocessInit(n));
}

50
libserpent/preprocess.h

@ -1,50 +0,0 @@
#ifndef ETHSERP_PREPROCESSOR
#define ETHSERP_PREPROCESSOR
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "rewriteutils.h"
// Storage variable index storing object
struct svObj {
std::map<std::string, std::string> offsets;
std::map<std::string, int> indices;
std::map<std::string, std::vector<std::string> > coefficients;
std::map<std::string, bool> nonfinal;
std::string globalOffset;
};
// Preprocessing result storing object
class preprocessAux {
public:
preprocessAux() {
globalExterns = std::map<std::string, int>();
localExterns = std::map<std::string, std::map<std::string, int> >();
localExterns["self"] = std::map<std::string, int>();
}
std::map<std::string, int> globalExterns;
std::map<std::string, std::string> globalExternSigs;
std::map<std::string, std::map<std::string, int> > localExterns;
std::map<std::string, std::map<std::string, std::string> > localExternSigs;
std::map<int, rewriteRuleSet > customMacros;
std::map<std::string, std::string> types;
svObj storageVars;
};
#define preprocessResult std::pair<Node, preprocessAux>
// Populate an svObj with the arguments needed to determine
// the storage position of a node
svObj getStorageVars(svObj pre, Node node, std::string prefix="",
int index=0);
// Preprocess a function (see cpp for details)
preprocessResult preprocess(Node inp);
#endif

905
libserpent/rewriter.cpp

@ -1,905 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "lllparser.h"
#include "bignum.h"
#include "optimize.h"
#include "rewriteutils.h"
#include "preprocess.h"
#include "functions.h"
#include "opcodes.h"
// Rewrite rules
std::string macros[][2] = {
{
"(seq $x)",
"$x"
},
{
"(seq (seq) $x)",
"$x"
},
{
"(+= $a $b)",
"(set $a (+ $a $b))"
},
{
"(*= $a $b)",
"(set $a (* $a $b))"
},
{
"(-= $a $b)",
"(set $a (- $a $b))"
},
{
"(/= $a $b)",
"(set $a (/ $a $b))"
},
{
"(%= $a $b)",
"(set $a (% $a $b))"
},
{
"(^= $a $b)",
"(set $a (^ $a $b))"
},
{
"(!= $a $b)",
"(iszero (eq $a $b))"
},
{
"(assert $x)",
"(unless $x (stop))"
},
{
"(min $a $b)",
"(with $1 $a (with $2 $b (if (lt $1 $2) $1 $2)))"
},
{
"(max $a $b)",
"(with $1 $a (with $2 $b (if (lt $1 $2) $2 $1)))"
},
{
"(smin $a $b)",
"(with $1 $a (with $2 $b (if (slt $1 $2) $1 $2)))"
},
{
"(smax $a $b)",
"(with $1 $a (with $2 $b (if (slt $1 $2) $2 $1)))"
},
{
"(if $cond $do (else $else))",
"(if $cond $do $else)"
},
{
"(code $code)",
"$code"
},
{
"(slice $arr $pos)",
"(add $arr (mul 32 $pos))",
},
{
"(array $len)",
"(alloc (mul 32 $len))"
},
{
"(while $cond $do)",
"(until (iszero $cond) $do)",
},
{
"(while (iszero $cond) $do)",
"(until $cond $do)",
},
{
"(if $cond $do)",
"(unless (iszero $cond) $do)",
},
{
"(if (iszero $cond) $do)",
"(unless $cond $do)",
},
{
"(access (. self storage) $ind)",
"(sload $ind)"
},
{
"(access $var $ind)",
"(mload (add $var (mul 32 $ind)))"
},
{
"(set (access (. self storage) $ind) $val)",
"(sstore $ind $val)"
},
{
"(set (sload $ind) $val)",
"(sstore $ind $val)"
},
{
"(set (access $var $ind) $val)",
"(mstore (add $var (mul 32 $ind)) $val)"
},
{
"(getch $var $ind)",
"(mod (mload (sub (add $var $ind) 31)) 256)"
},
{
"(setch $var $ind $val)",
"(mstore8 (add $var $ind) $val)",
},
{
"(send $to $value)",
"(~call (sub (gas) 25) $to $value 0 0 0 0)"
},
{
"(send $gas $to $value)",
"(~call $gas $to $value 0 0 0 0)"
},
{
"(sha3 $x)",
"(seq (set $1 $x) (~sha3 (ref $1) 32))"
},
{
"(sha3 $mstart (= chars $msize))",
"(~sha3 $mstart $msize)"
},
{
"(sha3 $mstart $msize)",
"(~sha3 $mstart (mul 32 $msize))"
},
{
"(id $0)",
"$0"
},
{
"(return $x)",
"(seq (set $1 $x) (~return (ref $1) 32))"
},
{
"(return $mstart (= chars $msize))",
"(~return $mstart $msize)"
},
{
"(return $start $len)",
"(~return $start (mul 32 $len))"
},
{
"(&& $x $y)",
"(if $x $y 0)"
},
{
"(|| $x $y)",
"(with $1 $x (if $1 $1 $y))"
},
{
"(>= $x $y)",
"(iszero (slt $x $y))"
},
{
"(<= $x $y)",
"(iszero (sgt $x $y))"
},
{
"(create $code)",
"(create 0 $code)"
},
{
"(create $endowment $code)",
"(with $1 (msize) (create $endowment (get $1) (lll $code (msize))))"
},
{
"(sha256 $x)",
"(with $1 (alloc 64) (seq (mstore (add (get $1) 32) $x) (pop (~call 101 2 0 (add (get $1) 32) 32 (get $1) 32)) (mload (get $1))))"
},
{
"(sha256 $arr (= chars $sz))",
"(with $1 (alloc 32) (seq (pop (~call 101 2 0 $arr $sz (get $1) 32)) (mload (get $1))))"
},
{
"(sha256 $arr $sz)",
"(with $1 (alloc 32) (seq (pop (~call 101 2 0 $arr (mul 32 $sz) (get $1) 32)) (mload (get $1))))"
},
{
"(ripemd160 $x)",
"(with $1 (alloc 64) (seq (mstore (add (get $1) 32) $x) (pop (~call 101 3 0 (add (get $1) 32) 32 (get $1) 32)) (mload (get $1))))"
},
{
"(ripemd160 $arr (= chars $sz))",
"(with $1 (alloc 32) (seq (pop (~call 101 3 0 $arr $sz (mload $1) 32)) (mload (get $1))))"
},
{
"(ripemd160 $arr $sz)",
"(with $1 (alloc 32) (seq (pop (~call 101 3 0 $arr (mul 32 $sz) (get $1) 32)) (mload (get $1))))"
},
{
"(ecrecover $h $v $r $s)",
"(with $1 (alloc 160) (seq (mstore (get $1) $h) (mstore (add (get $1) 32) $v) (mstore (add (get $1) 64) $r) (mstore (add (get $1) 96) $s) (pop (~call 101 1 0 (get $1) 128 (add (get $1 128)) 32)) (mload (add (get $1) 128))))"
},
{
"(inset $x)",
"$x"
},
{
"(create $x)",
"(with $1 (msize) (create $val (get $1) (lll $code (get $1))))"
},
{
"(with (= $var $val) $cond)",
"(with $var $val $cond)"
},
{
"(log $t1)",
"(~log1 0 0 $t1)"
},
{
"(log $t1 $t2)",
"(~log2 0 0 $t1 $t2)"
},
{
"(log $t1 $t2 $t3)",
"(~log3 0 0 $t1 $t2 $t3)"
},
{
"(log $t1 $t2 $t3 $t4)",
"(~log4 0 0 $t1 $t2 $t3 $t4)"
},
{
"(logarr $a $sz)",
"(~log0 $a (mul 32 $sz))"
},
{
"(logarr $a $sz $t1)",
"(~log1 $a (mul 32 $sz) $t1)"
},
{
"(logarr $a $sz $t1 $t2)",
"(~log2 $a (mul 32 $sz) $t1 $t2)"
},
{
"(logarr $a $sz $t1 $t2 $t3)",
"(~log3 $a (mul 32 $sz) $t1 $t2 $t3)"
},
{
"(logarr $a $sz $t1 $t2 $t3 $t4)",
"(~log4 $a (mul 32 $sz) $t1 $t2 $t3 $t4)"
},
{
"(save $loc $array (= chars $count))",
"(with $location (ref $loc) (with $c $count (with $end (div $c 32) (with $i 0 (seq (while (slt $i $end) (seq (sstore (add $i $location) (access $array $i)) (set $i (add $i 1)))) (sstore (add $i $location) (~and (access $array $i) (sub 0 (exp 256 (sub 32 (mod $c 32)))))))))))"
},
{
"(save $loc $array $count)",
"(with $location (ref $loc) (with $end $count (with $i 0 (while (slt $i $end) (seq (sstore (add $i $location) (access $array $i)) (set $i (add $i 1)))))))"
},
{
"(load $loc (= chars $count))",
"(with $location (ref $loc) (with $c $count (with $a (alloc $c) (with $i 0 (seq (while (slt $i (div $c 32)) (seq (set (access $a $i) (sload (add $location $i))) (set $i (add $i 1)))) (set (access $a $i) (~and (sload (add $location $i)) (sub 0 (exp 256 (sub 32 (mod $c 32)))))) $a)))))"
},
{
"(load $loc $count)",
"(with $location (ref $loc) (with $c $count (with $a (alloc $c) (with $i 0 (seq (while (slt $i $c) (seq (set (access $a $i) (sload (add $location $i))) (set $i (add $i 1)))) $a)))))"
},
{
"(unsafe_mcopy $to $from $sz)",
"(with _sz $sz (with _from $from (with _to $to (seq (comment STARTING UNSAFE MCOPY) (with _i 0 (while (lt _i _sz) (seq (mstore (add $to _i) (mload (add _from _i))) (set _i (add _i 32)))))))))"
},
{
"(mcopy $to $from $_sz)",
"(with _to $to (with _from $from (with _sz $sz (seq (comment STARTING MCOPY (with _i 0 (seq (while (lt (add _i 31) _sz) (seq (mstore (add _to _i) (mload (add _from _i))) (set _i (add _i 32)))) (with _mask (exp 256 (sub 32 (mod _sz 32))) (mstore (add $to _i) (add (mod (mload (add $to _i)) _mask) (and (mload (add $from _i)) (sub 0 _mask))))))))))))"
},
{ "(. msg sender)", "(caller)" },
{ "(. msg value)", "(callvalue)" },
{ "(. tx gasprice)", "(gasprice)" },
{ "(. tx origin)", "(origin)" },
{ "(. tx gas)", "(gas)" },
{ "(. $x balance)", "(balance $x)" },
{ "self", "(address)" },
{ "(. block prevhash)", "(prevhash)" },
{ "(. block coinbase)", "(coinbase)" },
{ "(. block timestamp)", "(timestamp)" },
{ "(. block number)", "(number)" },
{ "(. block difficulty)", "(difficulty)" },
{ "(. block gaslimit)", "(gaslimit)" },
{ "stop", "(stop)" },
{ "---END---", "" } //Keep this line at the end of the list
};
// Token synonyms
std::string synonyms[][2] = {
{ "or", "||" },
{ "and", "&&" },
{ "|", "~or" },
{ "&", "~and" },
{ "elif", "if" },
{ "!", "iszero" },
{ "~", "~not" },
{ "not", "iszero" },
{ "string", "alloc" },
{ "+", "add" },
{ "-", "sub" },
{ "*", "mul" },
{ "/", "sdiv" },
{ "^", "exp" },
{ "**", "exp" },
{ "%", "smod" },
{ "<", "slt" },
{ ">", "sgt" },
{ "=", "set" },
{ "==", "eq" },
{ ":", "kv" },
{ "---END---", "" } //Keep this line at the end of the list
};
std::map<std::string, std::string> synonymMap;
// Custom setters (need to be registered separately
// for use with managed storage)
std::string setters[][2] = {
{ "+=", "+" },
{ "-=", "-" },
{ "*=", "*" },
{ "/=", "/" },
{ "%=", "%" },
{ "^=", "^" },
{ "---END---", "" } //Keep this line at the end of the list
};
std::map<std::string, std::string> setterMap;
// Processes mutable array literals
Node array_lit_transform(Node node) {
std::string prefix = "_temp"+mkUniqueToken() + "_";
Metadata m = node.metadata;
std::map<std::string, Node> d;
std::string o = "(seq (set $arr (alloc "+utd(node.args.size()*32)+"))";
for (unsigned i = 0; i < node.args.size(); i++) {
o += " (mstore (add (get $arr) "+utd(i * 32)+") $"+utd(i)+")";
d[utd(i)] = node.args[i];
}
o += " (get $arr))";
return subst(parseLLL(o), d, prefix, m);
}
// Processes long text literals
Node string_transform(Node node) {
Metadata m = node.metadata;
if (!node.args.size())
err("Empty text!", m);
if (node.args[0].val.size() < 2
|| node.args[0].val[0] != '"'
|| node.args[0].val[node.args[0].val.size() - 1] != '"')
err("Text contents don't look like a string!", m);
std::string bin = node.args[0].val.substr(1, node.args[0].val.size() - 2);
unsigned sz = bin.size();
std::vector<Node> o;
for (unsigned i = 0; i < sz; i += 32) {
std::string t = binToNumeric(bin.substr(i, 32));
if ((sz - i) < 32 && (sz - i) > 0) {
while ((sz - i) < 32) {
t = decimalMul(t, "256");
i--;
}
i = sz;
}
o.push_back(token(t, node.metadata));
}
node = astnode("array_lit", o, node.metadata);
return array_lit_transform(node);
}
Node apply_rules(preprocessResult pr);
// Transform "<variable>.<fun>(args...)" into
// a call
Node dotTransform(Node node, preprocessAux aux) {
Metadata m = node.metadata;
// We're gonna make lots of temporary variables,
// so set up a unique flag for them
std::string prefix = "_temp"+mkUniqueToken()+"_";
// Check that the function name is a token
if (node.args[0].args[1].type == ASTNODE)
err("Function name must be static", m);
Node dotOwner = node.args[0].args[0];
std::string dotMember = node.args[0].args[1].val;
// kwargs = map of special arguments
std::map<std::string, Node> kwargs;
kwargs["value"] = token("0", m);
kwargs["gas"] = subst(parseLLL("(- (gas) 25)"), msn(), prefix, m);
// Search for as=? and call=code keywords, and isolate the actual
// function arguments
std::vector<Node> fnargs;
std::string as = "";
std::string op = "call";
for (unsigned i = 1; i < node.args.size(); i++) {
fnargs.push_back(node.args[i]);
Node arg = fnargs.back();
if (arg.val == "=" || arg.val == "set") {
if (arg.args[0].val == "as")
as = arg.args[1].val;
if (arg.args[0].val == "call" && arg.args[1].val == "code")
op = "callcode";
if (arg.args[0].val == "gas")
kwargs["gas"] = arg.args[1];
if (arg.args[0].val == "value")
kwargs["value"] = arg.args[1];
if (arg.args[0].val == "outsz")
kwargs["outsz"] = arg.args[1];
}
}
if (dotOwner.val == "self") {
if (as.size()) err("Cannot use \"as\" when calling self!", m);
as = dotOwner.val;
}
// Determine the funId and sig assuming the "as" keyword was used
int funId = 0;
std::string sig;
if (as.size() > 0 && aux.localExterns.count(as)) {
if (!aux.localExterns[as].count(dotMember))
err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m);
funId = aux.localExterns[as][dotMember];
sig = aux.localExternSigs[as][dotMember];
}
// Determine the funId and sig otherwise
else if (!as.size()) {
if (!aux.globalExterns.count(dotMember))
err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m);
std::string key = unsignedToDecimal(aux.globalExterns[dotMember]);
funId = aux.globalExterns[dotMember];
sig = aux.globalExternSigs[dotMember];
}
else err("Invalid call: "+printSimple(dotOwner)+"."+dotMember, m);
// Pack arguments
kwargs["data"] = packArguments(fnargs, sig, funId, m);
kwargs["to"] = dotOwner;
Node main;
// Pack output
if (!kwargs.count("outsz")) {
main = parseLLL(
"(with _data $data (seq "
"(pop (~"+op+" $gas $to $value (access _data 0) (access _data 1) (ref $dataout) 32))"
"(get $dataout)))");
}
else {
main = parseLLL(
"(with _data $data (with _outsz (mul 32 $outsz) (with _out (alloc _outsz) (seq "
"(pop (~"+op+" $gas $to $value (access _data 0) (access _data 1) _out _outsz))"
"(get _out)))))");
}
// Set up main call
Node o = subst(main, kwargs, prefix, m);
return o;
}
// Transform an access of the form self.bob, self.users[5], etc into
// a storage access
//
// There exist two types of objects: finite objects, and infinite
// objects. Finite objects are packed optimally tightly into storage
// accesses; for example:
//
// data obj[100](a, b[2][4], c)
//
// obj[0].a -> 0
// obj[0].b[0][0] -> 1
// obj[0].b[1][3] -> 8
// obj[45].c -> 459
//
// Infinite objects are accessed by sha3([v1, v2, v3 ... ]), where
// the values are a list of array indices and keyword indices, for
// example:
// data obj[](a, b[2][4], c)
// data obj2[](a, b[][], c)
//
// obj[0].a -> sha3([0, 0, 0])
// obj[5].b[1][3] -> sha3([0, 5, 1, 1, 3])
// obj[45].c -> sha3([0, 45, 2])
// obj2[0].a -> sha3([1, 0, 0])
// obj2[5].b[1][3] -> sha3([1, 5, 1, 1, 3])
// obj2[45].c -> sha3([1, 45, 2])
Node storageTransform(Node node, preprocessAux aux,
bool mapstyle=false, bool ref=false) {
Metadata m = node.metadata;
// Get a list of all of the "access parameters" used in order
// eg. self.users[5].cow[4][m[2]][woof] ->
// [--self, --users, 5, --cow, 4, m[2], woof]
std::vector<Node> hlist = listfyStorageAccess(node);
// For infinite arrays, the terms array will just provide a list
// of indices. For finite arrays, it's a list of index*coefficient
std::vector<Node> terms;
std::string offset = "0";
std::string prefix = "";
std::string varPrefix = "_temp"+mkUniqueToken()+"_";
int c = 0;
std::vector<std::string> coefficients;
coefficients.push_back("");
for (unsigned i = 1; i < hlist.size(); i++) {
// We pre-add the -- flag to parameter-like terms. For example,
// self.users[m] -> [--self, --users, m]
// self.users.m -> [--self, --users, --m]
if (hlist[i].val.substr(0, 2) == "--") {
prefix += hlist[i].val.substr(2) + ".";
std::string tempPrefix = prefix.substr(0, prefix.size()-1);
if (!aux.storageVars.offsets.count(tempPrefix))
return node;
if (c < (signed)coefficients.size() - 1)
err("Too few array index lookups", m);
if (c > (signed)coefficients.size() - 1)
err("Too many array index lookups", m);
coefficients = aux.storageVars.coefficients[tempPrefix];
// If the size of an object exceeds 2^176, we make it an infinite
// array
if (decimalGt(coefficients.back(), tt176) && !mapstyle)
return storageTransform(node, aux, true, ref);
offset = decimalAdd(offset, aux.storageVars.offsets[tempPrefix]);
c = 0;
if (mapstyle)
terms.push_back(token(unsignedToDecimal(
aux.storageVars.indices[tempPrefix])));
}
else if (mapstyle) {
terms.push_back(hlist[i]);
c += 1;
}
else {
if (c > (signed)coefficients.size() - 2)
err("Too many array index lookups", m);
terms.push_back(
astnode("mul",
hlist[i],
token(coefficients[coefficients.size() - 2 - c], m),
m));
c += 1;
}
}
if (aux.storageVars.nonfinal.count(prefix.substr(0, prefix.size()-1)))
err("Storage variable access not deep enough", m);
if (c < (signed)coefficients.size() - 1) {
err("Too few array index lookups", m);
}
if (c > (signed)coefficients.size() - 1) {
err("Too many array index lookups", m);
}
Node o;
if (mapstyle) {
std::string t = "_temp_"+mkUniqueToken();
std::vector<Node> sub;
for (unsigned i = 0; i < terms.size(); i++)
sub.push_back(asn("mstore",
asn("add",
tkn(utd(i * 32), m),
asn("get", tkn(t+"pos", m), m),
m),
terms[i],
m));
sub.push_back(tkn(t+"pos", m));
Node main = asn("with",
tkn(t+"pos", m),
asn("alloc", tkn(utd(terms.size() * 32), m), m),
asn("seq", sub, m),
m);
Node sz = token(utd(terms.size() * 32), m);
o = astnode("~sha3",
main,
sz,
m);
}
else {
// We add up all the index*coefficients
Node out = token(offset, node.metadata);
for (unsigned i = 0; i < terms.size(); i++) {
std::vector<Node> temp;
temp.push_back(out);
temp.push_back(terms[i]);
out = astnode("add", temp, node.metadata);
}
o = out;
}
if (ref) return o;
else return astnode("sload", o, node.metadata);
}
// Basic rewrite rule execution
std::pair<Node, bool> rulesTransform(Node node, rewriteRuleSet macros) {
std::string prefix = "_temp_"+mkUniqueToken();
bool changed = false;
if (!macros.ruleLists.count(node.val))
return std::pair<Node, bool>(node, false);
std::vector<rewriteRule> rules = macros.ruleLists[node.val];
for (unsigned pos = 0; pos < rules.size(); pos++) {
rewriteRule macro = rules[pos];
matchResult mr = match(macro.pattern, node);
if (mr.success) {
node = subst(macro.substitution, mr.map, prefix, node.metadata);
std::pair<Node, bool> o = rulesTransform(node, macros);
o.second = true;
return o;
}
}
return std::pair<Node, bool>(node, changed);
}
std::pair<Node, bool> synonymTransform(Node node) {
bool changed = false;
if (node.type == ASTNODE && synonymMap.count(node.val)) {
node.val = synonymMap[node.val];
changed = true;
}
return std::pair<Node, bool>(node, changed);
}
rewriteRuleSet nodeMacros;
rewriteRuleSet setterMacros;
bool dontDescend(std::string s) {
return s == "macro" || s == "comment" || s == "outer";
}
// Recursively applies any set of rewrite rules
std::pair<Node, bool> apply_rules_iter(preprocessResult pr, rewriteRuleSet rules) {
bool changed = false;
Node node = pr.first;
if (dontDescend(node.val))
return std::pair<Node, bool>(node, false);
std::pair<Node, bool> o = rulesTransform(node, rules);
node = o.first;
changed = changed || o.second;
if (node.type == ASTNODE) {
for (unsigned i = 0; i < node.args.size(); i++) {
std::pair<Node, bool> r =
apply_rules_iter(preprocessResult(node.args[i], pr.second), rules);
node.args[i] = r.first;
changed = changed || r.second;
}
}
return std::pair<Node, bool>(node, changed);
}
// Recursively applies rewrite rules and other primary transformations
std::pair<Node, bool> mainTransform(preprocessResult pr) {
bool changed = false;
Node node = pr.first;
// Anything inside "outer" should be treated as a separate program
// and thus recursively compiled in its entirety
if (node.val == "outer") {
node = apply_rules(preprocess(node.args[0]));
changed = true;
}
// Don't descend into comments, macros and inner scopes
if (dontDescend(node.val))
return std::pair<Node, bool>(node, changed);
// Special storage transformation
if (isNodeStorageVariable(node)) {
node = storageTransform(node, pr.second);
changed = true;
}
if (node.val == "ref" && isNodeStorageVariable(node.args[0])) {
node = storageTransform(node.args[0], pr.second, false, true);
changed = true;
}
if (node.val == "=" && isNodeStorageVariable(node.args[0])) {
Node t = storageTransform(node.args[0], pr.second);
if (t.val == "sload") {
std::vector<Node> o;
o.push_back(t.args[0]);
o.push_back(node.args[1]);
node = astnode("sstore", o, node.metadata);
}
changed = true;
}
// Main code
std::pair<Node, bool> pnb = synonymTransform(node);
node = pnb.first;
changed = changed || pnb.second;
// std::cerr << priority << " " << macros.size() << "\n";
std::pair<Node, bool> pnc = rulesTransform(node, nodeMacros);
node = pnc.first;
changed = changed || pnc.second;
// Special transformations
if (node.val == "array_lit") {
node = array_lit_transform(node);
changed = true;
}
if (node.val == "fun" && node.args[0].val == ".") {
node = dotTransform(node, pr.second);
changed = true;
}
if (node.val == "text") {
node = string_transform(node);
changed = true;
}
if (node.type == ASTNODE) {
unsigned i = 0;
// Arg 0 of all of these is a variable, so should not be changed
if (node.val == "set" || node.val == "ref"
|| node.val == "get" || node.val == "with") {
if (node.args[0].type == TOKEN &&
node.args[0].val.size() > 0 && node.args[0].val[0] != '\'') {
node.args[0].val = "'" + node.args[0].val;
changed = true;
}
i = 1;
}
// Convert arglen(x) to '_len_x
else if (node.val == "arglen") {
node.val = "get";
node.args[0].val = "'_len_" + node.args[0].val;
i = 1;
changed = true;
}
// Recursively process children
for (; i < node.args.size(); i++) {
std::pair<Node, bool> r =
mainTransform(preprocessResult(node.args[i], pr.second));
node.args[i] = r.first;
changed = changed || r.second;
}
}
// Add leading ' to variable names, and wrap them inside get
else if (node.type == TOKEN && !isNumberLike(node)) {
if (node.val.size() && node.val[0] != '\'' && node.val[0] != '$') {
Node n = astnode("get", tkn("'"+node.val), node.metadata);
node = n;
changed = true;
}
}
// Convert all numbers to normalized form
else if (node.type == TOKEN && isNumberLike(node) && !isDecimal(node.val)) {
node.val = strToNumeric(node.val);
changed = true;
}
return std::pair<Node, bool>(node, changed);
}
// Do some preprocessing to convert all of our macro lists into compiled
// forms that can then be reused
void parseMacros() {
for (int i = 0; i < 9999; i++) {
std::vector<Node> o;
if (macros[i][0] == "---END---") break;
nodeMacros.addRule(rewriteRule(
parseLLL(macros[i][0]),
parseLLL(macros[i][1])
));
}
for (int i = 0; i < 9999; i++) {
std::vector<Node> o;
if (setters[i][0] == "---END---") break;
setterMacros.addRule(rewriteRule(
asn(setters[i][0], tkn("$x"), tkn("$y")),
asn("=", tkn("$x"), asn(setters[i][1], tkn("$x"), tkn("$y")))
));
}
for (int i = 0; i < 9999; i++) {
if (synonyms[i][0] == "---END---") break;
synonymMap[synonyms[i][0]] = synonyms[i][1];
}
}
Node apply_rules(preprocessResult pr) {
// If the rewrite rules have not yet been parsed, parse them
if (!nodeMacros.ruleLists.size()) parseMacros();
// Iterate over macros by priority list
std::map<int, rewriteRuleSet >::iterator it;
std::pair<Node, bool> r;
for(it=pr.second.customMacros.begin();
it != pr.second.customMacros.end(); it++) {
while (1) {
// std::cerr << "STARTING ARI CYCLE: " << (*it).first <<"\n";
// std::cerr << printAST(pr.first) << "\n";
r = apply_rules_iter(pr, (*it).second);
pr.first = r.first;
if (!r.second) break;
}
}
// Apply setter macros
while (1) {
r = apply_rules_iter(pr, setterMacros);
pr.first = r.first;
if (!r.second) break;
}
// Apply all other mactos
while (1) {
r = mainTransform(pr);
pr.first = r.first;
if (!r.second) break;
}
return r.first;
}
// Pre-validation
Node validate(Node inp) {
Metadata m = inp.metadata;
if (inp.type == ASTNODE) {
int i = 0;
while(validFunctions[i][0] != "---END---") {
if (inp.val == validFunctions[i][0]) {
std::string sz = unsignedToDecimal(inp.args.size());
if (decimalGt(validFunctions[i][1], sz)) {
err("Too few arguments for "+inp.val, inp.metadata);
}
if (decimalGt(sz, validFunctions[i][2])) {
err("Too many arguments for "+inp.val, inp.metadata);
}
}
i++;
}
}
else if (inp.type == TOKEN) {
if (!inp.val.size()) err("??? empty token", m);
if (inp.val[0] == '_') err("Variables cannot start with _", m);
}
for (unsigned i = 0; i < inp.args.size(); i++) validate(inp.args[i]);
return inp;
}
Node postValidate(Node inp) {
// This allows people to use ~x as a way of having functions with the same
// name and arity as macros; the idea is that ~x is a "final" form, and
// should not be remacroed, but it is converted back at the end
if (inp.val.size() > 0 && inp.val[0] == '~') {
inp.val = inp.val.substr(1);
}
if (inp.type == ASTNODE) {
if (inp.val == ".")
err("Invalid object member (ie. a foo.bar not mapped to anything)",
inp.metadata);
else if (opcode(inp.val) >= 0) {
if ((signed)inp.args.size() < opinputs(inp.val))
err("Too few arguments for "+inp.val, inp.metadata);
if ((signed)inp.args.size() > opinputs(inp.val))
err("Too many arguments for "+inp.val, inp.metadata);
}
else if (isValidLLLFunc(inp.val, inp.args.size())) {
// do nothing
}
else err ("Invalid argument count or LLL function: "+printSimple(inp), inp.metadata);
for (unsigned i = 0; i < inp.args.size(); i++) {
inp.args[i] = postValidate(inp.args[i]);
}
}
return inp;
}
Node rewriteChunk(Node inp) {
return postValidate(optimize(apply_rules(
preprocessResult(
validate(inp), preprocessAux()))));
}
// Flatten nested sequence into flat sequence
Node flattenSeq(Node inp) {
std::vector<Node> o;
if (inp.val == "seq" && inp.type == ASTNODE) {
for (unsigned i = 0; i < inp.args.size(); i++) {
if (inp.args[i].val == "seq" && inp.args[i].type == ASTNODE)
o = extend(o, flattenSeq(inp.args[i]).args);
else
o.push_back(flattenSeq(inp.args[i]));
}
}
else if (inp.type == ASTNODE) {
for (unsigned i = 0; i < inp.args.size(); i++) {
o.push_back(flattenSeq(inp.args[i]));
}
}
else return inp;
return asn(inp.val, o, inp.metadata);
}
Node rewrite(Node inp) {
return postValidate(optimize(apply_rules(preprocess(flattenSeq(inp)))));
}
using namespace std;

16
libserpent/rewriter.h

@ -1,16 +0,0 @@
#ifndef ETHSERP_REWRITER
#define ETHSERP_REWRITER
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// Applies rewrite rules
Node rewrite(Node inp);
// Applies rewrite rules adding without wrapper
Node rewriteChunk(Node inp);
#endif

212
libserpent/rewriteutils.cpp

@ -1,212 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include <string>
#include "util.h"
#include "lllparser.h"
#include "bignum.h"
#include "rewriteutils.h"
#include "optimize.h"
// Valid functions and their min and max argument counts
std::string validFunctions[][3] = {
{ "if", "2", "3" },
{ "unless", "2", "2" },
{ "while", "2", "2" },
{ "until", "2", "2" },
{ "alloc", "1", "1" },
{ "array", "1", "1" },
{ "call", "2", tt256 },
{ "callcode", "2", tt256 },
{ "create", "1", "4" },
{ "getch", "2", "2" },
{ "setch", "3", "3" },
{ "sha3", "1", "2" },
{ "return", "1", "2" },
{ "inset", "1", "1" },
{ "min", "2", "2" },
{ "max", "2", "2" },
{ "array_lit", "0", tt256 },
{ "seq", "0", tt256 },
{ "log", "1", "6" },
{ "outer", "1", "1" },
{ "set", "2", "2" },
{ "get", "1", "1" },
{ "ref", "1", "1" },
{ "declare", "1", tt256 },
{ "with", "3", "3" },
{ "outer", "1", "1" },
{ "mcopy", "3", "3" },
{ "unsafe_mcopy", "3", "3" },
{ "save", "3", "3" },
{ "load", "2", "2" },
{ "---END---", "", "" } //Keep this line at the end of the list
};
std::map<std::string, bool> vfMap;
// Is a function name one of the valid functions above?
bool isValidFunctionName(std::string f) {
if (vfMap.size() == 0) {
for (int i = 0; ; i++) {
if (validFunctions[i][0] == "---END---") break;
vfMap[validFunctions[i][0]] = true;
}
}
return vfMap.count(f) != 0;
}
// Cool function for debug purposes (named cerrStringList to make
// all prints searchable via 'cerr')
void cerrStringList(std::vector<std::string> s, std::string suffix) {
for (unsigned i = 0; i < s.size(); i++) std::cerr << s[i] << " ";
std::cerr << suffix << "\n";
}
// Convert:
// self.cow -> ["cow"]
// self.horse[0] -> ["horse", "0"]
// self.a[6][7][self.storage[3]].chicken[9] ->
// ["6", "7", (sload 3), "chicken", "9"]
std::vector<Node> listfyStorageAccess(Node node) {
std::vector<Node> out;
std::vector<Node> nodez;
nodez.push_back(node);
while (1) {
if (nodez.back().type == TOKEN) {
out.push_back(token("--" + nodez.back().val, node.metadata));
std::vector<Node> outrev;
for (int i = (signed)out.size() - 1; i >= 0; i--) {
outrev.push_back(out[i]);
}
return outrev;
}
if (nodez.back().val == ".")
nodez.back().args[1].val = "--" + nodez.back().args[1].val;
if (nodez.back().args.size() == 0)
err("Error parsing storage variable statement", node.metadata);
if (nodez.back().args.size() == 1)
out.push_back(token(tt256m1, node.metadata));
else
out.push_back(nodez.back().args[1]);
nodez.push_back(nodez.back().args[0]);
}
}
// Is the given node something of the form
// self.cow
// self.horse[0]
// self.a[6][7][self.storage[3]].chicken[9]
bool isNodeStorageVariable(Node node) {
std::vector<Node> nodez;
nodez.push_back(node);
while (1) {
if (nodez.back().type == TOKEN) return false;
if (nodez.back().args.size() == 0) return false;
if (nodez.back().val != "." && nodez.back().val != "access")
return false;
if (nodez.back().args[0].val == "self") return true;
nodez.push_back(nodez.back().args[0]);
}
}
// Main pattern matching routine, for those patterns that can be expressed
// using our standard mini-language above
//
// Returns two values. First, a boolean to determine whether the node matches
// the pattern, second, if the node does match then a map mapping variables
// in the pattern to nodes
matchResult match(Node p, Node n) {
matchResult o;
o.success = false;
if (p.type == TOKEN) {
if (p.val == n.val && n.type == TOKEN) o.success = true;
else if (p.val[0] == '$' || p.val[0] == '@') {
o.success = true;
o.map[p.val.substr(1)] = n;
}
}
else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) {
// do nothing
}
else {
for (unsigned i = 0; i < p.args.size(); i++) {
matchResult oPrime = match(p.args[i], n.args[i]);
if (!oPrime.success) {
o.success = false;
return o;
}
for (std::map<std::string, Node>::iterator it = oPrime.map.begin();
it != oPrime.map.end();
it++) {
o.map[(*it).first] = (*it).second;
}
}
o.success = true;
}
return o;
}
// Fills in the pattern with a dictionary mapping variable names to
// nodes (these dicts are generated by match). Match and subst together
// create a full pattern-matching engine.
Node subst(Node pattern,
std::map<std::string, Node> dict,
std::string varflag,
Metadata m) {
// Swap out patterns at the token level
if (pattern.metadata.ln == -1)
pattern.metadata = m;
if (pattern.type == TOKEN &&
pattern.val[0] == '$') {
if (dict.count(pattern.val.substr(1))) {
return dict[pattern.val.substr(1)];
}
else {
return token(varflag + pattern.val.substr(1), m);
}
}
// Other tokens are untouched
else if (pattern.type == TOKEN) {
return pattern;
}
// Substitute recursively for ASTs
else {
std::vector<Node> args;
for (unsigned i = 0; i < pattern.args.size(); i++) {
args.push_back(subst(pattern.args[i], dict, varflag, m));
}
return asn(pattern.val, args, m);
}
}
// Transforms a sequence containing two-argument with statements
// into a statement containing those statements in nested form
Node withTransform (Node source) {
Node o = token("--");
Metadata m = source.metadata;
std::vector<Node> args;
for (int i = source.args.size() - 1; i >= 0; i--) {
Node a = source.args[i];
if (a.val == "with" && a.args.size() == 2) {
std::vector<Node> flipargs;
for (int j = args.size() - 1; j >= 0; j--)
flipargs.push_back(args[i]);
if (o.val != "--")
flipargs.push_back(o);
o = asn("with", a.args[0], a.args[1], asn("seq", flipargs, m), m);
args = std::vector<Node>();
}
else {
args.push_back(a);
}
}
std::vector<Node> flipargs;
for (int j = args.size() - 1; j >= 0; j--)
flipargs.push_back(args[j]);
if (o.val != "--")
flipargs.push_back(o);
return asn("seq", flipargs, m);
}

76
libserpent/rewriteutils.h

@ -1,76 +0,0 @@
#ifndef ETHSERP_REWRITEUTILS
#define ETHSERP_REWRITEUTILS
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// Valid functions and their min and max argument counts
extern std::string validFunctions[][3];
extern std::map<std::string, bool> vfMap;
bool isValidFunctionName(std::string f);
// Converts deep array access into ordered list of the arguments
// along the descent
std::vector<Node> listfyStorageAccess(Node node);
// Cool function for debug purposes (named cerrStringList to make
// all prints searchable via 'cerr')
void cerrStringList(std::vector<std::string> s, std::string suffix="");
// Is the given node something of the form
// self.cow
// self.horse[0]
// self.a[6][7][self.storage[3]].chicken[9]
bool isNodeStorageVariable(Node node);
// Applies rewrite rules adding without wrapper
Node rewriteChunk(Node inp);
// Match result storing object
struct matchResult {
bool success;
std::map<std::string, Node> map;
};
// Match node to pattern
matchResult match(Node p, Node n);
// Substitute node using pattern
Node subst(Node pattern,
std::map<std::string, Node> dict,
std::string varflag,
Metadata m);
Node withTransform(Node source);
class rewriteRule {
public:
rewriteRule(Node p, Node s) {
pattern = p;
substitution = s;
}
Node pattern;
Node substitution;
};
class rewriteRuleSet {
public:
rewriteRuleSet() {
ruleLists = std::map<std::string, std::vector<rewriteRule> >();
}
void addRule(rewriteRule r) {
if (!ruleLists.count(r.pattern.val))
ruleLists[r.pattern.val] = std::vector<rewriteRule>();
ruleLists[r.pattern.val].push_back(r);
}
std::map<std::string, std::vector<rewriteRule> > ruleLists;
};
#endif

115
libserpent/tokenize.cpp

@ -1,115 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
// These appear as independent tokens even if inside a stream of symbols
const std::string atoms[] = { "#", "//", "(", ")", "[", "]", "{", "}" };
const int numAtoms = 8;
// Is the char alphanumeric, a space, a bracket, a quote, a symbol?
int chartype(char c) {
if (c >= '0' && c <= '9') return ALPHANUM;
else if (c >= 'a' && c <= 'z') return ALPHANUM;
else if (c >= 'A' && c <= 'Z') return ALPHANUM;
else if (std::string("~_$@").find(c) != std::string::npos) return ALPHANUM;
else if (c == '\t' || c == ' ' || c == '\n' || c == '\r') return SPACE;
else if (std::string("()[]{}").find(c) != std::string::npos) return BRACK;
else if (c == '"') return DQUOTE;
else if (c == '\'') return SQUOTE;
else return SYMB;
}
// "y = f(45,124)/3" -> [ "y", "f", "(", "45", ",", "124", ")", "/", "3"]
std::vector<Node> tokenize(std::string inp, Metadata metadata, bool lispMode) {
int curtype = SPACE;
unsigned pos = 0;
int lastNewline = 0;
metadata.ch = 0;
std::string cur;
std::vector<Node> out;
inp += " ";
while (pos < inp.length()) {
int headtype = chartype(inp[pos]);
if (lispMode) {
if (inp[pos] == '\'') headtype = ALPHANUM;
}
// Are we inside a quote?
if (curtype == SQUOTE || curtype == DQUOTE) {
// Close quote
if (headtype == curtype) {
cur += inp[pos];
out.push_back(token(cur, metadata));
cur = "";
metadata.ch = pos - lastNewline;
curtype = SPACE;
pos += 1;
}
// eg. \xc3
else if (inp.length() >= pos + 4 && inp.substr(pos, 2) == "\\x") {
cur += (std::string("0123456789abcdef").find(inp[pos+2]) * 16
+ std::string("0123456789abcdef").find(inp[pos+3]));
pos += 4;
}
// Newline
else if (inp.substr(pos, 2) == "\\n") {
cur += '\n';
pos += 2;
}
// Backslash escape
else if (inp.length() >= pos + 2 && inp[pos] == '\\') {
cur += inp[pos + 1];
pos += 2;
}
// Normal character
else {
cur += inp[pos];
pos += 1;
}
}
else {
// Handle atoms ( '//', '#', brackets )
for (int i = 0; i < numAtoms; i++) {
int split = cur.length() - atoms[i].length();
if (split >= 0 && cur.substr(split) == atoms[i]) {
if (split > 0) {
out.push_back(token(cur.substr(0, split), metadata));
}
metadata.ch += split;
out.push_back(token(cur.substr(split), metadata));
metadata.ch = pos - lastNewline;
cur = "";
curtype = SPACE;
}
}
// Special case the minus sign
if (cur.length() > 1 && (cur.substr(cur.length() - 1) == "-"
|| cur.substr(cur.length() - 1) == "!")) {
out.push_back(token(cur.substr(0, cur.length() - 1), metadata));
out.push_back(token(cur.substr(cur.length() - 1), metadata));
cur = "";
}
// Boundary between different char types
if (headtype != curtype) {
if (curtype != SPACE && cur != "") {
out.push_back(token(cur, metadata));
}
metadata.ch = pos - lastNewline;
cur = "";
}
cur += inp[pos];
curtype = headtype;
pos += 1;
}
if (inp[pos] == '\n') {
lastNewline = pos;
metadata.ch = 0;
metadata.ln += 1;
}
}
return out;
}

16
libserpent/tokenize.h

@ -1,16 +0,0 @@
#ifndef ETHSERP_TOKENIZE
#define ETHSERP_TOKENIZE
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
int chartype(char c);
std::vector<Node> tokenize(std::string inp,
Metadata meta=Metadata(),
bool lispMode=false);
#endif

333
libserpent/util.cpp

@ -1,333 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include "util.h"
#include "bignum.h"
#include <fstream>
#include <cerrno>
//Token or value node constructor
Node token(std::string val, Metadata met) {
Node o;
o.type = 0;
o.val = val;
o.metadata = met;
return o;
}
//AST node constructor
Node astnode(std::string val, std::vector<Node> args, Metadata met) {
Node o;
o.type = 1;
o.val = val;
o.args = args;
o.metadata = met;
return o;
}
//AST node constructors for a specific number of children
Node astnode(std::string val, Metadata met) {
std::vector<Node> args;
return astnode(val, args, met);
}
Node astnode(std::string val, Node a, Metadata met) {
std::vector<Node> args;
args.push_back(a);
return astnode(val, args, met);
}
Node astnode(std::string val, Node a, Node b, Metadata met) {
std::vector<Node> args;
args.push_back(a);
args.push_back(b);
return astnode(val, args, met);
}
Node astnode(std::string val, Node a, Node b, Node c, Metadata met) {
std::vector<Node> args;
args.push_back(a);
args.push_back(b);
args.push_back(c);
return astnode(val, args, met);
}
Node astnode(std::string val, Node a, Node b, Node c, Node d, Metadata met) {
std::vector<Node> args;
args.push_back(a);
args.push_back(b);
args.push_back(c);
args.push_back(d);
return astnode(val, args, met);
}
// Print token list
std::string printTokens(std::vector<Node> tokens) {
std::string s = "";
for (unsigned i = 0; i < tokens.size(); i++) {
s += tokens[i].val + " ";
}
return s;
}
// Prints a lisp AST on one line
std::string printSimple(Node ast) {
if (ast.type == TOKEN) return ast.val;
std::string o = "(" + ast.val;
std::vector<std::string> subs;
for (unsigned i = 0; i < ast.args.size(); i++) {
o += " " + printSimple(ast.args[i]);
}
return o + ")";
}
// Number of tokens in a tree
int treeSize(Node prog) {
if (prog.type == TOKEN) return 1;
int o = 0;
for (unsigned i = 0; i < prog.args.size(); i++) o += treeSize(prog.args[i]);
return o;
}
// Pretty-prints a lisp AST
std::string printAST(Node ast, bool printMetadata) {
if (ast.type == TOKEN) return ast.val;
std::string o = "(";
if (printMetadata) {
o += ast.metadata.file + " ";
o += unsignedToDecimal(ast.metadata.ln) + " ";
o += unsignedToDecimal(ast.metadata.ch) + ": ";
}
o += ast.val;
std::vector<std::string> subs;
for (unsigned i = 0; i < ast.args.size(); i++) {
subs.push_back(printAST(ast.args[i], printMetadata));
}
unsigned k = 0;
std::string out = " ";
// As many arguments as possible go on the same line as the function,
// except when seq is used
while (k < subs.size() && o != "(seq") {
if (subs[k].find("\n") != std::string::npos || (out + subs[k]).length() >= 80) break;
out += subs[k] + " ";
k += 1;
}
// All remaining arguments go on their own lines
if (k < subs.size()) {
o += out + "\n";
std::vector<std::string> subsSliceK;
for (unsigned i = k; i < subs.size(); i++) subsSliceK.push_back(subs[i]);
o += indentLines(joinLines(subsSliceK));
o += "\n)";
}
else {
o += out.substr(0, out.size() - 1) + ")";
}
return o;
}
// Splits text by line
std::vector<std::string> splitLines(std::string s) {
unsigned pos = 0;
int lastNewline = 0;
std::vector<std::string> o;
while (pos < s.length()) {
if (s[pos] == '\n') {
o.push_back(s.substr(lastNewline, pos - lastNewline));
lastNewline = pos + 1;
}
pos = pos + 1;
}
o.push_back(s.substr(lastNewline));
return o;
}
// Inverse of splitLines
std::string joinLines(std::vector<std::string> lines) {
std::string o = "\n";
for (unsigned i = 0; i < lines.size(); i++) {
o += lines[i] + "\n";
}
return o.substr(1, o.length() - 2);
}
// Indent all lines by 4 spaces
std::string indentLines(std::string inp) {
std::vector<std::string> lines = splitLines(inp);
for (unsigned i = 0; i < lines.size(); i++) lines[i] = " "+lines[i];
return joinLines(lines);
}
// Binary to hexadecimal
std::string binToNumeric(std::string inp) {
std::string o = "0";
for (unsigned i = 0; i < inp.length(); i++) {
o = decimalAdd(decimalMul(o,"256"), unsignedToDecimal((unsigned char)inp[i]));
}
return o;
}
// Converts string to simple numeric format
std::string strToNumeric(std::string inp) {
std::string o = "0";
if (inp == "") {
o = "";
}
else if ((inp[0] == '"' && inp[inp.length()-1] == '"')
|| (inp[0] == '\'' && inp[inp.length()-1] == '\'')) {
for (unsigned i = 1; i < inp.length() - 1; i++) {
o = decimalAdd(decimalMul(o,"256"), unsignedToDecimal((unsigned char)inp[i]));
}
}
else if (inp.substr(0,2) == "0x") {
for (unsigned i = 2; i < inp.length(); i++) {
int dig = std::string("0123456789abcdef0123456789ABCDEF").find(inp[i]) % 16;
if (dig < 0) return "";
o = decimalAdd(decimalMul(o,"16"), unsignedToDecimal(dig));
}
}
else {
bool isPureNum = true;
for (unsigned i = 0; i < inp.length(); i++) {
isPureNum = isPureNum && inp[i] >= '0' && inp[i] <= '9';
}
o = isPureNum ? inp : "";
}
return o;
}
// Does the node contain a number (eg. 124, 0xf012c, "george")
bool isNumberLike(Node node) {
if (node.type == ASTNODE) return false;
return strToNumeric(node.val) != "";
}
// Is the number decimal?
bool isDecimal(std::string inp) {
for (unsigned i = 0; i < inp.length(); i++) {
if (inp[i] < '0' || inp[i] > '9') return false;
}
return true;
}
//Normalizes number representations
Node nodeToNumeric(Node node) {
std::string o = strToNumeric(node.val);
return token(o == "" ? node.val : o, node.metadata);
}
Node tryNumberize(Node node) {
if (node.type == TOKEN && isNumberLike(node)) return nodeToNumeric(node);
return node;
}
//Converts a value to an array of byte number nodes
std::vector<Node> toByteArr(std::string val, Metadata metadata, int minLen) {
std::vector<Node> o;
int L = 0;
while (val != "0" || L < minLen) {
o.push_back(token(decimalMod(val, "256"), metadata));
val = decimalDiv(val, "256");
L++;
}
std::vector<Node> o2;
for (int i = o.size() - 1; i >= 0; i--) o2.push_back(o[i]);
return o2;
}
int counter = 0;
//Makes a unique token
std::string mkUniqueToken() {
counter++;
return unsignedToDecimal(counter);
}
//Does a file exist? http://stackoverflow.com/questions/12774207
bool exists(std::string fileName) {
std::ifstream infile(fileName.c_str());
return infile.good();
}
//Reads a file: http://stackoverflow.com/questions/2602013
std::string get_file_contents(std::string filename)
{
std::ifstream in(filename.c_str(), std::ios::in | std::ios::binary);
if (in)
{
std::string contents;
in.seekg(0, std::ios::end);
contents.resize(in.tellg());
in.seekg(0, std::ios::beg);
in.read(&contents[0], contents.size());
in.close();
return(contents);
}
throw(errno);
}
//Report error
void err(std::string errtext, Metadata met) {
std::string err = "Error (file \"" + met.file + "\", line " +
unsignedToDecimal(met.ln + 1) + ", char " + unsignedToDecimal(met.ch) +
"): " + errtext;
std::cerr << err << std::endl;
throw(err);
}
//Report warning
void warn(std::string errtext, Metadata met) {
std::string err = "Warning (file \"" + met.file + "\", line " +
unsignedToDecimal(met.ln + 1) + ", char " + unsignedToDecimal(met.ch) +
"): " + errtext;
std::cerr << err << std::endl;
}
//Bin to hex
std::string binToHex(std::string inp) {
std::string o = "";
for (unsigned i = 0; i < inp.length(); i++) {
unsigned char v = inp[i];
o += std::string("0123456789abcdef").substr(v/16, 1)
+ std::string("0123456789abcdef").substr(v%16, 1);
}
return o;
}
//Hex to bin
std::string hexToBin(std::string inp) {
std::string o = "";
for (unsigned i = 0; i+1 < inp.length(); i+=2) {
char v = (char)(std::string("0123456789abcdef").find(inp[i]) * 16 +
std::string("0123456789abcdef").find(inp[i+1]));
o += v;
}
return o;
}
//Lower to upper
std::string upperCase(std::string inp) {
std::string o = "";
for (unsigned i = 0; i < inp.length(); i++) {
if (inp[i] >= 97 && inp[i] <= 122) o += inp[i] - 32;
else o += inp[i];
}
return o;
}
//Three-int vector
std::vector<int> triple(int a, int b, int c) {
std::vector<int> v;
v.push_back(a);
v.push_back(b);
v.push_back(c);
return v;
}
//Extend node vector
std::vector<Node> extend(std::vector<Node> a, std::vector<Node> b) {
for (unsigned i = 0; i < b.size(); i++) a.push_back(b[i]);
return a;
}

137
libserpent/util.h

@ -1,137 +0,0 @@
#ifndef ETHSERP_UTIL
#define ETHSERP_UTIL
#include <stdio.h>
#include <iostream>
#include <vector>
#include <map>
#include <fstream>
#include <cerrno>
const int TOKEN = 0,
ASTNODE = 1,
SPACE = 2,
BRACK = 3,
SQUOTE = 4,
DQUOTE = 5,
SYMB = 6,
ALPHANUM = 7,
LPAREN = 8,
RPAREN = 9,
COMMA = 10,
COLON = 11,
UNARY_OP = 12,
BINARY_OP = 13,
COMPOUND = 14,
TOKEN_SPLITTER = 15;
// Stores metadata about each token
class Metadata {
public:
Metadata(std::string File="main", int Ln=-1, int Ch=-1) {
file = File;
ln = Ln;
ch = Ch;
fixed = false;
}
std::string file;
int ln;
int ch;
bool fixed;
};
std::string mkUniqueToken();
// type can be TOKEN or ASTNODE
class Node {
public:
int type;
std::string val;
std::vector<Node> args;
Metadata metadata;
};
Node token(std::string val, Metadata met=Metadata());
Node astnode(std::string val, std::vector<Node> args, Metadata met=Metadata());
Node astnode(std::string val, Metadata met=Metadata());
Node astnode(std::string val, Node a, Metadata met=Metadata());
Node astnode(std::string val, Node a, Node b, Metadata met=Metadata());
Node astnode(std::string val, Node a, Node b, Node c, Metadata met=Metadata());
Node astnode(std::string val, Node a, Node b,
Node c, Node d, Metadata met=Metadata());
// Number of tokens in a tree
int treeSize(Node prog);
// Print token list
std::string printTokens(std::vector<Node> tokens);
// Prints a lisp AST on one line
std::string printSimple(Node ast);
// Pretty-prints a lisp AST
std::string printAST(Node ast, bool printMetadata=false);
// Splits text by line
std::vector<std::string> splitLines(std::string s);
// Inverse of splitLines
std::string joinLines(std::vector<std::string> lines);
// Indent all lines by 4 spaces
std::string indentLines(std::string inp);
// Converts binary to simple numeric format
std::string binToNumeric(std::string inp);
// Converts string to simple numeric format
std::string strToNumeric(std::string inp);
// Does the node contain a number (eg. 124, 0xf012c, "george")
bool isNumberLike(Node node);
//Normalizes number representations
Node nodeToNumeric(Node node);
//If a node is numeric, normalize its representation
Node tryNumberize(Node node);
//Converts a value to an array of byte number nodes
std::vector<Node> toByteArr(std::string val, Metadata metadata, int minLen=1);
//Reads a file
std::string get_file_contents(std::string filename);
//Does a file exist?
bool exists(std::string fileName);
//Report error
void err(std::string errtext, Metadata met);
//Report warning
void warn(std::string errtext, Metadata met);
//Bin to hex
std::string binToHex(std::string inp);
//Hex to bin
std::string hexToBin(std::string inp);
//Lower to upper
std::string upperCase(std::string inp);
//Three-int vector
std::vector<int> triple(int a, int b, int c);
//Extend node vector
std::vector<Node> extend(std::vector<Node> a, std::vector<Node> b);
// Is the number decimal?
bool isDecimal(std::string inp);
#define asn astnode
#define tkn token
#define msi std::map<std::string, int>
#define msn std::map<std::string, Node>
#define mss std::map<std::string, std::string>
#endif

3
libweb3jsonrpc/WebThreeStubServerBase.cpp

@ -41,9 +41,6 @@
#include <libethcore/CommonJS.h>
#include <libwhisper/Message.h>
#include <libwhisper/WhisperHost.h>
#if ETH_SERPENT || !ETH_TRUE
#include <libserpent/funcs.h>
#endif
#include "AccountHolder.h"
#include "JsonHelper.h"
using namespace std;

12
pullSerpent.sh

@ -1,12 +0,0 @@
#!/bin/bash
opwd="$PWD"
cd ../serpent
git stash
git pull
git stash pop
cp bignum.* compiler.* funcs.* lllparser.* opcodes.h parser.* rewriter.* tokenize.* util.* ../cpp-ethereum/libserpent/
cp cmdline.* "$opwd/sc/"
cd "$opwd"
perl -i -p -e 's:include "funcs.h":include <libserpent/funcs.h>:gc' sc/*

13
pysol/MANIFEST.in

@ -1,13 +0,0 @@
include pysol/*.cpp
include *.py
include libdevcore/*cpp
include libdevcore/*h
include libdevcrypto/*cpp
include libdevcrypto/*h
include libethcore/*cpp
include libethcore/*h
include libsolidity/*cpp
include libsolidity/*h
include libevmcore/*cpp
include libevmcore/*h
include pysol/README.md

0
pysol/README.md

115
pysol/pysolidity.cpp

@ -1,115 +0,0 @@
#include <Python.h>
#include "structmember.h"
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <vector>
#include "../libdevcore/CommonData.h"
#include <libsolidity/Compiler.h>
#include <libsolidity/CompilerStack.h>
#include <libsolidity/CompilerUtils.h>
#include <libsolidity/SourceReferenceFormatter.h>
std::string compile(std::string src) {
dev::solidity::CompilerStack compiler;
try
{
std::vector<uint8_t> m_data = compiler.compile(src, false);
return std::string(m_data.begin(), m_data.end());
}
catch (dev::Exception const& exception)
{
std::ostringstream error;
dev::solidity::SourceReferenceFormatter::printExceptionInformation(error, exception, "Error", compiler);
std::string e = error.str();
throw(e);
}
}
std::string mk_full_signature(std::string src) {
dev::solidity::CompilerStack compiler;
try
{
compiler.compile(src);
return compiler.getInterface("");
}
catch (dev::Exception const& exception)
{
std::ostringstream error;
dev::solidity::SourceReferenceFormatter::printExceptionInformation(error, exception, "Error", compiler);
std::string e = error.str();
throw(e);
}
}
std::string bob(std::string src) { return src + src; }
#define PYMETHOD(name, FROM, method, TO) \
static PyObject * name(PyObject *, PyObject *args) { \
try { \
FROM(med) \
return TO(method(med)); \
} \
catch (std::string e) { \
PyErr_SetString(PyExc_Exception, e.c_str()); \
return NULL; \
} \
}
#define FROMSTR(v) \
const char *command; \
int len; \
if (!PyArg_ParseTuple(args, "s#", &command, &len)) \
return NULL; \
std::string v = std::string(command, len); \
// Convert string into python wrapper form
PyObject* pyifyString(std::string s) {
return Py_BuildValue("s#", s.c_str(), s.length());
}
// Convert integer into python wrapper form
PyObject* pyifyInteger(unsigned int i) {
return Py_BuildValue("i", i);
}
// Convert pyobject int into normal form
int cppifyInt(PyObject* o) {
int out;
if (!PyArg_Parse(o, "i", &out))
throw("Argument should be integer");
return out;
}
// Convert pyobject string into normal form
std::string cppifyString(PyObject* o) {
const char *command;
if (!PyArg_Parse(o, "s", &command))
throw("Argument should be string");
return std::string(command);
}
int fh(std::string i) {
return dev::fromHex(i[0]);
}
PYMETHOD(ps_compile, FROMSTR, compile, pyifyString)
PYMETHOD(ps_mk_full_signature, FROMSTR, mk_full_signature, pyifyString)
static PyMethodDef PyextMethods[] = {
{"compile", ps_compile, METH_VARARGS,
"Compile code."},
{"mk_full_signature", ps_mk_full_signature, METH_VARARGS,
"Get the signature of a piece of code."},
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyMODINIT_FUNC initsolidity(void)
{
Py_InitModule( "solidity", PyextMethods );
}

41
pysol/setup.py

@ -1,41 +0,0 @@
import os
os.chdir('..')
from setuptools import setup, Extension
from distutils.sysconfig import get_config_vars
(opt,) = get_config_vars('OPT')
os.environ['OPT'] = " ".join(
flag for flag in opt.split() if flag != '-Wstrict-prototypes'
)
setup(
# Name of this package
name="ethereum-solidity",
# Package version
version='1.8.0',
description='Solidity compiler python wrapper',
maintainer='Vitalik Buterin',
maintainer_email='v@buterin.com',
license='WTFPL',
url='http://www.ethereum.org/',
# Describes how to build the actual extension module from C source files.
ext_modules=[
Extension(
'solidity', # Python name of the module
sources= ['libdevcore/Common.cpp', 'libdevcore/CommonData.cpp', 'libdevcore/CommonIO.cpp', 'libdevcore/FixedHash.cpp', 'libdevcore/Guards.cpp', 'libdevcore/Log.cpp', 'libdevcore/RangeMask.cpp', 'libdevcore/RLP.cpp', 'libdevcore/Worker.cpp', 'libdevcrypto/AES.cpp', 'libdevcrypto/Common.cpp', 'libdevcrypto/CryptoPP.cpp', 'libdevcrypto/ECDHE.cpp', 'libdevcrypto/FileSystem.cpp', 'libdevcrypto/MemoryDB.cpp', 'libdevcrypto/OverlayDB.cpp', 'libdevcrypto/SHA3.cpp', 'libdevcrypto/TrieCommon.cpp', 'libdevcrypto/TrieDB.cpp', 'libethcore/CommonEth.cpp', 'libethcore/CommonJS.cpp', 'libethcore/Exceptions.cpp', 'libsolidity/AST.cpp', 'libsolidity/ASTJsonConverter.cpp', 'libsolidity/ASTPrinter.cpp', 'libsolidity/CompilerContext.cpp', 'libsolidity/Compiler.cpp', 'libsolidity/CompilerStack.cpp', 'libsolidity/CompilerUtils.cpp', 'libsolidity/DeclarationContainer.cpp', 'libsolidity/ExpressionCompiler.cpp', 'libsolidity/GlobalContext.cpp', 'libsolidity/InterfaceHandler.cpp', 'libsolidity/NameAndTypeResolver.cpp', 'libsolidity/Parser.cpp', 'libsolidity/Scanner.cpp', 'libsolidity/SourceReferenceFormatter.cpp', 'libsolidity/Token.cpp', 'libsolidity/Types.cpp', 'libevmcore/Assembly.cpp', 'libevmcore/Instruction.cpp', 'pysol/pysolidity.cpp'],
libraries=['boost_python', 'boost_filesystem', 'boost_chrono', 'boost_thread', 'cryptopp', 'leveldb', 'jsoncpp'],
include_dirs=['/usr/include/boost', '..', '../..', '.'],
extra_compile_args=['--std=c++11', '-Wno-unknown-pragmas']
)],
py_modules=[
],
scripts=[
],
entry_points={
}
),

19
sc/CMakeLists.txt

@ -1,19 +0,0 @@
cmake_policy(SET CMP0015 NEW)
aux_source_directory(. SRC_LIST)
include_directories(BEFORE ..)
set(EXECUTABLE sc)
add_executable(${EXECUTABLE} ${SRC_LIST})
if (NOT ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC"))
target_link_libraries(${EXECUTABLE} serpent)
endif()
target_link_libraries(${EXECUTABLE} lll)
target_link_libraries(${EXECUTABLE} evmcore)
target_link_libraries(${EXECUTABLE} devcore)
install( TARGETS ${EXECUTABLE} DESTINATION bin )

129
sc/cmdline.cpp

@ -1,129 +0,0 @@
#include <stdio.h>
#include <string>
#include <iostream>
#include <vector>
#include <map>
#include <libserpent/funcs.h>
int main(int argv, char** argc) {
if (argv == 1) {
std::cerr << "Must provide a command and arguments! Try parse, rewrite, compile, assemble\n";
return 0;
}
if (argv == 2 && (std::string(argc[1]) == "--help" || std::string(argc[1]) == "-h" )) {
std::cout << argc[1] << "\n";
std::cout << "serpent command input\n";
std::cout << "where input -s for from stdin, a file, or interpreted as serpent code if does not exist as file.";
std::cout << "where command: \n";
std::cout << " parse: Just parses and returns s-expression code.\n";
std::cout << " rewrite: Parse, use rewrite rules print s-expressions of result.\n";
std::cout << " compile: Return resulting compiled EVM code in hex.\n";
std::cout << " assemble: Return result from step before compilation.\n";
return 0;
}
std::string flag = "";
std::string command = argc[1];
std::string input;
std::string secondInput;
if (std::string(argc[1]) == "-s") {
flag = command.substr(1);
command = argc[2];
input = "";
std::string line;
while (std::getline(std::cin, line)) {
input += line + "\n";
}
secondInput = argv == 3 ? "" : argc[3];
}
else {
if (argv == 2) {
std::cerr << "Not enough arguments for serpent cmdline\n";
throw(0);
}
input = argc[2];
secondInput = argv == 3 ? "" : argc[3];
}
bool haveSec = secondInput.length() > 0;
if (command == "parse" || command == "parse_serpent") {
std::cout << printAST(parseSerpent(input), haveSec) << "\n";
}
else if (command == "rewrite") {
std::cout << printAST(rewrite(parseLLL(input, true)), haveSec) << "\n";
}
else if (command == "compile_to_lll") {
std::cout << printAST(compileToLLL(input), haveSec) << "\n";
}
else if (command == "rewrite_chunk") {
std::cout << printAST(rewriteChunk(parseLLL(input, true)), haveSec) << "\n";
}
else if (command == "compile_chunk_to_lll") {
std::cout << printAST(compileChunkToLLL(input), haveSec) << "\n";
}
else if (command == "build_fragtree") {
std::cout << printAST(buildFragmentTree(parseLLL(input, true))) << "\n";
}
else if (command == "compile_lll") {
std::cout << binToHex(compileLLL(parseLLL(input, true))) << "\n";
}
else if (command == "dereference") {
std::cout << printTokens(dereference(parseLLL(input, true))) <<"\n";
}
else if (command == "pretty_assemble") {
std::cout << printTokens(prettyAssemble(parseLLL(input, true))) <<"\n";
}
else if (command == "pretty_compile_lll") {
std::cout << printTokens(prettyCompileLLL(parseLLL(input, true))) << "\n";
}
else if (command == "pretty_compile") {
std::cout << printTokens(prettyCompile(input)) << "\n";
}
else if (command == "pretty_compile_chunk") {
std::cout << printTokens(prettyCompileChunk(input)) << "\n";
}
else if (command == "assemble") {
std::cout << assemble(parseLLL(input, true)) << "\n";
}
else if (command == "serialize") {
std::cout << binToHex(serialize(tokenize(input, Metadata(), false))) << "\n";
}
else if (command == "deserialize") {
std::cout << printTokens(deserialize(hexToBin(input))) << "\n";
}
else if (command == "compile") {
std::cout << binToHex(compile(input)) << "\n";
}
else if (command == "compile_chunk") {
std::cout << binToHex(compileChunk(input)) << "\n";
}
else if (command == "encode_datalist") {
std::vector<Node> tokens = tokenize(input);
std::vector<std::string> o;
for (int i = 0; i < (int)tokens.size(); i++) {
o.push_back(tokens[i].val);
}
std::cout << binToHex(encodeDatalist(o)) << "\n";
}
else if (command == "decode_datalist") {
std::vector<std::string> o = decodeDatalist(hexToBin(input));
std::vector<Node> tokens;
for (int i = 0; i < (int)o.size(); i++)
tokens.push_back(token(o[i]));
std::cout << printTokens(tokens) << "\n";
}
else if (command == "tokenize") {
std::cout << printTokens(tokenize(input));
}
else if (command == "biject") {
if (argv == 3)
std::cerr << "Not enough arguments for biject\n";
int pos = decimalToUnsigned(secondInput);
std::vector<Node> n = prettyCompile(input);
if (pos >= (int)n.size())
std::cerr << "Code position too high\n";
Metadata m = n[pos].metadata;
std::cout << "Opcode: " << n[pos].val << ", file: " << m.file <<
", line: " << m.ln << ", char: " << m.ch << "\n";
}
}
Loading…
Cancel
Save