Gav Wood
11 years ago
113 changed files with 2535 additions and 178 deletions
@ -0,0 +1,61 @@ |
|||
/*
|
|||
This file is part of cpp-ethereum. |
|||
|
|||
cpp-ethereum is free software: you can redistribute it and/or modify |
|||
it under the terms of the GNU General Public License as published by |
|||
the Free Software Foundation, either version 3 of the License, or |
|||
(at your option) any later version. |
|||
|
|||
cpp-ethereum is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
GNU General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU General Public License |
|||
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
|||
*/ |
|||
/** @file FixedHash.h
|
|||
* @author Gav Wood <i@gavwood.com> |
|||
* @date 2014 |
|||
* |
|||
* The FixedHash fixed-size "hash" container type. |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
#include <string> |
|||
#include <libethential/FixedHash.h> |
|||
#include <libethential/vector_ref.h> |
|||
|
|||
namespace eth |
|||
{ |
|||
|
|||
// SHA-3 convenience routines.
|
|||
|
|||
/// Calculate SHA3-256 hash of the given input and load it into the given output.
|
|||
void sha3(bytesConstRef _input, bytesRef _output); |
|||
|
|||
/// Calculate SHA3-256 hash of the given input, possibly interpreting it as nibbles, and return the hash as a string filled with binary data.
|
|||
std::string sha3(std::string const& _input, bool _isNibbles); |
|||
|
|||
/// Calculate SHA3-256 hash of the given input, returning as a byte array.
|
|||
bytes sha3Bytes(bytesConstRef _input); |
|||
|
|||
/// Calculate SHA3-256 hash of the given input (presented as a binary string), returning as a byte array.
|
|||
inline bytes sha3Bytes(std::string const& _input) { return sha3Bytes((std::string*)&_input); } |
|||
|
|||
/// Calculate SHA3-256 hash of the given input, returning as a byte array.
|
|||
inline bytes sha3Bytes(bytes const& _input) { return sha3Bytes((bytes*)&_input); } |
|||
|
|||
/// Calculate SHA3-256 hash of the given input, returning as a 256-bit hash.
|
|||
h256 sha3(bytesConstRef _input); |
|||
|
|||
/// Calculate SHA3-256 hash of the given input, returning as a 256-bit hash.
|
|||
inline h256 sha3(bytes const& _input) { return sha3(bytesConstRef((bytes*)&_input)); } |
|||
|
|||
/// Calculate SHA3-256 hash of the given input (presented as a binary-filled string), returning as a 256-bit hash.
|
|||
inline h256 sha3(std::string const& _input) { return sha3(bytesConstRef(_input)); } |
|||
|
|||
extern h256 EmptySHA3; |
|||
|
|||
} |
@ -0,0 +1,51 @@ |
|||
cmake_policy(SET CMP0015 NEW) |
|||
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") |
|||
|
|||
aux_source_directory(. SRC_LIST) |
|||
|
|||
set(EXECUTABLE ethential) |
|||
|
|||
if(APPLE) |
|||
# set(CMAKE_INSTALL_PREFIX ../lib) |
|||
add_library(${EXECUTABLE} SHARED ${SRC_LIST}) |
|||
else() |
|||
add_library(${EXECUTABLE} ${SRC_LIST}) |
|||
endif() |
|||
file(GLOB HEADERS "*.h") |
|||
|
|||
include_directories(..) |
|||
|
|||
target_link_libraries(${EXECUTABLE} ethential) |
|||
target_link_libraries(${EXECUTABLE} gmp) |
|||
|
|||
|
|||
if(${TARGET_PLATFORM} STREQUAL "w64") |
|||
include_directories(/usr/x86_64-w64-mingw32/include/cryptopp) |
|||
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) |
|||
target_link_libraries(${EXECUTABLE} iphlpapi) |
|||
target_link_libraries(${EXECUTABLE} ws2_32) |
|||
target_link_libraries(${EXECUTABLE} mswsock) |
|||
target_link_libraries(${EXECUTABLE} shlwapi) |
|||
elseif (APPLE) |
|||
# Latest mavericks boost libraries only come with -mt |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
elseif (UNIX) |
|||
target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY}) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
else () |
|||
target_link_libraries(${EXECUTABLE} boost_thread) |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
endif () |
|||
|
|||
if (UNIX) |
|||
FIND_PACKAGE(Boost 1.53 REQUIRED COMPONENTS thread date_time system filesystem program_options signals serialization chrono unit_test_framework locale) |
|||
endif() |
|||
|
|||
message("Installation path: ${CMAKE_INSTALL_PREFIX}") |
|||
|
|||
install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) |
|||
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) |
|||
|
@ -0,0 +1,25 @@ |
|||
/*
|
|||
This file is part of cpp-ethereum. |
|||
|
|||
cpp-ethereum is free software: you can redistribute it and/or modify |
|||
it under the terms of the GNU General Public License as published by |
|||
the Free Software Foundation, either version 3 of the License, or |
|||
(at your option) any later version. |
|||
|
|||
cpp-ethereum is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
GNU General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU General Public License |
|||
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
|||
*/ |
|||
/** @file FixedHash.cpp
|
|||
* @author Gav Wood <i@gavwood.com> |
|||
* @date 2014 |
|||
*/ |
|||
|
|||
#include "FixedHash.h" |
|||
|
|||
using namespace std; |
|||
using namespace eth; |
@ -0,0 +1,51 @@ |
|||
cmake_policy(SET CMP0015 NEW) |
|||
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") |
|||
|
|||
aux_source_directory(. SRC_LIST) |
|||
|
|||
set(EXECUTABLE evmface) |
|||
|
|||
if(APPLE) |
|||
# set(CMAKE_INSTALL_PREFIX ../lib) |
|||
add_library(${EXECUTABLE} SHARED ${SRC_LIST}) |
|||
else() |
|||
add_library(${EXECUTABLE} ${SRC_LIST}) |
|||
endif() |
|||
file(GLOB HEADERS "*.h") |
|||
|
|||
include_directories(..) |
|||
|
|||
target_link_libraries(${EXECUTABLE} ethential) |
|||
target_link_libraries(${EXECUTABLE} gmp) |
|||
|
|||
|
|||
if(${TARGET_PLATFORM} STREQUAL "w64") |
|||
include_directories(/usr/x86_64-w64-mingw32/include/cryptopp) |
|||
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) |
|||
target_link_libraries(${EXECUTABLE} iphlpapi) |
|||
target_link_libraries(${EXECUTABLE} ws2_32) |
|||
target_link_libraries(${EXECUTABLE} mswsock) |
|||
target_link_libraries(${EXECUTABLE} shlwapi) |
|||
elseif (APPLE) |
|||
# Latest mavericks boost libraries only come with -mt |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
elseif (UNIX) |
|||
target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY}) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
else () |
|||
target_link_libraries(${EXECUTABLE} boost_thread) |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
endif () |
|||
|
|||
if (UNIX) |
|||
FIND_PACKAGE(Boost 1.53 REQUIRED COMPONENTS thread date_time system filesystem program_options signals serialization chrono unit_test_framework locale) |
|||
endif() |
|||
|
|||
message("Installation path: ${CMAKE_INSTALL_PREFIX}") |
|||
|
|||
install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) |
|||
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) |
|||
|
@ -0,0 +1,98 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "bignum.h" |
|||
|
|||
//Integer to string conversion
|
|||
std::string intToDecimal(int branch) { |
|||
if (branch < 10) return nums.substr(branch, 1); |
|||
else return intToDecimal(branch / 10) + nums.substr(branch % 10,1); |
|||
} |
|||
|
|||
//Add two strings representing decimal values
|
|||
std::string decimalAdd(std::string a, std::string b) { |
|||
std::string o = a; |
|||
while (b.length() < a.length()) b = "0" + b; |
|||
while (o.length() < b.length()) o = "0" + o; |
|||
bool carry = false; |
|||
for (int i = o.length() - 1; i >= 0; i--) { |
|||
o[i] = o[i] + b[i] - '0'; |
|||
if (carry) o[i]++; |
|||
if (o[i] > '9') { |
|||
o[i] -= 10; |
|||
carry = true; |
|||
} |
|||
else carry = false; |
|||
} |
|||
if (carry) o = "1" + o; |
|||
return o; |
|||
} |
|||
|
|||
//Helper function for decimalMul
|
|||
std::string decimalDigitMul(std::string a, int dig) { |
|||
if (dig == 0) return "0"; |
|||
else return decimalAdd(a, decimalDigitMul(a, dig - 1)); |
|||
} |
|||
|
|||
//Multiply two strings representing decimal values
|
|||
std::string decimalMul(std::string a, std::string b) { |
|||
std::string o = "0"; |
|||
for (unsigned i = 0; i < b.length(); i++) { |
|||
std::string n = decimalDigitMul(a, b[i] - '0'); |
|||
if (n != "0") { |
|||
for (unsigned j = i + 1; j < b.length(); j++) n += "0"; |
|||
} |
|||
o = decimalAdd(o, n); |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
//Is a greater than b? Flag allows equality
|
|||
bool decimalGt(std::string a, std::string b, bool eqAllowed) { |
|||
if (a == b) return eqAllowed; |
|||
return (a.length() > b.length()) || (a.length() >= b.length() && a > b); |
|||
} |
|||
|
|||
//Subtract the two strings representing decimal values
|
|||
std::string decimalSub(std::string a, std::string b) { |
|||
if (b == "0") return a; |
|||
if (b == a) return "0"; |
|||
while (b.length() < a.length()) b = "0" + b; |
|||
std::string c = b; |
|||
for (unsigned i = 0; i < c.length(); i++) c[i] = '0' + ('9' - c[i]); |
|||
std::string o = decimalAdd(decimalAdd(a, c).substr(1), "1"); |
|||
while (o.size() > 1 && o[0] == '0') o = o.substr(1); |
|||
return o; |
|||
} |
|||
|
|||
//Divide the two strings representing decimal values
|
|||
std::string decimalDiv(std::string a, std::string b) { |
|||
std::string c = b; |
|||
if (decimalGt(c, a)) return "0"; |
|||
int zeroes = -1; |
|||
while (decimalGt(a, c, true)) { |
|||
zeroes += 1; |
|||
c = c + "0"; |
|||
} |
|||
c = c.substr(0, c.size() - 1); |
|||
std::string quot = "0"; |
|||
while (decimalGt(a, c, true)) { |
|||
a = decimalSub(a, c); |
|||
quot = decimalAdd(quot, "1"); |
|||
} |
|||
for (int i = 0; i < zeroes; i++) quot += "0"; |
|||
return decimalAdd(quot, decimalDiv(a, b)); |
|||
} |
|||
|
|||
//Modulo the two strings representing decimal values
|
|||
std::string decimalMod(std::string a, std::string b) { |
|||
return decimalSub(a, decimalMul(decimalDiv(a, b), b)); |
|||
} |
|||
|
|||
//String to int conversion
|
|||
int decimalToInt(std::string a) { |
|||
if (a.size() == 0) return 0; |
|||
else return (a[a.size() - 1] - '0') |
|||
+ decimalToInt(a.substr(0,a.size()-1)) * 10; |
|||
} |
@ -0,0 +1,30 @@ |
|||
#ifndef ETHSERP_BIGNUM |
|||
#define ETHSERP_BIGNUM |
|||
|
|||
const std::string nums = "0123456789"; |
|||
|
|||
const std::string tt256 = |
|||
"115792089237316195423570985008687907853269984665640564039457584007913129639936" |
|||
; |
|||
|
|||
const std::string tt255 = |
|||
"57896044618658097711785492504343953926634992332820282019728792003956564819968" |
|||
; |
|||
|
|||
std::string intToDecimal(int branch); |
|||
|
|||
std::string decimalAdd(std::string a, std::string b); |
|||
|
|||
std::string decimalMul(std::string a, std::string b); |
|||
|
|||
std::string decimalSub(std::string a, std::string b); |
|||
|
|||
std::string decimalDiv(std::string a, std::string b); |
|||
|
|||
std::string decimalMod(std::string a, std::string b); |
|||
|
|||
bool decimalGt(std::string a, std::string b, bool eqAllowed=false); |
|||
|
|||
int decimalToInt(std::string a); |
|||
|
|||
#endif |
@ -0,0 +1,407 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
#include "bignum.h" |
|||
#include "opcodes.h" |
|||
|
|||
struct programAux { |
|||
std::map<std::string, std::string> vars; |
|||
bool allocUsed; |
|||
bool calldataUsed; |
|||
int step; |
|||
int labelLength; |
|||
}; |
|||
|
|||
struct programData { |
|||
programAux aux; |
|||
Node code; |
|||
}; |
|||
|
|||
programAux Aux() { |
|||
programAux o; |
|||
o.allocUsed = false; |
|||
o.calldataUsed = false; |
|||
o.step = 0; |
|||
return o; |
|||
} |
|||
|
|||
programData pd(programAux aux = Aux(), Node code=token("_")) { |
|||
programData o; |
|||
o.aux = aux; |
|||
o.code = code; |
|||
return o; |
|||
} |
|||
|
|||
Node multiToken(Node nodes[], int len, Metadata met) { |
|||
std::vector<Node> out; |
|||
for (int i = 0; i < len; i++) { |
|||
out.push_back(nodes[i]); |
|||
} |
|||
return astnode("_", out, met); |
|||
} |
|||
|
|||
Node finalize(programData c); |
|||
|
|||
// Turns LLL tree into tree of code fragments
|
|||
programData opcodeify(Node node, programAux aux=Aux()) { |
|||
std::string symb = "_"+mkUniqueToken(); |
|||
Metadata m = node.metadata; |
|||
// Numbers
|
|||
if (node.type == TOKEN) { |
|||
return pd(aux, nodeToNumeric(node)); |
|||
} |
|||
else if (node.val == "ref" || node.val == "get" || node.val == "set") { |
|||
std::string varname = node.args[0].val; |
|||
if (!aux.vars.count(varname)) { |
|||
aux.vars[varname] = intToDecimal(aux.vars.size() * 32); |
|||
} |
|||
if (varname == "msg.data") aux.calldataUsed = true; |
|||
// Set variable
|
|||
if (node.val == "set") { |
|||
programData sub = opcodeify(node.args[1], aux); |
|||
Node nodelist[] = { |
|||
sub.code, |
|||
token(aux.vars[varname], m), |
|||
token("MSTORE", m), |
|||
}; |
|||
return pd(sub.aux, multiToken(nodelist, 3, m)); |
|||
} |
|||
// Get variable
|
|||
else if (node.val == "get") { |
|||
Node nodelist[] = |
|||
{ token(aux.vars[varname], m), token("MLOAD", m) }; |
|||
return pd(aux, multiToken(nodelist, 2, m)); |
|||
} |
|||
// Refer variable
|
|||
else return pd(aux, token(aux.vars[varname], m)); |
|||
} |
|||
// Code blocks
|
|||
if (node.val == "lll" && node.args.size() == 2) { |
|||
if (node.args[1].val != "0") aux.allocUsed = true; |
|||
std::vector<Node> o; |
|||
o.push_back(finalize(opcodeify(node.args[0]))); |
|||
programData sub = opcodeify(node.args[1], aux); |
|||
Node code = astnode("____CODE", o, m); |
|||
Node nodelist[] = { |
|||
token("$begincode"+symb+".endcode"+symb, m), token("DUP", m), |
|||
sub.code, |
|||
token("$begincode"+symb, m), token("CODECOPY", m), |
|||
token("$endcode"+symb, m), token("JUMP", m), |
|||
token("~begincode"+symb, m), code, token("~endcode"+symb, m) |
|||
}; |
|||
return pd(sub.aux, multiToken(nodelist, 10, m)); |
|||
} |
|||
std::vector<Node> subs; |
|||
for (unsigned i = 0; i < node.args.size(); i++) { |
|||
programData sub = opcodeify(node.args[i], aux); |
|||
aux = sub.aux; |
|||
subs.push_back(sub.code); |
|||
} |
|||
// Seq of multiple statements
|
|||
if (node.val == "seq") { |
|||
return pd(aux, astnode("_", subs, m)); |
|||
} |
|||
// 2-part conditional (if gets rewritten to unless in rewrites)
|
|||
else if (node.val == "unless" && node.args.size() == 2) { |
|||
Node nodelist[] = { |
|||
subs[0], |
|||
token("$endif"+symb, m), token("JUMPI", m), |
|||
subs[1], |
|||
token("~endif"+symb, m) |
|||
}; |
|||
return pd(aux, multiToken(nodelist, 5, m)); |
|||
} |
|||
// 3-part conditional
|
|||
else if (node.val == "if" && node.args.size() == 3) { |
|||
Node nodelist[] = { |
|||
subs[0], |
|||
token("NOT", m), token("$else"+symb, m), token("JUMPI", m), |
|||
subs[1], |
|||
token("$endif"+symb, m), token("JUMP", m), token("~else"+symb, m), |
|||
subs[2], |
|||
token("~endif"+symb, m) |
|||
}; |
|||
return pd(aux, multiToken(nodelist, 10, m)); |
|||
} |
|||
// While (rewritten to this in rewrites)
|
|||
else if (node.val == "until") { |
|||
Node nodelist[] = { |
|||
token("~beg"+symb, m), |
|||
subs[0], |
|||
token("$end"+symb, m), token("JUMPI", m), |
|||
subs[1], |
|||
token("$beg"+symb, m), token("JUMP", m), token("~end"+symb, m) |
|||
}; |
|||
return pd(aux, multiToken(nodelist, 8, m)); |
|||
} |
|||
// Memory allocations
|
|||
else if (node.val == "alloc") { |
|||
aux.allocUsed = true; |
|||
Node nodelist[] = { |
|||
subs[0], |
|||
token("MSIZE", m), token("SWAP", m), token("MSIZE", m), |
|||
token("ADD", m), token("1", m), token("SWAP", m), token("SUB", m), |
|||
token("0", m), token("SWAP", m), token("MSTORE8", m) |
|||
}; |
|||
return pd(aux, multiToken(nodelist, 11, m)); |
|||
} |
|||
// Array literals
|
|||
else if (node.val == "array_lit") { |
|||
aux.allocUsed = true; |
|||
std::vector<Node> nodes; |
|||
if (!subs.size()) { |
|||
nodes.push_back(token("MSIZE", m)); |
|||
return pd(aux, astnode("_", nodes, m)); |
|||
} |
|||
nodes.push_back(token("MSIZE", m)); |
|||
nodes.push_back(token("0", m)); |
|||
nodes.push_back(token("MSIZE", m)); |
|||
nodes.push_back(token(intToDecimal(subs.size() * 32 - 1), m)); |
|||
nodes.push_back(token("ADD", m)); |
|||
nodes.push_back(token("MSTORE8", m)); |
|||
for (unsigned i = 0; i < subs.size(); i++) { |
|||
nodes.push_back(token("DUP", m)); |
|||
nodes.push_back(subs[i]); |
|||
nodes.push_back(token("SWAP", m)); |
|||
if (i > 0) { |
|||
nodes.push_back(token(intToDecimal(i * 32), m)); |
|||
nodes.push_back(token("ADD", m)); |
|||
} |
|||
nodes.push_back(token("MSTORE", m)); |
|||
} |
|||
return pd(aux, astnode("_", nodes, m)); |
|||
} |
|||
// All other functions/operators
|
|||
else { |
|||
std::vector<Node> subs2; |
|||
while (subs.size()) { |
|||
subs2.push_back(subs.back()); |
|||
subs.pop_back(); |
|||
} |
|||
subs2.push_back(token(upperCase(node.val), m)); |
|||
return pd(aux, astnode("_", subs2, m)); |
|||
} |
|||
} |
|||
|
|||
// Adds necessary wrappers to a program
|
|||
Node finalize(programData c) { |
|||
std::vector<Node> bottom; |
|||
Metadata m = c.code.metadata; |
|||
// If we are using both alloc and variables, we need to pre-zfill
|
|||
// some memory
|
|||
if (c.aux.allocUsed && c.aux.vars.size() > 0) { |
|||
Node nodelist[] = { |
|||
token("0", m), |
|||
token(intToDecimal(c.aux.vars.size() * 32 - 1)), |
|||
token("MSTORE8", m) |
|||
}; |
|||
bottom.push_back(multiToken(nodelist, 3, m)); |
|||
} |
|||
// If msg.data is being used as an array, then we need to copy it
|
|||
if (c.aux.calldataUsed) { |
|||
Node nodelist[] = { |
|||
token("MSIZE", m), token("CALLDATASIZE", m), token("MSIZE", m), |
|||
token("0", m), token("CALLDATACOPY", m), |
|||
token(c.aux.vars["msg.data"], m), token("MSTORE", m) |
|||
}; |
|||
bottom.push_back(multiToken(nodelist, 7, m)); |
|||
} |
|||
// The actual code
|
|||
bottom.push_back(c.code); |
|||
return astnode("_", bottom, m); |
|||
} |
|||
|
|||
//LLL -> code fragment tree
|
|||
Node buildFragmentTree(Node node) { |
|||
return finalize(opcodeify(node)); |
|||
} |
|||
|
|||
|
|||
// Builds a dictionary mapping labels to variable names
|
|||
programAux buildDict(Node program, programAux aux, int labelLength) { |
|||
Metadata m = program.metadata; |
|||
// Token
|
|||
if (program.type == TOKEN) { |
|||
if (isNumberLike(program)) { |
|||
aux.step += 1 + toByteArr(program.val, m).size(); |
|||
} |
|||
else if (program.val[0] == '~') { |
|||
aux.vars[program.val.substr(1)] = intToDecimal(aux.step); |
|||
} |
|||
else if (program.val[0] == '$') { |
|||
aux.step += labelLength + 1; |
|||
} |
|||
else aux.step += 1; |
|||
} |
|||
// A sub-program (ie. LLL)
|
|||
else if (program.val == "____CODE") { |
|||
programAux auks = Aux(); |
|||
for (unsigned i = 0; i < program.args.size(); i++) { |
|||
auks = buildDict(program.args[i], auks, labelLength); |
|||
} |
|||
for (std::map<std::string,std::string>::iterator it=auks.vars.begin(); |
|||
it != auks.vars.end(); |
|||
it++) { |
|||
aux.vars[(*it).first] = (*it).second; |
|||
} |
|||
aux.step += auks.step; |
|||
} |
|||
// Normal sub-block
|
|||
else { |
|||
for (unsigned i = 0; i < program.args.size(); i++) { |
|||
aux = buildDict(program.args[i], aux, labelLength); |
|||
} |
|||
} |
|||
return aux; |
|||
} |
|||
|
|||
// Applies that dictionary
|
|||
Node substDict(Node program, programAux aux, int labelLength) { |
|||
Metadata m = program.metadata; |
|||
std::vector<Node> out; |
|||
std::vector<Node> inner; |
|||
if (program.type == TOKEN) { |
|||
if (program.val[0] == '$') { |
|||
std::string tokStr = "PUSH"+intToDecimal(labelLength); |
|||
out.push_back(token(tokStr, m)); |
|||
int dotLoc = program.val.find('.'); |
|||
if (dotLoc == -1) { |
|||
std::string val = aux.vars[program.val.substr(1)]; |
|||
inner = toByteArr(val, m, labelLength); |
|||
} |
|||
else { |
|||
std::string start = aux.vars[program.val.substr(1, dotLoc-1)], |
|||
end = aux.vars[program.val.substr(dotLoc + 1)], |
|||
dist = decimalSub(end, start); |
|||
inner = toByteArr(dist, m, labelLength); |
|||
} |
|||
out.push_back(astnode("_", inner, m)); |
|||
} |
|||
else if (program.val[0] == '~') { } |
|||
else if (isNumberLike(program)) { |
|||
inner = toByteArr(program.val, m); |
|||
out.push_back(token("PUSH"+intToDecimal(inner.size()))); |
|||
out.push_back(astnode("_", inner, m)); |
|||
} |
|||
else return program; |
|||
} |
|||
else { |
|||
for (unsigned i = 0; i < program.args.size(); i++) { |
|||
Node n = substDict(program.args[i], aux, labelLength); |
|||
if (n.type == TOKEN || n.args.size()) out.push_back(n); |
|||
} |
|||
} |
|||
return astnode("_", out, m); |
|||
} |
|||
|
|||
// Compiled fragtree -> compiled fragtree without labels
|
|||
Node dereference(Node program) { |
|||
int sz = treeSize(program) * 4; |
|||
int labelLength = 1; |
|||
while (sz >= 256) { labelLength += 1; sz /= 256; } |
|||
programAux aux = buildDict(program, Aux(), labelLength); |
|||
return substDict(program, aux, labelLength); |
|||
} |
|||
|
|||
// Dereferenced fragtree -> opcodes
|
|||
std::vector<Node> flatten(Node derefed) { |
|||
std::vector<Node> o; |
|||
if (derefed.type == TOKEN) { |
|||
o.push_back(derefed); |
|||
} |
|||
else { |
|||
for (unsigned i = 0; i < derefed.args.size(); i++) { |
|||
std::vector<Node> oprime = flatten(derefed.args[i]); |
|||
for (unsigned j = 0; j < oprime.size(); j++) o.push_back(oprime[j]); |
|||
} |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Opcodes -> bin
|
|||
std::string serialize(std::vector<Node> codons) { |
|||
std::string o; |
|||
for (unsigned i = 0; i < codons.size(); i++) { |
|||
int v; |
|||
if (isNumberLike(codons[i])) { |
|||
v = decimalToInt(codons[i].val); |
|||
} |
|||
else if (codons[i].val.substr(0,4) == "PUSH") { |
|||
v = 95 + decimalToInt(codons[i].val.substr(4)); |
|||
} |
|||
else { |
|||
v = opcode(codons[i].val); |
|||
} |
|||
o += (char)v; |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Bin -> opcodes
|
|||
std::vector<Node> deserialize(std::string ser) { |
|||
std::vector<Node> o; |
|||
int backCount = 0; |
|||
for (unsigned i = 0; i < ser.length(); i++) { |
|||
unsigned char v = (unsigned char)ser[i]; |
|||
std::string oper = op((int)v); |
|||
if (oper != "" && backCount <= 0) o.push_back(token(oper)); |
|||
else if (v >= 96 && v < 128 && backCount <= 0) { |
|||
o.push_back(token("PUSH"+intToDecimal(v - 95))); |
|||
} |
|||
else o.push_back(token(intToDecimal(v))); |
|||
if (v >= 96 && v < 128 && backCount <= 0) { |
|||
backCount = v - 95; |
|||
} |
|||
else backCount--; |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Fragtree -> bin
|
|||
std::string assemble(Node fragTree) { |
|||
return serialize(flatten(dereference(fragTree))); |
|||
} |
|||
|
|||
// Fragtree -> tokens
|
|||
std::vector<Node> prettyAssemble(Node fragTree) { |
|||
return flatten(dereference(fragTree)); |
|||
} |
|||
|
|||
// LLL -> bin
|
|||
std::string compileLLL(Node program) { |
|||
return assemble(buildFragmentTree(program)); |
|||
} |
|||
|
|||
// LLL -> tokens
|
|||
std::vector<Node> prettyCompileLLL(Node program) { |
|||
return prettyAssemble(buildFragmentTree(program)); |
|||
} |
|||
|
|||
// Converts a list of integer values to binary transaction data
|
|||
std::string encodeDatalist(std::vector<std::string> vals) { |
|||
std::string o; |
|||
for (unsigned i = 0; i < vals.size(); i++) { |
|||
std::vector<Node> n = toByteArr(strToNumeric(vals[i]), Metadata(), 32); |
|||
for (unsigned j = 0; j < n.size(); j++) { |
|||
int v = decimalToInt(n[j].val); |
|||
o += (char)v; |
|||
} |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Converts binary transaction data into a list of integer values
|
|||
std::vector<std::string> decodeDatalist(std::string ser) { |
|||
std::vector<std::string> out; |
|||
for (unsigned i = 0; i < ser.length(); i+= 32) { |
|||
std::string o = "0"; |
|||
for (unsigned j = i; j < i + 32; j++) { |
|||
int vj = (int)(unsigned char)ser[j]; |
|||
o = decimalAdd(decimalMul(o, "256"), intToDecimal(vj)); |
|||
} |
|||
out.push_back(o); |
|||
} |
|||
return out; |
|||
} |
@ -0,0 +1,43 @@ |
|||
#ifndef ETHSERP_COMPILER |
|||
#define ETHSERP_COMPILER |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
|
|||
// Compiled fragtree -> compiled fragtree without labels
|
|||
Node dereference(Node program); |
|||
|
|||
// LLL -> fragtree
|
|||
Node buildFragmentTree(Node program); |
|||
|
|||
// Dereferenced fragtree -> opcodes
|
|||
std::vector<Node> flatten(Node derefed); |
|||
|
|||
// opcodes -> bin
|
|||
std::string serialize(std::vector<Node> codons); |
|||
|
|||
// Fragtree -> bin
|
|||
std::string assemble(Node fragTree); |
|||
|
|||
// Fragtree -> opcodes
|
|||
std::vector<Node> prettyAssemble(Node fragTree); |
|||
|
|||
// LLL -> bin
|
|||
std::string compileLLL(Node program); |
|||
|
|||
// LLL -> opcodes
|
|||
std::vector<Node> prettyCompileLLL(Node program); |
|||
|
|||
// bin -> opcodes
|
|||
std::vector<Node> deserialize(std::string ser); |
|||
|
|||
// Converts a list of integer values to binary transaction data
|
|||
std::string encodeDatalist(std::vector<std::string> vals); |
|||
|
|||
// Converts binary transaction data into a list of integer values
|
|||
std::vector<std::string> decodeDatalist(std::string ser); |
|||
|
|||
#endif |
@ -0,0 +1,11 @@ |
|||
#include <libserpent/funcs.h> |
|||
#include <libserpent/bignum.h> |
|||
#include <iostream> |
|||
|
|||
using namespace std; |
|||
|
|||
int main() { |
|||
cout << printAST(compileToLLL(get_file_contents("examples/namecoin.se"))) << "\n"; |
|||
cout << decimalSub("10234", "10234") << "\n"; |
|||
cout << decimalSub("10234", "10233") << "\n"; |
|||
} |
@ -0,0 +1,23 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include "funcs.h" |
|||
#include "bignum.h" |
|||
#include "util.h" |
|||
#include "parser.h" |
|||
#include "lllparser.h" |
|||
#include "compiler.h" |
|||
#include "rewriter.h" |
|||
#include "tokenize.h" |
|||
|
|||
Node compileToLLL(std::string input) { |
|||
return rewrite(parseSerpent(input)); |
|||
} |
|||
|
|||
std::string compile(std::string input) { |
|||
return compileLLL(compileToLLL(input)); |
|||
} |
|||
|
|||
std::vector<Node> prettyCompile(std::string input) { |
|||
return prettyCompileLLL(compileToLLL(input)); |
|||
} |
@ -0,0 +1,29 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include "bignum.h" |
|||
#include "util.h" |
|||
#include "parser.h" |
|||
#include "lllparser.h" |
|||
#include "compiler.h" |
|||
#include "rewriter.h" |
|||
#include "tokenize.h" |
|||
|
|||
// Function listing:
|
|||
//
|
|||
// parseSerpent (serpent -> AST) std::string -> Node
|
|||
// parseLLL (LLL -> AST) std::string -> Node
|
|||
// rewrite (apply rewrite rules) Node -> Node
|
|||
// compileToLLL (serpent -> LLL) std::string -> Node
|
|||
// compileLLL (LLL -> EVMhex) Node -> std::string
|
|||
// prettyCompileLLL (LLL -> EVMasm) Node -> std::vector<Node>
|
|||
// prettyCompile (serpent -> EVMasm) std::string -> std::vector>Node>
|
|||
// compile (serpent -> EVMhex) std::string -> std::string
|
|||
// get_file_contents (filename -> file) std::string -> std::string
|
|||
// exists (does file exist?) std::string -> bool
|
|||
|
|||
Node compileToLLL(std::string input); |
|||
|
|||
std::string compile(std::string input); |
|||
|
|||
std::vector<Node> prettyCompile(std::string input); |
@ -0,0 +1,70 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
#include "lllparser.h" |
|||
#include "tokenize.h" |
|||
|
|||
struct _parseOutput { |
|||
Node node; |
|||
int newpos; |
|||
}; |
|||
|
|||
// Helper, returns subtree and position of start of next node
|
|||
_parseOutput _parse(std::vector<Node> inp, int pos) { |
|||
Metadata met = inp[pos].metadata; |
|||
_parseOutput o; |
|||
// Bracket: keep grabbing tokens until we get to the
|
|||
// corresponding closing bracket
|
|||
if (inp[pos].val == "(" || inp[pos].val == "[") { |
|||
std::string fun, rbrack; |
|||
std::vector<Node> args; |
|||
pos += 1; |
|||
if (inp[pos].val == "[") { |
|||
fun = "access"; |
|||
rbrack = "]"; |
|||
} |
|||
else rbrack = ")"; |
|||
// First argument is the function
|
|||
while (inp[pos].val != ")") { |
|||
_parseOutput po = _parse(inp, pos); |
|||
if (fun.length() == 0 && po.node.type == 1) { |
|||
std::cerr << "Error: first arg must be function\n"; |
|||
fun = po.node.val; |
|||
} |
|||
else if (fun.length() == 0) { |
|||
fun = po.node.val; |
|||
} |
|||
else { |
|||
args.push_back(po.node); |
|||
} |
|||
pos = po.newpos; |
|||
} |
|||
o.newpos = pos + 1; |
|||
o.node = astnode(fun, args, met); |
|||
} |
|||
// Normal token, return it and advance to next token
|
|||
else { |
|||
o.newpos = pos + 1; |
|||
o.node = token(inp[pos].val, met); |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// stream of tokens -> lisp parse tree
|
|||
Node parseLLLTokenStream(std::vector<Node> inp) { |
|||
_parseOutput o = _parse(inp, 0); |
|||
return o.node; |
|||
} |
|||
|
|||
// Parses LLL
|
|||
Node parseLLL(std::string s, bool allowFileRead) { |
|||
std::string input = s; |
|||
std::string file = "main"; |
|||
if (exists(s) && allowFileRead) { |
|||
file = s; |
|||
input = get_file_contents(s); |
|||
} |
|||
return parseLLLTokenStream(tokenize(s, Metadata(file, 0, 0))); |
|||
} |
@ -0,0 +1,13 @@ |
|||
#ifndef ETHSERP_LLLPARSER |
|||
#define ETHSERP_LLLPARSER |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
|
|||
// LLL text -> parse tree
|
|||
Node parseLLL(std::string s, bool allowFileRead=false); |
|||
|
|||
#endif |
@ -0,0 +1,91 @@ |
|||
#ifndef ETHSERP_OPCODES |
|||
#define ETHSERP_OPCODES |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
|
|||
std::map<std::string, int> opcodes; |
|||
std::map<int, std::string> reverseOpcodes; |
|||
|
|||
// Fetches everything EXCEPT PUSH1..32
|
|||
std::pair<std::string, int> _opcode(std::string ops, int opi) { |
|||
if (!opcodes.size()) { |
|||
opcodes["STOP"] = 0x00; |
|||
opcodes["ADD"] = 0x01; |
|||
opcodes["MUL"] = 0x02; |
|||
opcodes["SUB"] = 0x03; |
|||
opcodes["DIV"] = 0x04; |
|||
opcodes["SDIV"] = 0x05; |
|||
opcodes["MOD"] = 0x06; |
|||
opcodes["SMOD"] = 0x07; |
|||
opcodes["EXP"] = 0x08; |
|||
opcodes["NEG"] = 0x09; |
|||
opcodes["LT"] = 0x0a; |
|||
opcodes["GT"] = 0x0b; |
|||
opcodes["SLT"] = 0x0c; |
|||
opcodes["SGT"] = 0x0d; |
|||
opcodes["EQ"] = 0x0e; |
|||
opcodes["NOT"] = 0x0f; |
|||
opcodes["AND"] = 0x10; |
|||
opcodes["OR"] = 0x11; |
|||
opcodes["XOR"] = 0x12; |
|||
opcodes["BYTE"] = 0x13; |
|||
opcodes["SHA3"] = 0x20; |
|||
opcodes["ADDRESS"] = 0x30; |
|||
opcodes["BALANCE"] = 0x31; |
|||
opcodes["ORIGIN"] = 0x32; |
|||
opcodes["CALLER"] = 0x33; |
|||
opcodes["CALLVALUE"] = 0x34; |
|||
opcodes["CALLDATALOAD"] = 0x35; |
|||
opcodes["CALLDATASIZE"] = 0x36; |
|||
opcodes["CALLDATACOPY"] = 0x37; |
|||
opcodes["CODESIZE"] = 0x38; |
|||
opcodes["CODECOPY"] = 0x39; |
|||
opcodes["GASPRICE"] = 0x3a; |
|||
opcodes["PREVHASH"] = 0x40; |
|||
opcodes["COINBASE"] = 0x41; |
|||
opcodes["TIMESTAMP"] = 0x42; |
|||
opcodes["NUMBER"] = 0x43; |
|||
opcodes["DIFFICULTY"] = 0x44; |
|||
opcodes["GASLIMIT"] = 0x45; |
|||
opcodes["POP"] = 0x50; |
|||
opcodes["DUP"] = 0x51; |
|||
opcodes["SWAP"] = 0x52; |
|||
opcodes["MLOAD"] = 0x53; |
|||
opcodes["MSTORE"] = 0x54; |
|||
opcodes["MSTORE8"] = 0x55; |
|||
opcodes["SLOAD"] = 0x56; |
|||
opcodes["SSTORE"] = 0x57; |
|||
opcodes["JUMP"] = 0x58; |
|||
opcodes["JUMPI"] = 0x59; |
|||
opcodes["PC"] = 0x5a; |
|||
opcodes["MSIZE"] = 0x5b; |
|||
opcodes["GAS"] = 0x5c; |
|||
opcodes["CREATE"] = 0xf0; |
|||
opcodes["CALL"] = 0xf1; |
|||
opcodes["RETURN"] = 0xf2; |
|||
opcodes["SUICIDE"] = 0xff; |
|||
for (std::map<std::string, int>::iterator it=opcodes.begin(); |
|||
it != opcodes.end(); |
|||
it++) { |
|||
reverseOpcodes[(*it).second] = (*it).first; |
|||
} |
|||
} |
|||
std::string op; |
|||
int opcode; |
|||
op = reverseOpcodes.count(opi) ? reverseOpcodes[opi] : ""; |
|||
opcode = opcodes.count(ops) ? opcodes[ops] : -1; |
|||
return std::pair<std::string, int>(op, opcode); |
|||
} |
|||
|
|||
int opcode(std::string op) { |
|||
return _opcode(op, 0).second; |
|||
} |
|||
|
|||
std::string op(int opcode) { |
|||
return _opcode("", opcode).first; |
|||
} |
|||
|
|||
#endif |
@ -0,0 +1,387 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
#include "parser.h" |
|||
#include "tokenize.h" |
|||
|
|||
// Extended BEDMAS precedence order
|
|||
int precedence(Node tok) { |
|||
std::string v = tok.val; |
|||
if (v == "!" || v == "not") return 0; |
|||
else if (v=="^" || v == "**") return 1; |
|||
else if (v=="*" || v=="/" || v=="@/" || v=="%" || v=="@%") return 2; |
|||
else if (v=="+" || v=="-") return 3; |
|||
else if (v=="<" || v==">" || v=="<=" || v==">=") return 4; |
|||
else if (v=="@<" || v=="@>" || v=="@<=" || v=="@>=") return 4; |
|||
else if (v=="&" || v=="|" || v=="xor" || v=="==") return 5; |
|||
else if (v=="&&" || v=="and") return 6; |
|||
else if (v=="||" || v=="or") return 7; |
|||
else if (v=="=") return 10; |
|||
else if (v=="+=" || v=="-=" || v=="*=" || v=="/=" || v=="%=") return 10; |
|||
else if (v=="@/=" || v=="@%=") return 10; |
|||
else return -1; |
|||
} |
|||
|
|||
// Token classification for shunting-yard purposes
|
|||
int toktype(Node tok) { |
|||
if (tok.type == ASTNODE) return COMPOUND; |
|||
std::string v = tok.val; |
|||
if (v == "(" || v == "[") return LPAREN; |
|||
else if (v == ")" || v == "]") return RPAREN; |
|||
else if (v == ",") return COMMA; |
|||
else if (v == ":") return COLON; |
|||
else if (v == "!" || v == "not") return UNARY_OP; |
|||
else if (precedence(tok) >= 0) return BINARY_OP; |
|||
if (tok.val[0] != '"' && tok.val[0] != '\'') { |
|||
for (unsigned i = 0; i < tok.val.length(); i++) { |
|||
if (chartype(tok.val[i]) == SYMB) { |
|||
err("Invalid symbol: "+tok.val, tok.metadata); |
|||
} |
|||
} |
|||
} |
|||
return ALPHANUM; |
|||
} |
|||
|
|||
|
|||
// Converts to reverse polish notation
|
|||
std::vector<Node> shuntingYard(std::vector<Node> tokens) { |
|||
std::vector<Node> iq; |
|||
for (int i = tokens.size() - 1; i >= 0; i--) { |
|||
iq.push_back(tokens[i]); |
|||
} |
|||
std::vector<Node> oq; |
|||
std::vector<Node> stack; |
|||
Node prev, tok; |
|||
int prevtyp, toktyp; |
|||
|
|||
while (iq.size()) { |
|||
prev = tok; |
|||
prevtyp = toktyp; |
|||
tok = iq.back(); |
|||
toktyp = toktype(tok); |
|||
iq.pop_back(); |
|||
// Alphanumerics go straight to output queue
|
|||
if (toktyp == ALPHANUM) { |
|||
oq.push_back(tok); |
|||
} |
|||
// Left parens go on stack and output queue
|
|||
else if (toktyp == LPAREN) { |
|||
if (prevtyp != ALPHANUM && prevtyp != RPAREN) { |
|||
oq.push_back(token("id", tok.metadata)); |
|||
} |
|||
Node fun = oq.back(); |
|||
oq.pop_back(); |
|||
oq.push_back(tok); |
|||
oq.push_back(fun); |
|||
stack.push_back(tok); |
|||
} |
|||
// If rparen, keep moving from stack to output queue until lparen
|
|||
else if (toktyp == RPAREN) { |
|||
while (stack.size() && toktype(stack.back()) != LPAREN) { |
|||
oq.push_back(stack.back()); |
|||
stack.pop_back(); |
|||
} |
|||
if (stack.size()) stack.pop_back(); |
|||
oq.push_back(tok); |
|||
} |
|||
// If binary op, keep popping from stack while higher bedmas precedence
|
|||
else if (toktyp == UNARY_OP || toktyp == BINARY_OP) { |
|||
if (tok.val == "-" && prevtyp != ALPHANUM && prevtyp != RPAREN) { |
|||
oq.push_back(token("0", tok.metadata)); |
|||
} |
|||
int prec = precedence(tok); |
|||
while (stack.size() |
|||
&& toktype(stack.back()) == BINARY_OP |
|||
&& precedence(stack.back()) <= prec) { |
|||
oq.push_back(stack.back()); |
|||
stack.pop_back(); |
|||
} |
|||
stack.push_back(tok); |
|||
} |
|||
// Comma and colon mean finish evaluating the argument
|
|||
else if (toktyp == COMMA || toktyp == COLON) { |
|||
while (stack.size() && toktype(stack.back()) != LPAREN) { |
|||
oq.push_back(stack.back()); |
|||
stack.pop_back(); |
|||
} |
|||
if (toktyp == COLON) oq.push_back(tok); |
|||
} |
|||
} |
|||
while (stack.size()) { |
|||
oq.push_back(stack.back()); |
|||
stack.pop_back(); |
|||
} |
|||
return oq; |
|||
} |
|||
|
|||
// Converts reverse polish notation into tree
|
|||
Node treefy(std::vector<Node> stream) { |
|||
std::vector<Node> iq; |
|||
for (int i = stream.size() -1; i >= 0; i--) { |
|||
iq.push_back(stream[i]); |
|||
} |
|||
std::vector<Node> oq; |
|||
while (iq.size()) { |
|||
Node tok = iq.back(); |
|||
iq.pop_back(); |
|||
int typ = toktype(tok); |
|||
// If unary, take node off end of oq and wrap it with the operator
|
|||
// If binary, do the same with two nodes
|
|||
if (typ == UNARY_OP || typ == BINARY_OP) { |
|||
std::vector<Node> args; |
|||
int rounds = (typ == BINARY_OP) ? 2 : 1; |
|||
for (int i = 0; i < rounds; i++) { |
|||
if (oq.size() == 0) { |
|||
err("Line malformed, not enough args for "+tok.val, |
|||
tok.metadata); |
|||
} |
|||
args.push_back(oq.back()); |
|||
oq.pop_back(); |
|||
} |
|||
std::vector<Node> args2; |
|||
while (args.size()) { |
|||
args2.push_back(args.back()); |
|||
args.pop_back(); |
|||
} |
|||
oq.push_back(astnode(tok.val, args2, tok.metadata)); |
|||
} |
|||
// If rparen, keep grabbing until we get to an lparen
|
|||
else if (toktype(tok) == RPAREN) { |
|||
std::vector<Node> args; |
|||
while (1) { |
|||
args.push_back(oq.back()); |
|||
oq.pop_back(); |
|||
if (!oq.size()) err("Bracket without matching", tok.metadata); |
|||
if (toktype(oq.back()) == LPAREN) break; |
|||
} |
|||
oq.pop_back(); |
|||
// We represent a[b] as (access a b)
|
|||
if (tok.val == "]") args.push_back(token("access", tok.metadata)); |
|||
std::string fun = args.back().val; |
|||
args.pop_back(); |
|||
// We represent [1,2,3] as (array_lit 1 2 3)
|
|||
if (fun == "access" && args.size() && args.back().val == "id") { |
|||
fun = "array_lit"; |
|||
args.pop_back(); |
|||
} |
|||
std::vector<Node> args2; |
|||
while (args.size()) { |
|||
args2.push_back(args.back()); |
|||
args.pop_back(); |
|||
} |
|||
// When evaluating 2 + (3 * 5), the shunting yard algo turns that
|
|||
// into 2 ( id 3 5 * ) +, effectively putting "id" as a dummy
|
|||
// function where the algo was expecting a function to call the
|
|||
// thing inside the brackets. This reverses that step
|
|||
if (fun == "id" && args2.size()) { |
|||
oq.push_back(args2[0]); |
|||
} |
|||
else { |
|||
oq.push_back(astnode(fun, args2, tok.metadata)); |
|||
} |
|||
} |
|||
else oq.push_back(tok); |
|||
// This is messy, but has to be done. Import/inset other files here
|
|||
std::string v = oq.back().val; |
|||
if ((v == "inset" || v == "import" || v == "create") |
|||
&& oq.back().args.size() == 1 |
|||
&& oq.back().args[0].type == TOKEN) { |
|||
int lastSlashPos = tok.metadata.file.rfind("/"); |
|||
std::string root; |
|||
if (lastSlashPos >= 0) |
|||
root = tok.metadata.file.substr(0, lastSlashPos) + "/"; |
|||
else |
|||
root = ""; |
|||
std::string filename = oq.back().args[0].val; |
|||
filename = filename.substr(1, filename.length() - 2); |
|||
if (!exists(root + filename)) |
|||
err("File does not exist: "+root + filename, tok.metadata); |
|||
oq.back().args.pop_back(); |
|||
oq.back().args.push_back(parseSerpent(root + filename)); |
|||
} |
|||
// Useful for debugging
|
|||
// for (int i = 0; i < oq.size(); i++) {
|
|||
// std::cerr << printSimple(oq[i]) << " ";
|
|||
// }
|
|||
// std::cerr << "\n";
|
|||
} |
|||
// Output must have one argument
|
|||
if (oq.size() == 0) { |
|||
err("Output blank", Metadata()); |
|||
} |
|||
else if (oq.size() > 1) { |
|||
err("Multiple expressions or unclosed bracket", oq[1].metadata); |
|||
} |
|||
|
|||
return oq[0]; |
|||
} |
|||
|
|||
|
|||
// Parses one line of serpent
|
|||
Node parseSerpentTokenStream(std::vector<Node> s) { |
|||
return treefy(shuntingYard(s)); |
|||
} |
|||
|
|||
|
|||
// Count spaces at beginning of line
|
|||
int spaceCount(std::string s) { |
|||
unsigned pos = 0; |
|||
while (pos < s.length() && (s[pos] == ' ' || s[pos] == '\t')) |
|||
pos++; |
|||
return pos; |
|||
} |
|||
|
|||
// Is this a command that takes an argument on the same line?
|
|||
bool bodied(std::string tok) { |
|||
return tok == "if" || tok == "elif" || tok == "while"; |
|||
} |
|||
|
|||
// Is this a command that takes an argument as a child block?
|
|||
bool childBlocked(std::string tok) { |
|||
return tok == "if" || tok == "elif" || tok == "else" |
|||
|| tok == "code" || tok == "shared" || tok == "init" |
|||
|| tok == "while"; |
|||
} |
|||
|
|||
// Are the two commands meant to continue each other?
|
|||
bool bodiedContinued(std::string prev, std::string tok) { |
|||
return (prev == "if" && tok == "elif") |
|||
|| (prev == "elif" && tok == "else") |
|||
|| (prev == "elif" && tok == "elif") |
|||
|| (prev == "if" && tok == "else") |
|||
|| (prev == "init" && tok == "code") |
|||
|| (prev == "shared" && tok == "code") |
|||
|| (prev == "shared" && tok == "init"); |
|||
} |
|||
|
|||
// Is a line of code empty?
|
|||
bool isLineEmpty(std::string line) { |
|||
std::vector<Node> tokens = tokenize(line); |
|||
if (!tokens.size() || tokens[0].val == "#" || tokens[0].val == "//") |
|||
return true; |
|||
return false; |
|||
} |
|||
|
|||
// Parse lines of serpent (helper function)
|
|||
Node parseLines(std::vector<std::string> lines, Metadata metadata, int sp) { |
|||
std::vector<Node> o; |
|||
int origLine = metadata.ln; |
|||
unsigned i = 0; |
|||
while (i < lines.size()) { |
|||
metadata.ln = origLine + i; |
|||
std::string main = lines[i]; |
|||
if (isLineEmpty(main)) { |
|||
i += 1; |
|||
continue; |
|||
} |
|||
int spaces = spaceCount(main); |
|||
if (spaces != sp) { |
|||
err("Indent mismatch", metadata); |
|||
} |
|||
// Tokenize current line
|
|||
std::vector<Node> tokens = tokenize(main.substr(sp), metadata); |
|||
// Remove extraneous tokens, including if / elif
|
|||
std::vector<Node> tokens2; |
|||
for (unsigned j = 0; j < tokens.size(); j++) { |
|||
if (tokens[j].val == "#" || tokens[j].val == "//") break; |
|||
if (j >= 1 || !bodied(tokens[j].val)) { |
|||
tokens2.push_back(tokens[j]); |
|||
} |
|||
} |
|||
if (tokens2.size() > 0 && tokens2.back().val == ":") |
|||
tokens2.pop_back(); |
|||
// Parse current line
|
|||
Node out = parseSerpentTokenStream(tokens2); |
|||
// Parse child block
|
|||
int childIndent = 999999; |
|||
std::vector<std::string> childBlock; |
|||
while (1) { |
|||
i++; |
|||
if (i >= lines.size()) |
|||
break; |
|||
bool ile = isLineEmpty(lines[i]); |
|||
if (!ile) { |
|||
int spaces = spaceCount(lines[i]); |
|||
if (spaces <= sp) break; |
|||
childBlock.push_back(lines[i]); |
|||
if (spaces < childIndent) childIndent = spaces; |
|||
} |
|||
else childBlock.push_back(""); |
|||
} |
|||
// Child block empty?
|
|||
bool cbe = true; |
|||
for (unsigned i = 0; i < childBlock.size(); i++) { |
|||
if (childBlock[i].length() > 0) { cbe = false; break; } |
|||
} |
|||
// Bring back if / elif into AST
|
|||
if (bodied(tokens[0].val)) { |
|||
std::vector<Node> args; |
|||
args.push_back(out); |
|||
out = astnode(tokens[0].val, args, out.metadata); |
|||
} |
|||
// Add child block to AST
|
|||
if (childBlocked(tokens[0].val)) { |
|||
if (cbe) |
|||
err("Expected indented child block!", out.metadata); |
|||
out.type = ASTNODE; |
|||
metadata.ln += 1; |
|||
out.args.push_back(parseLines(childBlock, metadata, childIndent)); |
|||
metadata.ln -= 1; |
|||
} |
|||
else if (!cbe) |
|||
err("Did not expect indented child block!", out.metadata); |
|||
if (o.size() == 0 || o.back().type == TOKEN) { |
|||
o.push_back(out); |
|||
continue; |
|||
} |
|||
// This is a little complicated. Basically, the idea here is to build
|
|||
// constructions like [if [< x 5] [a] [elif [< x 10] [b] [else [c]]]]
|
|||
std::vector<Node> u; |
|||
u.push_back(o.back()); |
|||
if (bodiedContinued(o.back().val, out.val)) { |
|||
while (1) { |
|||
if (!bodiedContinued(u.back().val, out.val)) { |
|||
u.pop_back(); |
|||
break; |
|||
} |
|||
if (!u.back().args.size() |
|||
|| !bodiedContinued(u.back().val, u.back().args.back().val)) { |
|||
break; |
|||
} |
|||
u.push_back(u.back().args.back()); |
|||
} |
|||
u.back().args.push_back(out); |
|||
while (u.size() > 1) { |
|||
Node v = u.back(); |
|||
u.pop_back(); |
|||
u.back().args.pop_back(); |
|||
u.back().args.push_back(v); |
|||
} |
|||
o.pop_back(); |
|||
o.push_back(u[0]); |
|||
} |
|||
else o.push_back(out); |
|||
} |
|||
if (o.size() == 1) |
|||
return o[0]; |
|||
else if (o.size()) |
|||
return astnode("seq", o, o[0].metadata); |
|||
else |
|||
return astnode("seq", o, Metadata()); |
|||
} |
|||
|
|||
// Parses serpent code
|
|||
Node parseSerpent(std::string s) { |
|||
std::string input = s; |
|||
std::string file = "main"; |
|||
if (exists(s)) { |
|||
file = s; |
|||
input = get_file_contents(s); |
|||
} |
|||
return parseLines(splitLines(input), Metadata(file, 0, 0), 0); |
|||
} |
|||
|
|||
|
|||
using namespace std; |
@ -0,0 +1,13 @@ |
|||
#ifndef ETHSERP_PARSER |
|||
#define ETHSERP_PARSER |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
|
|||
// Serpent text -> parse tree
|
|||
Node parseSerpent(std::string s); |
|||
|
|||
#endif |
@ -0,0 +1,466 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
#include "lllparser.h" |
|||
#include "bignum.h" |
|||
|
|||
std::string valid[][3] = { |
|||
{ "if", "2", "3" }, |
|||
{ "unless", "2", "2" }, |
|||
{ "while", "2", "2" }, |
|||
{ "until", "2", "2" }, |
|||
{ "code", "1", "2" }, |
|||
{ "init", "2", "2" }, |
|||
{ "shared", "2", "3" }, |
|||
{ "alloc", "1", "1" }, |
|||
{ "array", "1", "1" }, |
|||
{ "call", "2", "4" }, |
|||
{ "create", "1", "4" }, |
|||
{ "msg", "4", "6" }, |
|||
{ "getch", "2", "2" }, |
|||
{ "setch", "3", "3" }, |
|||
{ "sha3", "1", "2" }, |
|||
{ "return", "1", "2" }, |
|||
{ "inset", "1", "1" }, |
|||
{ "import", "1", "1" }, |
|||
{ "array_lit", "0", tt256 }, |
|||
{ "seq", "0", tt256 }, |
|||
{ "---END---", "", "" } //Keep this line at the end of the list
|
|||
}; |
|||
|
|||
std::string macros[][2] = { |
|||
{ |
|||
"(+= $a $b)", |
|||
"(set $a (+ $a $b))" |
|||
}, |
|||
{ |
|||
"(*= $a $b)", |
|||
"(set $a (* $a $b))" |
|||
}, |
|||
{ |
|||
"(-= $a $b)", |
|||
"(set $a (- $a $b))" |
|||
}, |
|||
{ |
|||
"(/= $a $b)", |
|||
"(set $a (/ $a $b))" |
|||
}, |
|||
{ |
|||
"(%= $a $b)", |
|||
"(set $a (% $a $b))" |
|||
}, |
|||
{ |
|||
"(^= $a $b)", |
|||
"(set $a (^ $a $b))" |
|||
}, |
|||
{ |
|||
"(@/= $a $b)", |
|||
"(set $a (@/ $a $b))" |
|||
}, |
|||
{ |
|||
"(@%= $a $b)", |
|||
"(set $a (@% $a $b))" |
|||
}, |
|||
{ |
|||
"(if $cond $do (else $else))", |
|||
"(if $cond $do $else)" |
|||
}, |
|||
{ |
|||
"(code $code)", |
|||
"$code" |
|||
}, |
|||
{ |
|||
"(access msg.data $ind)", |
|||
"(calldataload (mul 32 $ind))" |
|||
}, |
|||
{ |
|||
"(array $len)", |
|||
"(alloc (mul 32 $len))" |
|||
}, |
|||
{ |
|||
"(while $cond $do)", |
|||
"(until (not $cond) $do)", |
|||
}, |
|||
{ |
|||
"(while (not $cond) $do)", |
|||
"(until $cond $do)", |
|||
}, |
|||
{ |
|||
"(if $cond $do)", |
|||
"(unless (not $cond) $do)", |
|||
}, |
|||
{ |
|||
"(if (not $cond) $do)", |
|||
"(unless $cond $do)", |
|||
}, |
|||
{ |
|||
"(access contract.storage $ind)", |
|||
"(sload $ind)" |
|||
}, |
|||
{ |
|||
"(access $var $ind)", |
|||
"(mload (add $var (mul 32 $ind)))" |
|||
}, |
|||
{ |
|||
"(set (access contract.storage $ind) $val)", |
|||
"(sstore $ind $val)" |
|||
}, |
|||
{ |
|||
"(set (access $var $ind) $val)", |
|||
"(mstore (add $var (mul 32 $ind)) $val)" |
|||
}, |
|||
{ |
|||
"(getch $var $ind)", |
|||
"(mod (mload (add $var $ind)) 256)" |
|||
}, |
|||
{ |
|||
"(setch $var $ind $val)", |
|||
"(mstore8 (add $var $ind) $val)", |
|||
}, |
|||
{ |
|||
"(send $to $value)", |
|||
"(call (sub (gas) 25) $to $value 0 0 0 0)" |
|||
}, |
|||
{ |
|||
"(send $gas $to $value)", |
|||
"(call $gas $to $value 0 0 0 0)" |
|||
}, |
|||
{ |
|||
"(sha3 $x)", |
|||
"(seq (set $1 $x) (sha3 (ref $1) 32))" |
|||
}, |
|||
{ |
|||
"(id $0)", |
|||
"$0" |
|||
}, |
|||
{ |
|||
"(return $x)", |
|||
"(seq (set $1 $x) (~return (ref $1) 32))" |
|||
}, |
|||
{ |
|||
"(return $start $len)", |
|||
"(~return $start (mul 32 $len))" |
|||
}, |
|||
{ |
|||
"(&& $x $y)", |
|||
"(if $x $y 0)" |
|||
}, |
|||
{ |
|||
"(|| $x $y)", |
|||
"(seq (set $1 $x) (if (get $1) (get $1) $y))" |
|||
}, |
|||
{ |
|||
"(>= $x $y)", |
|||
"(not (slt $x $y))" |
|||
}, |
|||
{ |
|||
"(<= $x $y)", |
|||
"(not (sgt $x $y))" |
|||
}, |
|||
{ |
|||
"(@>= $x $y)", |
|||
"(not (lt $x $y))" |
|||
}, |
|||
{ |
|||
"(@<= $x $y)", |
|||
"(not (gt $x $y))" |
|||
}, |
|||
{ |
|||
"(create $code)", |
|||
"(create 0 $code)" |
|||
}, |
|||
{ |
|||
"(create $endowment $code)", |
|||
"(seq (set $1 (msize)) (create $endowment (get $1) (lll (outer $code) (msize))))" |
|||
}, |
|||
{ |
|||
"(call $f $dataval)", |
|||
"(msg (sub (gas) 45) $f 0 $dataval)" |
|||
}, |
|||
{ |
|||
"(call $f $inp $inpsz)", |
|||
"(msg (sub (gas) 25) $f 0 $inp $inpsz)" |
|||
}, |
|||
{ |
|||
"(call $f $inp $inpsz $outsz)", |
|||
"(seq (set $1 $outsz) (set $2 (alloc (mul 32 (get $1)))) (pop (call (sub (gas) (add 25 (get $1))) $f 0 $inp (mul 32 $inpsz) (ref $2) (mul 32 (get $1)))) (get $2))" |
|||
}, |
|||
{ |
|||
"(msg $gas $to $val $inp $inpsz)", |
|||
"(seq (call $gas $to $val $inp (mul 32 $inpsz) (ref $1) 32) (get $1))" |
|||
}, |
|||
{ |
|||
"(msg $gas $to $val $dataval)", |
|||
"(seq (set $1 $dataval) (call $gas $to $val (ref $1) 32 (ref $2) 32) (get $2))" |
|||
}, |
|||
{ |
|||
"(msg $gas $to $val $inp $inpsz $outsz)", |
|||
"(seq (set $1 (mul 32 $outsz)) (set $2 (alloc (get $1))) (pop (call $gas $to $val $inp (mul 32 $inpsz) (ref $2) (get $1))) (get $2))" |
|||
}, |
|||
{ |
|||
"(outer (init $init $code))", |
|||
"(seq $init (~return 0 (lll $code 0)))" |
|||
}, |
|||
{ |
|||
"(outer (shared $shared (init $init (code $code))))", |
|||
"(seq $shared $init (~return 0 (lll (seq $shared $code) 0)))" |
|||
}, |
|||
{ |
|||
"(outer $code)", |
|||
"(~return 0 (lll $code 0))" |
|||
}, |
|||
{ |
|||
"(seq (seq) $x)", |
|||
"$x" |
|||
}, |
|||
{ |
|||
"(inset $x)", |
|||
"$x" |
|||
}, |
|||
{ |
|||
"(create $val (import $code))", |
|||
"(seq (set $1 (msize)) (create $val (get $1) (lll $code (get $1))))" |
|||
}, |
|||
{ |
|||
"(create (import $x))", |
|||
"(seq (set $1 (msize)) (create $val (get $1) (lll $code (get $1))))" |
|||
}, |
|||
{ |
|||
"(create $x)", |
|||
"(seq (set $1 (msize)) (create $val (get $1) (lll $code (get $1))))" |
|||
}, |
|||
{ "msg.datasize", "(div (calldatasize) 32)" }, |
|||
{ "msg.sender", "(caller)" }, |
|||
{ "msg.value", "(callvalue)" }, |
|||
{ "tx.gasprice", "(gasprice)" }, |
|||
{ "tx.origin", "(origin)" }, |
|||
{ "tx.gas", "(gas)" }, |
|||
{ "contract.balance", "(balance)" }, |
|||
{ "contract.address", "(address)" }, |
|||
{ "block.prevhash", "(prevhash)" }, |
|||
{ "block.coinbase", "(coinbase)" }, |
|||
{ "block.timestamp", "(timestamp)" }, |
|||
{ "block.number", "(number)" }, |
|||
{ "block.difficulty", "(difficulty)" }, |
|||
{ "block.gaslimit", "(gaslimit)" }, |
|||
{ "stop", "(stop)" }, |
|||
{ "---END---", "" } //Keep this line at the end of the list
|
|||
}; |
|||
|
|||
std::vector<std::vector<Node> > nodeMacros; |
|||
|
|||
std::string synonyms[][2] = { |
|||
{ "|", "or" }, |
|||
{ "or", "||" }, |
|||
{ "&", "and" }, |
|||
{ "and", "&&" }, |
|||
{ "elif", "if" }, |
|||
{ "!", "not" }, |
|||
{ "string", "alloc" }, |
|||
{ "+", "add" }, |
|||
{ "-", "sub" }, |
|||
{ "*", "mul" }, |
|||
{ "/", "sdiv" }, |
|||
{ "^", "exp" }, |
|||
{ "**", "exp" }, |
|||
{ "%", "smod" }, |
|||
{ "@/", "div" }, |
|||
{ "@%", "mod" }, |
|||
{ "@<", "lt" }, |
|||
{ "@>", "gt" }, |
|||
{ "<", "slt" }, |
|||
{ ">", "sgt" }, |
|||
{ "=", "set" }, |
|||
{ "==", "eq" }, |
|||
{ "---END---", "" } //Keep this line at the end of the list
|
|||
}; |
|||
|
|||
struct matchResult { |
|||
bool success; |
|||
std::map<std::string, Node> map; |
|||
}; |
|||
|
|||
// Returns two values. First, a boolean to determine whether the node matches
|
|||
// the pattern, second, if the node does match then a map mapping variables
|
|||
// in the pattern to nodes
|
|||
matchResult match(Node p, Node n) { |
|||
matchResult o; |
|||
o.success = false; |
|||
if (p.type == TOKEN) { |
|||
if (p.val == n.val) o.success = true; |
|||
else if (p.val[0] == '$') { |
|||
o.success = true; |
|||
o.map[p.val.substr(1)] = n; |
|||
} |
|||
} |
|||
else if (n.type==TOKEN || p.val!=n.val || p.args.size()!=n.args.size()) { |
|||
} |
|||
else { |
|||
for (unsigned i = 0; i < p.args.size(); i++) { |
|||
matchResult oPrime = match(p.args[i], n.args[i]); |
|||
if (!oPrime.success) { |
|||
o.success = false; |
|||
return o; |
|||
} |
|||
for (std::map<std::string, Node>::iterator it = oPrime.map.begin(); |
|||
it != oPrime.map.end(); |
|||
it++) { |
|||
o.map[(*it).first] = (*it).second; |
|||
} |
|||
} |
|||
o.success = true; |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Fills in the pattern with a dictionary mapping variable names to
|
|||
// nodes (these dicts are generated by match). Match and subst together
|
|||
// create a full pattern-matching engine.
|
|||
Node subst(Node pattern, |
|||
std::map<std::string, Node> dict, |
|||
std::string varflag, |
|||
Metadata metadata) { |
|||
if (pattern.type == TOKEN && pattern.val[0] == '$') { |
|||
if (dict.count(pattern.val.substr(1))) { |
|||
return dict[pattern.val.substr(1)]; |
|||
} |
|||
else { |
|||
return token(varflag + pattern.val.substr(1), metadata); |
|||
} |
|||
} |
|||
else if (pattern.type == TOKEN) { |
|||
return pattern; |
|||
} |
|||
else { |
|||
std::vector<Node> args; |
|||
for (unsigned i = 0; i < pattern.args.size(); i++) { |
|||
args.push_back(subst(pattern.args[i], dict, varflag, metadata)); |
|||
} |
|||
return astnode(pattern.val, args, metadata); |
|||
} |
|||
} |
|||
|
|||
// Recursively applies rewrite rules
|
|||
Node apply_rules(Node node) { |
|||
// If the rewrite rules have not yet been parsed, parse them
|
|||
if (!nodeMacros.size()) { |
|||
for (int i = 0; i < 9999; i++) { |
|||
std::vector<Node> o; |
|||
if (macros[i][0] == "---END---") break; |
|||
o.push_back(parseLLL(macros[i][0])); |
|||
o.push_back(parseLLL(macros[i][1])); |
|||
nodeMacros.push_back(o); |
|||
} |
|||
} |
|||
// Main code
|
|||
unsigned pos = 0; |
|||
std::string prefix = "_temp"+mkUniqueToken()+"_"; |
|||
while(1) { |
|||
if (synonyms[pos][0] == "---END---") { |
|||
break; |
|||
} |
|||
else if (node.type == ASTNODE && node.val == synonyms[pos][0]) { |
|||
node.val = synonyms[pos][1]; |
|||
} |
|||
pos++; |
|||
} |
|||
for (pos = 0; pos < nodeMacros.size(); pos++) { |
|||
Node pattern = nodeMacros[pos][0]; |
|||
matchResult mr = match(pattern, node); |
|||
if (mr.success) { |
|||
Node pattern2 = nodeMacros[pos][1]; |
|||
node = subst(pattern2, mr.map, prefix, node.metadata); |
|||
} |
|||
} |
|||
if (node.type == ASTNODE && node.val != "ref" && node.val != "get") { |
|||
unsigned i = 0; |
|||
if (node.val == "set") i = 1; |
|||
for (i = i; i < node.args.size(); i++) { |
|||
node.args[i] = apply_rules(node.args[i]); |
|||
} |
|||
} |
|||
else if (node.type == TOKEN && !isNumberLike(node)) { |
|||
std::vector<Node> args; |
|||
args.push_back(node); |
|||
node = astnode("get", args, node.metadata); |
|||
} |
|||
// This allows people to use ~x as a way of having functions with the same
|
|||
// name and arity as macros; the idea is that ~x is a "final" form, and
|
|||
// should not be remacroed, but it is converted back at the end
|
|||
if (node.type == ASTNODE && node.val[0] == '~') |
|||
node.val = node.val.substr(1); |
|||
return node; |
|||
} |
|||
|
|||
Node optimize(Node inp) { |
|||
if (inp.type == TOKEN) return tryNumberize(inp); |
|||
for (unsigned i = 0; i < inp.args.size(); i++) { |
|||
inp.args[i] = optimize(inp.args[i]); |
|||
} |
|||
if (inp.args.size() == 2 |
|||
&& inp.args[0].type == TOKEN |
|||
&& inp.args[1].type == TOKEN) { |
|||
std::string o; |
|||
if (inp.val == "add") { |
|||
o = decimalMod(decimalAdd(inp.args[0].val, inp.args[1].val), tt256); |
|||
} |
|||
else if (inp.val == "sub") { |
|||
if (decimalGt(inp.args[0].val, inp.args[1].val, true)) |
|||
o = decimalSub(inp.args[0].val, inp.args[1].val); |
|||
} |
|||
else if (inp.val == "mul") { |
|||
o = decimalMod(decimalMul(inp.args[0].val, inp.args[1].val), tt256); |
|||
} |
|||
else if (inp.val == "div" && inp.args[1].val != "0") { |
|||
o = decimalDiv(inp.args[0].val, inp.args[1].val); |
|||
} |
|||
else if (inp.val == "sdiv" && inp.args[1].val != "0" |
|||
&& decimalGt(tt255, inp.args[0].val) |
|||
&& decimalGt(tt255, inp.args[1].val)) { |
|||
o = decimalDiv(inp.args[0].val, inp.args[1].val); |
|||
} |
|||
else if (inp.val == "mod" && inp.args[1].val != "0") { |
|||
o = decimalMod(inp.args[0].val, inp.args[1].val); |
|||
} |
|||
else if (inp.val == "smod" && inp.args[1].val != "0" |
|||
&& decimalGt(tt255, inp.args[0].val) |
|||
&& decimalGt(tt255, inp.args[1].val)) { |
|||
o = decimalMod(inp.args[0].val, inp.args[1].val); |
|||
} |
|||
if (o.length()) return token(o, inp.metadata); |
|||
} |
|||
return inp; |
|||
} |
|||
|
|||
Node validate(Node inp) { |
|||
if (inp.type == ASTNODE) { |
|||
int i = 0; |
|||
while(valid[i][0] != "---END---") { |
|||
if (inp.val == valid[i][0]) { |
|||
if (decimalGt(valid[i][1], intToDecimal(inp.args.size()))) { |
|||
err("Too few arguments for "+inp.val, inp.metadata); |
|||
} |
|||
if (decimalGt(intToDecimal(inp.args.size()), valid[i][2])) { |
|||
err("Too many arguments for "+inp.val, inp.metadata); |
|||
} |
|||
} |
|||
i++; |
|||
} |
|||
} |
|||
for (unsigned i = 0; i < inp.args.size(); i++) validate(inp.args[i]); |
|||
return inp; |
|||
} |
|||
|
|||
Node preprocess(Node inp) { |
|||
std::vector<Node> args; |
|||
args.push_back(inp); |
|||
return astnode("outer", args, inp.metadata); |
|||
} |
|||
|
|||
Node rewrite(Node inp) { |
|||
return optimize(apply_rules(validate(preprocess(inp)))); |
|||
} |
|||
|
|||
using namespace std; |
@ -0,0 +1,13 @@ |
|||
#ifndef ETHSERP_REWRITER |
|||
#define ETHSERP_REWRITER |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
|
|||
// Applies rewrite rules
|
|||
Node rewrite(Node inp); |
|||
|
|||
#endif |
@ -0,0 +1,111 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
|
|||
// These appear as independent tokens even if inside a stream of symbols
|
|||
const std::string atoms[] = { "#", "//", "(", ")", "[", "]", "{", "}" }; |
|||
const int numAtoms = 8; |
|||
|
|||
// Is the char alphanumeric, a space, a bracket, a quote, a symbol?
|
|||
int chartype(char c) { |
|||
if (c >= '0' && c <= '9') return ALPHANUM; |
|||
else if (c >= 'a' && c <= 'z') return ALPHANUM; |
|||
else if (c >= 'A' && c <= 'Z') return ALPHANUM; |
|||
else if (std::string("~._$").find(c) != std::string::npos) return ALPHANUM; |
|||
else if (c == '\t' || c == ' ' || c == '\n') return SPACE; |
|||
else if (std::string("()[]{}").find(c) != std::string::npos) return BRACK; |
|||
else if (c == '"') return DQUOTE; |
|||
else if (c == '\'') return SQUOTE; |
|||
else return SYMB; |
|||
} |
|||
|
|||
// "y = f(45,124)/3" -> [ "y", "f", "(", "45", ",", "124", ")", "/", "3"]
|
|||
std::vector<Node> tokenize(std::string inp, Metadata metadata) { |
|||
int curtype = SPACE; |
|||
unsigned pos = 0; |
|||
int lastNewline = 0; |
|||
metadata.ch = 0; |
|||
std::string cur; |
|||
std::vector<Node> out; |
|||
|
|||
inp += " "; |
|||
while (pos < inp.length()) { |
|||
int headtype = chartype(inp[pos]); |
|||
// Are we inside a quote?
|
|||
if (curtype == SQUOTE || curtype == DQUOTE) { |
|||
// Close quote
|
|||
if (headtype == curtype) { |
|||
cur += inp[pos]; |
|||
out.push_back(token(cur, metadata)); |
|||
cur = ""; |
|||
metadata.ch = pos - lastNewline; |
|||
curtype = SPACE; |
|||
pos += 1; |
|||
} |
|||
// eg. \xc3
|
|||
else if (inp.length() >= pos + 4 && inp.substr(pos, 2) == "\\x") { |
|||
cur += (std::string("0123456789abcdef").find(inp[pos+2]) * 16 |
|||
+ std::string("0123456789abcdef").find(inp[pos+3])); |
|||
pos += 4; |
|||
} |
|||
// Newline
|
|||
else if (inp.substr(pos, 2) == "\\n") { |
|||
cur += '\n'; |
|||
pos += 2; |
|||
} |
|||
// Backslash escape
|
|||
else if (inp.length() >= pos + 2 && inp[pos] == '\\') { |
|||
cur += inp[pos + 1]; |
|||
pos += 2; |
|||
} |
|||
// Normal character
|
|||
else { |
|||
cur += inp[pos]; |
|||
pos += 1; |
|||
} |
|||
} |
|||
else { |
|||
// Handle atoms ( '//', '#', brackets )
|
|||
for (int i = 0; i < numAtoms; i++) { |
|||
int split = cur.length() - atoms[i].length(); |
|||
if (split >= 0 && cur.substr(split) == atoms[i]) { |
|||
if (split > 0) { |
|||
out.push_back(token(cur.substr(0, split), metadata)); |
|||
} |
|||
metadata.ch += split; |
|||
out.push_back(token(cur.substr(split), metadata)); |
|||
metadata.ch = pos - lastNewline; |
|||
cur = ""; |
|||
curtype = SPACE; |
|||
} |
|||
} |
|||
// Special case the minus sign
|
|||
if (cur.length() > 1 && cur[cur.length() - 1] == '-') { |
|||
out.push_back(token(cur.substr(0, cur.length() - 1), metadata)); |
|||
out.push_back(token("-", metadata)); |
|||
cur = ""; |
|||
} |
|||
// Boundary between different char types
|
|||
if (headtype != curtype) { |
|||
if (curtype != SPACE && cur != "") { |
|||
out.push_back(token(cur, metadata)); |
|||
} |
|||
metadata.ch = pos - lastNewline; |
|||
cur = ""; |
|||
} |
|||
cur += inp[pos]; |
|||
curtype = headtype; |
|||
pos += 1; |
|||
} |
|||
if (inp[pos] == '\n') { |
|||
lastNewline = pos; |
|||
metadata.ch = 0; |
|||
metadata.ln += 1; |
|||
} |
|||
} |
|||
return out; |
|||
} |
|||
|
|||
|
@ -0,0 +1,14 @@ |
|||
#ifndef ETHSERP_TOKENIZE |
|||
#define ETHSERP_TOKENIZE |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
|
|||
int chartype(char c); |
|||
|
|||
std::vector<Node> tokenize(std::string inp, Metadata meta=Metadata()); |
|||
|
|||
#endif |
@ -0,0 +1,256 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include "util.h" |
|||
#include "bignum.h" |
|||
#include <fstream> |
|||
#include <cerrno> |
|||
|
|||
//Token or value node constructor
|
|||
Node token(std::string val, Metadata met) { |
|||
Node o; |
|||
o.type = 0; |
|||
o.val = val; |
|||
o.metadata = met; |
|||
return o; |
|||
} |
|||
|
|||
//AST node constructor
|
|||
Node astnode(std::string val, std::vector<Node> args, Metadata met) { |
|||
Node o; |
|||
o.type = 1; |
|||
o.val = val; |
|||
o.args = args; |
|||
o.metadata = met; |
|||
return o; |
|||
} |
|||
|
|||
// Print token list
|
|||
std::string printTokens(std::vector<Node> tokens) { |
|||
std::string s = ""; |
|||
for (unsigned i = 0; i < tokens.size(); i++) { |
|||
s += tokens[i].val + " "; |
|||
} |
|||
return s; |
|||
} |
|||
|
|||
// Prints a lisp AST on one line
|
|||
std::string printSimple(Node ast) { |
|||
if (ast.type == TOKEN) return ast.val; |
|||
std::string o = "(" + ast.val; |
|||
std::vector<std::string> subs; |
|||
for (unsigned i = 0; i < ast.args.size(); i++) { |
|||
o += " " + printSimple(ast.args[i]); |
|||
} |
|||
return o + ")"; |
|||
} |
|||
|
|||
// Number of tokens in a tree
|
|||
int treeSize(Node prog) { |
|||
if (prog.type == TOKEN) return 1; |
|||
int o = 0; |
|||
for (unsigned i = 0; i < prog.args.size(); i++) o += treeSize(prog.args[i]); |
|||
return o; |
|||
} |
|||
|
|||
// Pretty-prints a lisp AST
|
|||
std::string printAST(Node ast, bool printMetadata) { |
|||
if (ast.type == TOKEN) return ast.val; |
|||
std::string o = "("; |
|||
if (printMetadata) { |
|||
o += ast.metadata.file + " "; |
|||
o += intToDecimal(ast.metadata.ln) + " "; |
|||
o += intToDecimal(ast.metadata.ch) + ": "; |
|||
} |
|||
o += ast.val; |
|||
std::vector<std::string> subs; |
|||
for (unsigned i = 0; i < ast.args.size(); i++) { |
|||
subs.push_back(printAST(ast.args[i], printMetadata)); |
|||
} |
|||
unsigned k = 0; |
|||
std::string out = " "; |
|||
// As many arguments as possible go on the same line as the function,
|
|||
// except when seq is used
|
|||
while (k < subs.size() && o != "(seq") { |
|||
if (subs[k].find("\n") != std::string::npos || (out + subs[k]).length() >= 80) break; |
|||
out += subs[k] + " "; |
|||
k += 1; |
|||
} |
|||
// All remaining arguments go on their own lines
|
|||
if (k < subs.size()) { |
|||
o += out + "\n"; |
|||
std::vector<std::string> subsSliceK; |
|||
for (unsigned i = k; i < subs.size(); i++) subsSliceK.push_back(subs[i]); |
|||
o += indentLines(joinLines(subsSliceK)); |
|||
o += "\n)"; |
|||
} |
|||
else { |
|||
o += out.substr(0, out.size() - 1) + ")"; |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Splits text by line
|
|||
std::vector<std::string> splitLines(std::string s) { |
|||
unsigned pos = 0; |
|||
int lastNewline = 0; |
|||
std::vector<std::string> o; |
|||
while (pos < s.length()) { |
|||
if (s[pos] == '\n') { |
|||
o.push_back(s.substr(lastNewline, pos - lastNewline)); |
|||
lastNewline = pos + 1; |
|||
} |
|||
pos = pos + 1; |
|||
} |
|||
o.push_back(s.substr(lastNewline)); |
|||
return o; |
|||
} |
|||
|
|||
// Inverse of splitLines
|
|||
std::string joinLines(std::vector<std::string> lines) { |
|||
std::string o = "\n"; |
|||
for (unsigned i = 0; i < lines.size(); i++) { |
|||
o += lines[i] + "\n"; |
|||
} |
|||
return o.substr(1, o.length() - 2); |
|||
} |
|||
|
|||
// Indent all lines by 4 spaces
|
|||
std::string indentLines(std::string inp) { |
|||
std::vector<std::string> lines = splitLines(inp); |
|||
for (unsigned i = 0; i < lines.size(); i++) lines[i] = " "+lines[i]; |
|||
return joinLines(lines); |
|||
} |
|||
|
|||
// Converts string to simple numeric format
|
|||
std::string strToNumeric(std::string inp) { |
|||
std::string o = "0"; |
|||
if (inp == "") { |
|||
o = ""; |
|||
} |
|||
else if ((inp[0] == '"' && inp[inp.length()-1] == '"') |
|||
|| (inp[0] == '\'' && inp[inp.length()-1] == '\'')) { |
|||
for (unsigned i = 1; i < inp.length() - 1; i++) { |
|||
o = decimalAdd(decimalMul(o,"256"), intToDecimal(inp[i])); |
|||
} |
|||
} |
|||
else if (inp.substr(0,2) == "0x") { |
|||
for (unsigned i = 2; i < inp.length(); i++) { |
|||
int dig = std::string("0123456789abcdef").find(inp[i]); |
|||
if (dig < 0) return ""; |
|||
o = decimalAdd(decimalMul(o,"16"), intToDecimal(dig)); |
|||
} |
|||
} |
|||
else { |
|||
bool isPureNum = true; |
|||
for (unsigned i = 0; i < inp.length(); i++) { |
|||
isPureNum = isPureNum && inp[i] >= '0' && inp[i] <= '9'; |
|||
} |
|||
o = isPureNum ? inp : ""; |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
// Does the node contain a number (eg. 124, 0xf012c, "george")
|
|||
bool isNumberLike(Node node) { |
|||
if (node.type == ASTNODE) return false; |
|||
return strToNumeric(node.val) != ""; |
|||
} |
|||
|
|||
//Normalizes number representations
|
|||
Node nodeToNumeric(Node node) { |
|||
std::string o = strToNumeric(node.val); |
|||
return token(o == "" ? node.val : o, node.metadata); |
|||
} |
|||
|
|||
Node tryNumberize(Node node) { |
|||
if (node.type == TOKEN && isNumberLike(node)) return nodeToNumeric(node); |
|||
return node; |
|||
} |
|||
|
|||
//Converts a value to an array of byte number nodes
|
|||
std::vector<Node> toByteArr(std::string val, Metadata metadata, int minLen) { |
|||
std::vector<Node> o; |
|||
int L = 0; |
|||
while (val != "0" || L < minLen) { |
|||
o.push_back(token(decimalMod(val, "256"), metadata)); |
|||
val = decimalDiv(val, "256"); |
|||
L++; |
|||
} |
|||
std::vector<Node> o2; |
|||
for (int i = o.size() - 1; i >= 0; i--) o2.push_back(o[i]); |
|||
return o2; |
|||
} |
|||
|
|||
int counter = 0; |
|||
|
|||
//Makes a unique token
|
|||
std::string mkUniqueToken() { |
|||
counter++; |
|||
return intToDecimal(counter); |
|||
} |
|||
|
|||
//Does a file exist? http://stackoverflow.com/questions/12774207
|
|||
bool exists(std::string fileName) { |
|||
std::ifstream infile(fileName.c_str()); |
|||
return infile.good(); |
|||
} |
|||
|
|||
//Reads a file: http://stackoverflow.com/questions/2602013
|
|||
std::string get_file_contents(std::string filename) |
|||
{ |
|||
std::ifstream in(filename.c_str(), std::ios::in | std::ios::binary); |
|||
if (in) |
|||
{ |
|||
std::string contents; |
|||
in.seekg(0, std::ios::end); |
|||
contents.resize(in.tellg()); |
|||
in.seekg(0, std::ios::beg); |
|||
in.read(&contents[0], contents.size()); |
|||
in.close(); |
|||
return(contents); |
|||
} |
|||
throw(errno); |
|||
} |
|||
|
|||
//Report error
|
|||
void err(std::string errtext, Metadata met) { |
|||
std::string err = "Error (file \"" + met.file + "\", line " + |
|||
intToDecimal(met.ln) + ", char " + intToDecimal(met.ch) + |
|||
"): " + errtext; |
|||
std::cerr << err << "\n"; |
|||
throw(err); |
|||
} |
|||
|
|||
//Bin to hex
|
|||
std::string binToHex(std::string inp) { |
|||
std::string o = ""; |
|||
for (unsigned i = 0; i < inp.length(); i++) { |
|||
unsigned char v = inp[i]; |
|||
o += std::string("0123456789abcdef").substr(v/16, 1) |
|||
+ std::string("0123456789abcdef").substr(v%16, 1); |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
//Hex to bin
|
|||
std::string hexToBin(std::string inp) { |
|||
std::string o = ""; |
|||
for (unsigned i = 0; i+1 < inp.length(); i+=2) { |
|||
char v = (char)(std::string("0123456789abcdef").find(inp[i]) * 16 + |
|||
std::string("0123456789abcdef").find(inp[i+1])); |
|||
o += v; |
|||
} |
|||
return o; |
|||
} |
|||
|
|||
//Lower to upper
|
|||
std::string upperCase(std::string inp) { |
|||
std::string o = ""; |
|||
for (unsigned i = 0; i < inp.length(); i++) { |
|||
if (inp[i] >= 97 && inp[i] <= 122) o += inp[i] - 32; |
|||
else o += inp[i]; |
|||
} |
|||
return o; |
|||
} |
@ -0,0 +1,106 @@ |
|||
#ifndef ETHSERP_UTIL |
|||
#define ETHSERP_UTIL |
|||
|
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include <fstream> |
|||
#include <cerrno> |
|||
|
|||
const int TOKEN = 0, |
|||
ASTNODE = 1, |
|||
SPACE = 2, |
|||
BRACK = 3, |
|||
SQUOTE = 4, |
|||
DQUOTE = 5, |
|||
SYMB = 6, |
|||
ALPHANUM = 7, |
|||
LPAREN = 8, |
|||
RPAREN = 9, |
|||
COMMA = 10, |
|||
COLON = 11, |
|||
UNARY_OP = 12, |
|||
BINARY_OP = 13, |
|||
COMPOUND = 14; |
|||
|
|||
// Stores metadata about each token
|
|||
class Metadata { |
|||
public: |
|||
Metadata(std::string File="main", int Ln=0, int Ch=0) { |
|||
file = File; |
|||
ln = Ln; |
|||
ch = Ch; |
|||
} |
|||
std::string file; |
|||
int ln; |
|||
int ch; |
|||
}; |
|||
|
|||
std::string mkUniqueToken(); |
|||
|
|||
// type can be TOKEN or ASTNODE
|
|||
struct Node { |
|||
int type; |
|||
std::string val; |
|||
std::vector<Node> args; |
|||
Metadata metadata; |
|||
}; |
|||
Node token(std::string val, Metadata met=Metadata()); |
|||
Node astnode(std::string val, std::vector<Node> args, Metadata met=Metadata()); |
|||
|
|||
// Number of tokens in a tree
|
|||
int treeSize(Node prog); |
|||
|
|||
// Print token list
|
|||
std::string printTokens(std::vector<Node> tokens); |
|||
|
|||
// Prints a lisp AST on one line
|
|||
std::string printSimple(Node ast); |
|||
|
|||
// Pretty-prints a lisp AST
|
|||
std::string printAST(Node ast, bool printMetadata=false); |
|||
|
|||
// Splits text by line
|
|||
std::vector<std::string> splitLines(std::string s); |
|||
|
|||
// Inverse of splitLines
|
|||
std::string joinLines(std::vector<std::string> lines); |
|||
|
|||
// Indent all lines by 4 spaces
|
|||
std::string indentLines(std::string inp); |
|||
|
|||
// Converts string to simple numeric format
|
|||
std::string strToNumeric(std::string inp); |
|||
|
|||
// Does the node contain a number (eg. 124, 0xf012c, "george")
|
|||
bool isNumberLike(Node node); |
|||
|
|||
//Normalizes number representations
|
|||
Node nodeToNumeric(Node node); |
|||
|
|||
//If a node is numeric, normalize its representation
|
|||
Node tryNumberize(Node node); |
|||
|
|||
//Converts a value to an array of byte number nodes
|
|||
std::vector<Node> toByteArr(std::string val, Metadata metadata, int minLen=1); |
|||
|
|||
//Reads a file
|
|||
std::string get_file_contents(std::string filename); |
|||
|
|||
//Does a file exist?
|
|||
bool exists(std::string fileName); |
|||
|
|||
//Report error
|
|||
void err(std::string errtext, Metadata met); |
|||
|
|||
//Bin to hex
|
|||
std::string binToHex(std::string inp); |
|||
|
|||
//Hex to bin
|
|||
std::string hexToBin(std::string inp); |
|||
|
|||
//Lower to upper
|
|||
std::string upperCase(std::string inp); |
|||
|
|||
#endif |
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue