Gav Wood
11 years ago
27 changed files with 473 additions and 1003 deletions
@ -0,0 +1,58 @@ |
|||||
|
cmake_policy(SET CMP0015 NEW) |
||||
|
|
||||
|
aux_source_directory(. SRC_LIST) |
||||
|
|
||||
|
set(EXECUTABLE pyserpent) |
||||
|
|
||||
|
# set(CMAKE_INSTALL_PREFIX ../lib) |
||||
|
add_library(${EXECUTABLE} SHARED ${SRC_LIST}) |
||||
|
|
||||
|
if (UNIX) |
||||
|
FIND_PACKAGE(Boost 1.53 REQUIRED COMPONENTS python) |
||||
|
endif() |
||||
|
file(GLOB HEADERS "*.h") |
||||
|
|
||||
|
include_directories(..) |
||||
|
|
||||
|
add_definitions(-DETH_PYTHON) |
||||
|
include_directories(${PYTHON_ID}) |
||||
|
target_link_libraries(${EXECUTABLE} ${PYTHON_LS}) |
||||
|
|
||||
|
target_link_libraries(${EXECUTABLE} serpent) |
||||
|
target_link_libraries(${EXECUTABLE} lll) |
||||
|
target_link_libraries(${EXECUTABLE} evmface) |
||||
|
target_link_libraries(${EXECUTABLE} ethential) |
||||
|
target_link_libraries(${EXECUTABLE} gmp) |
||||
|
target_link_libraries(${EXECUTABLE} gmp) |
||||
|
|
||||
|
#g++ $(CXXFLAGS) -shared $(PLATFORM_OPTS) $(TARGET).o -L$(BOOST_LIB) -lboost_python -L/usr/lib/python$(PYTHON_VERSION)/config -lpython$(PYTHON_VERSION) $(COMMON_OBJS) -o $(TARGET).so |
||||
|
|
||||
|
if(${TARGET_PLATFORM} STREQUAL "w64") |
||||
|
target_link_libraries(${EXECUTABLE} boost_python_win32-mt-s) |
||||
|
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) |
||||
|
target_link_libraries(${EXECUTABLE} iphlpapi) |
||||
|
target_link_libraries(${EXECUTABLE} ws2_32) |
||||
|
target_link_libraries(${EXECUTABLE} mswsock) |
||||
|
target_link_libraries(${EXECUTABLE} shlwapi) |
||||
|
elseif (APPLE) |
||||
|
# Latest mavericks boost libraries only come with -mt |
||||
|
target_link_libraries(${EXECUTABLE} boost_python-mt) |
||||
|
target_link_libraries(${EXECUTABLE} boost_thread-mt) |
||||
|
find_package(Threads REQUIRED) |
||||
|
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
||||
|
elseif (UNIX) |
||||
|
target_link_libraries(${EXECUTABLE} ${Boost_PYTHON_LIBRARY}) |
||||
|
target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY}) |
||||
|
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
||||
|
else () |
||||
|
target_link_libraries(${EXECUTABLE} boost_python) |
||||
|
target_link_libraries(${EXECUTABLE} boost_thread) |
||||
|
find_package(Threads REQUIRED) |
||||
|
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
||||
|
endif () |
||||
|
|
||||
|
message("Installation path: ${CMAKE_INSTALL_PREFIX}") |
||||
|
|
||||
|
install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) |
||||
|
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) |
||||
|
|
@ -0,0 +1,132 @@ |
|||||
|
#include <boost/python.hpp> |
||||
|
#include <boost/python/stl_iterator.hpp> |
||||
|
#include <Python.h> |
||||
|
#include <libserpent/funcs.h> |
||||
|
|
||||
|
// Provide a python wrapper for the C++ functions
|
||||
|
|
||||
|
using namespace boost::python; |
||||
|
|
||||
|
//std::vector to python list converter
|
||||
|
//http://stackoverflow.com/questions/5314319/how-to-export-stdvector
|
||||
|
template<class T> |
||||
|
struct VecToList |
||||
|
{ |
||||
|
static PyObject* convert(const std::vector<T>& vec) |
||||
|
{ |
||||
|
boost::python::list* l = new boost::python::list(); |
||||
|
for(size_t i = 0; i < vec.size(); i++) |
||||
|
(*l).append(vec[i]); |
||||
|
|
||||
|
return l->ptr(); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
// python list to std::vector converter
|
||||
|
//http://code.activestate.com/lists/python-cplusplus-sig/16463/
|
||||
|
template<typename T> |
||||
|
struct Vector_from_python_list |
||||
|
{ |
||||
|
|
||||
|
Vector_from_python_list() |
||||
|
{ |
||||
|
using namespace boost::python; |
||||
|
using namespace boost::python::converter; |
||||
|
registry::push_back(&Vector_from_python_list<T>::convertible, |
||||
|
&Vector_from_python_list<T>::construct, |
||||
|
type_id<std::vector<T> |
||||
|
>()); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// Determine if obj_ptr can be converted in a std::vector<T>
|
||||
|
static void* convertible(PyObject* obj_ptr) |
||||
|
{ |
||||
|
if (!PyList_Check(obj_ptr)){ |
||||
|
return 0; |
||||
|
} |
||||
|
return obj_ptr; |
||||
|
} |
||||
|
|
||||
|
// Convert obj_ptr into a std::vector<T>
|
||||
|
static void construct( |
||||
|
PyObject* obj_ptr, |
||||
|
boost::python::converter::rvalue_from_python_stage1_data* data) |
||||
|
{ |
||||
|
using namespace boost::python; |
||||
|
// Extract the character data from the python string
|
||||
|
// const char* value = PyString_AsString(obj_ptr);
|
||||
|
list l(handle<>(borrowed(obj_ptr))); |
||||
|
|
||||
|
// // Verify that obj_ptr is a string (should be ensured by convertible())
|
||||
|
// assert(value);
|
||||
|
|
||||
|
// Grab pointer to memory into which to construct the new std::vector<T>
|
||||
|
void* storage = ( |
||||
|
(boost::python::converter::rvalue_from_python_storage<std::vector<T> |
||||
|
>*) |
||||
|
|
||||
|
data)->storage.bytes; |
||||
|
|
||||
|
// in-place construct the new std::vector<T> using the character data
|
||||
|
// extraced from the python object
|
||||
|
std::vector<T>& v = *(new (storage) std::vector<T>()); |
||||
|
|
||||
|
// populate the vector from list contains !!!
|
||||
|
int le = len(l); |
||||
|
v.resize(le); |
||||
|
for(int i = 0;i!=le;++i){ |
||||
|
v[i] = extract<T>(l[i]); |
||||
|
} |
||||
|
|
||||
|
// Stash the memory chunk pointer for later use by boost.python
|
||||
|
data->convertible = storage; |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
std::string printMetadata(Metadata m) { |
||||
|
return "["+m.file+" "+intToDecimal(m.ln)+" "+intToDecimal(m.ch)+"]"; |
||||
|
} |
||||
|
|
||||
|
BOOST_PYTHON_FUNCTION_OVERLOADS(tokenize_overloads, tokenize, 1, 2); |
||||
|
BOOST_PYTHON_FUNCTION_OVERLOADS(printast_overloads, printAST, 1, 2); |
||||
|
BOOST_PYTHON_FUNCTION_OVERLOADS(parselll_overloads, parseLLL, 1, 2); |
||||
|
//BOOST_PYTHON_FUNCTION_OVERLOADS(metadata_overloads, Metadata, 0, 3);
|
||||
|
BOOST_PYTHON_MODULE(pyserpent) |
||||
|
{ |
||||
|
def("tokenize", tokenize, tokenize_overloads()); |
||||
|
def("parse", parseSerpent); |
||||
|
def("parseLLL", parseLLL, parselll_overloads()); |
||||
|
def("rewrite", rewrite); |
||||
|
def("compile_to_lll", compileToLLL); |
||||
|
def("encode_datalist", encodeDatalist); |
||||
|
def("decode_datalist", decodeDatalist); |
||||
|
def("compile_lll", compileLLL); |
||||
|
def("assemble", assemble); |
||||
|
def("deserialize", deserialize); |
||||
|
def("dereference", dereference); |
||||
|
def("flatten", flatten); |
||||
|
def("serialize", serialize); |
||||
|
def("compile", compile); |
||||
|
def("pretty_compile", prettyCompile); |
||||
|
def("pretty_assemble", prettyAssemble); |
||||
|
//class_<Node>("Node",init<>())
|
||||
|
to_python_converter<std::vector<Node,class std::allocator<Node> >, |
||||
|
VecToList<Node> >(); |
||||
|
to_python_converter<std::vector<std::string,class std::allocator<std::string> >, |
||||
|
VecToList<std::string> >(); |
||||
|
Vector_from_python_list<Node>(); |
||||
|
Vector_from_python_list<std::string>(); |
||||
|
class_<Metadata>("Metadata",init<>()) |
||||
|
.def(init<std::string, int, int>()) |
||||
|
.def("__str__", printMetadata) |
||||
|
.def("__repr__", printMetadata) |
||||
|
; |
||||
|
class_<Node>("Node",init<>()) |
||||
|
.def(init<>()) |
||||
|
.def("__str__", printAST, printast_overloads()) |
||||
|
.def("__repr__", printAST, printast_overloads()) |
||||
|
; |
||||
|
//class_<Node>("Vector",init<>())
|
||||
|
// .def(init<>());
|
||||
|
} |
@ -0,0 +1,35 @@ |
|||||
|
cmake_policy(SET CMP0015 NEW) |
||||
|
|
||||
|
aux_source_directory(. SRC_LIST) |
||||
|
|
||||
|
include_directories(..) |
||||
|
|
||||
|
set(EXECUTABLE sc) |
||||
|
|
||||
|
add_executable(${EXECUTABLE} ${SRC_LIST}) |
||||
|
|
||||
|
if (${TARGET_PLATFORM} STREQUAL "w64") |
||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++") |
||||
|
target_link_libraries(${EXECUTABLE} gcc) |
||||
|
target_link_libraries(${EXECUTABLE} gdi32) |
||||
|
target_link_libraries(${EXECUTABLE} ws2_32) |
||||
|
target_link_libraries(${EXECUTABLE} mswsock) |
||||
|
target_link_libraries(${EXECUTABLE} shlwapi) |
||||
|
target_link_libraries(${EXECUTABLE} iphlpapi) |
||||
|
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) |
||||
|
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS) |
||||
|
elseif (UNIX) |
||||
|
else () |
||||
|
find_package(Threads REQUIRED) |
||||
|
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
||||
|
endif () |
||||
|
|
||||
|
target_link_libraries(${EXECUTABLE} serpent) |
||||
|
target_link_libraries(${EXECUTABLE} lll) |
||||
|
target_link_libraries(${EXECUTABLE} evmface) |
||||
|
target_link_libraries(${EXECUTABLE} ethential) |
||||
|
target_link_libraries(${EXECUTABLE} gmp) |
||||
|
|
||||
|
install( TARGETS ${EXECUTABLE} DESTINATION bin ) |
||||
|
|
||||
|
cmake_policy(SET CMP0015 NEW) |
@ -0,0 +1,106 @@ |
|||||
|
#include <stdio.h> |
||||
|
#include <iostream> |
||||
|
#include <vector> |
||||
|
#include <map> |
||||
|
#include <libserpent/funcs.h> |
||||
|
|
||||
|
int main(int argv, char** argc) { |
||||
|
if (argv == 1) { |
||||
|
std::cerr << "Must provide a command and arguments! Try parse, rewrite, compile, assemble\n"; |
||||
|
return 0; |
||||
|
} |
||||
|
std::string flag = ""; |
||||
|
std::string command = argc[1]; |
||||
|
std::string input; |
||||
|
std::string secondInput; |
||||
|
if (std::string(argc[1]) == "-s") { |
||||
|
flag = command.substr(1); |
||||
|
command = argc[2]; |
||||
|
input = ""; |
||||
|
std::string line; |
||||
|
while (std::getline(std::cin, line)) { |
||||
|
input += line + "\n"; |
||||
|
} |
||||
|
secondInput = argv == 3 ? "" : argc[3]; |
||||
|
} |
||||
|
else { |
||||
|
if (argv == 2) { |
||||
|
std::cerr << "Not enough arguments for serpent cmdline\n"; |
||||
|
throw(0); |
||||
|
} |
||||
|
input = argc[2]; |
||||
|
secondInput = argv == 3 ? "" : argc[3]; |
||||
|
} |
||||
|
bool haveSec = secondInput.length() > 0; |
||||
|
if (command == "parse" || command == "parse_serpent") { |
||||
|
std::cout << printAST(parseSerpent(input), haveSec) << "\n"; |
||||
|
} |
||||
|
else if (command == "rewrite") { |
||||
|
std::cout << printAST(rewrite(parseLLL(input, true)), haveSec) << "\n"; |
||||
|
} |
||||
|
else if (command == "compile_to_lll") { |
||||
|
std::cout << printAST(compileToLLL(input), haveSec) << "\n"; |
||||
|
} |
||||
|
else if (command == "build_fragtree") { |
||||
|
std::cout << printAST(buildFragmentTree(parseLLL(input, true))) << "\n"; |
||||
|
} |
||||
|
else if (command == "compile_lll") { |
||||
|
std::cout << binToHex(compileLLL(parseLLL(input, true))) << "\n"; |
||||
|
} |
||||
|
else if (command == "dereference") { |
||||
|
std::cout << printAST(dereference(parseLLL(input, true)), haveSec) <<"\n"; |
||||
|
} |
||||
|
else if (command == "pretty_assemble") { |
||||
|
std::cout << printTokens(prettyAssemble(parseLLL(input, true))) <<"\n"; |
||||
|
} |
||||
|
else if (command == "pretty_compile_lll") { |
||||
|
std::cout << printTokens(prettyCompileLLL(parseLLL(input, true))) << "\n"; |
||||
|
} |
||||
|
else if (command == "pretty_compile") { |
||||
|
std::cout << printTokens(prettyCompile(input)) << "\n"; |
||||
|
} |
||||
|
else if (command == "assemble") { |
||||
|
std::cout << assemble(parseLLL(input, true)) << "\n"; |
||||
|
} |
||||
|
else if (command == "serialize") { |
||||
|
std::cout << binToHex(serialize(tokenize(input))) << "\n"; |
||||
|
} |
||||
|
else if (command == "flatten") { |
||||
|
std::cout << printTokens(flatten(parseLLL(input, true))) << "\n"; |
||||
|
} |
||||
|
else if (command == "deserialize") { |
||||
|
std::cout << printTokens(deserialize(hexToBin(input))) << "\n"; |
||||
|
} |
||||
|
else if (command == "compile") { |
||||
|
std::cout << binToHex(compile(input)) << "\n"; |
||||
|
} |
||||
|
else if (command == "encode_datalist") { |
||||
|
std::vector<Node> tokens = tokenize(input); |
||||
|
std::vector<std::string> o; |
||||
|
for (int i = 0; i < (int)tokens.size(); i++) { |
||||
|
o.push_back(tokens[i].val); |
||||
|
} |
||||
|
std::cout << binToHex(encodeDatalist(o)) << "\n"; |
||||
|
} |
||||
|
else if (command == "decode_datalist") { |
||||
|
std::vector<std::string> o = decodeDatalist(hexToBin(input)); |
||||
|
std::vector<Node> tokens; |
||||
|
for (int i = 0; i < (int)o.size(); i++) |
||||
|
tokens.push_back(token(o[i])); |
||||
|
std::cout << printTokens(tokens) << "\n"; |
||||
|
} |
||||
|
else if (command == "tokenize") { |
||||
|
std::cout << printTokens(tokenize(input)); |
||||
|
} |
||||
|
else if (command == "biject") { |
||||
|
if (argv == 3) |
||||
|
std::cerr << "Not enough arguments for biject\n"; |
||||
|
int pos = decimalToInt(secondInput); |
||||
|
std::vector<Node> n = prettyCompile(input); |
||||
|
if (pos >= (int)n.size()) |
||||
|
std::cerr << "Code position too high\n"; |
||||
|
Metadata m = n[pos].metadata; |
||||
|
std::cout << "Opcode: " << n[pos].val << ", file: " << m.file << |
||||
|
", line: " << m.ln << ", char: " << m.ch << "\n"; |
||||
|
} |
||||
|
} |
@ -1,2 +0,0 @@ |
|||||
from compiler import * |
|
||||
from parser import * |
|
@ -1,446 +0,0 @@ |
|||||
#!/usr/bin/python |
|
||||
import re |
|
||||
import sys |
|
||||
import os |
|
||||
from parser import parse |
|
||||
from opcodes import opcodes, reverse_opcodes |
|
||||
import json |
|
||||
|
|
||||
label_counter = [0] |
|
||||
|
|
||||
|
|
||||
def mklabel(prefix): |
|
||||
label_counter[0] += 1 |
|
||||
return prefix + str(label_counter[0] - 1) |
|
||||
|
|
||||
# All functions go here |
|
||||
# |
|
||||
# Entries go in a format: |
|
||||
# |
|
||||
# [ val, inputcount, outputcount, code ] |
|
||||
|
|
||||
funtable = [ |
|
||||
['+', 2, 1, ['<1>', '<0>', 'ADD']], |
|
||||
['-', 2, 1, ['<1>', '<0>', 'SUB']], |
|
||||
['*', 2, 1, ['<1>', '<0>', 'MUL']], |
|
||||
['/', 2, 1, ['<1>', '<0>', 'DIV']], |
|
||||
['^', 2, 1, ['<1>', '<0>', 'EXP']], |
|
||||
['%', 2, 1, ['<1>', '<0>', 'MOD']], |
|
||||
['#/', 2, 1, ['<1>', '<0>', 'SDIV']], |
|
||||
['#%', 2, 1, ['<1>', '<0>', 'SMOD']], |
|
||||
['==', 2, 1, ['<1>', '<0>', 'EQ']], |
|
||||
['<', 2, 1, ['<1>', '<0>', 'LT']], |
|
||||
['<=', 2, 1, ['<1>', '<0>', 'GT', 'NOT']], |
|
||||
['>', 2, 1, ['<1>', '<0>', 'GT']], |
|
||||
['>=', 2, 1, ['<1>', '<0>', 'LT', 'NOT']], |
|
||||
['!', 1, 1, ['<0>', 'NOT']], |
|
||||
['or', 2, 1, ['<1>', '<0>', 'DUP', 4, 'PC', |
|
||||
'ADD', 'JUMPI', 'POP', 'SWAP', 'POP']], |
|
||||
['||', 2, 1, ['<1>', '<0>', 'DUP', 4, 'PC', |
|
||||
'ADD', 'JUMPI', 'POP', 'SWAP', 'POP']], |
|
||||
['and', 2, 1, ['<1>', '<0>', 'NOT', 'NOT', 'MUL']], |
|
||||
['&&', 2, 1, ['<1>', '<0>', 'NOT', 'NOT', 'MUL']], |
|
||||
['xor', 2, 1, ['<1>', '<0>', 'XOR']], |
|
||||
['&', 2, 1, ['<1>', '<0>', 'AND']], |
|
||||
['|', 2, 1, ['<1>', '<0>', 'OR']], |
|
||||
['byte', 2, 1, ['<0>', '<1>', 'BYTE']], |
|
||||
# Word array methods |
|
||||
# arr, ind -> val |
|
||||
['access', 2, 1, ['<0>', '<1>', 32, 'MUL', 'ADD', 'MLOAD']], |
|
||||
# arr, ind, val |
|
||||
['arrset', 3, 0, ['<2>', '<0>', '<1>', 32, 'MUL', 'ADD', 'MSTORE']], |
|
||||
# val, pointer -> pointer+32 |
|
||||
['set_and_inc', 2, 1, ['<1>', 'DUP', '<0>', 'SWAP', 'MSTORE', 32, 'ADD']], |
|
||||
# len (32 MUL) len*32 (MSIZE) len*32 MSIZE (SWAP) MSIZE len*32 (MSIZE ADD) |
|
||||
# MSIZE MSIZE+len*32 (1) MSIZE MSIZE+len*32 1 (SWAP SUB) MSIZE |
|
||||
# MSIZE+len*32-1 (0 SWAP MSTORE8) MSIZE |
|
||||
['array', 1, 1, ['<0>', 32, 'MUL', 'MSIZE', 'SWAP', 'MSIZE', |
|
||||
'ADD', 1, 'SWAP', 'SUB', 0, 'SWAP', 'MSTORE8']], # len -> arr |
|
||||
# String array methods |
|
||||
# arr, ind -> val |
|
||||
['getch', 2, 1, ['<1>', '<0>', 'ADD', 'MLOAD', 255, 'AND']], |
|
||||
['setch', 3, 0, ['<2>', '<1>', '<0>', 'ADD', 'MSTORE']], # arr, ind, val |
|
||||
# len MSIZE (SWAP) MSIZE len (MSIZE ADD) MSIZE MSIZE+len (1) MSIZE |
|
||||
# MSIZE+len 1 (SWAP SUB) MSIZE MSIZE+len-1 (0 SWAP MSTORE8) MSIZE |
|
||||
['string', 1, 1, ['<0>', 'MSIZE', 'SWAP', 'MSIZE', 'ADD', |
|
||||
1, 'SWAP', 'SUB', 0, 'SWAP', 'MSTORE8']], # len -> arr |
|
||||
# ['send', 2, 1, [0,0,0,0,0,'<1>','<0>','CALL'] ], # to, value, 0, [] -> /dev/null |
|
||||
# to, value, gas, [] -> /dev/null |
|
||||
['send', 3, 1, [0, 0, 0, 0, '<2>', '<1>', '<0>', 'CALL']], |
|
||||
# MSIZE 0 MSIZE (MSTORE) MSIZE (DUP) MSIZE MSIZE (...) MSIZE MSIZE 32 <4> |
|
||||
# <3> <2> <1> <0> (CALL) MSIZE FLAG (POP) MSIZE (MLOAD) RESULT |
|
||||
['msg', 5, 1, ['MSIZE', 0, 'MSIZE', 'MSTORE', 'DUP', 32, 'SWAP', '<4>', 32, 'MUL', '<3>', |
|
||||
'<2>', '<1>', '<0>', 'CALL', 'POP', 'MLOAD']], # to, value, gas, data, datasize -> out32 |
|
||||
# <5>*32 (MSIZE SWAP MSIZE SWAP) MSIZE MSIZE <5>*32 (DUP MSIZE ADD) MSIZE MSIZE <5>*32 MEND+1 (1 SWAP SUB) MSIZE MSIZE <5>*32 MEND (0 SWAP MSTORE8) MSIZE MSIZE <5>*32 (SWAP) MSIZE <5>*32 MSIZE |
|
||||
['msg', 6, 1, ['<5>', 32, 'MUL', 'MSIZE', 'SWAP', 'MSIZE', 'SWAP', 'DUP', 'MSIZE', 'ADD', 1, 'SWAP', 'SUB', 0, 'SWAP', 'MSTORE8', 'SWAP', |
|
||||
'<4>', 32, 'MUL', '<3>', '<2>', '<1>', '<0>', 'CALL', 'POP']], # to, value, gas, data, datasize, outsize -> out |
|
||||
# value, gas, data, datasize |
|
||||
['create', 4, 1, ['<3>', '<2>', '<1>', '<0>', 'CREATE']], |
|
||||
['sha3', 1, 1, [32, 'MSIZE', '<0>', 'MSIZE', 'MSTORE', 'SHA3']], |
|
||||
['sha3bytes', 1, 1, ['SHA3']], |
|
||||
['sload', 1, 1, ['<0>', 'SLOAD']], |
|
||||
['sstore', 2, 0, ['<1>', '<0>', 'SSTORE']], |
|
||||
['calldataload', 1, 1, ['<0>', 32, 'MUL', 'CALLDATALOAD']], |
|
||||
['id', 1, 1, ['<0>']], |
|
||||
# 0 MSIZE (SWAP) MSIZE 0 (MSIZE) MSIZE 0 MSIZE (MSTORE) MSIZE (32 SWAP) 32 |
|
||||
# MSIZE |
|
||||
# returns single value |
|
||||
['return', 1, 0, [ |
|
||||
'<0>', 'MSIZE', 'SWAP', 'MSIZE', 'MSTORE', 32, 'SWAP', 'RETURN']], |
|
||||
['return', 2, 0, ['<1>', 32, 'MUL', '<0>', 'RETURN']], |
|
||||
['suicide', 1, 0, ['<0>', 'SUICIDE']], |
|
||||
] |
|
||||
|
|
||||
# Pseudo-variables representing opcodes |
|
||||
pseudovars = { |
|
||||
'msg.datasize': [32, 'CALLDATASIZE', 'DIV'], |
|
||||
'msg.sender': ['CALLER'], |
|
||||
'msg.value': ['CALLVALUE'], |
|
||||
'tx.gasprice': ['GASPRICE'], |
|
||||
'tx.origin': ['ORIGIN'], |
|
||||
'tx.gas': ['GAS'], |
|
||||
'contract.balance': ['BALANCE'], |
|
||||
'block.prevhash': ['PREVHASH'], |
|
||||
'block.coinbase': ['COINBASE'], |
|
||||
'block.timestamp': ['TIMESTAMP'], |
|
||||
'block.number': ['NUMBER'], |
|
||||
'block.difficulty': ['DIFFICULTY'], |
|
||||
'block.gaslimit': ['GASLIMIT'], |
|
||||
} |
|
||||
|
|
||||
|
|
||||
# A set of methods for detecting raw values (numbers and strings) and |
|
||||
# converting them to integers |
|
||||
def frombytes(b): |
|
||||
return 0 if len(b) == 0 else ord(b[-1]) + 256 * frombytes(b[:-1]) |
|
||||
|
|
||||
|
|
||||
def fromhex(b): |
|
||||
return 0 if len(b) == 0 else '0123456789abcdef'.find(b[-1]) + 16 * fromhex(b[:-1]) |
|
||||
|
|
||||
|
|
||||
def is_numberlike(b): |
|
||||
if isinstance(b, (str, unicode)): |
|
||||
if re.match('^[0-9\-]*$', b): |
|
||||
return True |
|
||||
if b[0] in ["'", '"'] and b[-1] in ["'", '"'] and b[0] == b[-1]: |
|
||||
return True |
|
||||
if b[:2] == '0x': |
|
||||
return True |
|
||||
return False |
|
||||
|
|
||||
|
|
||||
def numberize(b): |
|
||||
if b[0] in ["'", '"']: |
|
||||
return frombytes(b[1:-1]) |
|
||||
elif b[:2] == '0x': |
|
||||
return fromhex(b[2:]) |
|
||||
else: |
|
||||
return int(b) |
|
||||
|
|
||||
|
|
||||
# Apply rewrite rules |
|
||||
def rewrite(ast): |
|
||||
if isinstance(ast, (str, unicode)): |
|
||||
return ast |
|
||||
elif ast[0] == 'set': |
|
||||
if ast[1][0] == 'access': |
|
||||
if ast[1][1] == 'contract.storage': |
|
||||
return ['sstore', rewrite(ast[1][2]), rewrite(ast[2])] |
|
||||
else: |
|
||||
return ['arrset', rewrite(ast[1][1]), rewrite(ast[1][2]), rewrite(ast[2])] |
|
||||
elif ast[0] == 'access': |
|
||||
if ast[1] == 'msg.data': |
|
||||
return ['calldataload', rewrite(ast[2])] |
|
||||
elif ast[1] == 'contract.storage': |
|
||||
return ['sload', rewrite(ast[2])] |
|
||||
elif ast[0] == 'array_lit': |
|
||||
o = ['array', str(len(ast[1:]))] |
|
||||
for a in ast[1:]: |
|
||||
o = ['set_and_inc', rewrite(a), o] |
|
||||
return ['-', o, str(len(ast[1:])*32)] |
|
||||
elif ast[0] == 'return': |
|
||||
if len(ast) == 2 and ast[1][0] == 'array_lit': |
|
||||
return ['return', rewrite(ast[1]), str(len(ast[1][1:]))] |
|
||||
return map(rewrite, ast) |
|
||||
|
|
||||
|
|
||||
# Main compiler code |
|
||||
def arity(ast): |
|
||||
if isinstance(ast, (str, unicode)): |
|
||||
return 1 |
|
||||
elif ast[0] == 'set': |
|
||||
return 0 |
|
||||
elif ast[0] == 'if': |
|
||||
return 0 |
|
||||
elif ast[0] == 'seq': |
|
||||
return 1 if len(ast[1:]) and arity(ast[-1]) == 1 else 0 |
|
||||
else: |
|
||||
for f in funtable: |
|
||||
if ast[0] == f[0]: |
|
||||
return f[2] |
|
||||
|
|
||||
|
|
||||
# Debugging |
|
||||
def print_wrapper(f): |
|
||||
def wrapper(*args, **kwargs): |
|
||||
print args[0] |
|
||||
u = f(*args, **kwargs) |
|
||||
print u |
|
||||
return u |
|
||||
return wrapper |
|
||||
|
|
||||
|
|
||||
# Right-hand-side expressions (ie. the normal kind) |
|
||||
#@print_wrapper |
|
||||
def compile_expr(ast, varhash, lc=[0]): |
|
||||
# Stop keyword |
|
||||
if ast == 'stop': |
|
||||
return ['STOP'] |
|
||||
# Literals |
|
||||
elif isinstance(ast, (str, unicode)): |
|
||||
if is_numberlike(ast): |
|
||||
return [numberize(ast)] |
|
||||
elif ast in pseudovars: |
|
||||
return pseudovars[ast] |
|
||||
else: |
|
||||
if ast not in varhash: |
|
||||
varhash[ast] = len(varhash) * 32 |
|
||||
return [varhash[ast], 'MLOAD'] |
|
||||
# Set (specifically, variables) |
|
||||
elif ast[0] == 'set': |
|
||||
if not isinstance(ast[1], (str, unicode)): |
|
||||
raise Exception("Cannot set the value of " + str(ast[1])) |
|
||||
elif ast[1] in pseudovars: |
|
||||
raise Exception("Cannot set a pseudovariable!") |
|
||||
else: |
|
||||
if ast[1] not in varhash: |
|
||||
varhash[ast[1]] = len(varhash) * 32 |
|
||||
return compile_expr(ast[2], varhash, lc) + [varhash[ast[1]], 'MSTORE'] |
|
||||
# If and if/else statements |
|
||||
elif ast[0] == 'if': |
|
||||
f = compile_expr(ast[1], varhash, lc) |
|
||||
g = compile_expr(ast[2], varhash, lc) |
|
||||
h = compile_expr(ast[3], varhash, lc) if len(ast) > 3 else None |
|
||||
label, ref = 'LABEL_' + str(lc[0]), 'REF_' + str(lc[0]) |
|
||||
lc[0] += 1 |
|
||||
label2, ref2 = 'LABEL_' + str(lc[0]), 'REF_' + str(lc[0]) |
|
||||
lc[0] += 1 |
|
||||
if h: |
|
||||
return f + ['NOT', ref2, 'JUMPI'] + g + [ref, 'JUMP', label2] + h + [label] |
|
||||
else: |
|
||||
return f + ['NOT', ref, 'JUMPI'] + g + [label] |
|
||||
# While loops |
|
||||
elif ast[0] == 'while': |
|
||||
f = compile_expr(ast[1], varhash, lc) |
|
||||
g = compile_expr(ast[2], varhash, lc) |
|
||||
beglab, begref = 'LABEL_' + str(lc[0]), 'REF_' + str(lc[0]) |
|
||||
endlab, endref = 'LABEL_' + str(lc[0] + 1), 'REF_' + str(lc[0] + 1) |
|
||||
lc[0] += 2 |
|
||||
return [beglab] + f + ['NOT', endref, 'JUMPI'] + g + [begref, 'JUMP', endlab] |
|
||||
# Seq |
|
||||
elif ast[0] == 'seq': |
|
||||
o = [] |
|
||||
for arg in ast[1:]: |
|
||||
o.extend(compile_expr(arg, varhash, lc)) |
|
||||
if arity(arg) == 1 and arg != ast[-1]: |
|
||||
o.append('POP') |
|
||||
return o |
|
||||
# Functions and operations |
|
||||
for f in funtable: |
|
||||
if ast[0] == f[0] and len(ast[1:]) == f[1]: |
|
||||
# If arity of all args is 1 |
|
||||
if reduce(lambda x, y: x * arity(y), ast[1:], 1): |
|
||||
iq = f[3][:] |
|
||||
oq = [] |
|
||||
while len(iq): |
|
||||
tok = iq.pop(0) |
|
||||
if isinstance(tok, (str, unicode)) and tok[0] == '<' and tok[-1] == '>': |
|
||||
oq.extend( |
|
||||
compile_expr(ast[1 + int(tok[1:-1])], varhash, lc)) |
|
||||
else: |
|
||||
oq.append(tok) |
|
||||
return oq |
|
||||
else: |
|
||||
raise Exception( |
|
||||
"Arity of argument mismatches for %s: %s" % (f[0], ast)) |
|
||||
raise Exception("invalid op: " + ast[0]) |
|
||||
|
|
||||
|
|
||||
# Stuff to add once to each program |
|
||||
def add_wrappers(c, varhash): |
|
||||
if len(varhash) and 'MSIZE' in c: |
|
||||
return [0, len(varhash) * 32 - 1, 'MSTORE8'] + c |
|
||||
else: |
|
||||
return c |
|
||||
|
|
||||
|
|
||||
# Optimizations |
|
||||
ops = { |
|
||||
'ADD': lambda x, y: (x + y) % 2 ** 256, |
|
||||
'MUL': lambda x, y: (x * y) % 2 ** 256, |
|
||||
'SUB': lambda x, y: (x - y) % 2 ** 256, |
|
||||
'DIV': lambda x, y: x / y, |
|
||||
'EXP': lambda x, y: pow(x, y, 2 ** 256), |
|
||||
'AND': lambda x, y: x & y, |
|
||||
'OR': lambda x, y: x | y, |
|
||||
'XOR': lambda x, y: x ^ y |
|
||||
} |
|
||||
|
|
||||
|
|
||||
def multipop(li, n): |
|
||||
if n > 0: |
|
||||
li.pop() |
|
||||
multipop(li, n - 1) |
|
||||
return li |
|
||||
|
|
||||
|
|
||||
def optimize(c): |
|
||||
iq = c[:] |
|
||||
oq = [] |
|
||||
while len(iq): |
|
||||
oq.append(iq.pop(0)) |
|
||||
if oq[-1] in ops and len(oq) >= 3: |
|
||||
if isinstance(oq[-2], (int, long)) and isinstance(oq[-3], (int, long)): |
|
||||
ntok = ops[oq[-1]](oq[-2], oq[-3]) |
|
||||
multipop(oq, 3).append(ntok) |
|
||||
if oq[-1] == 'NOT' and len(oq) >= 2 and oq[-2] == 'NOT': |
|
||||
multipop(oq, 2) |
|
||||
if oq[-1] == 'ADD' and len(oq) >= 3 and oq[-2] == 0 and is_numberlike(oq[-3]): |
|
||||
multipop(oq, 2) |
|
||||
if oq[-1] in ['SUB', 'ADD'] and len(oq) >= 3 and oq[-3] == 0 and is_numberlike(oq[-2]): |
|
||||
ntok = oq[-2] |
|
||||
multipop(oq, 3).append(ntok) |
|
||||
return oq |
|
||||
|
|
||||
|
|
||||
def compile_to_assembly(source, optimize_flag=1): |
|
||||
if isinstance(source, (str, unicode)): |
|
||||
source = parse(source) |
|
||||
varhash = {} |
|
||||
c1 = rewrite(source) |
|
||||
c2 = compile_expr(c1, varhash, [0]) |
|
||||
c3 = add_wrappers(c2, varhash) |
|
||||
c4 = optimize(c3) if optimize_flag else c3 |
|
||||
return c4 |
|
||||
|
|
||||
|
|
||||
def get_vars(source): |
|
||||
if isinstance(source, (str, unicode)): |
|
||||
source = parse(source) |
|
||||
varhash = {} |
|
||||
c1 = rewrite(source) |
|
||||
# fill varhash |
|
||||
compile_expr(c1, varhash, [0]) |
|
||||
return varhash |
|
||||
|
|
||||
|
|
||||
def log256(n): |
|
||||
return 0 if n == 0 else 1 + log256(n / 256) |
|
||||
|
|
||||
|
|
||||
def tobytearr(n, L): |
|
||||
return [] if L == 0 else tobytearr(n / 256, L - 1) + [n % 256] |
|
||||
|
|
||||
|
|
||||
# Dereference labels |
|
||||
def dereference(c): |
|
||||
iq = [x for x in c] |
|
||||
mq = [] |
|
||||
pos = 0 |
|
||||
labelmap = {} |
|
||||
while len(iq): |
|
||||
front = iq.pop(0) |
|
||||
if isinstance(front, str) and front[:6] == 'LABEL_': |
|
||||
labelmap[front[6:]] = pos |
|
||||
else: |
|
||||
mq.append(front) |
|
||||
if isinstance(front, str) and front[:4] == 'REF_': |
|
||||
pos += 5 |
|
||||
elif isinstance(front, (int, long)): |
|
||||
pos += 1 + max(1, log256(front)) |
|
||||
else: |
|
||||
pos += 1 |
|
||||
oq = [] |
|
||||
for m in mq: |
|
||||
if isinstance(m, str) and m[:4] == 'REF_': |
|
||||
oq.append('PUSH4') |
|
||||
oq.extend(tobytearr(labelmap[m[4:]], 4)) |
|
||||
elif isinstance(m, (int, long)): |
|
||||
L = max(1, log256(m)) |
|
||||
oq.append('PUSH' + str(L)) |
|
||||
oq.extend(tobytearr(m, L)) |
|
||||
else: |
|
||||
oq.append(m) |
|
||||
return oq |
|
||||
|
|
||||
|
|
||||
def serialize(source): |
|
||||
def numberize(arg): |
|
||||
if isinstance(arg, (int, long)): |
|
||||
return arg |
|
||||
elif arg in reverse_opcodes: |
|
||||
return reverse_opcodes[arg] |
|
||||
elif arg[:4] == 'PUSH': |
|
||||
return 95 + int(arg[4:]) |
|
||||
elif re.match('^[0-9]*$', arg): |
|
||||
return int(arg) |
|
||||
else: |
|
||||
raise Exception("Cannot serialize: " + str(arg)) |
|
||||
return ''.join(map(chr, map(numberize, source))) |
|
||||
|
|
||||
|
|
||||
def deserialize(source): |
|
||||
o = [] |
|
||||
i, j = 0, -1 |
|
||||
while i < len(source): |
|
||||
p = ord(source[i]) |
|
||||
if j >= 0: |
|
||||
o.append(p) |
|
||||
elif p >= 96 and p <= 127: |
|
||||
o.append('PUSH' + str(p - 95)) |
|
||||
else: |
|
||||
o.append(opcodes[p][0]) |
|
||||
if p >= 96 and p <= 127: |
|
||||
j = p - 95 |
|
||||
j -= 1 |
|
||||
i += 1 |
|
||||
return o |
|
||||
|
|
||||
|
|
||||
def assemble(asm): |
|
||||
return serialize(dereference(asm)) |
|
||||
|
|
||||
|
|
||||
def compile(source): |
|
||||
return assemble(compile_to_assembly(parse(source))) |
|
||||
|
|
||||
|
|
||||
def encode_datalist(vals): |
|
||||
def enc(n): |
|
||||
if isinstance(n, (int, long)): |
|
||||
return ''.join(map(chr, tobytearr(n, 32))) |
|
||||
elif isinstance(n, (str, unicode)) and len(n) == 40: |
|
||||
return '\x00' * 12 + n.decode('hex') |
|
||||
elif isinstance(n, (str, unicode)): |
|
||||
return '\x00' * (32 - len(n)) + n |
|
||||
elif n is True: |
|
||||
return 1 |
|
||||
elif n is False or n is None: |
|
||||
return 0 |
|
||||
if isinstance(vals, (tuple, list)): |
|
||||
return ''.join(map(enc, vals)) |
|
||||
elif vals == '': |
|
||||
return '' |
|
||||
else: |
|
||||
# Assume you're getting in numbers or 0x... |
|
||||
return ''.join(map(enc, map(numberize, vals.split(' ')))) |
|
||||
|
|
||||
|
|
||||
def decode_datalist(arr): |
|
||||
if isinstance(arr, list): |
|
||||
arr = ''.join(map(chr, arr)) |
|
||||
o = [] |
|
||||
for i in range(0, len(arr), 32): |
|
||||
o.append(frombytes(arr[i:i + 32])) |
|
||||
return o |
|
@ -1,56 +0,0 @@ |
|||||
opcodes = { |
|
||||
0x00: ['STOP', 0, 0], |
|
||||
0x01: ['ADD', 2, 1], |
|
||||
0x02: ['MUL', 2, 1], |
|
||||
0x03: ['SUB', 2, 1], |
|
||||
0x04: ['DIV', 2, 1], |
|
||||
0x05: ['SDIV', 2, 1], |
|
||||
0x06: ['MOD', 2, 1], |
|
||||
0x07: ['SMOD', 2, 1], |
|
||||
0x08: ['EXP', 2, 1], |
|
||||
0x09: ['NEG', 2, 1], |
|
||||
0x0a: ['LT', 2, 1], |
|
||||
0x0b: ['GT', 2, 1], |
|
||||
0x0c: ['EQ', 2, 1], |
|
||||
0x0d: ['NOT', 1, 1], |
|
||||
0x10: ['AND', 2, 1], |
|
||||
0x11: ['OR', 2, 1], |
|
||||
0x12: ['XOR', 2, 1], |
|
||||
0x13: ['BYTE', 2, 1], |
|
||||
0x20: ['SHA3', 2, 1], |
|
||||
0x30: ['ADDRESS', 0, 1], |
|
||||
0x31: ['BALANCE', 0, 1], |
|
||||
0x32: ['ORIGIN', 0, 1], |
|
||||
0x33: ['CALLER', 0, 1], |
|
||||
0x34: ['CALLVALUE', 0, 1], |
|
||||
0x35: ['CALLDATALOAD', 1, 1], |
|
||||
0x36: ['CALLDATASIZE', 0, 1], |
|
||||
0x37: ['GASPRICE', 0, 1], |
|
||||
0x40: ['PREVHASH', 0, 1], |
|
||||
0x41: ['COINBASE', 0, 1], |
|
||||
0x42: ['TIMESTAMP', 0, 1], |
|
||||
0x43: ['NUMBER', 0, 1], |
|
||||
0x44: ['DIFFICULTY', 0, 1], |
|
||||
0x45: ['GASLIMIT', 0, 1], |
|
||||
0x50: ['POP', 1, 0], |
|
||||
0x51: ['DUP', 1, 2], |
|
||||
0x52: ['SWAP', 2, 2], |
|
||||
0x53: ['MLOAD', 1, 1], |
|
||||
0x54: ['MSTORE', 2, 0], |
|
||||
0x55: ['MSTORE8', 2, 0], |
|
||||
0x56: ['SLOAD', 1, 1], |
|
||||
0x57: ['SSTORE', 2, 0], |
|
||||
0x58: ['JUMP', 1, 0], |
|
||||
0x59: ['JUMPI', 2, 0], |
|
||||
0x5a: ['PC', 0, 1], |
|
||||
0x5b: ['MSIZE', 0, 1], |
|
||||
0x5c: ['GAS', 0, 1], |
|
||||
0x60: ['PUSH', 0, 1], #encompasses 96...127 |
|
||||
0xf0: ['CREATE', 4, 1], |
|
||||
0xf1: ['CALL', 7, 1], |
|
||||
0xf2: ['RETURN', 2, 1], |
|
||||
0xff: ['SUICIDE', 1, 1], |
|
||||
} |
|
||||
reverse_opcodes = {} |
|
||||
for o in opcodes: |
|
||||
reverse_opcodes[opcodes[o][0]] = o |
|
@ -1,299 +0,0 @@ |
|||||
import re |
|
||||
|
|
||||
# Number of spaces at the beginning of a line |
|
||||
def spaces(ln): |
|
||||
spaces = 0 |
|
||||
while spaces < len(ln) and ln[spaces] == ' ': spaces += 1 |
|
||||
return spaces |
|
||||
|
|
||||
# Main parse function |
|
||||
def parse(document): |
|
||||
return parse_lines(document.split('\n')) |
|
||||
|
|
||||
def strip_line(ln): |
|
||||
ln2 = ln.strip() |
|
||||
if '//' in ln2: |
|
||||
return ln2[:ln2.find('//')] |
|
||||
else: |
|
||||
return ln2 |
|
||||
|
|
||||
# Parse the statement-level structure, including if and while statements |
|
||||
def parse_lines(lns): |
|
||||
o = [] |
|
||||
i = 0 |
|
||||
while i < len(lns): |
|
||||
main = lns[i] |
|
||||
# Skip empty lines |
|
||||
if len(main.strip()) == 0: |
|
||||
i += 1 |
|
||||
continue |
|
||||
if spaces(main) > 0: |
|
||||
raise Exception("Line "+str(i)+" indented too much!") |
|
||||
main = strip_line(main) |
|
||||
# Grab the child block of an if statement |
|
||||
start_child_block = i+1 |
|
||||
indent = 99999999 |
|
||||
i += 1 |
|
||||
child_lns = [] |
|
||||
while i < len(lns): |
|
||||
if len(strip_line(lns[i])) > 0: |
|
||||
sp = spaces(lns[i]) |
|
||||
if sp == 0: break |
|
||||
indent = min(sp,indent) |
|
||||
child_lns.append(lns[i]) |
|
||||
i += 1 |
|
||||
child_block = map(lambda x:x[indent:],child_lns) |
|
||||
# Calls parse_line to parse the individual line |
|
||||
out = parse_line(main) |
|
||||
# Include the child block into the parsed expression |
|
||||
if out[0] in ['if', 'else', 'while', 'else if']: |
|
||||
if len(child_block) == 0: |
|
||||
raise Exception("If/else/while statement must have sub-clause! (%d)" % i) |
|
||||
else: |
|
||||
out.append(parse_lines(child_block)) |
|
||||
else: |
|
||||
if len(child_block) > 0: |
|
||||
raise Exception("Not an if/else/while statement, can't have sub-clause! (%d)" % i) |
|
||||
# This is somewhat complicated. Essentially, it converts something like |
|
||||
# "if c1 then s1 elif c2 then s2 elif c3 then s3 else s4" (with appropriate |
|
||||
# indenting) to [ if c1 s1 [ if c2 s2 [ if c3 s3 s4 ] ] ] |
|
||||
if out[0] == 'else if': |
|
||||
if len(o) == 0: raise Exception("Cannot start with else if! (%d)" % i) |
|
||||
u = o[-1] |
|
||||
while len(u) == 4: u = u[-1] |
|
||||
u.append(['if'] + out[1:]) |
|
||||
elif out[0] == 'else': |
|
||||
if len(o) == 0: raise Exception("Cannot start with else! (%d)" % i) |
|
||||
u = o[-1] |
|
||||
while len(u) == 4: u = u[-1] |
|
||||
u.append(out[1]) |
|
||||
else: |
|
||||
# Normal case: just add the parsed line to the output |
|
||||
o.append(out) |
|
||||
return o[0] if len(o) == 1 else ['seq'] + o |
|
||||
|
|
||||
# Tokens contain one or more chars of the same type, with a few exceptions |
|
||||
def chartype(c): |
|
||||
if c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.': |
|
||||
return 'alphanum' |
|
||||
elif c in '\t ': return 'space' |
|
||||
elif c in '()[]': return 'brack' |
|
||||
elif c == '"': return 'dquote' |
|
||||
elif c == "'": return 'squote' |
|
||||
else: return 'symb' |
|
||||
|
|
||||
# Converts something like "b[4] = x+2 > y*-3" to |
|
||||
# [ 'b', '[', '4', ']', '=', 'x', '+', '2', '>', 'y', '*', '-', '3' ] |
|
||||
def tokenize(ln): |
|
||||
tp = 'space' |
|
||||
i = 0 |
|
||||
o = [] |
|
||||
global cur |
|
||||
cur = '' |
|
||||
# Finish a token and start a new one |
|
||||
def nxt(): |
|
||||
global cur |
|
||||
if len(cur) >= 2 and cur[-1] == '-': |
|
||||
o.extend([cur[:-1],'-']) |
|
||||
elif len(cur.strip()) >= 1: |
|
||||
o.append(cur) |
|
||||
cur = '' |
|
||||
# Main loop |
|
||||
while i < len(ln): |
|
||||
c = chartype(ln[i]) |
|
||||
# Inside a string |
|
||||
if tp == 'squote' or tp == "dquote": |
|
||||
if c == tp: |
|
||||
cur += ln[i] |
|
||||
nxt() |
|
||||
i += 1 |
|
||||
tp = 'space' |
|
||||
elif ln[i:i+2] == '\\x': |
|
||||
cur += ln[i+2:i+4].decode('hex') |
|
||||
i += 4 |
|
||||
elif ln[i:i+2] == '\\n': |
|
||||
cur += '\x0a' |
|
||||
i += 2 |
|
||||
elif ln[i] == '\\': |
|
||||
cur += ln[i+1] |
|
||||
i += 2 |
|
||||
else: |
|
||||
cur += ln[i] |
|
||||
i += 1 |
|
||||
# Not inside a string |
|
||||
else: |
|
||||
if c == 'brack' or tp == 'brack': nxt() |
|
||||
elif c == 'space': nxt() |
|
||||
elif c != 'space' and tp == 'space': nxt() |
|
||||
elif c == 'symb' and tp != 'symb': nxt() |
|
||||
elif c == 'alphanum' and tp == 'symb': nxt() |
|
||||
elif c == 'squote' or c == "dquote": nxt() |
|
||||
cur += ln[i] |
|
||||
tp = c |
|
||||
i += 1 |
|
||||
nxt() |
|
||||
if o[-1] in [':',':\n','\n']: o.pop() |
|
||||
if tp in ['squote','dquote']: raise Exception("Unclosed string: "+ln) |
|
||||
return o |
|
||||
|
|
||||
# This is the part where we turn a token list into an abstract syntax tree |
|
||||
precedence = { |
|
||||
'^': 1, |
|
||||
'*': 2, |
|
||||
'/': 3, |
|
||||
'%': 4, |
|
||||
'#/': 2, |
|
||||
'#%': 2, |
|
||||
'+': 3, |
|
||||
'-': 3, |
|
||||
'<': 4, |
|
||||
'<=': 4, |
|
||||
'>': 4, |
|
||||
'>=': 4, |
|
||||
'==': 5, |
|
||||
'and': 6, |
|
||||
'&&': 6, |
|
||||
'or': 7, |
|
||||
'||': 7, |
|
||||
'!': 0 |
|
||||
} |
|
||||
|
|
||||
def toktype(token): |
|
||||
if token is None: return None |
|
||||
elif token in ['(','[']: return 'left_paren' |
|
||||
elif token in [')',']']: return 'right_paren' |
|
||||
elif token == ',': return 'comma' |
|
||||
elif token == ':': return 'colon' |
|
||||
elif token in ['!']: return 'unary_operation' |
|
||||
elif not isinstance(token,str): return 'compound' |
|
||||
elif token in precedence: return 'binary_operation' |
|
||||
elif re.match('^[0-9a-zA-Z\-\.]*$',token): return 'alphanum' |
|
||||
elif token[0] in ['"',"'"] and token[0] == token[-1]: return 'alphanum' |
|
||||
else: raise Exception("Invalid token: "+token) |
|
||||
|
|
||||
# https://en.wikipedia.org/wiki/Shunting-yard_algorithm |
|
||||
# |
|
||||
# The algorithm works by maintaining three stacks: iq, stack, oq. Initially, |
|
||||
# the tokens are placed in order on the iq. Then, one by one, the tokens are |
|
||||
# processed. Values are moved immediately to the output queue. Operators are |
|
||||
# pushed onto the stack, but if an operator comes along with lower precendence |
|
||||
# then all operators on the stack with higher precedence are applied first. |
|
||||
# For example: |
|
||||
# iq = 2 + 3 * 5 + 7, stack = \, oq = \ |
|
||||
# iq = + 3 * 5 + 7, stack = \, oq = 2 |
|
||||
# iq = 3 * 5 + 7, stack = +, oq = 2 |
|
||||
# iq = * 5 + 7, stack = +, oq = 2 3 |
|
||||
# iq = 5 + 7, stack = + *, oq = 2 3 (since * > + in precedence) |
|
||||
# iq = + 7, stack = + *, oq = 2 3 5 |
|
||||
# iq = 7, stack = + +, oq = 2 [* 3 5] (since + > * in precedence) |
|
||||
# iq = \, stack = + +, oq = 2 [* 3 5] 7 |
|
||||
# iq = \, stack = +, oq = 2 [+ [* 3 5] 7] |
|
||||
# iq = \, stack = \, oq = [+ 2 [+ [* 3 5] 7] ] |
|
||||
# |
|
||||
# Functions, where function arguments begin with a left bracket preceded by |
|
||||
# the function name, are separated by commas, and end with a right bracket, |
|
||||
# are also included in this algorithm, though in a different way |
|
||||
def shunting_yard(tokens): |
|
||||
iq = [x for x in tokens] |
|
||||
oq = [] |
|
||||
stack = [] |
|
||||
prev,tok = None,None |
|
||||
# The normal Shunting-Yard algorithm simply converts expressions into |
|
||||
# reverse polish notation. Here, we try to be slightly more ambitious |
|
||||
# and build up the AST directly on the output queue |
|
||||
# eg. say oq = [ 2, 5, 3 ] and we add "+" then "*" |
|
||||
# we get first [ 2, [ +, 5, 3 ] ] then [ [ *, 2, [ +, 5, 3 ] ] ] |
|
||||
def popstack(stack,oq): |
|
||||
tok = stack.pop() |
|
||||
typ = toktype(tok) |
|
||||
if typ == 'binary_operation': |
|
||||
a,b = oq.pop(), oq.pop() |
|
||||
oq.append([ tok, b, a]) |
|
||||
elif typ == 'unary_operation': |
|
||||
a = oq.pop() |
|
||||
oq.append([ tok, a ]) |
|
||||
elif typ == 'right_paren': |
|
||||
args = [] |
|
||||
while toktype(oq[-1]) != 'left_paren': |
|
||||
args.insert(0,oq.pop()) |
|
||||
oq.pop() |
|
||||
if tok == ']' and args[0] != 'id': |
|
||||
oq.append(['access'] + args) |
|
||||
elif tok == ']': |
|
||||
oq.append(['array_lit'] + args[1:]) |
|
||||
elif tok == ')' and len(args) and args[0] != 'id': |
|
||||
oq.append(args) |
|
||||
else: |
|
||||
oq.append(args[1]) |
|
||||
# The main loop |
|
||||
while len(iq) > 0: |
|
||||
prev = tok |
|
||||
tok = iq.pop(0) |
|
||||
typ = toktype(tok) |
|
||||
if typ == 'alphanum': |
|
||||
oq.append(tok) |
|
||||
elif typ == 'left_paren': |
|
||||
# Handle cases like 3 * (2 + 5) by using 'id' as a default function |
|
||||
# name |
|
||||
if toktype(prev) != 'alphanum' and toktype(prev) != 'right_paren': |
|
||||
oq.append('id') |
|
||||
# Say the statement is "... f(45...". At the start, we would have f |
|
||||
# as the last item on the oq. So we move it onto the stack, put the |
|
||||
# leftparen on the oq, and move f back to the stack, so we have ( f |
|
||||
# as the last two items on the oq. We also put the leftparen on the |
|
||||
# stack so we have a separator on both the stack and the oq |
|
||||
stack.append(oq.pop()) |
|
||||
oq.append(tok) |
|
||||
oq.append(stack.pop()) |
|
||||
stack.append(tok) |
|
||||
elif typ == 'right_paren': |
|
||||
# eg. f(27, 3 * 5 + 4). First, we finish evaluating all the |
|
||||
# arithmetic inside the last argument. Then, we run popstack |
|
||||
# to coalesce all of the function arguments sitting on the |
|
||||
# oq into a single list |
|
||||
while len(stack) and toktype(stack[-1]) != 'left_paren': |
|
||||
popstack(stack,oq) |
|
||||
if len(stack): |
|
||||
stack.pop() |
|
||||
stack.append(tok) |
|
||||
popstack(stack,oq) |
|
||||
elif typ == 'unary_operation' or typ == 'binary_operation': |
|
||||
# -5 -> 0 - 5 |
|
||||
if tok == '-' and toktype(prev) not in ['alphanum', 'right_paren']: |
|
||||
oq.append('0') |
|
||||
# Handle BEDMAS operator precedence |
|
||||
prec = precedence[tok] |
|
||||
while len(stack) and toktype(stack[-1]) == 'binary_operation' and precedence[stack[-1]] < prec: |
|
||||
popstack(stack,oq) |
|
||||
stack.append(tok) |
|
||||
elif typ == 'comma': |
|
||||
# Finish evaluating all arithmetic before the comma |
|
||||
while len(stack) and toktype(stack[-1]) != 'left_paren': |
|
||||
popstack(stack,oq) |
|
||||
elif typ == 'colon': |
|
||||
# Colon is like a comma except it stays in the argument list |
|
||||
while len(stack) and toktype(stack[-1]) != 'right_paren': |
|
||||
popstack(stack,oq) |
|
||||
oq.append(tok) |
|
||||
while len(stack): |
|
||||
popstack(stack,oq) |
|
||||
if len(oq) == 1: |
|
||||
return oq[0] |
|
||||
else: |
|
||||
raise Exception("Wrong number of items left on stack: "+str(oq)) |
|
||||
|
|
||||
def parse_line(ln): |
|
||||
tokens = tokenize(ln.strip()) |
|
||||
if tokens[0] == 'if' or tokens[0] == 'while': |
|
||||
return [ tokens[0], shunting_yard(tokens[1:]) ] |
|
||||
elif len(tokens) >= 2 and tokens[0] == 'else' and tokens[1] == 'if': |
|
||||
return [ 'else if', shunting_yard(tokens[2:]) ] |
|
||||
elif len(tokens) >= 1 and tokens[0] == 'elif': |
|
||||
return [ 'else if', shunting_yard(tokens[1:]) ] |
|
||||
elif len(tokens) == 1 and tokens[0] == 'else': |
|
||||
return [ 'else' ] |
|
||||
elif '=' in tokens: |
|
||||
eqplace = tokens.index('=') |
|
||||
return [ 'set', shunting_yard(tokens[:eqplace]), shunting_yard(tokens[eqplace+1:]) ] |
|
||||
else: |
|
||||
return shunting_yard(tokens) |
|
Loading…
Reference in new issue