Gav Wood
11 years ago
27 changed files with 473 additions and 1003 deletions
@ -0,0 +1,58 @@ |
|||
cmake_policy(SET CMP0015 NEW) |
|||
|
|||
aux_source_directory(. SRC_LIST) |
|||
|
|||
set(EXECUTABLE pyserpent) |
|||
|
|||
# set(CMAKE_INSTALL_PREFIX ../lib) |
|||
add_library(${EXECUTABLE} SHARED ${SRC_LIST}) |
|||
|
|||
if (UNIX) |
|||
FIND_PACKAGE(Boost 1.53 REQUIRED COMPONENTS python) |
|||
endif() |
|||
file(GLOB HEADERS "*.h") |
|||
|
|||
include_directories(..) |
|||
|
|||
add_definitions(-DETH_PYTHON) |
|||
include_directories(${PYTHON_ID}) |
|||
target_link_libraries(${EXECUTABLE} ${PYTHON_LS}) |
|||
|
|||
target_link_libraries(${EXECUTABLE} serpent) |
|||
target_link_libraries(${EXECUTABLE} lll) |
|||
target_link_libraries(${EXECUTABLE} evmface) |
|||
target_link_libraries(${EXECUTABLE} ethential) |
|||
target_link_libraries(${EXECUTABLE} gmp) |
|||
target_link_libraries(${EXECUTABLE} gmp) |
|||
|
|||
#g++ $(CXXFLAGS) -shared $(PLATFORM_OPTS) $(TARGET).o -L$(BOOST_LIB) -lboost_python -L/usr/lib/python$(PYTHON_VERSION)/config -lpython$(PYTHON_VERSION) $(COMMON_OBJS) -o $(TARGET).so |
|||
|
|||
if(${TARGET_PLATFORM} STREQUAL "w64") |
|||
target_link_libraries(${EXECUTABLE} boost_python_win32-mt-s) |
|||
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) |
|||
target_link_libraries(${EXECUTABLE} iphlpapi) |
|||
target_link_libraries(${EXECUTABLE} ws2_32) |
|||
target_link_libraries(${EXECUTABLE} mswsock) |
|||
target_link_libraries(${EXECUTABLE} shlwapi) |
|||
elseif (APPLE) |
|||
# Latest mavericks boost libraries only come with -mt |
|||
target_link_libraries(${EXECUTABLE} boost_python-mt) |
|||
target_link_libraries(${EXECUTABLE} boost_thread-mt) |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
elseif (UNIX) |
|||
target_link_libraries(${EXECUTABLE} ${Boost_PYTHON_LIBRARY}) |
|||
target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY}) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
else () |
|||
target_link_libraries(${EXECUTABLE} boost_python) |
|||
target_link_libraries(${EXECUTABLE} boost_thread) |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
endif () |
|||
|
|||
message("Installation path: ${CMAKE_INSTALL_PREFIX}") |
|||
|
|||
install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) |
|||
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) |
|||
|
@ -0,0 +1,132 @@ |
|||
#include <boost/python.hpp> |
|||
#include <boost/python/stl_iterator.hpp> |
|||
#include <Python.h> |
|||
#include <libserpent/funcs.h> |
|||
|
|||
// Provide a python wrapper for the C++ functions
|
|||
|
|||
using namespace boost::python; |
|||
|
|||
//std::vector to python list converter
|
|||
//http://stackoverflow.com/questions/5314319/how-to-export-stdvector
|
|||
template<class T> |
|||
struct VecToList |
|||
{ |
|||
static PyObject* convert(const std::vector<T>& vec) |
|||
{ |
|||
boost::python::list* l = new boost::python::list(); |
|||
for(size_t i = 0; i < vec.size(); i++) |
|||
(*l).append(vec[i]); |
|||
|
|||
return l->ptr(); |
|||
} |
|||
}; |
|||
|
|||
// python list to std::vector converter
|
|||
//http://code.activestate.com/lists/python-cplusplus-sig/16463/
|
|||
template<typename T> |
|||
struct Vector_from_python_list |
|||
{ |
|||
|
|||
Vector_from_python_list() |
|||
{ |
|||
using namespace boost::python; |
|||
using namespace boost::python::converter; |
|||
registry::push_back(&Vector_from_python_list<T>::convertible, |
|||
&Vector_from_python_list<T>::construct, |
|||
type_id<std::vector<T> |
|||
>()); |
|||
|
|||
} |
|||
|
|||
// Determine if obj_ptr can be converted in a std::vector<T>
|
|||
static void* convertible(PyObject* obj_ptr) |
|||
{ |
|||
if (!PyList_Check(obj_ptr)){ |
|||
return 0; |
|||
} |
|||
return obj_ptr; |
|||
} |
|||
|
|||
// Convert obj_ptr into a std::vector<T>
|
|||
static void construct( |
|||
PyObject* obj_ptr, |
|||
boost::python::converter::rvalue_from_python_stage1_data* data) |
|||
{ |
|||
using namespace boost::python; |
|||
// Extract the character data from the python string
|
|||
// const char* value = PyString_AsString(obj_ptr);
|
|||
list l(handle<>(borrowed(obj_ptr))); |
|||
|
|||
// // Verify that obj_ptr is a string (should be ensured by convertible())
|
|||
// assert(value);
|
|||
|
|||
// Grab pointer to memory into which to construct the new std::vector<T>
|
|||
void* storage = ( |
|||
(boost::python::converter::rvalue_from_python_storage<std::vector<T> |
|||
>*) |
|||
|
|||
data)->storage.bytes; |
|||
|
|||
// in-place construct the new std::vector<T> using the character data
|
|||
// extraced from the python object
|
|||
std::vector<T>& v = *(new (storage) std::vector<T>()); |
|||
|
|||
// populate the vector from list contains !!!
|
|||
int le = len(l); |
|||
v.resize(le); |
|||
for(int i = 0;i!=le;++i){ |
|||
v[i] = extract<T>(l[i]); |
|||
} |
|||
|
|||
// Stash the memory chunk pointer for later use by boost.python
|
|||
data->convertible = storage; |
|||
} |
|||
}; |
|||
|
|||
std::string printMetadata(Metadata m) { |
|||
return "["+m.file+" "+intToDecimal(m.ln)+" "+intToDecimal(m.ch)+"]"; |
|||
} |
|||
|
|||
BOOST_PYTHON_FUNCTION_OVERLOADS(tokenize_overloads, tokenize, 1, 2); |
|||
BOOST_PYTHON_FUNCTION_OVERLOADS(printast_overloads, printAST, 1, 2); |
|||
BOOST_PYTHON_FUNCTION_OVERLOADS(parselll_overloads, parseLLL, 1, 2); |
|||
//BOOST_PYTHON_FUNCTION_OVERLOADS(metadata_overloads, Metadata, 0, 3);
|
|||
BOOST_PYTHON_MODULE(pyserpent) |
|||
{ |
|||
def("tokenize", tokenize, tokenize_overloads()); |
|||
def("parse", parseSerpent); |
|||
def("parseLLL", parseLLL, parselll_overloads()); |
|||
def("rewrite", rewrite); |
|||
def("compile_to_lll", compileToLLL); |
|||
def("encode_datalist", encodeDatalist); |
|||
def("decode_datalist", decodeDatalist); |
|||
def("compile_lll", compileLLL); |
|||
def("assemble", assemble); |
|||
def("deserialize", deserialize); |
|||
def("dereference", dereference); |
|||
def("flatten", flatten); |
|||
def("serialize", serialize); |
|||
def("compile", compile); |
|||
def("pretty_compile", prettyCompile); |
|||
def("pretty_assemble", prettyAssemble); |
|||
//class_<Node>("Node",init<>())
|
|||
to_python_converter<std::vector<Node,class std::allocator<Node> >, |
|||
VecToList<Node> >(); |
|||
to_python_converter<std::vector<std::string,class std::allocator<std::string> >, |
|||
VecToList<std::string> >(); |
|||
Vector_from_python_list<Node>(); |
|||
Vector_from_python_list<std::string>(); |
|||
class_<Metadata>("Metadata",init<>()) |
|||
.def(init<std::string, int, int>()) |
|||
.def("__str__", printMetadata) |
|||
.def("__repr__", printMetadata) |
|||
; |
|||
class_<Node>("Node",init<>()) |
|||
.def(init<>()) |
|||
.def("__str__", printAST, printast_overloads()) |
|||
.def("__repr__", printAST, printast_overloads()) |
|||
; |
|||
//class_<Node>("Vector",init<>())
|
|||
// .def(init<>());
|
|||
} |
@ -0,0 +1,35 @@ |
|||
cmake_policy(SET CMP0015 NEW) |
|||
|
|||
aux_source_directory(. SRC_LIST) |
|||
|
|||
include_directories(..) |
|||
|
|||
set(EXECUTABLE sc) |
|||
|
|||
add_executable(${EXECUTABLE} ${SRC_LIST}) |
|||
|
|||
if (${TARGET_PLATFORM} STREQUAL "w64") |
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++") |
|||
target_link_libraries(${EXECUTABLE} gcc) |
|||
target_link_libraries(${EXECUTABLE} gdi32) |
|||
target_link_libraries(${EXECUTABLE} ws2_32) |
|||
target_link_libraries(${EXECUTABLE} mswsock) |
|||
target_link_libraries(${EXECUTABLE} shlwapi) |
|||
target_link_libraries(${EXECUTABLE} iphlpapi) |
|||
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) |
|||
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS) |
|||
elseif (UNIX) |
|||
else () |
|||
find_package(Threads REQUIRED) |
|||
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) |
|||
endif () |
|||
|
|||
target_link_libraries(${EXECUTABLE} serpent) |
|||
target_link_libraries(${EXECUTABLE} lll) |
|||
target_link_libraries(${EXECUTABLE} evmface) |
|||
target_link_libraries(${EXECUTABLE} ethential) |
|||
target_link_libraries(${EXECUTABLE} gmp) |
|||
|
|||
install( TARGETS ${EXECUTABLE} DESTINATION bin ) |
|||
|
|||
cmake_policy(SET CMP0015 NEW) |
@ -0,0 +1,106 @@ |
|||
#include <stdio.h> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <map> |
|||
#include <libserpent/funcs.h> |
|||
|
|||
int main(int argv, char** argc) { |
|||
if (argv == 1) { |
|||
std::cerr << "Must provide a command and arguments! Try parse, rewrite, compile, assemble\n"; |
|||
return 0; |
|||
} |
|||
std::string flag = ""; |
|||
std::string command = argc[1]; |
|||
std::string input; |
|||
std::string secondInput; |
|||
if (std::string(argc[1]) == "-s") { |
|||
flag = command.substr(1); |
|||
command = argc[2]; |
|||
input = ""; |
|||
std::string line; |
|||
while (std::getline(std::cin, line)) { |
|||
input += line + "\n"; |
|||
} |
|||
secondInput = argv == 3 ? "" : argc[3]; |
|||
} |
|||
else { |
|||
if (argv == 2) { |
|||
std::cerr << "Not enough arguments for serpent cmdline\n"; |
|||
throw(0); |
|||
} |
|||
input = argc[2]; |
|||
secondInput = argv == 3 ? "" : argc[3]; |
|||
} |
|||
bool haveSec = secondInput.length() > 0; |
|||
if (command == "parse" || command == "parse_serpent") { |
|||
std::cout << printAST(parseSerpent(input), haveSec) << "\n"; |
|||
} |
|||
else if (command == "rewrite") { |
|||
std::cout << printAST(rewrite(parseLLL(input, true)), haveSec) << "\n"; |
|||
} |
|||
else if (command == "compile_to_lll") { |
|||
std::cout << printAST(compileToLLL(input), haveSec) << "\n"; |
|||
} |
|||
else if (command == "build_fragtree") { |
|||
std::cout << printAST(buildFragmentTree(parseLLL(input, true))) << "\n"; |
|||
} |
|||
else if (command == "compile_lll") { |
|||
std::cout << binToHex(compileLLL(parseLLL(input, true))) << "\n"; |
|||
} |
|||
else if (command == "dereference") { |
|||
std::cout << printAST(dereference(parseLLL(input, true)), haveSec) <<"\n"; |
|||
} |
|||
else if (command == "pretty_assemble") { |
|||
std::cout << printTokens(prettyAssemble(parseLLL(input, true))) <<"\n"; |
|||
} |
|||
else if (command == "pretty_compile_lll") { |
|||
std::cout << printTokens(prettyCompileLLL(parseLLL(input, true))) << "\n"; |
|||
} |
|||
else if (command == "pretty_compile") { |
|||
std::cout << printTokens(prettyCompile(input)) << "\n"; |
|||
} |
|||
else if (command == "assemble") { |
|||
std::cout << assemble(parseLLL(input, true)) << "\n"; |
|||
} |
|||
else if (command == "serialize") { |
|||
std::cout << binToHex(serialize(tokenize(input))) << "\n"; |
|||
} |
|||
else if (command == "flatten") { |
|||
std::cout << printTokens(flatten(parseLLL(input, true))) << "\n"; |
|||
} |
|||
else if (command == "deserialize") { |
|||
std::cout << printTokens(deserialize(hexToBin(input))) << "\n"; |
|||
} |
|||
else if (command == "compile") { |
|||
std::cout << binToHex(compile(input)) << "\n"; |
|||
} |
|||
else if (command == "encode_datalist") { |
|||
std::vector<Node> tokens = tokenize(input); |
|||
std::vector<std::string> o; |
|||
for (int i = 0; i < (int)tokens.size(); i++) { |
|||
o.push_back(tokens[i].val); |
|||
} |
|||
std::cout << binToHex(encodeDatalist(o)) << "\n"; |
|||
} |
|||
else if (command == "decode_datalist") { |
|||
std::vector<std::string> o = decodeDatalist(hexToBin(input)); |
|||
std::vector<Node> tokens; |
|||
for (int i = 0; i < (int)o.size(); i++) |
|||
tokens.push_back(token(o[i])); |
|||
std::cout << printTokens(tokens) << "\n"; |
|||
} |
|||
else if (command == "tokenize") { |
|||
std::cout << printTokens(tokenize(input)); |
|||
} |
|||
else if (command == "biject") { |
|||
if (argv == 3) |
|||
std::cerr << "Not enough arguments for biject\n"; |
|||
int pos = decimalToInt(secondInput); |
|||
std::vector<Node> n = prettyCompile(input); |
|||
if (pos >= (int)n.size()) |
|||
std::cerr << "Code position too high\n"; |
|||
Metadata m = n[pos].metadata; |
|||
std::cout << "Opcode: " << n[pos].val << ", file: " << m.file << |
|||
", line: " << m.ln << ", char: " << m.ch << "\n"; |
|||
} |
|||
} |
@ -1,2 +0,0 @@ |
|||
from compiler import * |
|||
from parser import * |
@ -1,446 +0,0 @@ |
|||
#!/usr/bin/python |
|||
import re |
|||
import sys |
|||
import os |
|||
from parser import parse |
|||
from opcodes import opcodes, reverse_opcodes |
|||
import json |
|||
|
|||
label_counter = [0] |
|||
|
|||
|
|||
def mklabel(prefix): |
|||
label_counter[0] += 1 |
|||
return prefix + str(label_counter[0] - 1) |
|||
|
|||
# All functions go here |
|||
# |
|||
# Entries go in a format: |
|||
# |
|||
# [ val, inputcount, outputcount, code ] |
|||
|
|||
funtable = [ |
|||
['+', 2, 1, ['<1>', '<0>', 'ADD']], |
|||
['-', 2, 1, ['<1>', '<0>', 'SUB']], |
|||
['*', 2, 1, ['<1>', '<0>', 'MUL']], |
|||
['/', 2, 1, ['<1>', '<0>', 'DIV']], |
|||
['^', 2, 1, ['<1>', '<0>', 'EXP']], |
|||
['%', 2, 1, ['<1>', '<0>', 'MOD']], |
|||
['#/', 2, 1, ['<1>', '<0>', 'SDIV']], |
|||
['#%', 2, 1, ['<1>', '<0>', 'SMOD']], |
|||
['==', 2, 1, ['<1>', '<0>', 'EQ']], |
|||
['<', 2, 1, ['<1>', '<0>', 'LT']], |
|||
['<=', 2, 1, ['<1>', '<0>', 'GT', 'NOT']], |
|||
['>', 2, 1, ['<1>', '<0>', 'GT']], |
|||
['>=', 2, 1, ['<1>', '<0>', 'LT', 'NOT']], |
|||
['!', 1, 1, ['<0>', 'NOT']], |
|||
['or', 2, 1, ['<1>', '<0>', 'DUP', 4, 'PC', |
|||
'ADD', 'JUMPI', 'POP', 'SWAP', 'POP']], |
|||
['||', 2, 1, ['<1>', '<0>', 'DUP', 4, 'PC', |
|||
'ADD', 'JUMPI', 'POP', 'SWAP', 'POP']], |
|||
['and', 2, 1, ['<1>', '<0>', 'NOT', 'NOT', 'MUL']], |
|||
['&&', 2, 1, ['<1>', '<0>', 'NOT', 'NOT', 'MUL']], |
|||
['xor', 2, 1, ['<1>', '<0>', 'XOR']], |
|||
['&', 2, 1, ['<1>', '<0>', 'AND']], |
|||
['|', 2, 1, ['<1>', '<0>', 'OR']], |
|||
['byte', 2, 1, ['<0>', '<1>', 'BYTE']], |
|||
# Word array methods |
|||
# arr, ind -> val |
|||
['access', 2, 1, ['<0>', '<1>', 32, 'MUL', 'ADD', 'MLOAD']], |
|||
# arr, ind, val |
|||
['arrset', 3, 0, ['<2>', '<0>', '<1>', 32, 'MUL', 'ADD', 'MSTORE']], |
|||
# val, pointer -> pointer+32 |
|||
['set_and_inc', 2, 1, ['<1>', 'DUP', '<0>', 'SWAP', 'MSTORE', 32, 'ADD']], |
|||
# len (32 MUL) len*32 (MSIZE) len*32 MSIZE (SWAP) MSIZE len*32 (MSIZE ADD) |
|||
# MSIZE MSIZE+len*32 (1) MSIZE MSIZE+len*32 1 (SWAP SUB) MSIZE |
|||
# MSIZE+len*32-1 (0 SWAP MSTORE8) MSIZE |
|||
['array', 1, 1, ['<0>', 32, 'MUL', 'MSIZE', 'SWAP', 'MSIZE', |
|||
'ADD', 1, 'SWAP', 'SUB', 0, 'SWAP', 'MSTORE8']], # len -> arr |
|||
# String array methods |
|||
# arr, ind -> val |
|||
['getch', 2, 1, ['<1>', '<0>', 'ADD', 'MLOAD', 255, 'AND']], |
|||
['setch', 3, 0, ['<2>', '<1>', '<0>', 'ADD', 'MSTORE']], # arr, ind, val |
|||
# len MSIZE (SWAP) MSIZE len (MSIZE ADD) MSIZE MSIZE+len (1) MSIZE |
|||
# MSIZE+len 1 (SWAP SUB) MSIZE MSIZE+len-1 (0 SWAP MSTORE8) MSIZE |
|||
['string', 1, 1, ['<0>', 'MSIZE', 'SWAP', 'MSIZE', 'ADD', |
|||
1, 'SWAP', 'SUB', 0, 'SWAP', 'MSTORE8']], # len -> arr |
|||
# ['send', 2, 1, [0,0,0,0,0,'<1>','<0>','CALL'] ], # to, value, 0, [] -> /dev/null |
|||
# to, value, gas, [] -> /dev/null |
|||
['send', 3, 1, [0, 0, 0, 0, '<2>', '<1>', '<0>', 'CALL']], |
|||
# MSIZE 0 MSIZE (MSTORE) MSIZE (DUP) MSIZE MSIZE (...) MSIZE MSIZE 32 <4> |
|||
# <3> <2> <1> <0> (CALL) MSIZE FLAG (POP) MSIZE (MLOAD) RESULT |
|||
['msg', 5, 1, ['MSIZE', 0, 'MSIZE', 'MSTORE', 'DUP', 32, 'SWAP', '<4>', 32, 'MUL', '<3>', |
|||
'<2>', '<1>', '<0>', 'CALL', 'POP', 'MLOAD']], # to, value, gas, data, datasize -> out32 |
|||
# <5>*32 (MSIZE SWAP MSIZE SWAP) MSIZE MSIZE <5>*32 (DUP MSIZE ADD) MSIZE MSIZE <5>*32 MEND+1 (1 SWAP SUB) MSIZE MSIZE <5>*32 MEND (0 SWAP MSTORE8) MSIZE MSIZE <5>*32 (SWAP) MSIZE <5>*32 MSIZE |
|||
['msg', 6, 1, ['<5>', 32, 'MUL', 'MSIZE', 'SWAP', 'MSIZE', 'SWAP', 'DUP', 'MSIZE', 'ADD', 1, 'SWAP', 'SUB', 0, 'SWAP', 'MSTORE8', 'SWAP', |
|||
'<4>', 32, 'MUL', '<3>', '<2>', '<1>', '<0>', 'CALL', 'POP']], # to, value, gas, data, datasize, outsize -> out |
|||
# value, gas, data, datasize |
|||
['create', 4, 1, ['<3>', '<2>', '<1>', '<0>', 'CREATE']], |
|||
['sha3', 1, 1, [32, 'MSIZE', '<0>', 'MSIZE', 'MSTORE', 'SHA3']], |
|||
['sha3bytes', 1, 1, ['SHA3']], |
|||
['sload', 1, 1, ['<0>', 'SLOAD']], |
|||
['sstore', 2, 0, ['<1>', '<0>', 'SSTORE']], |
|||
['calldataload', 1, 1, ['<0>', 32, 'MUL', 'CALLDATALOAD']], |
|||
['id', 1, 1, ['<0>']], |
|||
# 0 MSIZE (SWAP) MSIZE 0 (MSIZE) MSIZE 0 MSIZE (MSTORE) MSIZE (32 SWAP) 32 |
|||
# MSIZE |
|||
# returns single value |
|||
['return', 1, 0, [ |
|||
'<0>', 'MSIZE', 'SWAP', 'MSIZE', 'MSTORE', 32, 'SWAP', 'RETURN']], |
|||
['return', 2, 0, ['<1>', 32, 'MUL', '<0>', 'RETURN']], |
|||
['suicide', 1, 0, ['<0>', 'SUICIDE']], |
|||
] |
|||
|
|||
# Pseudo-variables representing opcodes |
|||
pseudovars = { |
|||
'msg.datasize': [32, 'CALLDATASIZE', 'DIV'], |
|||
'msg.sender': ['CALLER'], |
|||
'msg.value': ['CALLVALUE'], |
|||
'tx.gasprice': ['GASPRICE'], |
|||
'tx.origin': ['ORIGIN'], |
|||
'tx.gas': ['GAS'], |
|||
'contract.balance': ['BALANCE'], |
|||
'block.prevhash': ['PREVHASH'], |
|||
'block.coinbase': ['COINBASE'], |
|||
'block.timestamp': ['TIMESTAMP'], |
|||
'block.number': ['NUMBER'], |
|||
'block.difficulty': ['DIFFICULTY'], |
|||
'block.gaslimit': ['GASLIMIT'], |
|||
} |
|||
|
|||
|
|||
# A set of methods for detecting raw values (numbers and strings) and |
|||
# converting them to integers |
|||
def frombytes(b): |
|||
return 0 if len(b) == 0 else ord(b[-1]) + 256 * frombytes(b[:-1]) |
|||
|
|||
|
|||
def fromhex(b): |
|||
return 0 if len(b) == 0 else '0123456789abcdef'.find(b[-1]) + 16 * fromhex(b[:-1]) |
|||
|
|||
|
|||
def is_numberlike(b): |
|||
if isinstance(b, (str, unicode)): |
|||
if re.match('^[0-9\-]*$', b): |
|||
return True |
|||
if b[0] in ["'", '"'] and b[-1] in ["'", '"'] and b[0] == b[-1]: |
|||
return True |
|||
if b[:2] == '0x': |
|||
return True |
|||
return False |
|||
|
|||
|
|||
def numberize(b): |
|||
if b[0] in ["'", '"']: |
|||
return frombytes(b[1:-1]) |
|||
elif b[:2] == '0x': |
|||
return fromhex(b[2:]) |
|||
else: |
|||
return int(b) |
|||
|
|||
|
|||
# Apply rewrite rules |
|||
def rewrite(ast): |
|||
if isinstance(ast, (str, unicode)): |
|||
return ast |
|||
elif ast[0] == 'set': |
|||
if ast[1][0] == 'access': |
|||
if ast[1][1] == 'contract.storage': |
|||
return ['sstore', rewrite(ast[1][2]), rewrite(ast[2])] |
|||
else: |
|||
return ['arrset', rewrite(ast[1][1]), rewrite(ast[1][2]), rewrite(ast[2])] |
|||
elif ast[0] == 'access': |
|||
if ast[1] == 'msg.data': |
|||
return ['calldataload', rewrite(ast[2])] |
|||
elif ast[1] == 'contract.storage': |
|||
return ['sload', rewrite(ast[2])] |
|||
elif ast[0] == 'array_lit': |
|||
o = ['array', str(len(ast[1:]))] |
|||
for a in ast[1:]: |
|||
o = ['set_and_inc', rewrite(a), o] |
|||
return ['-', o, str(len(ast[1:])*32)] |
|||
elif ast[0] == 'return': |
|||
if len(ast) == 2 and ast[1][0] == 'array_lit': |
|||
return ['return', rewrite(ast[1]), str(len(ast[1][1:]))] |
|||
return map(rewrite, ast) |
|||
|
|||
|
|||
# Main compiler code |
|||
def arity(ast): |
|||
if isinstance(ast, (str, unicode)): |
|||
return 1 |
|||
elif ast[0] == 'set': |
|||
return 0 |
|||
elif ast[0] == 'if': |
|||
return 0 |
|||
elif ast[0] == 'seq': |
|||
return 1 if len(ast[1:]) and arity(ast[-1]) == 1 else 0 |
|||
else: |
|||
for f in funtable: |
|||
if ast[0] == f[0]: |
|||
return f[2] |
|||
|
|||
|
|||
# Debugging |
|||
def print_wrapper(f): |
|||
def wrapper(*args, **kwargs): |
|||
print args[0] |
|||
u = f(*args, **kwargs) |
|||
print u |
|||
return u |
|||
return wrapper |
|||
|
|||
|
|||
# Right-hand-side expressions (ie. the normal kind) |
|||
#@print_wrapper |
|||
def compile_expr(ast, varhash, lc=[0]): |
|||
# Stop keyword |
|||
if ast == 'stop': |
|||
return ['STOP'] |
|||
# Literals |
|||
elif isinstance(ast, (str, unicode)): |
|||
if is_numberlike(ast): |
|||
return [numberize(ast)] |
|||
elif ast in pseudovars: |
|||
return pseudovars[ast] |
|||
else: |
|||
if ast not in varhash: |
|||
varhash[ast] = len(varhash) * 32 |
|||
return [varhash[ast], 'MLOAD'] |
|||
# Set (specifically, variables) |
|||
elif ast[0] == 'set': |
|||
if not isinstance(ast[1], (str, unicode)): |
|||
raise Exception("Cannot set the value of " + str(ast[1])) |
|||
elif ast[1] in pseudovars: |
|||
raise Exception("Cannot set a pseudovariable!") |
|||
else: |
|||
if ast[1] not in varhash: |
|||
varhash[ast[1]] = len(varhash) * 32 |
|||
return compile_expr(ast[2], varhash, lc) + [varhash[ast[1]], 'MSTORE'] |
|||
# If and if/else statements |
|||
elif ast[0] == 'if': |
|||
f = compile_expr(ast[1], varhash, lc) |
|||
g = compile_expr(ast[2], varhash, lc) |
|||
h = compile_expr(ast[3], varhash, lc) if len(ast) > 3 else None |
|||
label, ref = 'LABEL_' + str(lc[0]), 'REF_' + str(lc[0]) |
|||
lc[0] += 1 |
|||
label2, ref2 = 'LABEL_' + str(lc[0]), 'REF_' + str(lc[0]) |
|||
lc[0] += 1 |
|||
if h: |
|||
return f + ['NOT', ref2, 'JUMPI'] + g + [ref, 'JUMP', label2] + h + [label] |
|||
else: |
|||
return f + ['NOT', ref, 'JUMPI'] + g + [label] |
|||
# While loops |
|||
elif ast[0] == 'while': |
|||
f = compile_expr(ast[1], varhash, lc) |
|||
g = compile_expr(ast[2], varhash, lc) |
|||
beglab, begref = 'LABEL_' + str(lc[0]), 'REF_' + str(lc[0]) |
|||
endlab, endref = 'LABEL_' + str(lc[0] + 1), 'REF_' + str(lc[0] + 1) |
|||
lc[0] += 2 |
|||
return [beglab] + f + ['NOT', endref, 'JUMPI'] + g + [begref, 'JUMP', endlab] |
|||
# Seq |
|||
elif ast[0] == 'seq': |
|||
o = [] |
|||
for arg in ast[1:]: |
|||
o.extend(compile_expr(arg, varhash, lc)) |
|||
if arity(arg) == 1 and arg != ast[-1]: |
|||
o.append('POP') |
|||
return o |
|||
# Functions and operations |
|||
for f in funtable: |
|||
if ast[0] == f[0] and len(ast[1:]) == f[1]: |
|||
# If arity of all args is 1 |
|||
if reduce(lambda x, y: x * arity(y), ast[1:], 1): |
|||
iq = f[3][:] |
|||
oq = [] |
|||
while len(iq): |
|||
tok = iq.pop(0) |
|||
if isinstance(tok, (str, unicode)) and tok[0] == '<' and tok[-1] == '>': |
|||
oq.extend( |
|||
compile_expr(ast[1 + int(tok[1:-1])], varhash, lc)) |
|||
else: |
|||
oq.append(tok) |
|||
return oq |
|||
else: |
|||
raise Exception( |
|||
"Arity of argument mismatches for %s: %s" % (f[0], ast)) |
|||
raise Exception("invalid op: " + ast[0]) |
|||
|
|||
|
|||
# Stuff to add once to each program |
|||
def add_wrappers(c, varhash): |
|||
if len(varhash) and 'MSIZE' in c: |
|||
return [0, len(varhash) * 32 - 1, 'MSTORE8'] + c |
|||
else: |
|||
return c |
|||
|
|||
|
|||
# Optimizations |
|||
ops = { |
|||
'ADD': lambda x, y: (x + y) % 2 ** 256, |
|||
'MUL': lambda x, y: (x * y) % 2 ** 256, |
|||
'SUB': lambda x, y: (x - y) % 2 ** 256, |
|||
'DIV': lambda x, y: x / y, |
|||
'EXP': lambda x, y: pow(x, y, 2 ** 256), |
|||
'AND': lambda x, y: x & y, |
|||
'OR': lambda x, y: x | y, |
|||
'XOR': lambda x, y: x ^ y |
|||
} |
|||
|
|||
|
|||
def multipop(li, n): |
|||
if n > 0: |
|||
li.pop() |
|||
multipop(li, n - 1) |
|||
return li |
|||
|
|||
|
|||
def optimize(c): |
|||
iq = c[:] |
|||
oq = [] |
|||
while len(iq): |
|||
oq.append(iq.pop(0)) |
|||
if oq[-1] in ops and len(oq) >= 3: |
|||
if isinstance(oq[-2], (int, long)) and isinstance(oq[-3], (int, long)): |
|||
ntok = ops[oq[-1]](oq[-2], oq[-3]) |
|||
multipop(oq, 3).append(ntok) |
|||
if oq[-1] == 'NOT' and len(oq) >= 2 and oq[-2] == 'NOT': |
|||
multipop(oq, 2) |
|||
if oq[-1] == 'ADD' and len(oq) >= 3 and oq[-2] == 0 and is_numberlike(oq[-3]): |
|||
multipop(oq, 2) |
|||
if oq[-1] in ['SUB', 'ADD'] and len(oq) >= 3 and oq[-3] == 0 and is_numberlike(oq[-2]): |
|||
ntok = oq[-2] |
|||
multipop(oq, 3).append(ntok) |
|||
return oq |
|||
|
|||
|
|||
def compile_to_assembly(source, optimize_flag=1): |
|||
if isinstance(source, (str, unicode)): |
|||
source = parse(source) |
|||
varhash = {} |
|||
c1 = rewrite(source) |
|||
c2 = compile_expr(c1, varhash, [0]) |
|||
c3 = add_wrappers(c2, varhash) |
|||
c4 = optimize(c3) if optimize_flag else c3 |
|||
return c4 |
|||
|
|||
|
|||
def get_vars(source): |
|||
if isinstance(source, (str, unicode)): |
|||
source = parse(source) |
|||
varhash = {} |
|||
c1 = rewrite(source) |
|||
# fill varhash |
|||
compile_expr(c1, varhash, [0]) |
|||
return varhash |
|||
|
|||
|
|||
def log256(n): |
|||
return 0 if n == 0 else 1 + log256(n / 256) |
|||
|
|||
|
|||
def tobytearr(n, L): |
|||
return [] if L == 0 else tobytearr(n / 256, L - 1) + [n % 256] |
|||
|
|||
|
|||
# Dereference labels |
|||
def dereference(c): |
|||
iq = [x for x in c] |
|||
mq = [] |
|||
pos = 0 |
|||
labelmap = {} |
|||
while len(iq): |
|||
front = iq.pop(0) |
|||
if isinstance(front, str) and front[:6] == 'LABEL_': |
|||
labelmap[front[6:]] = pos |
|||
else: |
|||
mq.append(front) |
|||
if isinstance(front, str) and front[:4] == 'REF_': |
|||
pos += 5 |
|||
elif isinstance(front, (int, long)): |
|||
pos += 1 + max(1, log256(front)) |
|||
else: |
|||
pos += 1 |
|||
oq = [] |
|||
for m in mq: |
|||
if isinstance(m, str) and m[:4] == 'REF_': |
|||
oq.append('PUSH4') |
|||
oq.extend(tobytearr(labelmap[m[4:]], 4)) |
|||
elif isinstance(m, (int, long)): |
|||
L = max(1, log256(m)) |
|||
oq.append('PUSH' + str(L)) |
|||
oq.extend(tobytearr(m, L)) |
|||
else: |
|||
oq.append(m) |
|||
return oq |
|||
|
|||
|
|||
def serialize(source): |
|||
def numberize(arg): |
|||
if isinstance(arg, (int, long)): |
|||
return arg |
|||
elif arg in reverse_opcodes: |
|||
return reverse_opcodes[arg] |
|||
elif arg[:4] == 'PUSH': |
|||
return 95 + int(arg[4:]) |
|||
elif re.match('^[0-9]*$', arg): |
|||
return int(arg) |
|||
else: |
|||
raise Exception("Cannot serialize: " + str(arg)) |
|||
return ''.join(map(chr, map(numberize, source))) |
|||
|
|||
|
|||
def deserialize(source): |
|||
o = [] |
|||
i, j = 0, -1 |
|||
while i < len(source): |
|||
p = ord(source[i]) |
|||
if j >= 0: |
|||
o.append(p) |
|||
elif p >= 96 and p <= 127: |
|||
o.append('PUSH' + str(p - 95)) |
|||
else: |
|||
o.append(opcodes[p][0]) |
|||
if p >= 96 and p <= 127: |
|||
j = p - 95 |
|||
j -= 1 |
|||
i += 1 |
|||
return o |
|||
|
|||
|
|||
def assemble(asm): |
|||
return serialize(dereference(asm)) |
|||
|
|||
|
|||
def compile(source): |
|||
return assemble(compile_to_assembly(parse(source))) |
|||
|
|||
|
|||
def encode_datalist(vals): |
|||
def enc(n): |
|||
if isinstance(n, (int, long)): |
|||
return ''.join(map(chr, tobytearr(n, 32))) |
|||
elif isinstance(n, (str, unicode)) and len(n) == 40: |
|||
return '\x00' * 12 + n.decode('hex') |
|||
elif isinstance(n, (str, unicode)): |
|||
return '\x00' * (32 - len(n)) + n |
|||
elif n is True: |
|||
return 1 |
|||
elif n is False or n is None: |
|||
return 0 |
|||
if isinstance(vals, (tuple, list)): |
|||
return ''.join(map(enc, vals)) |
|||
elif vals == '': |
|||
return '' |
|||
else: |
|||
# Assume you're getting in numbers or 0x... |
|||
return ''.join(map(enc, map(numberize, vals.split(' ')))) |
|||
|
|||
|
|||
def decode_datalist(arr): |
|||
if isinstance(arr, list): |
|||
arr = ''.join(map(chr, arr)) |
|||
o = [] |
|||
for i in range(0, len(arr), 32): |
|||
o.append(frombytes(arr[i:i + 32])) |
|||
return o |
@ -1,56 +0,0 @@ |
|||
opcodes = { |
|||
0x00: ['STOP', 0, 0], |
|||
0x01: ['ADD', 2, 1], |
|||
0x02: ['MUL', 2, 1], |
|||
0x03: ['SUB', 2, 1], |
|||
0x04: ['DIV', 2, 1], |
|||
0x05: ['SDIV', 2, 1], |
|||
0x06: ['MOD', 2, 1], |
|||
0x07: ['SMOD', 2, 1], |
|||
0x08: ['EXP', 2, 1], |
|||
0x09: ['NEG', 2, 1], |
|||
0x0a: ['LT', 2, 1], |
|||
0x0b: ['GT', 2, 1], |
|||
0x0c: ['EQ', 2, 1], |
|||
0x0d: ['NOT', 1, 1], |
|||
0x10: ['AND', 2, 1], |
|||
0x11: ['OR', 2, 1], |
|||
0x12: ['XOR', 2, 1], |
|||
0x13: ['BYTE', 2, 1], |
|||
0x20: ['SHA3', 2, 1], |
|||
0x30: ['ADDRESS', 0, 1], |
|||
0x31: ['BALANCE', 0, 1], |
|||
0x32: ['ORIGIN', 0, 1], |
|||
0x33: ['CALLER', 0, 1], |
|||
0x34: ['CALLVALUE', 0, 1], |
|||
0x35: ['CALLDATALOAD', 1, 1], |
|||
0x36: ['CALLDATASIZE', 0, 1], |
|||
0x37: ['GASPRICE', 0, 1], |
|||
0x40: ['PREVHASH', 0, 1], |
|||
0x41: ['COINBASE', 0, 1], |
|||
0x42: ['TIMESTAMP', 0, 1], |
|||
0x43: ['NUMBER', 0, 1], |
|||
0x44: ['DIFFICULTY', 0, 1], |
|||
0x45: ['GASLIMIT', 0, 1], |
|||
0x50: ['POP', 1, 0], |
|||
0x51: ['DUP', 1, 2], |
|||
0x52: ['SWAP', 2, 2], |
|||
0x53: ['MLOAD', 1, 1], |
|||
0x54: ['MSTORE', 2, 0], |
|||
0x55: ['MSTORE8', 2, 0], |
|||
0x56: ['SLOAD', 1, 1], |
|||
0x57: ['SSTORE', 2, 0], |
|||
0x58: ['JUMP', 1, 0], |
|||
0x59: ['JUMPI', 2, 0], |
|||
0x5a: ['PC', 0, 1], |
|||
0x5b: ['MSIZE', 0, 1], |
|||
0x5c: ['GAS', 0, 1], |
|||
0x60: ['PUSH', 0, 1], #encompasses 96...127 |
|||
0xf0: ['CREATE', 4, 1], |
|||
0xf1: ['CALL', 7, 1], |
|||
0xf2: ['RETURN', 2, 1], |
|||
0xff: ['SUICIDE', 1, 1], |
|||
} |
|||
reverse_opcodes = {} |
|||
for o in opcodes: |
|||
reverse_opcodes[opcodes[o][0]] = o |
@ -1,299 +0,0 @@ |
|||
import re |
|||
|
|||
# Number of spaces at the beginning of a line |
|||
def spaces(ln): |
|||
spaces = 0 |
|||
while spaces < len(ln) and ln[spaces] == ' ': spaces += 1 |
|||
return spaces |
|||
|
|||
# Main parse function |
|||
def parse(document): |
|||
return parse_lines(document.split('\n')) |
|||
|
|||
def strip_line(ln): |
|||
ln2 = ln.strip() |
|||
if '//' in ln2: |
|||
return ln2[:ln2.find('//')] |
|||
else: |
|||
return ln2 |
|||
|
|||
# Parse the statement-level structure, including if and while statements |
|||
def parse_lines(lns): |
|||
o = [] |
|||
i = 0 |
|||
while i < len(lns): |
|||
main = lns[i] |
|||
# Skip empty lines |
|||
if len(main.strip()) == 0: |
|||
i += 1 |
|||
continue |
|||
if spaces(main) > 0: |
|||
raise Exception("Line "+str(i)+" indented too much!") |
|||
main = strip_line(main) |
|||
# Grab the child block of an if statement |
|||
start_child_block = i+1 |
|||
indent = 99999999 |
|||
i += 1 |
|||
child_lns = [] |
|||
while i < len(lns): |
|||
if len(strip_line(lns[i])) > 0: |
|||
sp = spaces(lns[i]) |
|||
if sp == 0: break |
|||
indent = min(sp,indent) |
|||
child_lns.append(lns[i]) |
|||
i += 1 |
|||
child_block = map(lambda x:x[indent:],child_lns) |
|||
# Calls parse_line to parse the individual line |
|||
out = parse_line(main) |
|||
# Include the child block into the parsed expression |
|||
if out[0] in ['if', 'else', 'while', 'else if']: |
|||
if len(child_block) == 0: |
|||
raise Exception("If/else/while statement must have sub-clause! (%d)" % i) |
|||
else: |
|||
out.append(parse_lines(child_block)) |
|||
else: |
|||
if len(child_block) > 0: |
|||
raise Exception("Not an if/else/while statement, can't have sub-clause! (%d)" % i) |
|||
# This is somewhat complicated. Essentially, it converts something like |
|||
# "if c1 then s1 elif c2 then s2 elif c3 then s3 else s4" (with appropriate |
|||
# indenting) to [ if c1 s1 [ if c2 s2 [ if c3 s3 s4 ] ] ] |
|||
if out[0] == 'else if': |
|||
if len(o) == 0: raise Exception("Cannot start with else if! (%d)" % i) |
|||
u = o[-1] |
|||
while len(u) == 4: u = u[-1] |
|||
u.append(['if'] + out[1:]) |
|||
elif out[0] == 'else': |
|||
if len(o) == 0: raise Exception("Cannot start with else! (%d)" % i) |
|||
u = o[-1] |
|||
while len(u) == 4: u = u[-1] |
|||
u.append(out[1]) |
|||
else: |
|||
# Normal case: just add the parsed line to the output |
|||
o.append(out) |
|||
return o[0] if len(o) == 1 else ['seq'] + o |
|||
|
|||
# Tokens contain one or more chars of the same type, with a few exceptions |
|||
def chartype(c): |
|||
if c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.': |
|||
return 'alphanum' |
|||
elif c in '\t ': return 'space' |
|||
elif c in '()[]': return 'brack' |
|||
elif c == '"': return 'dquote' |
|||
elif c == "'": return 'squote' |
|||
else: return 'symb' |
|||
|
|||
# Converts something like "b[4] = x+2 > y*-3" to |
|||
# [ 'b', '[', '4', ']', '=', 'x', '+', '2', '>', 'y', '*', '-', '3' ] |
|||
def tokenize(ln): |
|||
tp = 'space' |
|||
i = 0 |
|||
o = [] |
|||
global cur |
|||
cur = '' |
|||
# Finish a token and start a new one |
|||
def nxt(): |
|||
global cur |
|||
if len(cur) >= 2 and cur[-1] == '-': |
|||
o.extend([cur[:-1],'-']) |
|||
elif len(cur.strip()) >= 1: |
|||
o.append(cur) |
|||
cur = '' |
|||
# Main loop |
|||
while i < len(ln): |
|||
c = chartype(ln[i]) |
|||
# Inside a string |
|||
if tp == 'squote' or tp == "dquote": |
|||
if c == tp: |
|||
cur += ln[i] |
|||
nxt() |
|||
i += 1 |
|||
tp = 'space' |
|||
elif ln[i:i+2] == '\\x': |
|||
cur += ln[i+2:i+4].decode('hex') |
|||
i += 4 |
|||
elif ln[i:i+2] == '\\n': |
|||
cur += '\x0a' |
|||
i += 2 |
|||
elif ln[i] == '\\': |
|||
cur += ln[i+1] |
|||
i += 2 |
|||
else: |
|||
cur += ln[i] |
|||
i += 1 |
|||
# Not inside a string |
|||
else: |
|||
if c == 'brack' or tp == 'brack': nxt() |
|||
elif c == 'space': nxt() |
|||
elif c != 'space' and tp == 'space': nxt() |
|||
elif c == 'symb' and tp != 'symb': nxt() |
|||
elif c == 'alphanum' and tp == 'symb': nxt() |
|||
elif c == 'squote' or c == "dquote": nxt() |
|||
cur += ln[i] |
|||
tp = c |
|||
i += 1 |
|||
nxt() |
|||
if o[-1] in [':',':\n','\n']: o.pop() |
|||
if tp in ['squote','dquote']: raise Exception("Unclosed string: "+ln) |
|||
return o |
|||
|
|||
# This is the part where we turn a token list into an abstract syntax tree |
|||
precedence = { |
|||
'^': 1, |
|||
'*': 2, |
|||
'/': 3, |
|||
'%': 4, |
|||
'#/': 2, |
|||
'#%': 2, |
|||
'+': 3, |
|||
'-': 3, |
|||
'<': 4, |
|||
'<=': 4, |
|||
'>': 4, |
|||
'>=': 4, |
|||
'==': 5, |
|||
'and': 6, |
|||
'&&': 6, |
|||
'or': 7, |
|||
'||': 7, |
|||
'!': 0 |
|||
} |
|||
|
|||
def toktype(token): |
|||
if token is None: return None |
|||
elif token in ['(','[']: return 'left_paren' |
|||
elif token in [')',']']: return 'right_paren' |
|||
elif token == ',': return 'comma' |
|||
elif token == ':': return 'colon' |
|||
elif token in ['!']: return 'unary_operation' |
|||
elif not isinstance(token,str): return 'compound' |
|||
elif token in precedence: return 'binary_operation' |
|||
elif re.match('^[0-9a-zA-Z\-\.]*$',token): return 'alphanum' |
|||
elif token[0] in ['"',"'"] and token[0] == token[-1]: return 'alphanum' |
|||
else: raise Exception("Invalid token: "+token) |
|||
|
|||
# https://en.wikipedia.org/wiki/Shunting-yard_algorithm |
|||
# |
|||
# The algorithm works by maintaining three stacks: iq, stack, oq. Initially, |
|||
# the tokens are placed in order on the iq. Then, one by one, the tokens are |
|||
# processed. Values are moved immediately to the output queue. Operators are |
|||
# pushed onto the stack, but if an operator comes along with lower precendence |
|||
# then all operators on the stack with higher precedence are applied first. |
|||
# For example: |
|||
# iq = 2 + 3 * 5 + 7, stack = \, oq = \ |
|||
# iq = + 3 * 5 + 7, stack = \, oq = 2 |
|||
# iq = 3 * 5 + 7, stack = +, oq = 2 |
|||
# iq = * 5 + 7, stack = +, oq = 2 3 |
|||
# iq = 5 + 7, stack = + *, oq = 2 3 (since * > + in precedence) |
|||
# iq = + 7, stack = + *, oq = 2 3 5 |
|||
# iq = 7, stack = + +, oq = 2 [* 3 5] (since + > * in precedence) |
|||
# iq = \, stack = + +, oq = 2 [* 3 5] 7 |
|||
# iq = \, stack = +, oq = 2 [+ [* 3 5] 7] |
|||
# iq = \, stack = \, oq = [+ 2 [+ [* 3 5] 7] ] |
|||
# |
|||
# Functions, where function arguments begin with a left bracket preceded by |
|||
# the function name, are separated by commas, and end with a right bracket, |
|||
# are also included in this algorithm, though in a different way |
|||
def shunting_yard(tokens): |
|||
iq = [x for x in tokens] |
|||
oq = [] |
|||
stack = [] |
|||
prev,tok = None,None |
|||
# The normal Shunting-Yard algorithm simply converts expressions into |
|||
# reverse polish notation. Here, we try to be slightly more ambitious |
|||
# and build up the AST directly on the output queue |
|||
# eg. say oq = [ 2, 5, 3 ] and we add "+" then "*" |
|||
# we get first [ 2, [ +, 5, 3 ] ] then [ [ *, 2, [ +, 5, 3 ] ] ] |
|||
def popstack(stack,oq): |
|||
tok = stack.pop() |
|||
typ = toktype(tok) |
|||
if typ == 'binary_operation': |
|||
a,b = oq.pop(), oq.pop() |
|||
oq.append([ tok, b, a]) |
|||
elif typ == 'unary_operation': |
|||
a = oq.pop() |
|||
oq.append([ tok, a ]) |
|||
elif typ == 'right_paren': |
|||
args = [] |
|||
while toktype(oq[-1]) != 'left_paren': |
|||
args.insert(0,oq.pop()) |
|||
oq.pop() |
|||
if tok == ']' and args[0] != 'id': |
|||
oq.append(['access'] + args) |
|||
elif tok == ']': |
|||
oq.append(['array_lit'] + args[1:]) |
|||
elif tok == ')' and len(args) and args[0] != 'id': |
|||
oq.append(args) |
|||
else: |
|||
oq.append(args[1]) |
|||
# The main loop |
|||
while len(iq) > 0: |
|||
prev = tok |
|||
tok = iq.pop(0) |
|||
typ = toktype(tok) |
|||
if typ == 'alphanum': |
|||
oq.append(tok) |
|||
elif typ == 'left_paren': |
|||
# Handle cases like 3 * (2 + 5) by using 'id' as a default function |
|||
# name |
|||
if toktype(prev) != 'alphanum' and toktype(prev) != 'right_paren': |
|||
oq.append('id') |
|||
# Say the statement is "... f(45...". At the start, we would have f |
|||
# as the last item on the oq. So we move it onto the stack, put the |
|||
# leftparen on the oq, and move f back to the stack, so we have ( f |
|||
# as the last two items on the oq. We also put the leftparen on the |
|||
# stack so we have a separator on both the stack and the oq |
|||
stack.append(oq.pop()) |
|||
oq.append(tok) |
|||
oq.append(stack.pop()) |
|||
stack.append(tok) |
|||
elif typ == 'right_paren': |
|||
# eg. f(27, 3 * 5 + 4). First, we finish evaluating all the |
|||
# arithmetic inside the last argument. Then, we run popstack |
|||
# to coalesce all of the function arguments sitting on the |
|||
# oq into a single list |
|||
while len(stack) and toktype(stack[-1]) != 'left_paren': |
|||
popstack(stack,oq) |
|||
if len(stack): |
|||
stack.pop() |
|||
stack.append(tok) |
|||
popstack(stack,oq) |
|||
elif typ == 'unary_operation' or typ == 'binary_operation': |
|||
# -5 -> 0 - 5 |
|||
if tok == '-' and toktype(prev) not in ['alphanum', 'right_paren']: |
|||
oq.append('0') |
|||
# Handle BEDMAS operator precedence |
|||
prec = precedence[tok] |
|||
while len(stack) and toktype(stack[-1]) == 'binary_operation' and precedence[stack[-1]] < prec: |
|||
popstack(stack,oq) |
|||
stack.append(tok) |
|||
elif typ == 'comma': |
|||
# Finish evaluating all arithmetic before the comma |
|||
while len(stack) and toktype(stack[-1]) != 'left_paren': |
|||
popstack(stack,oq) |
|||
elif typ == 'colon': |
|||
# Colon is like a comma except it stays in the argument list |
|||
while len(stack) and toktype(stack[-1]) != 'right_paren': |
|||
popstack(stack,oq) |
|||
oq.append(tok) |
|||
while len(stack): |
|||
popstack(stack,oq) |
|||
if len(oq) == 1: |
|||
return oq[0] |
|||
else: |
|||
raise Exception("Wrong number of items left on stack: "+str(oq)) |
|||
|
|||
def parse_line(ln): |
|||
tokens = tokenize(ln.strip()) |
|||
if tokens[0] == 'if' or tokens[0] == 'while': |
|||
return [ tokens[0], shunting_yard(tokens[1:]) ] |
|||
elif len(tokens) >= 2 and tokens[0] == 'else' and tokens[1] == 'if': |
|||
return [ 'else if', shunting_yard(tokens[2:]) ] |
|||
elif len(tokens) >= 1 and tokens[0] == 'elif': |
|||
return [ 'else if', shunting_yard(tokens[1:]) ] |
|||
elif len(tokens) == 1 and tokens[0] == 'else': |
|||
return [ 'else' ] |
|||
elif '=' in tokens: |
|||
eqplace = tokens.index('=') |
|||
return [ 'set', shunting_yard(tokens[:eqplace]), shunting_yard(tokens[eqplace+1:]) ] |
|||
else: |
|||
return shunting_yard(tokens) |
Loading…
Reference in new issue