/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see .
*/
/** @file Instruction.cpp
* @author Gav Wood
* @date 2014
*/
#include "Instruction.h"
#include
#include "CommonEth.h"
#include "Log.h"
using namespace std;
using namespace eth;
const std::map eth::c_instructions =
{
{ "STOP", Instruction::STOP },
{ "ADD", Instruction::ADD },
{ "SUB", Instruction::SUB },
{ "MUL", Instruction::MUL },
{ "DIV", Instruction::DIV },
{ "SDIV", Instruction::SDIV },
{ "MOD", Instruction::MOD },
{ "SMOD", Instruction::SMOD },
{ "EXP", Instruction::EXP },
{ "NEG", Instruction::NEG },
{ "LT", Instruction::LT },
{ "LE", Instruction::LE },
{ "GT", Instruction::GT },
{ "GE", Instruction::GE },
{ "EQ", Instruction::EQ },
{ "NOT", Instruction::NOT },
{ "MYADDRESS", Instruction::MYADDRESS },
{ "TXSENDER", Instruction::TXSENDER },
{ "TXVALUE", Instruction::TXVALUE },
{ "TXDATAN", Instruction::TXDATAN },
{ "TXDATA", Instruction::TXDATA },
{ "BLK_PREVHASH", Instruction::BLK_PREVHASH },
{ "BLK_COINBASE", Instruction::BLK_COINBASE },
{ "BLK_TIMESTAMP", Instruction::BLK_TIMESTAMP },
{ "BLK_NUMBER", Instruction::BLK_NUMBER },
{ "BLK_DIFFICULTY", Instruction::BLK_DIFFICULTY },
{ "BLK_NONCE", Instruction::BLK_NONCE },
{ "BASEFEE", Instruction::BASEFEE },
{ "SHA256", Instruction::SHA256 },
{ "RIPEMD160", Instruction::RIPEMD160 },
{ "ECMUL", Instruction::ECMUL },
{ "ECADD", Instruction::ECADD },
{ "ECSIGN", Instruction::ECSIGN },
{ "ECRECOVER", Instruction::ECRECOVER },
{ "ECVALID", Instruction::ECVALID },
{ "SHA3", Instruction::SHA3 },
{ "PUSH", Instruction::PUSH },
{ "POP", Instruction::POP },
{ "DUP", Instruction::DUP },
{ "SWAP", Instruction::SWAP },
{ "MLOAD", Instruction::MLOAD },
{ "MSTORE", Instruction::MSTORE },
{ "SLOAD", Instruction::SLOAD },
{ "SSTORE", Instruction::SSTORE },
{ "JMP", Instruction::JMP },
{ "JMPI", Instruction::JMPI },
{ "IND", Instruction::IND },
{ "EXTRO", Instruction::EXTRO },
{ "BALANCE", Instruction::BALANCE },
{ "MKTX", Instruction::MKTX },
{ "SUICIDE", Instruction::SUICIDE }
};
const std::map eth::c_instructionInfo =
{
{ Instruction::STOP, { "STOP", 0, 0, 0 } },
{ Instruction::ADD, { "ADD", 0, 2, 1 } },
{ Instruction::SUB, { "SUB", 0, 2, 1 } },
{ Instruction::MUL, { "MUL", 0, 2, 1 } },
{ Instruction::DIV, { "DIV", 0, 2, 1 } },
{ Instruction::SDIV, { "SDIV", 0, 2, 1 } },
{ Instruction::MOD, { "MOD", 0, 2, 1 } },
{ Instruction::SMOD, { "SMOD", 0, 2, 1 } },
{ Instruction::EXP, { "EXP", 0, 2, 1 } },
{ Instruction::NEG, { "NEG", 0, 1, 1 } },
{ Instruction::LT, { "LT", 0, 2, 1 } },
{ Instruction::LE, { "LE", 0, 2, 1 } },
{ Instruction::GT, { "GT", 0, 2, 1 } },
{ Instruction::GE, { "GE", 0, 2, 1 } },
{ Instruction::EQ, { "EQ", 0, 2, 1 } },
{ Instruction::NOT, { "NOT", 0, 1, 1 } },
{ Instruction::MYADDRESS, { "MYADDRESS", 0, 0, 1 } },
{ Instruction::TXSENDER, { "TXSENDER", 0, 0, 1 } },
{ Instruction::TXVALUE, { "TXVALUE", 0, 0, 1 } },
{ Instruction::TXDATAN, { "TXDATAN", 0, 0, 1 } },
{ Instruction::TXDATA, { "TXDATA", 0, 1, 1 } },
{ Instruction::BLK_PREVHASH, { "BLK_PREVHASH", 0, 0, 1 } },
{ Instruction::BLK_COINBASE, { "BLK_COINBASE", 0, 0, 1 } },
{ Instruction::BLK_TIMESTAMP, { "BLK_TIMESTAMP", 0, 0, 1 } },
{ Instruction::BLK_NUMBER, { "BLK_NUMBER", 0, 0, 1 } },
{ Instruction::BLK_DIFFICULTY, { "BLK_DIFFICULTY", 0, 0, 1 } },
{ Instruction::BLK_NONCE, { "BLK_NONCE", 0, 0, 1 } },
{ Instruction::BASEFEE, { "BASEFEE", 0, 0, 1 } },
{ Instruction::SHA256, { "SHA256", 0, -1, 1 } },
{ Instruction::RIPEMD160, { "RIPEMD160", 0, -1, 1 } },
{ Instruction::ECMUL, { "ECMUL", 0, 3, 2 } },
{ Instruction::ECADD, { "ECADD", 0, 4, 2 } },
{ Instruction::ECSIGN, { "ECSIGN", 0, 2, 3 } },
{ Instruction::ECRECOVER, { "ECRECOVER", 0, 4, 2 } },
{ Instruction::ECVALID, { "ECVALID", 0, 2, 1 } },
{ Instruction::SHA3, { "SHA3", 0, -1, 1 } },
{ Instruction::PUSH, { "PUSH", 1, 0, 1 } },
{ Instruction::POP, { "POP", 0, 1, 0 } },
{ Instruction::DUP, { "DUP", 0, 1, 2 } },
{ Instruction::SWAP, { "SWAP", 0, 2, 2 } },
{ Instruction::MLOAD, { "MLOAD", 0, 1, 1 } },
{ Instruction::MSTORE, { "MSTORE", 0, 2, 0 } },
{ Instruction::SLOAD, { "SLOAD", 0, 1, 1 } },
{ Instruction::SSTORE, { "SSTORE", 0, 2, 0 } },
{ Instruction::JMP, { "JMP", 0, 1, 0 } },
{ Instruction::JMPI, { "JMPI", 0, 2, 0 } },
{ Instruction::IND, { "IND", 0, 0, 1 } },
{ Instruction::EXTRO, { "EXTRO", 0, 2, 1 } },
{ Instruction::BALANCE, { "BALANCE", 0, 1, 1 } },
{ Instruction::MKTX, { "MKTX", 0, -3, 0 } },
{ Instruction::SUICIDE, { "SUICIDE", 0, 1, 0} }
};
static string readQuoted(char const*& o_d, char const* _e)
{
string ret;
bool escaped = 0;
for (++o_d; o_d != _e && (escaped || *o_d != '"'); ++o_d)
if (!escaped && *o_d == '\\')
escaped = true;
else
ret.push_back(*o_d);
if (o_d != _e)
++o_d; // skip last "
return ret;
}
static u256 readNumeric(string _v, bool _quiet)
{
u256 x = 1;
for (auto const& i: units())
if (boost::algorithm::ends_with(_v, i.second))
{
_v = _v.substr(0, _v.size() - i.second.size());
x = i.first;
break;
}
try
{
return x * u256(_v);
}
catch (...)
{
if (!_quiet)
cwarn << "Invalid numeric" << _v;
}
return 0;
}
u256s eth::assemble(std::string const& _code, bool _quiet)
{
u256s ret;
map known;
map req;
char const* d = _code.data();
char const* e = _code.data() + _code.size();
while (d != e)
{
// skip to next token
for (; d != e && !isalnum(*d) && *d != '_' && *d != ':' && *d != '"'; ++d) {}
if (d == e)
break;
if (*d == '"')
{
string s = readQuoted(d, e);
if (s.size() > 32)
{
if (!_quiet)
cwarn << "String literal > 32 characters. Cropping.";
s.resize(32);
}
h256 valHash;
memcpy(valHash.data(), s.data(), s.size());
memset(valHash.data() + s.size(), 0, 32 - s.size());
ret.push_back((u256)valHash);
}
else
{
char const* s = d;
for (; d != e && (isalnum(*d) || *d == '_' || *d == ':' || *d == '"'); ++d) {}
string t = string(s, d - s);
if (isdigit(t[0]))
ret.push_back(readNumeric(t, _quiet));
else if (t.back() == ':')
known[t.substr(0, t.size() - 1)] = (unsigned)ret.size();
else
{
auto it = c_instructions.find(boost::algorithm::to_upper_copy(t));
if (it != c_instructions.end())
ret.push_back((u256)it->second);
else
{
req[(unsigned)ret.size()] = t;
ret.push_back(0);
}
}
}
}
for (auto i: req)
if (known.count(i.second))
ret[i.first] = known[i.second];
else
if (!_quiet)
cwarn << "Unknown assembler token" << i.second << "at address" << i.first;
return ret;
}
static void appendCode(u256s& o_code, vector& o_locs, u256s _code, vector& _locs)
{
o_locs.reserve(o_locs.size() + _locs.size());
for (auto i: _locs)
{
_code[i] += (u256)o_code.size();
o_locs.push_back(i + (unsigned)o_code.size());
}
o_code.reserve(o_code.size() + _code.size());
for (auto i: _code)
o_code.push_back(i);
}
static int compileLispFragment(char const*& d, char const* e, bool _quiet, u256s& o_code, vector& o_locs)
{
std::map const c_arith = { { "+", Instruction::ADD }, { "-", Instruction::SUB }, { "*", Instruction::MUL }, { "/", Instruction::DIV }, { "%", Instruction::MOD } };
std::map const c_binary = { { "<", Instruction::LT }, { "<=", Instruction::LE }, { ">", Instruction::GT }, { ">=", Instruction::GE }, { "=", Instruction::EQ }, { "!=", Instruction::NOT } };
std::map const c_unary = { { "!", Instruction::NOT } };
std::set const c_allowed = { '+', '-', '*', '/', '%', '<', '>', '=', '!' };
bool exec = false;
int outs = 1;
while (d != e)
{
// skip to next token
for (; d != e && !isalnum(*d) && *d != '(' && *d != ')' && *d != '_' && *d != '"' && !c_allowed.count(*d) && *d != ';'; ++d) {}
if (d == e)
break;
switch (*d)
{
case ';':
for (; d != e && *d != '\n'; ++d) {}
break;
case '(':
exec = true;
++d;
break;
case ')':
if (exec)
{
++d;
return outs;
}
else
// unexpected - return false as we don't know what to do with it.
return -1;
default:
{
bool haveLiteral = false;
u256 literalValue = 0;
string t;
if (*d == '"')
{
string s = readQuoted(d, e);
if (s.size() > 32)
{
if (!_quiet)
cwarn << "String literal > 32 characters. Cropping.";
s.resize(32);
}
h256 valHash;
memcpy(valHash.data(), s.data(), s.size());
memset(valHash.data() + s.size(), 0, 32 - s.size());
literalValue = (u256)valHash;
haveLiteral = true;
}
else
{
char const* s = d;
for (; d != e && (isalnum(*d) || *d == '_' || c_allowed.count(*d)); ++d) {}
t = string(s, d - s);
if (isdigit(t[0]))
{
literalValue = readNumeric(t, _quiet);
haveLiteral = true;
}
}
if (haveLiteral)
{
bool bareLoad = true;
if (exec)
{
u256s codes;
vector locs;
if (compileLispFragment(d, e, _quiet, codes, locs))
{
appendCode(o_code, o_locs, codes, locs);
while (compileLispFragment(d, e, _quiet, codes, locs) != -1)
if (!_quiet)
cwarn << "Additional items in bare store. Ignoring.";
bareLoad = false;
}
}
o_code.push_back(Instruction::PUSH);
o_code.push_back(literalValue);
if (exec)
o_code.push_back(bareLoad ? Instruction::SLOAD : Instruction::SSTORE);
}
else
{
boost::algorithm::to_upper(t);
if (t == "IF")
{
// Compile all the code...
u256s codes[4];
vector locs[4];
for (int i = 0; i < 3; ++i)
{
int o = compileLispFragment(d, e, _quiet, codes[i], locs[i]);
if (i == 1)
outs = o;
if ((i == 0 && o != 1) || o == -1 || (i == 2 && o != outs))
return -1;
}
if (compileLispFragment(d, e, _quiet, codes[3], locs[3]) != -1)
return false;
// Push the positive location.
o_code.push_back(Instruction::PUSH);
unsigned posLocation = (unsigned)o_code.size();
o_locs.push_back(posLocation);
o_code.push_back(0);
// First fragment - predicate
appendCode(o_code, o_locs, codes[0], locs[0]);
// Jump to positive if true.
o_code.push_back(Instruction::JMPI);
// Second fragment - negative.
appendCode(o_code, o_locs, codes[2], locs[2]);
// Jump to end after negative.
o_code.push_back(Instruction::PUSH);
unsigned endLocation = (unsigned)o_code.size();
o_locs.push_back(endLocation);
o_code.push_back(0);
o_code.push_back(Instruction::JMP);
// Third fragment - positive.
o_code[posLocation] = o_code.size();
appendCode(o_code, o_locs, codes[1], locs[1]);
// At end now.
o_code[endLocation] = o_code.size();
}
else if (t == "WHEN" || t == "UNLESS")
{
outs = 0;
// Compile all the code...
u256s codes[3];
vector locs[3];
for (int i = 0; i < 2; ++i)
{
int o = compileLispFragment(d, e, _quiet, codes[i], locs[i]);
if (o == -1 || (i == 0 && o != 1))
return false;
if (i == 1)
for (int j = 0; j < o; ++j)
codes[i].push_back(Instruction::POP); // pop additional items off stack for the previous item (final item's returns get left on).
}
if (compileLispFragment(d, e, _quiet, codes[2], locs[2]) != -1)
return false;
// Push the positive location.
o_code.push_back(Instruction::PUSH);
unsigned endLocation = (unsigned)o_code.size();
o_locs.push_back(endLocation);
o_code.push_back(0);
// First fragment - predicate
appendCode(o_code, o_locs, codes[0], locs[0]);
// Jump to end...
if (t == "WHEN")
o_code.push_back(Instruction::NOT);
o_code.push_back(Instruction::JMPI);
// Second fragment - negative.
appendCode(o_code, o_locs, codes[1], locs[1]);
// At end now.
o_code[endLocation] = o_code.size();
}
else if (t == "FOR")
{
outs = 0;
// Compile all the code...
u256s codes[3];
vector locs[3];
for (int i = 0; i < 2; ++i)
{
int o = compileLispFragment(d, e, _quiet, codes[i], locs[i]);
if (o == -1 || (i == 0 && o != 1))
return false;
if (i == 1)
for (int j = 0; j < o; ++j)
codes[i].push_back(Instruction::POP); // pop additional items off stack for the previous item (final item's returns get left on).
}
if (compileLispFragment(d, e, _quiet, codes[2], locs[2]) != -1)
return false;
unsigned startLocation = (unsigned)o_code.size();
// Push the positive location.
o_code.push_back(Instruction::PUSH);
unsigned endInsertion = (unsigned)o_code.size();
o_locs.push_back(endInsertion);
o_code.push_back(0);
// First fragment - predicate
appendCode(o_code, o_locs, codes[0], locs[0]);
// Jump to positive if true.
o_code.push_back(Instruction::NOT);
o_code.push_back(Instruction::JMPI);
// Second fragment - negative.
appendCode(o_code, o_locs, codes[1], locs[1]);
// Jump to end after negative.
o_code.push_back(Instruction::PUSH);
o_locs.push_back((unsigned)o_code.size());
o_code.push_back(startLocation);
o_code.push_back(Instruction::JMP);
// At end now.
o_code[endInsertion] = o_code.size();
}
else if (t == "SEQ")
{
while (d != e)
{
u256s codes;
vector locs;
outs = 0;
int o;
if ((o = compileLispFragment(d, e, _quiet, codes, locs)) > -1)
{
for (int i = 0; i < outs; ++i)
o_code.push_back(Instruction::POP); // pop additional items off stack for the previous item (final item's returns get left on).
outs = o;
appendCode(o_code, o_locs, codes, locs);
}
else
break;
}
}
else if (t == "CALL")
{
if (exec)
{
vector>> codes(1);
int totalArgs = 0;
while (d != e)
{
int o = compileLispFragment(d, e, _quiet, codes.back().first, codes.back().second);
if (o < 1)
break;
codes.push_back(pair>());
totalArgs += o;
}
if (totalArgs < 2)
{
cwarn << "Expected at least 2 arguments to CALL; got" << totalArgs << ".";
break;
}
unsigned datan = codes.size() - 3;
unsigned i = 0;
for (auto it = codes.rbegin(); it != codes.rend(); ++it, ++i)
{
appendCode(o_code, o_locs, it->first, it->second);
if (i == datan)
{
o_code.push_back(Instruction::PUSH);
o_code.push_back(datan);
}
}
o_code.push_back(Instruction::MKTX);
outs = 0;
}
}
else if (t == "MULTI")
{
while (d != e)
{
u256s codes;
vector locs;
outs = 0;
int o;
if ((o = compileLispFragment(d, e, _quiet, codes, locs)) > -1)
{
outs += o;
appendCode(o_code, o_locs, codes, locs);
}
else
break;
}
}
else if (t == "AND")
{
vector codes;
vector> locs;
while (d != e)
{
codes.resize(codes.size() + 1);
locs.resize(locs.size() + 1);
{
int o = compileLispFragment(d, e, _quiet, codes.back(), locs.back());
if (o != 1)
return false;
}
if (compileLispFragment(d, e, _quiet, codes.back(), locs.back()) != -1)
break;
}
// last one is empty.
if (codes.size() < 2)
return false;
codes.pop_back();
locs.pop_back();
vector ends;
if (codes.size() > 1)
{
o_code.push_back(Instruction::PUSH);
o_code.push_back(0);
for (unsigned i = 1; i < codes.size(); ++i)
{
// Push the false location.
o_code.push_back(Instruction::PUSH);
ends.push_back((unsigned)o_code.size());
o_locs.push_back(ends.back());
o_code.push_back(0);
// Check if true - predicate
appendCode(o_code, o_locs, codes[i - 1], locs[i - 1]);
// Jump to end...
o_code.push_back(Instruction::NOT);
o_code.push_back(Instruction::JMPI);
}
o_code.push_back(Instruction::POP);
}
// Check if true - predicate
appendCode(o_code, o_locs, codes.back(), locs.back());
// At end now.
for (auto i: ends)
o_code[i] = o_code.size();
}
else if (t == "OR")
{
vector codes;
vector> locs;
while (d != e)
{
codes.resize(codes.size() + 1);
locs.resize(locs.size() + 1);
{
int o = compileLispFragment(d, e, _quiet, codes.back(), locs.back());
if (o != 1)
return false;
}
}
// last one is empty.
if (codes.size() < 2)
return false;
codes.pop_back();
locs.pop_back();
vector ends;
if (codes.size() > 1)
{
o_code.push_back(Instruction::PUSH);
o_code.push_back(1);
for (unsigned i = 1; i < codes.size(); ++i)
{
// Push the false location.
o_code.push_back(Instruction::PUSH);
ends.push_back((unsigned)o_code.size());
o_locs.push_back(ends.back());
o_code.push_back(0);
// Check if true - predicate
appendCode(o_code, o_locs, codes[i - 1], locs[i - 1]);
// Jump to end...
o_code.push_back(Instruction::JMPI);
}
o_code.push_back(Instruction::POP);
}
// Check if true - predicate
appendCode(o_code, o_locs, codes.back(), locs.back());
// At end now.
for (auto i: ends)
o_code[i] = o_code.size();
}
else
{
auto it = c_instructions.find(t);
if (it != c_instructions.end())
{
if (exec)
{
vector>> codes(1);
int totalArgs = 0;
while (d != e)
{
int o = compileLispFragment(d, e, _quiet, codes.back().first, codes.back().second);
if (o < 1)
break;
codes.push_back(pair>());
totalArgs += o;
}
int ea = c_instructionInfo.at(it->second).args;
if ((ea >= 0 && totalArgs != ea) || (ea < 0 && totalArgs < -ea))
{
cwarn << "Expected " << (ea < 0 ? "at least" : "exactly") << abs(ea) << "arguments to operation" << t << "; got" << totalArgs << ".";
break;
}
for (auto it = codes.rbegin(); it != codes.rend(); ++it)
appendCode(o_code, o_locs, it->first, it->second);
o_code.push_back((u256)it->second);
outs = c_instructionInfo.at(it->second).ret;
}
else
{
o_code.push_back(Instruction::PUSH);
o_code.push_back(it->second);
}
}
else
{
auto it = c_arith.find(t);
if (it != c_arith.end())
{
int i = 0;
while (d != e)
{
u256s codes;
vector locs;
int o = compileLispFragment(d, e, _quiet, codes, locs);
if (o == -1)
{
if (!i)
cwarn << "Expected at least one argument to operation" << t;
break;
}
if (o == 0)
{
cwarn << "null operation given to " << t;
break;
}
appendCode(o_code, o_locs, codes, locs);
i += o;
for (; i > 1; --i)
o_code.push_back((u256)it->second);
}
}
else
{
auto it = c_binary.find(t);
if (it != c_binary.end())
{
vector>> codes(1);
int totalArgs = 0;
while (d != e)
{
int o = compileLispFragment(d, e, _quiet, codes.back().first, codes.back().second);
if (o < 1)
break;
codes.push_back(pair>());
totalArgs += o;
}
codes.pop_back();
// int i = (int)codes.size();
if (totalArgs != 2)
{
cwarn << "Expected two arguments to binary operator" << t << "; got" << totalArgs << ".";
break;
}
for (auto it = codes.rbegin(); it != codes.rend(); ++it)
appendCode(o_code, o_locs, it->first, it->second);
if (it->second == Instruction::NOT)
o_code.push_back(Instruction::EQ);
o_code.push_back((u256)it->second);
}
else
{
auto it = c_unary.find(t);
if (it != c_unary.end())
{
vector>> codes(1);
int totalArgs = 0;
while (d != e)
{
int o = compileLispFragment(d, e, _quiet, codes.back().first, codes.back().second);
if (o == -1)
break;
totalArgs += o;
codes.push_back(pair>());
}
codes.pop_back();
// int i = (int)codes.size();
if (totalArgs != 1)
{
cwarn << "Expected one argument to unary operator" << t << "; got" << totalArgs << ".";
break;
}
for (auto it = codes.rbegin(); it != codes.rend(); ++it)
appendCode(o_code, o_locs, it->first, it->second);
o_code.push_back(it->second);
}
else if (!_quiet)
cwarn << "Unknown assembler token" << t;
}
}
}
}
}
if (!exec)
return outs;
}
}
}
return -1;
}
u256s eth::compileLisp(std::string const& _code, bool _quiet)
{
char const* d = _code.data();
char const* e = _code.data() + _code.size();
u256s ret;
vector locs;
compileLispFragment(d, e, _quiet, ret, locs);
return ret;
}
string eth::disassemble(u256s const& _mem)
{
stringstream ret;
uint numerics = 0;
for (auto it = _mem.begin(); it != _mem.end(); ++it)
{
u256 n = *it;
auto iit = c_instructionInfo.find((Instruction)(uint)n);
if (numerics || iit == c_instructionInfo.end() || (u256)(uint)iit->first != n) // not an instruction or expecting an argument...
{
if (numerics)
numerics--;
ret << "0x" << hex << n << " ";
}
else
{
auto const& ii = iit->second;
ret << ii.name << " ";
numerics = ii.additional;
}
}
return ret.str();
}