Browse Source

Full Trie I/O implementation with tests and cross-checker with

alternative implementation.
cl-refactor
Gav Wood 11 years ago
parent
commit
8c6cadeddf
  1. 4
      Common.h
  2. 90
      PatriciaTree.cpp
  3. 380
      PatriciaTree.h
  4. 3
      RLP.cpp
  5. 43
      RLP.h
  6. 1
      foreign.h
  7. 91
      main.cpp

4
Common.h

@ -23,11 +23,11 @@ using sint = int64_t;
template <class _T> std::string toString(_T const& _t) { std::ostringstream o; o << _t; return o.str(); }
template <class _T> inline std::string asHex(_T const& _data)
template <class _T> inline std::string asHex(_T const& _data, int _w = 2)
{
std::ostringstream ret;
for (auto i: _data)
ret << std::hex << std::setfill('0') << std::setw(2) << (int)i;
ret << std::hex << std::setfill('0') << std::setw(_w) << (int)i;
return ret.str();
}

90
PatriciaTree.cpp

@ -2,6 +2,9 @@
#include "PatriciaTree.h"
using namespace std;
using namespace eth;
bool eth::g_hashDebug = false;
/*
PatriciaTree::PatriciaTree(RLP const& _data)
{
@ -21,3 +24,90 @@ PatriciaTree::PatriciaTree(RLP const& _data)
}
}
*/
TrieNode* TrieNode::newBranch(fConstBytes _k1, std::string const& _v1, fConstBytes _k2, std::string const& _v2)
{
uint prefix = commonPrefix(_k1, _k2);
TrieNode* ret;
if (_k1.size() == prefix)
ret = new TrieBranchNode(_k2[prefix], new TrieLeafNode(_k2.cropped(prefix + 1), _v2), _v1);
else if (_k2.size() == prefix)
ret = new TrieBranchNode(_k1[prefix], new TrieLeafNode(_k1.cropped(prefix + 1), _v1), _v2);
else // both continue after split
ret = new TrieBranchNode(_k1[prefix], new TrieLeafNode(_k1.cropped(prefix + 1), _v1), _k2[prefix], new TrieLeafNode(_k2.cropped(prefix + 1), _v2));
if (prefix)
// have shared prefix - split.
ret = new TrieInfixNode(_k1.cropped(0, prefix), ret);
return ret;
}
TrieNode* TrieBranchNode::insert(fConstBytes _key, std::string const& _value)
{
assert(_value.size());
mark();
if (_key.empty())
m_value = _value;
else
if (!m_nodes[_key[0]])
m_nodes[_key[0]] = new TrieLeafNode(_key.cropped(1), _value);
else
m_nodes[_key[0]] = m_nodes[_key[0]]->insert(_key.cropped(1), _value);
return this;
}
TrieNode* TrieBranchNode::remove(fConstBytes _key)
{
if (_key.empty())
if (m_value.size())
{
m_value.clear();
return rejig();
}
else {}
else if (m_nodes[_key[0]] != nullptr)
{
m_nodes[_key[0]] = m_nodes[_key[0]]->remove(_key.cropped(1));
return rejig();
}
return this;
}
TrieNode* TrieBranchNode::rejig()
{
mark();
byte n = activeBranch();
if (n == (byte)-1 && m_value.size())
{
// switch to leaf
auto r = new TrieLeafNode(fConstBytes(), m_value);
delete this;
return r;
}
else if (n < 16 && m_value.empty())
{
// only branching to n...
if (auto b = dynamic_cast<TrieBranchNode*>(m_nodes[n]))
{
// switch to infix
m_nodes[n] = nullptr;
delete this;
return new TrieInfixNode(fConstBytes(&n, 1), b);
}
else
{
auto x = dynamic_cast<TrieExtNode*>(m_nodes[n]);
assert(x);
// include in child
pushFront(x->m_ext, n);
m_nodes[n] = nullptr;
delete this;
return x;
}
}
return this;
}

380
PatriciaTree.h

@ -4,12 +4,16 @@
#include "RLP.h"
#include "sha256.h"
#define ENABLE_DEBUG_PRINT 1
namespace eth
{
using StringMap = std::map<std::string, std::string>;
using HexMap = std::map<bytes, std::string>;
extern bool g_hashDebug;
/*
* Hex-prefix Notation. First nibble has flags: oddness = 2^0 & termination = 2^1
* [0,0,1,2,3,4,5] 0x10012345
@ -66,6 +70,10 @@ inline std::string toBigEndianString(u256 _val)
inline u256 hash256aux(HexMap const& _s, HexMap::const_iterator _begin, HexMap::const_iterator _end, unsigned _preLen)
{
static std::string s_indent;
if (_preLen)
s_indent += " ";
RLPStream rlp;
if (_begin == _end)
{
@ -75,6 +83,8 @@ inline u256 hash256aux(HexMap const& _s, HexMap::const_iterator _begin, HexMap::
{
// only one left - terminate with the pair.
rlp << RLPList(2) << hexPrefixEncode(_begin->first, true, _preLen) << _begin->second;
if (g_hashDebug)
std::cerr << s_indent << asHex(fConstBytes(_begin->first.data() + _preLen, _begin->first.size() - _preLen), 1) << ": " << _begin->second << " = " << sha256(rlp.out()) << std::endl;
}
else
{
@ -92,7 +102,11 @@ inline u256 hash256aux(HexMap const& _s, HexMap::const_iterator _begin, HexMap::
if (sharedPre > _preLen)
{
// if they all have the same next nibble, we also want a pair.
if (g_hashDebug)
std::cerr << s_indent << asHex(fConstBytes(_begin->first.data() + _preLen, sharedPre), 1) << ": " << std::endl;
rlp << RLPList(2) << hexPrefixEncode(_begin->first, false, _preLen, sharedPre) << toBigEndianString(hash256aux(_s, _begin, _end, sharedPre));
if (g_hashDebug)
std::cerr << s_indent << "= " << sha256(rlp.out()) << std::endl;
}
else
{
@ -100,7 +114,11 @@ inline u256 hash256aux(HexMap const& _s, HexMap::const_iterator _begin, HexMap::
rlp << RLPList(17);
auto b = _begin;
if (_preLen == b->first.size())
{
if (g_hashDebug)
std::cerr << s_indent << "@: " << b->second << std::endl;
++b;
}
for (auto i = 0; i < 16; ++i)
{
auto n = b;
@ -108,16 +126,26 @@ inline u256 hash256aux(HexMap const& _s, HexMap::const_iterator _begin, HexMap::
if (b == n)
rlp << "";
else
{
if (g_hashDebug)
std::cerr << s_indent << std::hex << i << ": " << std::endl;
rlp << toBigEndianString(hash256aux(_s, b, n, _preLen + 1));
}
b = n;
}
if (_preLen == _begin->first.size())
rlp << _begin->second;
else
rlp << "";
if (g_hashDebug)
std::cerr << s_indent << "= " << sha256(rlp.out()) << std::endl;
}
}
// std::cout << std::hex << sha256(rlp.out()) << ": " << asHex(rlp.out()) << ": " << RLP(rlp.out()) << std::endl;
// if (g_hashDebug)
// std::cerr << std::hex << sha256(rlp.out()) << ": " << asHex(rlp.out()) << ": " << RLP(rlp.out()) << std::endl;
if (_preLen)
s_indent.resize(s_indent.size() - 2);
return sha256(rlp.out());
}
@ -125,7 +153,7 @@ inline u256 hash256(StringMap const& _s)
{
// build patricia tree.
if (_s.empty())
return 0;
return sha256(RLPNull);
HexMap hexMap;
for (auto i = _s.rbegin(); i != _s.rend(); ++i)
hexMap[toHex(i->first)] = i->second;
@ -134,20 +162,358 @@ inline u256 hash256(StringMap const& _s)
return hash256aux(hexMap, hexMap.cbegin(), hexMap.cend(), 0);
}
template <class _T, class _U> uint commonPrefix(_T const& _t, _U const& _u)
{
uint s = std::min<uint>(_t.size(), _u.size());
for (uint i = 0;; ++i)
if (i == s || _t[i] != _u[i])
return i;
return s;
}
/**
* @brief Merkle Patricia Tree: a modifed base-16 Radix tree.
*/
class PatriciaTree
class TrieNode
{
public:
TrieNode() {}
virtual ~TrieNode() {}
virtual std::string const& at(fConstBytes _key) const = 0;
virtual TrieNode* insert(fConstBytes _key, std::string const& _value) = 0;
virtual TrieNode* remove(fConstBytes _key) = 0;
virtual bytes rlp() const = 0;
#if ENABLE_DEBUG_PRINT
void debugPrint(std::string const& _indent = "") const { std::cerr << std::hex << sha256() << ":" << std::endl; debugPrintBody(_indent); }
#endif
u256 sha256() const { /*if (!m_sha256)*/ m_sha256 = eth::sha256(rlp()); return m_sha256; }
void mark() { m_sha256 = 0; }
protected:
#if ENABLE_DEBUG_PRINT
virtual void debugPrintBody(std::string const& _indent = "") const = 0;
#endif
static TrieNode* newBranch(fConstBytes _k1, std::string const& _v1, fConstBytes _k2, std::string const& _v2);
private:
mutable u256 m_sha256 = 0;
};
static const std::string c_nullString;
class TrieExtNode: public TrieNode
{
public:
TrieExtNode(fConstBytes _bytes): m_ext(_bytes.begin(), _bytes.end()) {}
bytes m_ext;
};
class TrieBranchNode: public TrieNode
{
public:
TrieBranchNode(std::string const& _value): m_value(_value)
{
memset(m_nodes.data(), 0, sizeof(TrieNode*) * 16);
}
TrieBranchNode(byte _i1, TrieNode* _n1, std::string const& _value = std::string()): m_value(_value)
{
memset(m_nodes.data(), 0, sizeof(TrieNode*) * 16);
m_nodes[_i1] = _n1;
}
TrieBranchNode(byte _i1, TrieNode* _n1, byte _i2, TrieNode* _n2)
{
memset(m_nodes.data(), 0, sizeof(TrieNode*) * 16);
m_nodes[_i1] = _n1;
m_nodes[_i2] = _n2;
}
virtual ~TrieBranchNode()
{
for (auto i: m_nodes)
delete i;
}
#if ENABLE_DEBUG_PRINT
virtual void debugPrintBody(std::string const& _indent) const
{
if (m_value.size())
std::cerr << _indent << "@: " << m_value << std::endl;
for (auto i = 0; i < 16; ++i)
if (m_nodes[i])
{
std::cerr << _indent << std::hex << i << ": ";
m_nodes[i]->debugPrint(_indent + " ");
}
}
#endif
virtual std::string const& at(fConstBytes _key) const override
{
if (_key.empty())
return m_value;
else if (m_nodes[_key[0]] != nullptr)
return m_nodes[_key[0]]->at(_key.cropped(1));
return c_nullString;
}
virtual TrieNode* insert(fConstBytes _key, std::string const& _value) override;
virtual TrieNode* remove(fConstBytes _key) override;
virtual bytes rlp() const override
{
RLPStream s;
s << RLPList(17);
for (auto i: m_nodes)
s << (i ? toBigEndianString(i->sha256()) : "");
s << m_value;
return s.out();
}
private:
/// @returns (byte)-1 when no active branches, 16 when multiple active and the index of the active branch otherwise.
byte activeBranch() const
{
byte n = (byte)-1;
for (int i = 0; i < 16; ++i)
if (m_nodes[i] != nullptr)
{
if (n == (byte)-1)
n = i;
else
return 16;
}
return n;
}
TrieNode* rejig();
std::array<TrieNode*, 16> m_nodes;
std::string m_value;
};
class TrieLeafNode: public TrieExtNode
{
public:
TrieLeafNode(fConstBytes _key, std::string const& _value): TrieExtNode(_key), m_value(_value) {}
#if ENABLE_DEBUG_PRINT
virtual void debugPrintBody(std::string const& _indent) const
{
assert(m_value.size());
std::cerr << _indent;
if (m_ext.size())
std::cerr << asHex(m_ext, 1) << ": ";
else
std::cerr << "@: ";
std::cerr << m_value << std::endl;
}
#endif
virtual std::string const& at(fConstBytes _key) const override
{
return contains(_key) ? m_value : c_nullString;
}
virtual TrieNode* insert(fConstBytes _key, std::string const& _value) override
{
assert(_value.size());
mark();
if (contains(_key))
{
m_value = _value;
return this;
}
else
{
// create new trie.
auto n = TrieNode::newBranch(_key, _value, fConstBytes(&m_ext), m_value);
delete this;
return n;
}
}
virtual TrieNode* remove(fConstBytes _key) override
{
if (contains(_key))
{
delete this;
return nullptr;
}
return this;
}
virtual bytes rlp() const override
{
RLPStream s;
s << RLPList(2) << hexPrefixEncode(m_ext, true) << m_value;
return s.out();
}
private:
bool contains(fConstBytes _key) const { return _key.size() == m_ext.size() && !memcmp(_key.data(), m_ext.data(), _key.size()); }
std::string m_value;
};
template <class _T> void trimFront(_T& _t, uint _elements)
{
memmove(_t.data(), _t.data() + _elements, (_t.size() - _elements) * sizeof(_t[0]));
_t.resize(_t.size() - _elements);
}
template <class _T, class _U> void pushFront(_T& _t, _U _e)
{
_t.push_back(_e);
memmove(_t.data() + 1, _t.data(), (_t.size() - 1) * sizeof(_e));
_t[0] = _e;
}
class TrieInfixNode: public TrieExtNode
{
public:
PatriciaTree() {}
~PatriciaTree() {}
TrieInfixNode(fConstBytes _key, TrieNode* _next): TrieExtNode(_key), m_next(_next) {}
virtual ~TrieInfixNode() { delete m_next; }
void fromRLP(RLP const& _data);
std::string toRLP();
#if ENABLE_DEBUG_PRINT
virtual void debugPrintBody(std::string const& _indent) const
{
std::cerr << _indent << asHex(m_ext, 1) << ": ";
m_next->debugPrint(_indent + " ");
}
#endif
virtual std::string const& at(fConstBytes _key) const override
{
assert(m_next);
return contains(_key) ? m_next->at(_key.cropped(m_ext.size())) : c_nullString;
}
virtual TrieNode* insert(fConstBytes _key, std::string const& _value) override
{
assert(_value.size());
mark();
if (contains(_key))
{
m_next = m_next->insert(_key.cropped(m_ext.size()), _value);
return this;
}
else
{
int prefix = commonPrefix(_key, m_ext);
if (prefix)
{
// one infix becomes two infixes, then insert into the second
// instead of pop_front()...
trimFront(m_ext, prefix);
return new TrieInfixNode(_key.cropped(0, prefix), insert(_key.cropped(prefix), _value));
}
else
{
// split here.
auto f = m_ext[0];
trimFront(m_ext, 1);
TrieNode* n = m_ext.empty() ? m_next : this;
if (n != this)
{
m_next = nullptr;
delete this;
}
TrieBranchNode* ret = new TrieBranchNode(f, n);
ret->insert(_key, _value);
return ret;
}
}
}
virtual TrieNode* remove(fConstBytes _key) override
{
if (contains(_key))
{
mark();
m_next = m_next->remove(_key.cropped(m_ext.size()));
if (auto p = dynamic_cast<TrieExtNode*>(m_next))
{
// merge with child...
m_ext.reserve(m_ext.size() + p->m_ext.size());
for (auto i: p->m_ext)
m_ext.push_back(i);
p->m_ext = m_ext;
p->mark();
m_next = nullptr;
delete this;
return p;
}
if (!m_next)
{
delete this;
return nullptr;
}
}
return this;
}
virtual bytes rlp() const override
{
assert(m_next);
RLPStream s;
s << RLPList(2) << hexPrefixEncode(m_ext, false) << toBigEndianString(m_next->sha256());
return s.out();
}
private:
bool contains(fConstBytes _key) const { return _key.size() >= m_ext.size() && !memcmp(_key.data(), m_ext.data(), m_ext.size()); }
TrieNode* m_next;
};
class Trie
{
public:
Trie(): m_root(nullptr) {}
~Trie() { delete m_root; }
u256 sha256() const { return m_root ? m_root->sha256() : eth::sha256(RLPNull); }
bytes rlp() const { return m_root ? m_root->rlp() : RLPNull; }
void debugPrint() { if (m_root) m_root->debugPrint(); }
std::string const& at(std::string const& _key) const
{
if (!m_root)
return c_nullString;
auto h = toHex(_key);
return m_root->at(fConstBytes(&h));
}
void insert(std::string const& _key, std::string const& _value)
{
if (_value.empty())
remove(_key);
auto h = toHex(_key);
m_root = m_root ? m_root->insert(&h, _value) : new TrieLeafNode(fConstBytes(&h), _value);
}
void remove(std::string const& _key)
{
if (m_root)
{
auto h = toHex(_key);
m_root = m_root->remove(&h);
}
}
private:
TrieNode* m_root;
};
}

3
RLP.cpp

@ -1,2 +1,5 @@
#include "RLP.h"
using namespace std;
using namespace eth;
bytes eth::RLPNull = rlpBytes("");

43
RLP.h

@ -293,6 +293,49 @@ private:
bytes m_out;
};
template <class _T> void rlpListAux(RLPStream& _out, _T _t)
{
_out << _t;
}
template <class _T, class ... _Ts> void rlpListAux(RLPStream& _out, _T _t, _Ts ... _ts)
{
_out << _t;
rlpListAux(_out, _ts...);
}
template <class _T> std::string rlp(_T _t)
{
RLPStream out;
out << _t;
return out.str();
}
template <class _T> bytes rlpBytes(_T _t)
{
RLPStream out;
out << _t;
return out.out();
}
template <class ... _Ts> std::string rlpList(_Ts ... _ts)
{
RLPStream out;
out << RLPList(sizeof ...(_Ts));
rlpListAux(out, _ts...);
return out.str();
}
template <class ... _Ts> bytes rlpListBytes(_Ts ... _ts)
{
RLPStream out;
out << RLPList(sizeof ...(_Ts));
rlpListAux(out, _ts...);
return out.out();
}
extern bytes RLPNull;
}
inline std::string escaped(std::string const& _s, bool _all = true)

1
foreign.h

@ -27,6 +27,7 @@ public:
_T* data() const { return m_data; }
unsigned count() const { return m_count; }
unsigned size() const { return m_count; }
unsigned empty() const { return !m_count; }
foreign<_T> next() const { return foreign<_T>(m_data + m_count, m_count); }
foreign<_T> cropped(unsigned _begin, int _count = -1) const { if (m_data && _begin + std::max(0, _count) <= m_count) return foreign<_T>(m_data + _begin, _count < 0 ? m_count - _begin : _count); else return foreign<_T>(); }
void retarget(_T const* _d, size_t _s) { m_data = _d; m_count = _s; }

91
main.cpp

@ -1,3 +1,4 @@
#include <random>
#include "Common.h"
#include "RLP.h"
#include "PatriciaTree.h"
@ -5,38 +6,74 @@
using namespace std;
using namespace eth;
template <class _T> void rlpListAux(RLPStream& _out, _T _t)
std::string randomWord()
{
_out << _t;
}
template <class _T, class ... _Ts> void rlpListAux(RLPStream& _out, _T _t, _Ts ... _ts)
{
_out << _t;
rlpListAux(_out, _ts...);
}
template <class _T> std::string rlp(_T _t)
{
RLPStream out;
out << _t;
return out.str();
}
template <class ... _Ts> std::string rlpList(_Ts ... _ts)
{
RLPStream out;
out << RLPList(sizeof ...(_Ts));
rlpListAux(out, _ts...);
return out.str();
static std::mt19937_64 s_eng(0);
std::string ret(uniform_int_distribution<int>(4, 10)(s_eng), ' ');
char const n[] = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890";
uniform_int_distribution<int> d(0, sizeof(n) - 2);
for (char& c: ret)
c = n[d(s_eng)];
return ret;
}
int main()
{
cout << hex << hash256({{"dog", "puppy"}}) << endl;
cout << hex << hash256({{"dog", "puppy"}, {"doe", "reindeer"}}) << endl;
cout << hex << hash256({{"doe", "reindeer"}, {"dog", "puppy"}, {"dogglesworth", "cat"}}) << endl;
cout << hex << hash256({{"dog", "puppy"}, {"horse", "stallion"}, {"do", "verb"}, {"doge", "coin"}}) << endl;
{
Trie t;
t.insert("dog", "puppy");
assert(t.sha256() == hash256({{"dog", "puppy"}}));
assert(t.at("dog") == "puppy");
t.insert("doe", "reindeer");
assert(t.sha256() == hash256({{"dog", "puppy"}, {"doe", "reindeer"}}));
assert(t.at("doe") == "reindeer");
assert(t.at("dog") == "puppy");
t.insert("dogglesworth", "cat");
assert(t.sha256() == hash256({{"doe", "reindeer"}, {"dog", "puppy"}, {"dogglesworth", "cat"}}));
assert(t.at("doe") == "reindeer");
assert(t.at("dog") == "puppy");
assert(t.at("dogglesworth") == "cat");
t.remove("dogglesworth");
t.remove("doe");
assert(t.at("doe").empty());
assert(t.at("dogglesworth").empty());
assert(t.at("dog") == "puppy");
assert(t.sha256() == hash256({{"dog", "puppy"}}));
t.insert("horse", "stallion");
t.insert("do", "verb");
t.insert("doge", "coin");
assert(t.sha256() == hash256({{"dog", "puppy"}, {"horse", "stallion"}, {"do", "verb"}, {"doge", "coin"}}));
assert(t.at("doge") == "coin");
assert(t.at("do") == "verb");
assert(t.at("horse") == "stallion");
assert(t.at("dog") == "puppy");
t.remove("horse");
t.remove("do");
t.remove("doge");
assert(t.sha256() == hash256({{"dog", "puppy"}}));
assert(t.at("dog") == "puppy");
t.remove("dog");
for (int a = 0; a < 20; ++a)
{
StringMap m;
for (int i = 0; i < 20; ++i)
{
auto k = randomWord();
auto v = toString(i);
m.insert(make_pair(k, v));
t.insert(k, v);
assert(hash256(m) == t.sha256());
}
while (!m.empty())
{
auto k = m.begin()->first;
t.remove(k);
m.erase(k);
assert(hash256(m) == t.sha256());
}
}
}
// int of value 15
assert(RLP("\x0f") == 15);

Loading…
Cancel
Save