Neil Booth
8 years ago
3 changed files with 247 additions and 282 deletions
@ -1,257 +0,0 @@ |
|||||
# Copyright (c) 2016, Neil Booth |
|
||||
# |
|
||||
# All rights reserved. |
|
||||
# |
|
||||
# See the file "LICENCE" for information about the copyright |
|
||||
# and warranty status of this software. |
|
||||
|
|
||||
'''UTXO and file cache. |
|
||||
|
|
||||
During initial sync these cache data and only flush occasionally. |
|
||||
Once synced flushes are performed after processing each block. |
|
||||
''' |
|
||||
|
|
||||
|
|
||||
import struct |
|
||||
|
|
||||
from lib.util import LoggedClass |
|
||||
from lib.hash import hash_to_str |
|
||||
|
|
||||
|
|
||||
# History can hold approx. 65536 * HIST_ENTRIES_PER_KEY entries |
|
||||
HIST_ENTRIES_PER_KEY = 1024 |
|
||||
HIST_VALUE_BYTES = HIST_ENTRIES_PER_KEY * 4 |
|
||||
ADDR_TX_HASH_LEN = 4 |
|
||||
UTXO_TX_HASH_LEN = 4 |
|
||||
NO_HASH_168 = bytes([255]) * 21 |
|
||||
NO_CACHE_ENTRY = NO_HASH_168 + bytes(12) |
|
||||
|
|
||||
|
|
||||
class UTXOCache(LoggedClass): |
|
||||
'''An in-memory UTXO cache, representing all changes to UTXO state |
|
||||
since the last DB flush. |
|
||||
|
|
||||
We want to store millions, perhaps 10s of millions of these in |
|
||||
memory for optimal performance during initial sync, because then |
|
||||
it is possible to spend UTXOs without ever going to the database |
|
||||
(other than as an entry in the address history, and there is only |
|
||||
one such entry per TX not per UTXO). So store them in a Python |
|
||||
dictionary with binary keys and values. |
|
||||
|
|
||||
Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes) |
|
||||
Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes) |
|
||||
|
|
||||
That's 67 bytes of raw data. Python dictionary overhead means |
|
||||
each entry actually uses about 187 bytes of memory. So almost |
|
||||
11.5 million UTXOs can fit in 2GB of RAM. There are approximately |
|
||||
42 million UTXOs on bitcoin mainnet at height 433,000. |
|
||||
|
|
||||
Semantics: |
|
||||
|
|
||||
add: Add it to the cache dictionary. |
|
||||
spend: Remove it if in the cache dictionary. |
|
||||
Otherwise it's been flushed to the DB. Each UTXO |
|
||||
is responsible for two entries in the DB stored using |
|
||||
compressed keys. Mark both for deletion in the next |
|
||||
flush of the in-memory UTXO cache. |
|
||||
|
|
||||
A UTXO is stored in the DB in 2 "tables": |
|
||||
|
|
||||
1. The output value and tx number. Must be keyed with a |
|
||||
hash168 prefix so the unspent outputs and balance of an |
|
||||
arbitrary address can be looked up with a simple key |
|
||||
traversal. |
|
||||
Key: b'u' + hash168 + compressed_tx_hash + tx_idx |
|
||||
Value: a (tx_num, value) pair |
|
||||
|
|
||||
2. Given a prevout, we need to be able to look up the UTXO key |
|
||||
to remove it. As is keyed by hash168 and that is not part |
|
||||
of the prevout, we need a hash168 lookup. |
|
||||
Key: b'h' + compressed tx_hash + tx_idx |
|
||||
Value: (hash168, tx_num) pair |
|
||||
|
|
||||
The compressed TX hash is just the first few bytes of the hash of |
|
||||
the TX the UTXO is in (and needn't be the same number of bytes in |
|
||||
each table). As this is not unique there will be collisions; |
|
||||
tx_num is stored to resolve them. The collision rate is around |
|
||||
0.02% for the hash168 table, and almost zero for the UTXO table |
|
||||
(there are around 100 collisions in the whole bitcoin blockchain). |
|
||||
|
|
||||
''' |
|
||||
|
|
||||
def __init__(self, get_tx_hash, db, coin): |
|
||||
super().__init__() |
|
||||
self.get_tx_hash = get_tx_hash |
|
||||
self.coin = coin |
|
||||
self.cache = {} |
|
||||
self.put = self.cache.__setitem__ |
|
||||
self.db = db |
|
||||
self.db_cache = {} |
|
||||
# Statistics |
|
||||
self.cache_spends = 0 |
|
||||
self.db_deletes = 0 |
|
||||
|
|
||||
def lookup(self, prev_hash, prev_idx): |
|
||||
'''Given a prevout, return a pair (hash168, value). |
|
||||
|
|
||||
If the UTXO is not found, returns (None, None).''' |
|
||||
# Fast track is it being in the cache |
|
||||
idx_packed = struct.pack('<H', prev_idx) |
|
||||
value = self.cache.get(prev_hash + idx_packed, None) |
|
||||
if value: |
|
||||
return value |
|
||||
return self.db_lookup(prev_hash, idx_packed, False) |
|
||||
|
|
||||
def db_lookup(self, tx_hash, idx_packed, delete=True): |
|
||||
'''Return a UTXO from the DB. Remove it if delete is True. |
|
||||
|
|
||||
Return NO_CACHE_ENTRY if it is not in the DB.''' |
|
||||
hash168 = self.hash168(tx_hash, idx_packed, delete) |
|
||||
if not hash168: |
|
||||
return NO_CACHE_ENTRY |
|
||||
|
|
||||
# Read the UTXO through the cache from the disk. We have to |
|
||||
# go through the cache because compressed keys can collide. |
|
||||
key = b'u' + hash168 + tx_hash[:UTXO_TX_HASH_LEN] + idx_packed |
|
||||
data = self.cache_get(key) |
|
||||
if data is None: |
|
||||
# Uh-oh, this should not happen... |
|
||||
self.logger.error('found no UTXO for {} / {:d} key {}' |
|
||||
.format(hash_to_str(tx_hash), |
|
||||
struct.unpack('<H', idx_packed), |
|
||||
bytes(key).hex())) |
|
||||
return NO_CACHE_ENTRY |
|
||||
|
|
||||
if len(data) == 12: |
|
||||
if delete: |
|
||||
self.db_deletes += 1 |
|
||||
self.cache_delete(key) |
|
||||
return hash168 + data |
|
||||
|
|
||||
# Resolve the compressed key collison. These should be |
|
||||
# extremely rare. |
|
||||
assert len(data) % 12 == 0 |
|
||||
for n in range(0, len(data), 12): |
|
||||
(tx_num, ) = struct.unpack('<I', data[n:n+4]) |
|
||||
this_tx_hash, height = self.get_tx_hash(tx_num) |
|
||||
if tx_hash == this_tx_hash: |
|
||||
result = hash168 + data[n:n+12] |
|
||||
if delete: |
|
||||
self.db_deletes += 1 |
|
||||
self.cache_write(key, data[:n] + data[n+12:]) |
|
||||
return result |
|
||||
|
|
||||
raise Exception('could not resolve UTXO key collision') |
|
||||
|
|
||||
def spend(self, prev_hash, prev_idx): |
|
||||
'''Spend a UTXO and return the cache's value. |
|
||||
|
|
||||
If the UTXO is not in the cache it must be on disk. |
|
||||
''' |
|
||||
# Fast track is it being in the cache |
|
||||
idx_packed = struct.pack('<H', prev_idx) |
|
||||
value = self.cache.pop(prev_hash + idx_packed, None) |
|
||||
if value: |
|
||||
self.cache_spends += 1 |
|
||||
return value |
|
||||
|
|
||||
return self.db_lookup(prev_hash, idx_packed) |
|
||||
|
|
||||
def hash168(self, tx_hash, idx_packed, delete=True): |
|
||||
'''Return the hash168 paid to by the given TXO. |
|
||||
|
|
||||
Look it up in the DB and removes it if delete is True. Return |
|
||||
None if not found. |
|
||||
''' |
|
||||
key = b'h' + tx_hash[:ADDR_TX_HASH_LEN] + idx_packed |
|
||||
data = self.cache_get(key) |
|
||||
if data is None: |
|
||||
# Assuming the DB is not corrupt, if delete is True this |
|
||||
# indicates a successful spend of a non-standard script |
|
||||
# as we don't currently record those |
|
||||
return None |
|
||||
|
|
||||
if len(data) == 25: |
|
||||
if delete: |
|
||||
self.cache_delete(key) |
|
||||
return data[:21] |
|
||||
|
|
||||
assert len(data) % 25 == 0 |
|
||||
|
|
||||
# Resolve the compressed key collision using the TX number |
|
||||
for n in range(0, len(data), 25): |
|
||||
(tx_num, ) = struct.unpack('<I', data[n+21:n+25]) |
|
||||
my_hash, height = self.get_tx_hash(tx_num) |
|
||||
if my_hash == tx_hash: |
|
||||
if delete: |
|
||||
self.cache_write(key, data[:n] + data[n+25:]) |
|
||||
return data[n:n+21] |
|
||||
|
|
||||
raise Exception('could not resolve hash168 collision') |
|
||||
|
|
||||
def cache_write(self, key, value): |
|
||||
'''Cache write of a (key, value) pair to the DB.''' |
|
||||
assert(bool(value)) |
|
||||
self.db_cache[key] = value |
|
||||
|
|
||||
def cache_delete(self, key): |
|
||||
'''Cache deletion of a key from the DB.''' |
|
||||
self.db_cache[key] = None |
|
||||
|
|
||||
def cache_get(self, key): |
|
||||
'''Fetch a value from the DB through our write cache.''' |
|
||||
value = self.db_cache.get(key) |
|
||||
if value: |
|
||||
return value |
|
||||
return self.db.get(key) |
|
||||
|
|
||||
def flush(self, batch): |
|
||||
'''Flush the cached DB writes and UTXO set to the batch.''' |
|
||||
# Care is needed because the writes generated by flushing the |
|
||||
# UTXO state may have keys in common with our write cache or |
|
||||
# may be in the DB already. |
|
||||
hcolls = ucolls = 0 |
|
||||
new_utxos = len(self.cache) |
|
||||
|
|
||||
for cache_key, cache_value in self.cache.items(): |
|
||||
# Frist write to the hash168 lookup table |
|
||||
key = b'h' + cache_key[:ADDR_TX_HASH_LEN] + cache_key[-2:] |
|
||||
value = cache_value[:25] |
|
||||
prior_value = self.cache_get(key) |
|
||||
if prior_value: # Should rarely happen |
|
||||
hcolls += 1 |
|
||||
value += prior_value |
|
||||
self.cache_write(key, value) |
|
||||
|
|
||||
# Next write the UTXO table |
|
||||
key = (b'u' + cache_value[:21] + cache_key[:UTXO_TX_HASH_LEN] |
|
||||
+ cache_key[-2:]) |
|
||||
value = cache_value[-12:] |
|
||||
prior_value = self.cache_get(key) |
|
||||
if prior_value: # Should almost never happen |
|
||||
ucolls += 1 |
|
||||
value += prior_value |
|
||||
self.cache_write(key, value) |
|
||||
|
|
||||
# GC-ing this now can only help the levelDB write. |
|
||||
self.cache = {} |
|
||||
self.put = self.cache.__setitem__ |
|
||||
|
|
||||
# Now we can update to the batch. |
|
||||
for key, value in self.db_cache.items(): |
|
||||
if value: |
|
||||
batch.put(key, value) |
|
||||
else: |
|
||||
batch.delete(key) |
|
||||
|
|
||||
self.db_cache = {} |
|
||||
|
|
||||
adds = new_utxos + self.cache_spends |
|
||||
|
|
||||
self.logger.info('UTXO cache adds: {:,d} spends: {:,d} ' |
|
||||
.format(adds, self.cache_spends)) |
|
||||
self.logger.info('UTXO DB adds: {:,d} spends: {:,d}. ' |
|
||||
'Collisions: hash168: {:,d} UTXO: {:,d}' |
|
||||
.format(new_utxos, self.db_deletes, |
|
||||
hcolls, ucolls)) |
|
||||
self.cache_spends = self.db_deletes = 0 |
|
Loading…
Reference in new issue