# Copyright (c) 2016, Neil Booth # # All rights reserved. # # See the file "LICENCE" for information about the copyright # and warranty status of this software. '''UTXO and file cache. During initial sync these cache data and only flush occasionally. Once synced flushes are performed after processing each block. ''' import struct from lib.util import LoggedClass from lib.hash import hash_to_str # History can hold approx. 65536 * HIST_ENTRIES_PER_KEY entries HIST_ENTRIES_PER_KEY = 1024 HIST_VALUE_BYTES = HIST_ENTRIES_PER_KEY * 4 ADDR_TX_HASH_LEN = 4 UTXO_TX_HASH_LEN = 4 NO_HASH_168 = bytes([255]) * 21 NO_CACHE_ENTRY = NO_HASH_168 + bytes(12) class UTXOCache(LoggedClass): '''An in-memory UTXO cache, representing all changes to UTXO state since the last DB flush. We want to store millions, perhaps 10s of millions of these in memory for optimal performance during initial sync, because then it is possible to spend UTXOs without ever going to the database (other than as an entry in the address history, and there is only one such entry per TX not per UTXO). So store them in a Python dictionary with binary keys and values. Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes) Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes) That's 67 bytes of raw data. Python dictionary overhead means each entry actually uses about 187 bytes of memory. So almost 11.5 million UTXOs can fit in 2GB of RAM. There are approximately 42 million UTXOs on bitcoin mainnet at height 433,000. Semantics: add: Add it to the cache dictionary. spend: Remove it if in the cache dictionary. Otherwise it's been flushed to the DB. Each UTXO is responsible for two entries in the DB stored using compressed keys. Mark both for deletion in the next flush of the in-memory UTXO cache. A UTXO is stored in the DB in 2 "tables": 1. The output value and tx number. Must be keyed with a hash168 prefix so the unspent outputs and balance of an arbitrary address can be looked up with a simple key traversal. Key: b'u' + hash168 + compressed_tx_hash + tx_idx Value: a (tx_num, value) pair 2. Given a prevout, we need to be able to look up the UTXO key to remove it. As is keyed by hash168 and that is not part of the prevout, we need a hash168 lookup. Key: b'h' + compressed tx_hash + tx_idx Value: (hash168, tx_num) pair The compressed TX hash is just the first few bytes of the hash of the TX the UTXO is in (and needn't be the same number of bytes in each table). As this is not unique there will be collisions; tx_num is stored to resolve them. The collision rate is around 0.02% for the hash168 table, and almost zero for the UTXO table (there are around 100 collisions in the whole bitcoin blockchain). ''' def __init__(self, get_tx_hash, db, coin): super().__init__() self.get_tx_hash = get_tx_hash self.coin = coin self.cache = {} self.put = self.cache.__setitem__ self.db = db self.db_cache = {} # Statistics self.cache_spends = 0 self.db_deletes = 0 def lookup(self, prev_hash, prev_idx): '''Given a prevout, return a pair (hash168, value). If the UTXO is not found, returns (None, None).''' # Fast track is it being in the cache idx_packed = struct.pack('