You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
471 lines
17 KiB
471 lines
17 KiB
8 years ago
|
# See the file "COPYING" for information about the copyright
|
||
|
# and warranty status of this software.
|
||
|
|
||
|
import array
|
||
|
import itertools
|
||
|
import os
|
||
|
import struct
|
||
|
import time
|
||
|
from binascii import hexlify, unhexlify
|
||
|
from bisect import bisect_right
|
||
|
from collections import defaultdict, namedtuple
|
||
|
from functools import partial
|
||
|
import logging
|
||
|
|
||
|
import plyvel
|
||
|
|
||
|
from lib.coins import Bitcoin
|
||
|
from lib.script import ScriptPubKey
|
||
|
|
||
|
ADDR_TX_HASH_LEN=6
|
||
|
UTXO_TX_HASH_LEN=4
|
||
|
HIST_ENTRY_LEN=256*4 # Admits 65536 * HIST_ENTRY_LEN/4 entries
|
||
|
UTXO = namedtuple("UTXO", "tx_num tx_pos tx_hash height value")
|
||
|
|
||
|
|
||
|
def to_4_bytes(value):
|
||
|
return struct.pack('<I', value)
|
||
|
|
||
|
def from_4_bytes(b):
|
||
|
return struct.unpack('<I', b)[0]
|
||
|
|
||
|
|
||
|
class DB(object):
|
||
|
|
||
|
HEIGHT_KEY = b'height'
|
||
|
TIP_KEY = b'tip'
|
||
|
GENESIS_KEY = b'genesis'
|
||
|
TX_COUNT_KEY = b'tx_count'
|
||
|
WALL_TIME_KEY = b'wall_time'
|
||
|
|
||
|
class Error(Exception):
|
||
|
pass
|
||
|
|
||
|
def __init__(self, env):
|
||
|
self.logger = logging.getLogger('DB')
|
||
|
self.logger.setLevel(logging.INFO)
|
||
|
|
||
|
self.coin = env.coin
|
||
|
self.flush_size = env.flush_size
|
||
|
self.logger.info('using flush size of {:,d} entries'
|
||
|
.format(self.flush_size))
|
||
|
|
||
|
self.tx_counts = array.array('I')
|
||
|
self.tx_hash_file_size = 4*1024*1024
|
||
|
# Unflushed items. Headers and tx_hashes have one entry per block
|
||
|
self.headers = []
|
||
|
self.tx_hashes = []
|
||
|
self.history = defaultdict(list)
|
||
|
self.writes_avoided = 0
|
||
|
self.read_cache_hits = 0
|
||
|
self.write_cache_hits = 0
|
||
|
self.last_writes = 0
|
||
|
self.last_time = time.time()
|
||
|
|
||
|
# Things put in a batch are not visible until the batch is written,
|
||
|
# so use a cache.
|
||
|
# Semantics: a key/value pair in this dictionary represents the
|
||
|
# in-memory state of the DB. Anything in this dictionary will be
|
||
|
# written at the next flush.
|
||
|
self.write_cache = {}
|
||
|
# Read cache: a key/value pair in this dictionary represents
|
||
|
# something read from the DB; it is on-disk as of the prior
|
||
|
# flush. If a key is in write_cache that value is more
|
||
|
# recent. Any key in write_cache and not in read_cache has
|
||
|
# never hit the disk.
|
||
|
self.read_cache = {}
|
||
|
|
||
|
db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET)
|
||
|
try:
|
||
|
self.db = self.open_db(db_name, False)
|
||
|
except:
|
||
|
self.db = self.open_db(db_name, True)
|
||
|
self.headers_file = self.open_file('headers', True)
|
||
|
self.txcount_file = self.open_file('txcount', True)
|
||
|
self.init_db()
|
||
|
self.logger.info('created new database {}'.format(db_name))
|
||
|
else:
|
||
|
self.logger.info('successfully opened database {}'.format(db_name))
|
||
|
self.headers_file = self.open_file('headers')
|
||
|
self.txcount_file = self.open_file('txcount')
|
||
|
self.read_db()
|
||
|
|
||
|
# Note that DB_HEIGHT is the height of the next block to be written.
|
||
|
# So an empty DB has a DB_HEIGHT of 0 not -1.
|
||
|
self.tx_count = self.db_tx_count
|
||
|
self.height = self.db_height - 1
|
||
|
self.tx_counts.fromfile(self.txcount_file, self.db_height)
|
||
|
if self.tx_count == 0:
|
||
|
self.flush()
|
||
|
|
||
|
def open_db(self, db_name, create):
|
||
|
return plyvel.DB(db_name, create_if_missing=create,
|
||
|
error_if_exists=create,
|
||
|
compression=None)
|
||
|
# lru_cache_size=256*1024*1024)
|
||
|
|
||
|
def init_db(self):
|
||
|
self.db_height = 0
|
||
|
self.db_tx_count = 0
|
||
|
self.wall_time = 0
|
||
|
self.tip = self.coin.GENESIS_HASH
|
||
|
self.put(self.GENESIS_KEY, unhexlify(self.tip))
|
||
|
|
||
|
def read_db(self):
|
||
|
genesis_hash = hexlify(self.get(self.GENESIS_KEY))
|
||
|
if genesis_hash != self.coin.GENESIS_HASH:
|
||
|
raise self.Error('DB genesis hash {} does not match coin {}'
|
||
|
.format(genesis_hash, self.coin.GENESIS_HASH))
|
||
|
self.db_height = from_4_bytes(self.get(self.HEIGHT_KEY))
|
||
|
self.db_tx_count = from_4_bytes(self.get(self.TX_COUNT_KEY))
|
||
|
self.wall_time = from_4_bytes(self.get(self.WALL_TIME_KEY))
|
||
|
self.tip = hexlify(self.get(self.TIP_KEY))
|
||
|
self.logger.info('{}/{} height: {:,d} tx count: {:,d} sync time: {}'
|
||
|
.format(self.coin.NAME, self.coin.NET,
|
||
|
self.db_height - 1, self.db_tx_count,
|
||
|
self.formatted_wall_time()))
|
||
|
|
||
|
def formatted_wall_time(self):
|
||
|
wall_time = int(self.wall_time)
|
||
|
return '{:d}d {:02d}h {:02d}m {:02d}s'.format(
|
||
|
wall_time // 86400, (wall_time % 86400) // 3600,
|
||
|
(wall_time % 3600) // 60, wall_time % 60)
|
||
|
|
||
|
def get(self, key):
|
||
|
# Get a key from write_cache, then read_cache, then the DB
|
||
|
value = self.write_cache.get(key)
|
||
|
if not value:
|
||
|
value = self.read_cache.get(key)
|
||
|
if not value:
|
||
|
value = self.db.get(key)
|
||
|
self.read_cache[key] = value
|
||
|
else:
|
||
|
self.read_cache_hits += 1
|
||
|
else:
|
||
|
self.write_cache_hits += 1
|
||
|
return value
|
||
|
|
||
|
def put(self, key, value):
|
||
|
assert(bool(value))
|
||
|
self.write_cache[key] = value
|
||
|
|
||
|
def delete(self, key):
|
||
|
# Deleting an on-disk key requires a later physical delete
|
||
|
# If it's not on-disk we can just drop it entirely
|
||
|
if self.read_cache.get(key) is None:
|
||
|
self.writes_avoided += 1
|
||
|
self.write_cache.pop(key, None)
|
||
|
else:
|
||
|
self.write_cache[key] = None
|
||
|
|
||
|
def put_state(self):
|
||
|
now = time.time()
|
||
|
self.wall_time += now - self.last_time
|
||
|
self.last_time = now
|
||
|
self.db_tx_count = self.tx_count
|
||
|
self.db_height = self.height + 1
|
||
|
self.put(self.HEIGHT_KEY, to_4_bytes(self.db_height))
|
||
|
self.put(self.TX_COUNT_KEY, to_4_bytes(self.db_tx_count))
|
||
|
self.put(self.TIP_KEY, unhexlify(self.tip))
|
||
|
self.put(self.WALL_TIME_KEY, to_4_bytes(int(self.wall_time)))
|
||
|
|
||
|
def flush(self):
|
||
|
# Write out the files to the FS before flushing to the DB. If
|
||
|
# the DB transaction fails, the files being too long doesn't
|
||
|
# matter. But if writing the files fails we do not want to
|
||
|
# have updated the DB. This disk flush is fast.
|
||
|
self.write_headers()
|
||
|
self.write_tx_counts()
|
||
|
self.write_tx_hashes()
|
||
|
|
||
|
tx_diff = self.tx_count - self.db_tx_count
|
||
|
height_diff = self.height + 1 - self.db_height
|
||
|
self.logger.info('flushing to levelDB {:,d} txs and {:,d} blocks '
|
||
|
'to height {:,d} tx count: {:,d}'
|
||
|
.format(tx_diff, height_diff, self.height,
|
||
|
self.tx_count))
|
||
|
|
||
|
# This LevelDB flush is slow
|
||
|
deletes = 0
|
||
|
writes = 0
|
||
|
with self.db.write_batch(transaction=True) as batch:
|
||
|
# Flush the state, then the cache, then the history
|
||
|
self.put_state()
|
||
|
for key, value in self.write_cache.items():
|
||
|
if value is None:
|
||
|
batch.delete(key)
|
||
|
deletes += 1
|
||
|
else:
|
||
|
batch.put(key, value)
|
||
|
writes += 1
|
||
|
|
||
|
self.flush_history()
|
||
|
|
||
|
self.logger.info('flushed. Cache hits: {:,d}/{:,d} writes: {:,d} '
|
||
|
'deletes: {:,d} elided: {:,d} sync: {}'
|
||
|
.format(self.write_cache_hits,
|
||
|
self.read_cache_hits, writes, deletes,
|
||
|
self.writes_avoided,
|
||
|
self.formatted_wall_time()))
|
||
|
|
||
|
# Note this preserves semantics and hopefully saves time
|
||
|
self.read_cache = self.write_cache
|
||
|
self.write_cache = {}
|
||
|
self.writes_avoided = 0
|
||
|
self.read_cache_hits = 0
|
||
|
self.write_cache_hits = 0
|
||
|
self.last_writes = writes
|
||
|
|
||
|
def flush_history(self):
|
||
|
# Drop any None entry
|
||
|
self.history.pop(None, None)
|
||
|
|
||
|
for hash160, hist in self.history.items():
|
||
|
prefix = b'H' + hash160
|
||
|
for key, v in self.db.iterator(reverse=True, prefix=prefix,
|
||
|
fill_cache=False):
|
||
|
assert len(key) == 23
|
||
|
v += array.array('I', hist).tobytes()
|
||
|
break
|
||
|
else:
|
||
|
key = prefix + bytes(2)
|
||
|
v = array.array('I', hist).tobytes()
|
||
|
|
||
|
# db.put doesn't accept a memoryview!
|
||
|
self.db.put(key, v[:HIST_ENTRY_LEN])
|
||
|
if len(v) > HIST_ENTRY_LEN:
|
||
|
# must be big-endian
|
||
|
(idx, ) = struct.unpack('>H', key[-2:])
|
||
|
for n in range(HIST_ENTRY_LEN, len(v), HIST_ENTRY_LEN):
|
||
|
idx += 1
|
||
|
key = prefix + struct.pack('>H', idx)
|
||
|
if idx % 500 == 0:
|
||
|
addr = self.coin.P2PKH_address_from_hash160(hash160)
|
||
|
self.logger.info('address {} hist moving to idx {:d}'
|
||
|
.format(addr, idx))
|
||
|
self.db.put(key, v[n:n + HIST_ENTRY_LEN])
|
||
|
|
||
|
self.history = defaultdict(list)
|
||
|
|
||
|
def get_hash160(self, tx_hash, idx, delete=True):
|
||
|
key = b'h' + tx_hash[:ADDR_TX_HASH_LEN] + struct.pack('<H', idx)
|
||
|
data = self.get(key)
|
||
|
if data is None:
|
||
|
return None
|
||
|
|
||
|
if len(data) == 24:
|
||
|
if delete:
|
||
|
self.delete(key)
|
||
|
return data[:20]
|
||
|
|
||
|
# This should almost never happen
|
||
|
assert len(data) % 24 == 0
|
||
|
self.logger.info('hash160 compressed key collision {}'
|
||
|
.format(key.hex()))
|
||
|
for n in range(0, len(data), 24):
|
||
|
(tx_num, ) = struct.unpack('<I', data[n+20:n+24])
|
||
|
my_hash, height = self.get_tx_hash(tx_num)
|
||
|
if my_hash == tx_hash:
|
||
|
if delete:
|
||
|
self.put(key, data[:n] + data[n + 24:])
|
||
|
return data[n:n+20]
|
||
|
else:
|
||
|
raise Exception('could not resolve hash160 collision')
|
||
|
|
||
|
def spend_utxo(self, prevout):
|
||
|
hash160 = self.get_hash160(prevout.hash, prevout.n)
|
||
|
if hash160 is None:
|
||
|
# This indicates a successful spend of a non-standard script
|
||
|
# self.logger.info('ignoring spend of non-standard UTXO {}/{:d} '
|
||
|
# 'at height {:d}'
|
||
|
# .format(bytes(reversed(prevout.hash)).hex(),
|
||
|
# prevout.n, self.height))
|
||
|
return None
|
||
|
|
||
|
key = (b'u' + hash160 + prevout.hash[:UTXO_TX_HASH_LEN]
|
||
|
+ struct.pack('<H', prevout.n))
|
||
|
data = self.get(key)
|
||
|
if len(data) == 12:
|
||
|
(tx_num, ) = struct.unpack('<I', data[:4])
|
||
|
self.delete(key)
|
||
|
else:
|
||
|
# This should almost never happen
|
||
|
assert len(data) % (4 + 8) == 0
|
||
|
self.logger.info('UTXO compressed key collision at height {:d}, '
|
||
|
'utxo {} / {:d}'
|
||
|
.format(self.height, bytes(reversed(prevout.hash))
|
||
|
.hex(), prevout.n))
|
||
|
for n in range(0, len(data), 12):
|
||
|
(tx_num, ) = struct.unpack('<I', data[n:n+4])
|
||
|
tx_hash, height = self.get_tx_hash(tx_num)
|
||
|
if prevout.hash == tx_hash:
|
||
|
break
|
||
|
else:
|
||
|
raise Exception('could not resolve UTXO key collision')
|
||
|
|
||
|
data = data[:n] + data[n + 12:]
|
||
|
self.put(key, data)
|
||
|
|
||
|
return hash160
|
||
|
|
||
|
def put_utxo(self, tx_hash, idx, txout):
|
||
|
pk = ScriptPubKey.from_script(txout.pk_script, self.coin)
|
||
|
if not pk.hash160:
|
||
|
return None
|
||
|
|
||
|
pack = struct.pack
|
||
|
idxb = pack('<H', idx)
|
||
|
txcb = pack('<I', self.tx_count)
|
||
|
|
||
|
# First write the hash160 lookup
|
||
|
key = b'h' + tx_hash[:ADDR_TX_HASH_LEN] + idxb
|
||
|
# b'' avoids this annoyance: https://bugs.python.org/issue13298
|
||
|
value = b''.join([pk.hash160, txcb])
|
||
|
prior_value = self.get(key)
|
||
|
if prior_value: # Should almost never happen
|
||
|
value += prior_value
|
||
|
self.put(key, value)
|
||
|
|
||
|
# Next write the UTXO
|
||
|
key = b'u' + pk.hash160 + tx_hash[:UTXO_TX_HASH_LEN] + idxb
|
||
|
value = txcb + pack('<Q', txout.value)
|
||
|
prior_value = self.get(key)
|
||
|
if prior_value: # Should almost never happen
|
||
|
value += prior_value
|
||
|
self.put(key, value)
|
||
|
|
||
|
return pk.hash160
|
||
|
|
||
|
def open_file(self, filename, truncate=False, create=False):
|
||
|
try:
|
||
|
return open(filename, 'wb+' if truncate else 'rb+')
|
||
|
except FileNotFoundError:
|
||
|
if create:
|
||
|
return open(filename, 'wb+')
|
||
|
raise
|
||
|
|
||
|
def read_headers(self, height, count):
|
||
|
header_len = self.coin.HEADER_LEN
|
||
|
self.headers_file.seek(height * header_len)
|
||
|
return self.headers_file.read(count * header_len)
|
||
|
|
||
|
def write_headers(self):
|
||
|
headers = b''.join(self.headers)
|
||
|
header_len = self.coin.HEADER_LEN
|
||
|
assert len(headers) % header_len == 0
|
||
|
self.headers_file.seek(self.db_height * header_len)
|
||
|
self.headers_file.write(headers)
|
||
|
self.headers_file.flush()
|
||
|
self.headers = []
|
||
|
|
||
|
def write_tx_counts(self):
|
||
|
self.txcount_file.seek(self.db_height * self.tx_counts.itemsize)
|
||
|
self.txcount_file.write(self.tx_counts[self.db_height: self.height + 1])
|
||
|
self.txcount_file.flush()
|
||
|
|
||
|
def write_tx_hashes(self):
|
||
|
hash_blob = b''.join(itertools.chain(*self.tx_hashes))
|
||
|
assert len(hash_blob) % 32 == 0
|
||
|
assert self.tx_hash_file_size % 32 == 0
|
||
|
hashes = memoryview(hash_blob)
|
||
|
cursor = 0
|
||
|
file_pos = self.db_tx_count * 32
|
||
|
while cursor < len(hashes):
|
||
|
file_num, offset = divmod(file_pos, self.tx_hash_file_size)
|
||
|
size = min(len(hashes) - cursor, self.tx_hash_file_size - offset)
|
||
|
filename = 'hashes{:05d}'.format(file_num)
|
||
|
with self.open_file(filename, create=True) as f:
|
||
|
f.seek(offset)
|
||
|
f.write(hashes[cursor:cursor + size])
|
||
|
cursor += size
|
||
|
file_pos += size
|
||
|
self.tx_hashes = []
|
||
|
|
||
|
def process_block(self, block):
|
||
|
self.headers.append(block[:self.coin.HEADER_LEN])
|
||
|
|
||
|
tx_hashes, txs = self.coin.read_block(block)
|
||
|
self.height += 1
|
||
|
|
||
|
assert len(self.tx_counts) == self.height
|
||
|
|
||
|
# These both need to be updated before calling process_tx().
|
||
|
# It uses them for tx hash lookup
|
||
|
self.tx_hashes.append(tx_hashes)
|
||
|
self.tx_counts.append(self.tx_count + len(txs))
|
||
|
|
||
|
for tx_hash, tx in zip(tx_hashes, txs):
|
||
|
self.process_tx(tx_hash, tx)
|
||
|
|
||
|
# Flush if we're getting full
|
||
|
if len(self.write_cache) + len(self.history) > self.flush_size:
|
||
|
self.flush()
|
||
|
|
||
|
def process_tx(self, tx_hash, tx):
|
||
|
hash160s = set()
|
||
|
if not tx.is_coinbase:
|
||
|
for txin in tx.inputs:
|
||
|
hash160s.add(self.spend_utxo(txin.prevout))
|
||
|
|
||
|
for idx, txout in enumerate(tx.outputs):
|
||
|
hash160s.add(self.put_utxo(tx_hash, idx, txout))
|
||
|
|
||
|
for hash160 in hash160s:
|
||
|
self.history[hash160].append(self.tx_count)
|
||
|
|
||
|
self.tx_count += 1
|
||
|
|
||
|
def get_tx_hash(self, tx_num):
|
||
|
'''Returns the tx_hash and height of a tx number.'''
|
||
|
height = bisect_right(self.tx_counts, tx_num)
|
||
|
|
||
|
# Is this on disk or unflushed?
|
||
|
if height >= self.db_height:
|
||
|
tx_hashes = self.tx_hashes[height - self.db_height]
|
||
|
tx_hash = tx_hashes[tx_num - self.tx_counts[height - 1]]
|
||
|
else:
|
||
|
file_pos = tx_num * 32
|
||
|
file_num, offset = divmod(file_pos, self.tx_hash_file_size)
|
||
|
filename = 'hashes{:05d}'.format(file_num)
|
||
|
with self.open_file(filename) as f:
|
||
|
f.seek(offset)
|
||
|
tx_hash = f.read(32)
|
||
|
|
||
|
return tx_hash, height
|
||
|
|
||
|
def get_balance(self, hash160):
|
||
|
'''Returns the confirmed balance of an address.'''
|
||
|
utxos = self.get_utxos(hash_160)
|
||
|
return sum(utxo.value for utxo in utxos)
|
||
|
|
||
|
def get_history(self, hash160):
|
||
|
'''Returns a sorted list of (tx_hash, height) tuples of transactions
|
||
|
that touched the address, earliest in the blockchain first.
|
||
|
Only includes outputs that have been spent. Other
|
||
|
transactions will be in the UTXO set.
|
||
|
'''
|
||
|
prefix = b'H' + hash160
|
||
|
a = array.array('I')
|
||
|
for key, hist in self.db.iterator(prefix=prefix):
|
||
|
a.frombytes(hist)
|
||
|
return [self.get_tx_hash(tx_num) for tx_num in a]
|
||
|
|
||
|
def get_utxos(self, hash160):
|
||
|
'''Returns all UTXOs for an address sorted such that the earliest
|
||
|
in the blockchain comes first.
|
||
|
'''
|
||
|
unpack = struct.unpack
|
||
|
prefix = b'u' + hash160
|
||
|
utxos = []
|
||
|
for k, v in self.db.iterator(prefix=prefix):
|
||
|
(tx_pos, ) = unpack('<H', k[-2:])
|
||
|
|
||
|
for n in range(0, len(v), 12):
|
||
|
(tx_num, ) = unpack('<I', v[n:n+4])
|
||
|
(value, ) = unpack('<Q', v[n+4:n+12])
|
||
|
tx_hash, height = self.get_tx_hash(tx_num)
|
||
|
utxos.append(UTXO(tx_num, tx_pos, tx_hash, height, value))
|
||
|
|
||
|
# Sorted by height and block position.
|
||
|
return sorted(utxos)
|