Browse Source

Merge branch 'utxo_cache' into develop

master
Neil Booth 8 years ago
parent
commit
9f9db0c7bd
  1. 1
      lib/coins.py
  2. 43
      lib/util.py
  3. 50
      server/db.py
  4. 11
      server/server.py

1
lib/coins.py

@ -137,7 +137,6 @@ class Coin(object):
@classmethod @classmethod
def read_block(cls, block): def read_block(cls, block):
assert isinstance(block, memoryview)
d = Deserializer(block[cls.HEADER_LEN:]) d = Deserializer(block[cls.HEADER_LEN:])
return d.read_block() return d.read_block()

43
lib/util.py

@ -1,8 +1,9 @@
# See the file "LICENSE" for information about the copyright # See the file "LICENSE" for information about the copyright
# and warranty status of this software. # and warranty status of this software.
import array
import sys import sys
from collections import Container, Mapping
# Method decorator. To be used for calculations that will always # Method decorator. To be used for calculations that will always
@ -25,6 +26,46 @@ class cachedproperty(object):
.format(self.f.__name__, obj)) .format(self.f.__name__, obj))
def deep_getsizeof(obj):
"""Find the memory footprint of a Python object.
Based on from code.tutsplus.com: http://goo.gl/fZ0DXK
This is a recursive function that drills down a Python object graph
like a dictionary holding nested dictionaries with lists of lists
and tuples and sets.
The sys.getsizeof function does a shallow size of only. It counts each
object inside a container as pointer only regardless of how big it
really is.
:param o: the object
:return:
"""
ids = set()
def size(o):
if id(o) in ids:
return 0
r = sys.getsizeof(o)
ids.add(id(o))
if isinstance(o, (str, bytes, bytearray, array.array)):
return r
if isinstance(o, Mapping):
return r + sum(size(k) + size(v) for k, v in o.items())
if isinstance(o, Container):
return r + sum(size(x) for x in o)
return r
return size(obj)
def chunks(items, size): def chunks(items, size):
for i in range(0, len(items), size): for i in range(0, len(items), size):
yield items[i: i + size] yield items[i: i + size]

50
server/db.py

@ -45,10 +45,10 @@ class UTXOCache(object):
Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes) Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes)
Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes) Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes)
That's 67 bytes of raw data. Assume 100 bytes per UTXO accounting That's 67 bytes of raw data. Python dictionary overhead means
for Python datastructure overhead, then perhaps 20 million UTXOs each entry actually uses about 187 bytes of memory. So almost
can fit in 2GB of RAM. There are approximately 42 million UTXOs 11.5 million UTXOs can fit in 2GB of RAM. There are approximately
on bitcoin mainnet at height 433,000. 42 million UTXOs on bitcoin mainnet at height 433,000.
Semantics: Semantics:
@ -80,6 +80,7 @@ class UTXOCache(object):
tx_num is stored to resolve them. The collision rate is around tx_num is stored to resolve them. The collision rate is around
0.02% for the hash168 table, and almost zero for the UTXO table 0.02% for the hash168 table, and almost zero for the UTXO table
(there are around 100 collisions in the whole bitcoin blockchain). (there are around 100 collisions in the whole bitcoin blockchain).
''' '''
def __init__(self, parent, db, coin): def __init__(self, parent, db, coin):
@ -290,6 +291,7 @@ class DB(object):
self.coin = env.coin self.coin = env.coin
self.flush_MB = env.flush_MB self.flush_MB = env.flush_MB
self.next_cache_check = 0
self.logger.info('flushing after cache reaches {:,d} MB' self.logger.info('flushing after cache reaches {:,d} MB'
.format(self.flush_MB)) .format(self.flush_MB))
@ -298,7 +300,7 @@ class DB(object):
# Unflushed items. Headers and tx_hashes have one entry per block # Unflushed items. Headers and tx_hashes have one entry per block
self.headers = [] self.headers = []
self.tx_hashes = [] self.tx_hashes = []
self.history = defaultdict(list) self.history = defaultdict(partial(array.array, 'I'))
self.history_size = 0 self.history_size = 0
db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET) db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET)
@ -432,13 +434,12 @@ class DB(object):
flush_id = struct.pack('>H', self.flush_count) flush_id = struct.pack('>H', self.flush_count)
for hash168, hist in self.history.items(): for hash168, hist in self.history.items():
key = b'H' + hash168 + flush_id key = b'H' + hash168 + flush_id
batch.put(key, array.array('I', hist).tobytes()) batch.put(key, hist.tobytes())
self.logger.info('flushed {:,d} history entries ({:,d} MB)...' self.logger.info('flushed {:,d} history entries in {:,d} addrs...'
.format(self.history_size, .format(self.history_size, len(self.history)))
self.history_size * 4 // 1048576))
self.history = defaultdict(list) self.history = defaultdict(partial(array.array, 'I'))
self.history_size = 0 self.history_size = 0
def open_file(self, filename, truncate=False, create=False): def open_file(self, filename, truncate=False, create=False):
@ -488,20 +489,24 @@ class DB(object):
def cache_MB(self): def cache_MB(self):
'''Returns the approximate size of the cache, in MB.''' '''Returns the approximate size of the cache, in MB.'''
utxo_MB = ((len(self.utxo_cache.cache) + len(self.utxo_cache.db_cache)) # Good average estimates
* 100 // 1048576) utxo_cache_size = len(self.utxo_cache.cache) * 187
hist_MB = (len(self.history) * 48 + self.history_size * 20) // 1048576 db_cache_size = len(self.utxo_cache.db_cache) * 105
if self.height % 200 == 0: hist_cache_size = len(self.history) * 180 + self.history_size * 4
self.logger.info('cache size at height {:,d}: ' utxo_MB = (db_cache_size + utxo_cache_size) // 1048576
'UTXOs: {:,d} MB history: {:,d} MB' hist_MB = hist_cache_size // 1048576
.format(self.height, utxo_MB, hist_MB)) cache_MB = utxo_MB + hist_MB
self.logger.info('cache entries: UTXOs: {:,d}/{:,d} '
'history: {:,d}/{:,d}' self.logger.info('cache entries: UTXO: {:,d} DB: {:,d} '
'hist count: {:,d} hist size: {:,d}'
.format(len(self.utxo_cache.cache), .format(len(self.utxo_cache.cache),
len(self.utxo_cache.db_cache), len(self.utxo_cache.db_cache),
len(self.history), len(self.history),
self.history_size)) self.history_size))
return utxo_MB + hist_MB self.logger.info('cache size at height {:,d}: {:,d}MB '
'(UTXOs {:,d}MB hist {:,d}MB)'
.format(self.height, cache_MB, utxo_MB, hist_MB))
return cache_MB
def process_block(self, block): def process_block(self, block):
self.headers.append(block[:self.coin.HEADER_LEN]) self.headers.append(block[:self.coin.HEADER_LEN])
@ -519,7 +524,10 @@ class DB(object):
for tx_hash, tx in zip(tx_hashes, txs): for tx_hash, tx in zip(tx_hashes, txs):
self.process_tx(tx_hash, tx) self.process_tx(tx_hash, tx)
# Flush if we're getting full # Check if we're getting full and time to flush?
now = time.time()
if now > self.next_cache_check:
self.next_cache_check = now + 60
if self.cache_MB() > self.flush_MB: if self.cache_MB() > self.flush_MB:
self.flush() self.flush()

11
server/server.py

@ -74,18 +74,7 @@ class BlockCache(object):
self.logger.info('catching up, block cache limit {:d}MB...' self.logger.info('catching up, block cache limit {:d}MB...'
.format(self.cache_limit)) .format(self.cache_limit))
last_log = 0
prior_height = self.db.height
while await self.maybe_prefill(): while await self.maybe_prefill():
now = time.time()
count = self.fetched_height - prior_height
if now > last_log + 15 and count:
last_log = now
prior_height = self.fetched_height
self.logger.info('prefilled {:,d} blocks to height {:,d} '
'daemon height: {:,d}'
.format(count, self.fetched_height,
self.daemon_height))
await asyncio.sleep(1) await asyncio.sleep(1)
if not self.stop: if not self.stop:

Loading…
Cancel
Save