diff --git a/lib/coins.py b/lib/coins.py index 8afed4d..699fa5f 100644 --- a/lib/coins.py +++ b/lib/coins.py @@ -137,7 +137,6 @@ class Coin(object): @classmethod def read_block(cls, block): - assert isinstance(block, memoryview) d = Deserializer(block[cls.HEADER_LEN:]) return d.read_block() diff --git a/lib/util.py b/lib/util.py index 40f9e2f..81eca15 100644 --- a/lib/util.py +++ b/lib/util.py @@ -1,8 +1,9 @@ # See the file "LICENSE" for information about the copyright # and warranty status of this software. - +import array import sys +from collections import Container, Mapping # Method decorator. To be used for calculations that will always @@ -25,6 +26,46 @@ class cachedproperty(object): .format(self.f.__name__, obj)) +def deep_getsizeof(obj): + """Find the memory footprint of a Python object. + + Based on from code.tutsplus.com: http://goo.gl/fZ0DXK + + This is a recursive function that drills down a Python object graph + like a dictionary holding nested dictionaries with lists of lists + and tuples and sets. + + The sys.getsizeof function does a shallow size of only. It counts each + object inside a container as pointer only regardless of how big it + really is. + + :param o: the object + + :return: + """ + + ids = set() + + def size(o): + if id(o) in ids: + return 0 + + r = sys.getsizeof(o) + ids.add(id(o)) + + if isinstance(o, (str, bytes, bytearray, array.array)): + return r + + if isinstance(o, Mapping): + return r + sum(size(k) + size(v) for k, v in o.items()) + + if isinstance(o, Container): + return r + sum(size(x) for x in o) + + return r + + return size(obj) + def chunks(items, size): for i in range(0, len(items), size): yield items[i: i + size] diff --git a/server/db.py b/server/db.py index 74f04ff..e7eae6f 100644 --- a/server/db.py +++ b/server/db.py @@ -45,10 +45,10 @@ class UTXOCache(object): Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes) Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes) - That's 67 bytes of raw data. Assume 100 bytes per UTXO accounting - for Python datastructure overhead, then perhaps 20 million UTXOs - can fit in 2GB of RAM. There are approximately 42 million UTXOs - on bitcoin mainnet at height 433,000. + That's 67 bytes of raw data. Python dictionary overhead means + each entry actually uses about 187 bytes of memory. So almost + 11.5 million UTXOs can fit in 2GB of RAM. There are approximately + 42 million UTXOs on bitcoin mainnet at height 433,000. Semantics: @@ -80,6 +80,7 @@ class UTXOCache(object): tx_num is stored to resolve them. The collision rate is around 0.02% for the hash168 table, and almost zero for the UTXO table (there are around 100 collisions in the whole bitcoin blockchain). + ''' def __init__(self, parent, db, coin): @@ -290,6 +291,7 @@ class DB(object): self.coin = env.coin self.flush_MB = env.flush_MB + self.next_cache_check = 0 self.logger.info('flushing after cache reaches {:,d} MB' .format(self.flush_MB)) @@ -298,7 +300,7 @@ class DB(object): # Unflushed items. Headers and tx_hashes have one entry per block self.headers = [] self.tx_hashes = [] - self.history = defaultdict(list) + self.history = defaultdict(partial(array.array, 'I')) self.history_size = 0 db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET) @@ -432,13 +434,12 @@ class DB(object): flush_id = struct.pack('>H', self.flush_count) for hash168, hist in self.history.items(): key = b'H' + hash168 + flush_id - batch.put(key, array.array('I', hist).tobytes()) + batch.put(key, hist.tobytes()) - self.logger.info('flushed {:,d} history entries ({:,d} MB)...' - .format(self.history_size, - self.history_size * 4 // 1048576)) + self.logger.info('flushed {:,d} history entries in {:,d} addrs...' + .format(self.history_size, len(self.history))) - self.history = defaultdict(list) + self.history = defaultdict(partial(array.array, 'I')) self.history_size = 0 def open_file(self, filename, truncate=False, create=False): @@ -488,20 +489,24 @@ class DB(object): def cache_MB(self): '''Returns the approximate size of the cache, in MB.''' - utxo_MB = ((len(self.utxo_cache.cache) + len(self.utxo_cache.db_cache)) - * 100 // 1048576) - hist_MB = (len(self.history) * 48 + self.history_size * 20) // 1048576 - if self.height % 200 == 0: - self.logger.info('cache size at height {:,d}: ' - 'UTXOs: {:,d} MB history: {:,d} MB' - .format(self.height, utxo_MB, hist_MB)) - self.logger.info('cache entries: UTXOs: {:,d}/{:,d} ' - 'history: {:,d}/{:,d}' - .format(len(self.utxo_cache.cache), - len(self.utxo_cache.db_cache), - len(self.history), - self.history_size)) - return utxo_MB + hist_MB + # Good average estimates + utxo_cache_size = len(self.utxo_cache.cache) * 187 + db_cache_size = len(self.utxo_cache.db_cache) * 105 + hist_cache_size = len(self.history) * 180 + self.history_size * 4 + utxo_MB = (db_cache_size + utxo_cache_size) // 1048576 + hist_MB = hist_cache_size // 1048576 + cache_MB = utxo_MB + hist_MB + + self.logger.info('cache entries: UTXO: {:,d} DB: {:,d} ' + 'hist count: {:,d} hist size: {:,d}' + .format(len(self.utxo_cache.cache), + len(self.utxo_cache.db_cache), + len(self.history), + self.history_size)) + self.logger.info('cache size at height {:,d}: {:,d}MB ' + '(UTXOs {:,d}MB hist {:,d}MB)' + .format(self.height, cache_MB, utxo_MB, hist_MB)) + return cache_MB def process_block(self, block): self.headers.append(block[:self.coin.HEADER_LEN]) @@ -519,9 +524,12 @@ class DB(object): for tx_hash, tx in zip(tx_hashes, txs): self.process_tx(tx_hash, tx) - # Flush if we're getting full - if self.cache_MB() > self.flush_MB: - self.flush() + # Check if we're getting full and time to flush? + now = time.time() + if now > self.next_cache_check: + self.next_cache_check = now + 60 + if self.cache_MB() > self.flush_MB: + self.flush() def process_tx(self, tx_hash, tx): cache = self.utxo_cache diff --git a/server/server.py b/server/server.py index e58f457..91117f9 100644 --- a/server/server.py +++ b/server/server.py @@ -74,18 +74,7 @@ class BlockCache(object): self.logger.info('catching up, block cache limit {:d}MB...' .format(self.cache_limit)) - last_log = 0 - prior_height = self.db.height while await self.maybe_prefill(): - now = time.time() - count = self.fetched_height - prior_height - if now > last_log + 15 and count: - last_log = now - prior_height = self.fetched_height - self.logger.info('prefilled {:,d} blocks to height {:,d} ' - 'daemon height: {:,d}' - .format(count, self.fetched_height, - self.daemon_height)) await asyncio.sleep(1) if not self.stop: