Make mempool processing more properly asynchronous

7 years ago · 9bd9476a54
3 changed files with 91 additions and 100 deletions
--- a/electrumx/server/controller.py
+++ b/electrumx/server/controller.py
@ -99,7 +99,7 @@ class Controller(ServerBase):
        BlockProcessor = env.coin.BLOCK_PROCESSOR
        self.bp = BlockProcessor(env, self.tasks, daemon, notifications)
        self.mempool = MemPool(env.coin, self.tasks, daemon, notifications,
-                               self.bp.db_utxo_lookup)
+                               self.bp.lookup_utxos)
        self.chain_state = ChainState(env, self.tasks, daemon, self.bp,
                                      notifications)
        self.peer_mgr = PeerManager(env, self.tasks, self.chain_state)
--- a/electrumx/server/db.py
+++ b/electrumx/server/db.py
@ -35,9 +35,6 @@ class DB(object):

    DB_VERSIONS = [6]

-    class MissingUTXOError(Exception):
-        '''Raised if a mempool tx input UTXO couldn't be found.'''
-
    class DBError(Exception):
        '''Raised on general DB errors generally indicating corruption.'''

@ -399,43 +396,52 @@ class DB(object):
            tx_hash, height = self.fs_tx_hash(tx_num)
            yield UTXO(tx_num, tx_pos, tx_hash, height, value)

-    def db_utxo_lookup(self, tx_hash, tx_idx):
-        '''Given a prevout return a (hashX, value) pair.
+    async def lookup_utxos(self, prevouts):
+        '''For each prevout, lookup it up in the DB and return a (hashX,
+        value) pair or None if not found.

-        Raises MissingUTXOError if the UTXO is not found.  Used by the
-        mempool code.
+        Used by the mempool code.
        '''
-        idx_packed = pack('<H', tx_idx)
-        hashX, tx_num_packed = self._db_hashX(tx_hash, idx_packed)
-        if not hashX:
-            # This can happen when the daemon is a block ahead of us
-            # and has mempool txs spending outputs from that new block
-            raise self.MissingUTXOError
-
-        # Key: b'u' + address_hashX + tx_idx + tx_num
-        # Value: the UTXO value as a 64-bit unsigned integer
-        key = b'u' + hashX + idx_packed + tx_num_packed
-        db_value = self.utxo_db.get(key)
-        if not db_value:
-            raise self.DBError('UTXO {} / {:,d} in one table only'
-                               .format(hash_to_hex_str(tx_hash), tx_idx))
-        value, = unpack('<Q', db_value)
-        return hashX, value
-
-    def _db_hashX(self, tx_hash, idx_packed):
-        '''Return (hashX, tx_num_packed) for the given TXO.
-
-        Both are None if not found.'''
-        # Key: b'h' + compressed_tx_hash + tx_idx + tx_num
-        # Value: hashX
-        prefix = b'h' + tx_hash[:4] + idx_packed
-
-        # Find which entry, if any, the TX_HASH matches.
-        for db_key, hashX in self.utxo_db.iterator(prefix=prefix):
-            tx_num_packed = db_key[-4:]
-            tx_num, = unpack('<I', tx_num_packed)
-            hash, height = self.fs_tx_hash(tx_num)
-            if hash == tx_hash:
-                return hashX, tx_num_packed
-
-        return None, None
+        def lookup_hashXs():
+            '''Return (hashX, suffix) pairs, or None if not found,
+            for each prevout.
+            '''
+            def lookup_hashX(tx_hash, tx_idx):
+                idx_packed = pack('<H', tx_idx)
+
+                # Key: b'h' + compressed_tx_hash + tx_idx + tx_num
+                # Value: hashX
+                prefix = b'h' + tx_hash[:4] + idx_packed
+
+                # Find which entry, if any, the TX_HASH matches.
+                for db_key, hashX in self.utxo_db.iterator(prefix=prefix):
+                    tx_num_packed = db_key[-4:]
+                    tx_num, = unpack('<I', tx_num_packed)
+                    hash, height = self.fs_tx_hash(tx_num)
+                    if hash == tx_hash:
+                        return hashX, idx_packed + tx_num_packed
+                return None, None
+            return [lookup_hashX(*prevout) for prevout in prevouts]
+
+        def lookup_utxos(hashX_pairs):
+            def lookup_utxo(hashX, suffix):
+                if not hashX:
+                    # This can happen when the daemon is a block ahead
+                    # of us and has mempool txs spending outputs from
+                    # that new block
+                    return None
+                # Key: b'u' + address_hashX + tx_idx + tx_num
+                # Value: the UTXO value as a 64-bit unsigned integer
+                key = b'u' + hashX + suffix
+                db_value = self.utxo_db.get(key)
+                if not db_value:
+                    # This can happen if the DB was updated between
+                    # getting the hashXs and getting the UTXOs
+                    return None
+                value, = unpack('<Q', db_value)
+                return hashX, value
+            return [lookup_utxo(*hashX_pair) for hashX_pair in hashX_pairs]
+
+        run_in_thread = self.tasks.run_in_thread
+        hashX_pairs = await run_in_thread(lookup_hashXs)
+        return await run_in_thread(lookup_utxos, hashX_pairs)
--- a/electrumx/server/mempool.py
+++ b/electrumx/server/mempool.py
@ -14,7 +14,7 @@ from collections import defaultdict

 from electrumx.lib.hash import hash_to_hex_str, hex_str_to_hash
 from electrumx.lib.util import class_logger
-from electrumx.server.db import UTXO, DB
+from electrumx.server.db import UTXO


 class MemPool(object):
@ -31,10 +31,10 @@ class MemPool(object):
    A pair is a (hashX, value) tuple.  tx hashes are hex strings.
    '''

-    def __init__(self, coin, tasks, daemon, notifications, utxo_lookup):
+    def __init__(self, coin, tasks, daemon, notifications, lookup_utxos):
        self.logger = class_logger(__name__, self.__class__.__name__)
        self.coin = coin
-        self.utxo_lookup = utxo_lookup
+        self.lookup_utxos = lookup_utxos
        self.tasks = tasks
        self.daemon = daemon
        self.notifications = notifications
@ -142,7 +142,6 @@ class MemPool(object):
    def _async_process_some(self, limit):
        pending = []
        txs = self.txs
-        fee_hist = self.fee_histogram

        async def process(unprocessed, touched):
            nonlocal pending
@ -159,21 +158,8 @@ class MemPool(object):
                deferred = pending
                pending = []

-            result, deferred = await self.tasks.run_in_thread(
-                self._process_raw_txs, raw_txs, deferred)
-
+            deferred = await self._process_raw_txs(raw_txs, deferred, touched)
            pending.extend(deferred)
-            hashXs = self.hashXs
-            for hex_hash, item in result.items():
-                if hex_hash in txs:
-                    txs[hex_hash] = item
-                    txin_pairs, txout_pairs, tx_fee, tx_size = item
-                    fee_rate = tx_fee // tx_size
-                    fee_hist[fee_rate] += tx_size
-                    for hashX, value in itertools.chain(txin_pairs,
-                                                        txout_pairs):
-                        touched.add(hashX)
-                        hashXs[hashX].add(hex_hash)

        return process

@ -185,22 +171,15 @@ class MemPool(object):
        # evicted or they got in a block.
        return {hh: raw for hh, raw in zip(hex_hashes, raw_txs) if raw}

-    def _process_raw_txs(self, raw_tx_map, pending):
+    async def _process_raw_txs(self, raw_tx_map, pending, touched):
        '''Process the dictionary of raw transactions and return a dictionary
        of updates to apply to self.txs.
-
-        This runs in the executor so should not update any member
-        variables it doesn't own.  Atomic reads of self.txs that do
-        not depend on the result remaining the same are fine.
        '''
        script_hashX = self.coin.hashX_from_script
        deserializer = self.coin.DESERIALIZER
-        txs = self.txs

        # Deserialize each tx and put it in a pending list
        for tx_hash, raw_tx in raw_tx_map.items():
-            if tx_hash not in txs:
-                continue
            tx, tx_size = deserializer(raw_tx).read_tx_and_vsize()

            # Convert the tx outputs into (hashX, value) pairs
@ -213,48 +192,54 @@ class MemPool(object):

            pending.append((tx_hash, txin_pairs, txout_pairs, tx_size))

-        # Now process what we can
-        result = {}
+        # The transaction inputs can be from other mempool
+        # transactions (which may or may not be processed yet) or are
+        # otherwise presumably in the DB.
+        txs = self.txs
+        db_prevouts = [(hex_str_to_hash(prev_hash), prev_idx)
+                       for item in pending
+                       for (prev_hash, prev_idx) in item[1]
+                       if prev_hash not in txs]
+
+        # If a lookup fails, it returns a None entry
+        db_utxos = await self.lookup_utxos(db_prevouts)
+        db_utxo_map = {(hash_to_hex_str(prev_hash), prev_idx): db_utxo
+                       for (prev_hash, prev_idx), db_utxo
+                       in zip(db_prevouts, db_utxos)}
+
        deferred = []
-        utxo_lookup = self.utxo_lookup
+        hashXs = self.hashXs
+        fee_hist = self.fee_histogram

        for item in pending:
-            tx_hash, old_txin_pairs, txout_pairs, tx_size = item
+            tx_hash, previns, txout_pairs, tx_size = item
            if tx_hash not in txs:
                continue

-            mempool_missing = False
            txin_pairs = []
-
            try:
-                for prev_hex_hash, prev_idx in old_txin_pairs:
-                    tx_info = txs.get(prev_hex_hash, 0)
-                    if tx_info is None:
-                        tx_info = result.get(prev_hex_hash)
-                        if not tx_info:
-                            mempool_missing = True
-                            continue
-                    if tx_info:
-                        txin_pairs.append(tx_info[1][prev_idx])
-                    elif not mempool_missing:
-                        prev_hash = hex_str_to_hash(prev_hex_hash)
-                        txin_pairs.append(utxo_lookup(prev_hash, prev_idx))
-            except (DB.MissingUTXOError, DB.DBError):
-                # DBError can happen when flushing a newly processed
-                # block.  MissingUTXOError typically happens just
-                # after the daemon has accepted a new block and the
-                # new mempool has deps on new txs in that block.
-                continue
-
-            if mempool_missing:
+                for previn in previns:
+                    utxo = db_utxo_map.get(previn)
+                    if not utxo:
+                        prev_hash, prev_index = previn
+                        # This can raise a KeyError or TypeError
+                        utxo = txs[prev_hash][1][prev_index]
+                    txin_pairs.append(utxo)
+            except (KeyError, TypeError):
                deferred.append(item)
-            else:
-                # Compute fee
-                tx_fee = (sum(v for hashX, v in txin_pairs) -
-                          sum(v for hashX, v in txout_pairs))
-                result[tx_hash] = (txin_pairs, txout_pairs, tx_fee, tx_size)
+                continue

-        return result, deferred
+            # Compute fee
+            tx_fee = (sum(v for hashX, v in txin_pairs) -
+                      sum(v for hashX, v in txout_pairs))
+            fee_rate = tx_fee // tx_size
+            fee_hist[fee_rate] += tx_size
+            txs[tx_hash] = (txin_pairs, txout_pairs, tx_fee, tx_size)
+            for hashX, value in itertools.chain(txin_pairs, txout_pairs):
+                touched.add(hashX)
+                hashXs[hashX].add(tx_hash)
+
+        return deferred

    async def _raw_transactions(self, hashX):
        '''Returns an iterable of (hex_hash, raw_tx) pairs for all