diff --git a/PERFORMANCE-NOTES b/PERFORMANCE-NOTES new file mode 100644 index 0000000..c65f4e6 --- /dev/null +++ b/PERFORMANCE-NOTES @@ -0,0 +1,23 @@ +Just some notes on performance with Python 3.5. I am taking this into +account in the code. + +- 60% faster to create lists with [] list comprehensions than tuples + or lists with tuple(), list(). Of those list is 10% faster than + tuple. + +- an implicit default argument is ~5% faster than passing the default + explicitly + +- using a local variable x rather than self.x in loops and list + comprehensions is over 50% faster + +- struct.pack, struct.unpack are over 60% faster than int.to_bytes and + int.from_bytes. They are faster little endian (presumably because + it matches the host) than big endian regardless of length. + +- single-item list and tuple unpacking. Suppose b = (1, ) + + a, = b is a about 0.4% faster than (a,) = b + and about 45% faster than a = b[0] + +- multiple assignment is faster using tuples only for 3 or more items \ No newline at end of file diff --git a/lib/tx.py b/lib/tx.py index 7ea35f4..2fb86cd 100644 --- a/lib/tx.py +++ b/lib/tx.py @@ -9,7 +9,7 @@ from collections import namedtuple -import struct +from struct import unpack_from from lib.util import cachedproperty from lib.hash import double_sha256, hash_to_str @@ -58,7 +58,13 @@ class TxOutput(namedtuple("TxOutput", "value pk_script")): class Deserializer(object): - '''Deserializes blocks into transactions.''' + '''Deserializes blocks into transactions. + + External entry points are read_tx() and read_block(). + + This code is performance sensitive as it is executed 100s of + millions of times during sync. + ''' def __init__(self, binary): assert isinstance(binary, bytes) @@ -67,82 +73,91 @@ class Deserializer(object): def read_tx(self): return Tx( - self.read_le_int32(), # version - self.read_inputs(), # inputs - self.read_outputs(), # outputs - self.read_le_uint32() # locktime + self._read_le_int32(), # version + self._read_inputs(), # inputs + self._read_outputs(), # outputs + self._read_le_uint32() # locktime ) def read_block(self): tx_hashes = [] txs = [] - tx_count = self.read_varint() - for n in range(tx_count): + binary = self.binary + hash = double_sha256 + read_tx = self.read_tx + append_hash = tx_hashes.append + for n in range(self._read_varint()): start = self.cursor - tx = self.read_tx() + txs.append(read_tx()) # Note this hash needs to be reversed for human display # For efficiency we store it in the natural serialized order - tx_hash = double_sha256(self.binary[start:self.cursor]) - tx_hashes.append(tx_hash) - txs.append(tx) + append_hash(hash(binary[start:self.cursor])) + assert self.cursor == len(binary) return tx_hashes, txs - def read_inputs(self): - n = self.read_varint() - return [self.read_input() for i in range(n)] + def _read_inputs(self): + read_input = self._read_input + return [read_input() for i in range(self._read_varint())] - def read_input(self): + def _read_input(self): return TxInput( - self.read_nbytes(32), # prev_hash - self.read_le_uint32(), # prev_idx - self.read_varbytes(), # script - self.read_le_uint32() # sequence + self._read_nbytes(32), # prev_hash + self._read_le_uint32(), # prev_idx + self._read_varbytes(), # script + self._read_le_uint32() # sequence ) - def read_outputs(self): - n = self.read_varint() - return [self.read_output() for i in range(n)] + def _read_outputs(self): + read_output = self._read_output + return [read_output() for i in range(self._read_varint())] - def read_output(self): - value = self.read_le_int64() - pk_script = self.read_varbytes() - return TxOutput(value, pk_script) + def _read_output(self): + return TxOutput( + self._read_le_int64(), # value + self._read_varbytes(), # pk_script + ) - def read_nbytes(self, n): - result = self.binary[self.cursor:self.cursor + n] - self.cursor += n - return result + def _read_nbytes(self, n): + cursor = self.cursor + self.cursor = end = cursor + n + assert len(self.binary) >= end + return self.binary[cursor:end] - def read_varbytes(self): - return self.read_nbytes(self.read_varint()) + def _read_varbytes(self): + return self._read_nbytes(self._read_varint()) - def read_varint(self): - b = self.binary[self.cursor] + def _read_varint(self): + n = self.binary[self.cursor] self.cursor += 1 - if b < 253: - return b - if b == 253: - return self.read_le_uint16() - if b == 254: - return self.read_le_uint32() - return self.read_le_uint64() - - def read_le_int32(self): - return self.read_format('