Browse Source

Speed up tx.py; giving ~ 5% higher tx/s in memory

master
Neil Booth 8 years ago
parent
commit
0ff579604c
  1. 23
      PERFORMANCE-NOTES
  2. 129
      lib/tx.py

23
PERFORMANCE-NOTES

@ -0,0 +1,23 @@
Just some notes on performance with Python 3.5. I am taking this into
account in the code.
- 60% faster to create lists with [] list comprehensions than tuples
or lists with tuple(), list(). Of those list is 10% faster than
tuple.
- an implicit default argument is ~5% faster than passing the default
explicitly
- using a local variable x rather than self.x in loops and list
comprehensions is over 50% faster
- struct.pack, struct.unpack are over 60% faster than int.to_bytes and
int.from_bytes. They are faster little endian (presumably because
it matches the host) than big endian regardless of length.
- single-item list and tuple unpacking. Suppose b = (1, )
a, = b is a about 0.4% faster than (a,) = b
and about 45% faster than a = b[0]
- multiple assignment is faster using tuples only for 3 or more items

129
lib/tx.py

@ -9,7 +9,7 @@
from collections import namedtuple from collections import namedtuple
import struct from struct import unpack_from
from lib.util import cachedproperty from lib.util import cachedproperty
from lib.hash import double_sha256, hash_to_str from lib.hash import double_sha256, hash_to_str
@ -58,7 +58,13 @@ class TxOutput(namedtuple("TxOutput", "value pk_script")):
class Deserializer(object): class Deserializer(object):
'''Deserializes blocks into transactions.''' '''Deserializes blocks into transactions.
External entry points are read_tx() and read_block().
This code is performance sensitive as it is executed 100s of
millions of times during sync.
'''
def __init__(self, binary): def __init__(self, binary):
assert isinstance(binary, bytes) assert isinstance(binary, bytes)
@ -67,82 +73,91 @@ class Deserializer(object):
def read_tx(self): def read_tx(self):
return Tx( return Tx(
self.read_le_int32(), # version self._read_le_int32(), # version
self.read_inputs(), # inputs self._read_inputs(), # inputs
self.read_outputs(), # outputs self._read_outputs(), # outputs
self.read_le_uint32() # locktime self._read_le_uint32() # locktime
) )
def read_block(self): def read_block(self):
tx_hashes = [] tx_hashes = []
txs = [] txs = []
tx_count = self.read_varint() binary = self.binary
for n in range(tx_count): hash = double_sha256
read_tx = self.read_tx
append_hash = tx_hashes.append
for n in range(self._read_varint()):
start = self.cursor start = self.cursor
tx = self.read_tx() txs.append(read_tx())
# Note this hash needs to be reversed for human display # Note this hash needs to be reversed for human display
# For efficiency we store it in the natural serialized order # For efficiency we store it in the natural serialized order
tx_hash = double_sha256(self.binary[start:self.cursor]) append_hash(hash(binary[start:self.cursor]))
tx_hashes.append(tx_hash) assert self.cursor == len(binary)
txs.append(tx)
return tx_hashes, txs return tx_hashes, txs
def read_inputs(self): def _read_inputs(self):
n = self.read_varint() read_input = self._read_input
return [self.read_input() for i in range(n)] return [read_input() for i in range(self._read_varint())]
def read_input(self): def _read_input(self):
return TxInput( return TxInput(
self.read_nbytes(32), # prev_hash self._read_nbytes(32), # prev_hash
self.read_le_uint32(), # prev_idx self._read_le_uint32(), # prev_idx
self.read_varbytes(), # script self._read_varbytes(), # script
self.read_le_uint32() # sequence self._read_le_uint32() # sequence
) )
def read_outputs(self): def _read_outputs(self):
n = self.read_varint() read_output = self._read_output
return [self.read_output() for i in range(n)] return [read_output() for i in range(self._read_varint())]
def read_output(self): def _read_output(self):
value = self.read_le_int64() return TxOutput(
pk_script = self.read_varbytes() self._read_le_int64(), # value
return TxOutput(value, pk_script) self._read_varbytes(), # pk_script
)
def read_nbytes(self, n): def _read_nbytes(self, n):
result = self.binary[self.cursor:self.cursor + n] cursor = self.cursor
self.cursor += n self.cursor = end = cursor + n
return result assert len(self.binary) >= end
return self.binary[cursor:end]
def read_varbytes(self): def _read_varbytes(self):
return self.read_nbytes(self.read_varint()) return self._read_nbytes(self._read_varint())
def read_varint(self): def _read_varint(self):
b = self.binary[self.cursor] n = self.binary[self.cursor]
self.cursor += 1 self.cursor += 1
if b < 253: if n < 253:
return b return n
if b == 253: if n == 253:
return self.read_le_uint16() return self._read_le_uint16()
if b == 254: if n == 254:
return self.read_le_uint32() return self._read_le_uint32()
return self.read_le_uint64() return self._read_le_uint64()
def read_le_int32(self): def _read_le_int32(self):
return self.read_format('<i') result, = unpack_from('<i', self.binary, self.cursor)
self.cursor += 4
def read_le_int64(self): return result
return self.read_format('<q')
def read_le_uint16(self): def _read_le_int64(self):
return self.read_format('<H') result, = unpack_from('<q', self.binary, self.cursor)
self.cursor += 8
return result
def read_le_uint32(self): def _read_le_uint16(self):
return self.read_format('<I') result, = unpack_from('<H', self.binary, self.cursor)
self.cursor += 2
return result
def read_le_uint64(self): def _read_le_uint32(self):
return self.read_format('<Q') result, = unpack_from('<I', self.binary, self.cursor)
self.cursor += 4
return result
def read_format(self, fmt): def _read_le_uint64(self):
(result,) = struct.unpack_from(fmt, self.binary, self.cursor) result, = unpack_from('<Q', self.binary, self.cursor)
self.cursor += struct.calcsize(fmt) self.cursor += 8
return result return result

Loading…
Cancel
Save