Browse Source

Minor optimizations to Bitcoin tx processing (#214)

* Construct unpacking functions for byte format up front to save time
wasted on parsing format string every iteration.

* Store length ahead of time as the overhead adds up.

* Reduce object attribute lookups in hash functions.

* Clean up lib pkg API changes. Unit test new "public" API members.
Underscore prefix for internal hash module funcs. Make bytes.fromhex a
public function.

* Document recent performance findings.
master
Justin Turner Arthur 7 years ago
committed by Neil
parent
commit
0c2e5c6368
  1. 16
      docs/PERFORMANCE-NOTES
  2. 15
      lib/hash.py
  3. 18
      lib/tx.py
  4. 10
      lib/util.py
  5. 4
      server/controller.py
  6. 10
      server/daemon.py
  7. 20
      tests/lib/test_util.py

16
docs/PERFORMANCE-NOTES

@ -1,4 +1,4 @@
Just some notes on performance with Python 3.5. I am taking this into Just some notes on performance with Python 3.5. We are taking these into
account in the code. account in the code.
- 60% faster to create lists with [] list comprehensions than tuples - 60% faster to create lists with [] list comprehensions than tuples
@ -16,11 +16,21 @@ account in the code.
- struct.pack, struct.unpack are over 60% faster than int.to_bytes and - struct.pack, struct.unpack are over 60% faster than int.to_bytes and
int.from_bytes. They are faster little endian (presumably because int.from_bytes. They are faster little endian (presumably because
it matches the host) than big endian regardless of length. it matches the host) than big endian regardless of length. Furthermore,
using stored packing and unpacking methods from Struct classes is faster
than using the flexible-format struct.[un]pack equivalents.
After storing the Struct('<Q').unpack_from function as unpack_uint64_from,
later calls to unpack_uint64_from(b, 0) are about 30% faster than calls to
unpack_from('<Q', b, 0).
- single-item list and tuple unpacking. Suppose b = (1, ) - single-item list and tuple unpacking. Suppose b = (1, )
a, = b is a about 0.4% faster than (a,) = b a, = b is a about 0.4% faster than (a,) = b
and about 45% faster than a = b[0] and about 45% faster than a = b[0]
- multiple assignment is faster using tuples only for 3 or more items - multiple assignment is faster using tuples only for 3 or more items
- retrieving a previously stored length of a bytes object can be over 200%
faster than a new call to len(b)

15
lib/hash.py

@ -29,17 +29,22 @@
import hashlib import hashlib
import hmac import hmac
from lib.util import bytes_to_int, int_to_bytes from lib.util import bytes_to_int, int_to_bytes, hex_to_bytes
_sha256 = hashlib.sha256
_sha512 = hashlib.sha512
_new_hash = hashlib.new
_new_hmac = hmac.new
def sha256(x): def sha256(x):
'''Simple wrapper of hashlib sha256.''' '''Simple wrapper of hashlib sha256.'''
return hashlib.sha256(x).digest() return _sha256(x).digest()
def ripemd160(x): def ripemd160(x):
'''Simple wrapper of hashlib ripemd160.''' '''Simple wrapper of hashlib ripemd160.'''
h = hashlib.new('ripemd160') h = _new_hash('ripemd160')
h.update(x) h.update(x)
return h.digest() return h.digest()
@ -51,7 +56,7 @@ def double_sha256(x):
def hmac_sha512(key, msg): def hmac_sha512(key, msg):
'''Use SHA-512 to provide an HMAC.''' '''Use SHA-512 to provide an HMAC.'''
return hmac.new(key, msg, hashlib.sha512).digest() return _new_hmac(key, msg, _sha512).digest()
def hash160(x): def hash160(x):
@ -73,7 +78,7 @@ hash_to_str = hash_to_hex_str
def hex_str_to_hash(x): def hex_str_to_hash(x):
'''Convert a displayed hex string to a binary hash.''' '''Convert a displayed hex string to a binary hash.'''
return bytes(reversed(bytes.fromhex(x))) return bytes(reversed(hex_to_bytes(x)))
class Base58Error(Exception): class Base58Error(Exception):

18
lib/tx.py

@ -29,10 +29,11 @@
from collections import namedtuple from collections import namedtuple
from struct import unpack_from
from lib.util import cachedproperty
from lib.hash import double_sha256, hash_to_str from lib.hash import double_sha256, hash_to_str
from lib.util import (cachedproperty, unpack_int32_from, unpack_int64_from,
unpack_uint16_from, unpack_uint32_from,
unpack_uint64_from)
class Tx(namedtuple("Tx", "version inputs outputs locktime")): class Tx(namedtuple("Tx", "version inputs outputs locktime")):
@ -78,6 +79,7 @@ class Deserializer(object):
def __init__(self, binary, start=0): def __init__(self, binary, start=0):
assert isinstance(binary, bytes) assert isinstance(binary, bytes)
self.binary = binary self.binary = binary
self.binary_length = len(binary)
self.cursor = start self.cursor = start
def read_tx(self): def read_tx(self):
@ -131,7 +133,7 @@ class Deserializer(object):
def _read_nbytes(self, n): def _read_nbytes(self, n):
cursor = self.cursor cursor = self.cursor
self.cursor = end = cursor + n self.cursor = end = cursor + n
assert len(self.binary) >= end assert self.binary_length >= end
return self.binary[cursor:end] return self.binary[cursor:end]
def _read_varbytes(self): def _read_varbytes(self):
@ -149,27 +151,27 @@ class Deserializer(object):
return self._read_le_uint64() return self._read_le_uint64()
def _read_le_int32(self): def _read_le_int32(self):
result, = unpack_from('<i', self.binary, self.cursor) result, = unpack_int32_from(self.binary, self.cursor)
self.cursor += 4 self.cursor += 4
return result return result
def _read_le_int64(self): def _read_le_int64(self):
result, = unpack_from('<q', self.binary, self.cursor) result, = unpack_int64_from(self.binary, self.cursor)
self.cursor += 8 self.cursor += 8
return result return result
def _read_le_uint16(self): def _read_le_uint16(self):
result, = unpack_from('<H', self.binary, self.cursor) result, = unpack_uint16_from(self.binary, self.cursor)
self.cursor += 2 self.cursor += 2
return result return result
def _read_le_uint32(self): def _read_le_uint32(self):
result, = unpack_from('<I', self.binary, self.cursor) result, = unpack_uint32_from(self.binary, self.cursor)
self.cursor += 4 self.cursor += 4
return result return result
def _read_le_uint64(self): def _read_le_uint64(self):
result, = unpack_from('<Q', self.binary, self.cursor) result, = unpack_uint64_from(self.binary, self.cursor)
self.cursor += 8 self.cursor += 8
return result return result

10
lib/util.py

@ -34,7 +34,7 @@ import logging
import re import re
import sys import sys
from collections import Container, Mapping from collections import Container, Mapping
from struct import pack from struct import pack, Struct
class LoggedClass(object): class LoggedClass(object):
@ -309,3 +309,11 @@ def protocol_version(client_req, server_min, server_max):
result = None result = None
return result return result
unpack_int32_from = Struct('<i').unpack_from
unpack_int64_from = Struct('<q').unpack_from
unpack_uint16_from = Struct('<H').unpack_from
unpack_uint32_from = Struct('<I').unpack_from
unpack_uint64_from = Struct('<Q').unpack_from
hex_to_bytes = bytes.fromhex

4
server/controller.py

@ -675,7 +675,7 @@ class Controller(util.LoggedClass):
'''Raise an RPCError if the value is not a valid transaction '''Raise an RPCError if the value is not a valid transaction
hash.''' hash.'''
try: try:
if len(bytes.fromhex(value)) == 32: if len(util.hex_to_bytes(value)) == 32:
return return
except Exception: except Exception:
pass pass
@ -898,7 +898,7 @@ class Controller(util.LoggedClass):
raw_tx = await self.daemon_request('getrawtransaction', tx_hash) raw_tx = await self.daemon_request('getrawtransaction', tx_hash)
if not raw_tx: if not raw_tx:
return None return None
raw_tx = bytes.fromhex(raw_tx) raw_tx = util.hex_to_bytes(raw_tx)
tx, tx_hash = self.coin.DESERIALIZER(raw_tx).read_tx() tx, tx_hash = self.coin.DESERIALIZER(raw_tx).read_tx()
if index >= len(tx.outputs): if index >= len(tx.outputs):
return None return None

10
server/daemon.py

@ -18,7 +18,7 @@ from time import strptime
import aiohttp import aiohttp
import lib.util as util from lib.util import LoggedClass, int_to_varint, hex_to_bytes
from lib.hash import hex_str_to_hash from lib.hash import hex_str_to_hash
@ -26,7 +26,7 @@ class DaemonError(Exception):
'''Raised when the daemon returns an error in its results.''' '''Raised when the daemon returns an error in its results.'''
class Daemon(util.LoggedClass): class Daemon(LoggedClass):
'''Handles connections to a daemon at the given URL.''' '''Handles connections to a daemon at the given URL.'''
WARMING_UP = -28 WARMING_UP = -28
@ -208,7 +208,7 @@ class Daemon(util.LoggedClass):
params_iterable = ((h, False) for h in hex_hashes) params_iterable = ((h, False) for h in hex_hashes)
blocks = await self._send_vector('getblock', params_iterable) blocks = await self._send_vector('getblock', params_iterable)
# Convert hex string to bytes # Convert hex string to bytes
return [bytes.fromhex(block) for block in blocks] return [hex_to_bytes(block) for block in blocks]
async def mempool_hashes(self): async def mempool_hashes(self):
'''Update our record of the daemon's mempool hashes.''' '''Update our record of the daemon's mempool hashes.'''
@ -240,7 +240,7 @@ class Daemon(util.LoggedClass):
txs = await self._send_vector('getrawtransaction', params_iterable, txs = await self._send_vector('getrawtransaction', params_iterable,
replace_errs=replace_errs) replace_errs=replace_errs)
# Convert hex strings to bytes # Convert hex strings to bytes
return [bytes.fromhex(tx) if tx else None for tx in txs] return [hex_to_bytes(tx) if tx else None for tx in txs]
async def sendrawtransaction(self, params): async def sendrawtransaction(self, params):
'''Broadcast a transaction to the network.''' '''Broadcast a transaction to the network.'''
@ -336,7 +336,7 @@ class LegacyRPCDaemon(Daemon):
raw_block = header raw_block = header
num_txs = len(transactions) num_txs = len(transactions)
if num_txs > 0: if num_txs > 0:
raw_block += util.int_to_varint(num_txs) raw_block += int_to_varint(num_txs)
raw_block += b''.join(transactions) raw_block += b''.join(transactions)
else: else:
raw_block += b'\x00' raw_block += b'\x00'

20
tests/lib/test_util.py

@ -57,6 +57,7 @@ def test_increment_byte_string():
assert util.increment_byte_string(b'\x01\x01') == b'\x01\x02' assert util.increment_byte_string(b'\x01\x01') == b'\x01\x02'
assert util.increment_byte_string(b'\xff\xff') is None assert util.increment_byte_string(b'\xff\xff') is None
def test_is_valid_hostname(): def test_is_valid_hostname():
is_valid_hostname = util.is_valid_hostname is_valid_hostname = util.is_valid_hostname
assert not is_valid_hostname('') assert not is_valid_hostname('')
@ -116,3 +117,22 @@ def test_protocol_version():
assert util.protocol_version(["0.8", "0.9"], "1.0", "1.1") is None assert util.protocol_version(["0.8", "0.9"], "1.0", "1.1") is None
assert util.protocol_version(["1.1", "1.2"], "1.0", "1.1") == (1, 1) assert util.protocol_version(["1.1", "1.2"], "1.0", "1.1") == (1, 1)
assert util.protocol_version(["1.2", "1.3"], "1.0", "1.1") is None assert util.protocol_version(["1.2", "1.3"], "1.0", "1.1") is None
def test_unpackers():
b = bytes(range(256))
assert util.unpack_int32_from(b, 0) == (50462976,)
assert util.unpack_int32_from(b, 42) == (757869354,)
assert util.unpack_int64_from(b, 0) == (506097522914230528,)
assert util.unpack_int64_from(b, 42) == (3544384782113450794,)
assert util.unpack_uint16_from(b, 0) == (256,)
assert util.unpack_uint16_from(b, 42) == (11050,)
assert util.unpack_uint32_from(b, 0) == (50462976,)
assert util.unpack_uint32_from(b, 42) == (757869354,)
assert util.unpack_uint64_from(b, 0) == (506097522914230528,)
assert util.unpack_uint64_from(b, 42) == (3544384782113450794,)
def test_hex_transforms():
h = "AABBCCDDEEFF"
assert util.hex_to_bytes(h) == b'\xaa\xbb\xcc\xdd\xee\xff'
Loading…
Cancel
Save