Browse Source

gossipd: use exponential backoff on reconnect for important peers.

We start at 1 second, back off to 5 minutes.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ppa-0.6.1
Rusty Russell 7 years ago
parent
commit
a134ca9659
  1. 22
      gossipd/gossip.c
  2. 12
      tests/test_lightningd.py

22
gossipd/gossip.c

@ -53,11 +53,17 @@
#define HSM_FD 3
#define INITIAL_WAIT_SECONDS 1
#define MAX_WAIT_SECONDS 300
/* We put everything in this struct (redundantly) to pass it to timer cb */
struct important_peerid {
struct daemon *daemon;
struct pubkey id;
/* How long to wait after failed connect */
unsigned int wait_seconds;
};
/* We keep a set of peer ids we're always trying to reach. */
@ -217,9 +223,11 @@ static void destroy_peer(struct peer *peer)
list_del_from(&peer->daemon->peers, &peer->list);
imp = important_peerid_map_get(&peer->daemon->important_peerids,
&peer->id);
if (imp)
if (imp) {
imp->wait_seconds = INITIAL_WAIT_SECONDS;
retry_important(imp);
}
}
static struct peer *find_peer(struct daemon *daemon, const struct pubkey *id)
{
@ -1707,11 +1715,16 @@ static void connect_failed(struct io_conn *conn, struct reaching *reach)
imp = important_peerid_map_get(&reach->daemon->important_peerids,
&reach->id);
if (imp) {
/* FIXME: Exponential backoff! */
status_trace("...will try again in %u seconds", 5);
imp->wait_seconds *= 2;
if (imp->wait_seconds > MAX_WAIT_SECONDS)
imp->wait_seconds = MAX_WAIT_SECONDS;
status_trace("...will try again in %u seconds",
imp->wait_seconds);
/* If important_id freed, this will be removed too */
new_reltimer(&reach->daemon->timers, imp,
time_from_sec(5), retry_important, imp);
time_from_sec(imp->wait_seconds),
retry_important, imp);
}
tal_free(reach);
return;
@ -1928,6 +1941,7 @@ static struct io_plan *peer_important(struct io_conn *conn,
imp = tal(daemon, struct important_peerid);
imp->id = id;
imp->daemon = daemon;
imp->wait_seconds = INITIAL_WAIT_SECONDS;
important_peerid_map_add(&daemon->important_peerids,
imp);
/* Start trying to reaching it now. */

12
tests/test_lightningd.py

@ -666,9 +666,19 @@ class LightningDTests(BaseLightningDTests):
"Connection refused",
l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
# Wait for exponential backoff to give us a 2 second window.
l1.daemon.wait_for_log('...will try again in 2 seconds')
# It should now succeed when it restarts.
l2.daemon.start()
l1.rpc.connect(l2.info['id'], 'localhost', l2.info['port'])
# Multiples should be fine!
fut1 = self.executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
fut2 = self.executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
fut3 = self.executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
fut1.result(10)
fut2.result(10)
fut3.result(10)
def test_balance(self):
l1, l2 = self.connect()

Loading…
Cancel
Save