Browse Source

gossipd: use exponential backoff on reconnect for important peers.

We start at 1 second, back off to 5 minutes.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ppa-0.6.1
Rusty Russell 7 years ago
parent
commit
a134ca9659
  1. 22
      gossipd/gossip.c
  2. 12
      tests/test_lightningd.py

22
gossipd/gossip.c

@ -53,11 +53,17 @@
#define HSM_FD 3 #define HSM_FD 3
#define INITIAL_WAIT_SECONDS 1
#define MAX_WAIT_SECONDS 300
/* We put everything in this struct (redundantly) to pass it to timer cb */ /* We put everything in this struct (redundantly) to pass it to timer cb */
struct important_peerid { struct important_peerid {
struct daemon *daemon; struct daemon *daemon;
struct pubkey id; struct pubkey id;
/* How long to wait after failed connect */
unsigned int wait_seconds;
}; };
/* We keep a set of peer ids we're always trying to reach. */ /* We keep a set of peer ids we're always trying to reach. */
@ -217,8 +223,10 @@ static void destroy_peer(struct peer *peer)
list_del_from(&peer->daemon->peers, &peer->list); list_del_from(&peer->daemon->peers, &peer->list);
imp = important_peerid_map_get(&peer->daemon->important_peerids, imp = important_peerid_map_get(&peer->daemon->important_peerids,
&peer->id); &peer->id);
if (imp) if (imp) {
imp->wait_seconds = INITIAL_WAIT_SECONDS;
retry_important(imp); retry_important(imp);
}
} }
static struct peer *find_peer(struct daemon *daemon, const struct pubkey *id) static struct peer *find_peer(struct daemon *daemon, const struct pubkey *id)
@ -1707,11 +1715,16 @@ static void connect_failed(struct io_conn *conn, struct reaching *reach)
imp = important_peerid_map_get(&reach->daemon->important_peerids, imp = important_peerid_map_get(&reach->daemon->important_peerids,
&reach->id); &reach->id);
if (imp) { if (imp) {
/* FIXME: Exponential backoff! */ imp->wait_seconds *= 2;
status_trace("...will try again in %u seconds", 5); if (imp->wait_seconds > MAX_WAIT_SECONDS)
imp->wait_seconds = MAX_WAIT_SECONDS;
status_trace("...will try again in %u seconds",
imp->wait_seconds);
/* If important_id freed, this will be removed too */ /* If important_id freed, this will be removed too */
new_reltimer(&reach->daemon->timers, imp, new_reltimer(&reach->daemon->timers, imp,
time_from_sec(5), retry_important, imp); time_from_sec(imp->wait_seconds),
retry_important, imp);
} }
tal_free(reach); tal_free(reach);
return; return;
@ -1928,6 +1941,7 @@ static struct io_plan *peer_important(struct io_conn *conn,
imp = tal(daemon, struct important_peerid); imp = tal(daemon, struct important_peerid);
imp->id = id; imp->id = id;
imp->daemon = daemon; imp->daemon = daemon;
imp->wait_seconds = INITIAL_WAIT_SECONDS;
important_peerid_map_add(&daemon->important_peerids, important_peerid_map_add(&daemon->important_peerids,
imp); imp);
/* Start trying to reaching it now. */ /* Start trying to reaching it now. */

12
tests/test_lightningd.py

@ -666,9 +666,19 @@ class LightningDTests(BaseLightningDTests):
"Connection refused", "Connection refused",
l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port']) l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
# Wait for exponential backoff to give us a 2 second window.
l1.daemon.wait_for_log('...will try again in 2 seconds')
# It should now succeed when it restarts. # It should now succeed when it restarts.
l2.daemon.start() l2.daemon.start()
l1.rpc.connect(l2.info['id'], 'localhost', l2.info['port'])
# Multiples should be fine!
fut1 = self.executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
fut2 = self.executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
fut3 = self.executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.info['port'])
fut1.result(10)
fut2.result(10)
fut3.result(10)
def test_balance(self): def test_balance(self):
l1, l2 = self.connect() l1, l2 = self.connect()

Loading…
Cancel
Save