From 521c7f71211653dc7ebb58562d2db13450655aa3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 8 Oct 2019 11:46:24 +1030 Subject: [PATCH] seeker: take over gossip control. The seeker starts by asking a peer (the first peer!) for all gossip since a minute before the modified time of the gossip store. This algorithm is enhanced in successive patches. Signed-off-by: Rusty Russell --- gossipd/gossipd.c | 101 ++---------- gossipd/gossipd.h | 23 +-- gossipd/seeker.c | 258 ++++++++++++++++++++++++------- gossipd/seeker.h | 10 +- gossipd/test/run-extended-info.c | 3 + 5 files changed, 224 insertions(+), 171 deletions(-) diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index 14f6cb61d..a1b7386f9 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -72,13 +72,6 @@ static bool suppress_gossip = false; #endif -/* What are our targets for each gossip level? (including levels above). - * - * If we're missing gossip: 3 high. - * Otherwise, 2 medium, and 8 low. Rest no limit.. - */ -static const size_t gossip_level_targets[] = { 3, 2, 8, SIZE_MAX }; - /*~ A channel consists of a `struct half_chan` for each direction, each of * which has a `flags` word from the `channel_update`; bit 1 is * ROUTING_FLAGS_DISABLED in the `channel_update`. But we also keep a local @@ -156,55 +149,6 @@ void queue_peer_from_store(struct peer *peer, queue_peer_msg(peer, take(gossip_store_get(NULL, gs, bcast->index))); } -/*~ We have different levels of gossipiness, depending on our needs. */ -static u32 gossip_start(const struct routing_state *rstate, - enum gossip_level gossip_level) -{ - switch (gossip_level) { - case GOSSIP_HIGH: - return 0; - case GOSSIP_MEDIUM: - return gossip_time_now(rstate).ts.tv_sec - 24 * 3600; - case GOSSIP_LOW: - return gossip_time_now(rstate).ts.tv_sec; - case GOSSIP_NONE: - return UINT32_MAX; - } - abort(); -} - -/* BOLT #7: - * - * A node: - * - if the `gossip_queries` feature is negotiated: - * - MUST NOT relay any gossip messages unless explicitly requested. - */ -void setup_gossip_range(struct peer *peer) -{ - u8 *msg; - - /*~ Without the `gossip_queries` feature, gossip flows automatically. */ - if (!peer->gossip_queries_feature) { - /* This peer is gossipy whether we want it or not! */ - return; - } - - status_debug("Setting peer %s to gossip level %s", - type_to_string(tmpctx, struct node_id, &peer->id), - peer->gossip_level == GOSSIP_HIGH ? "HIGH" - : peer->gossip_level == GOSSIP_MEDIUM ? "MEDIUM" - : peer->gossip_level == GOSSIP_LOW ? "LOW" - : peer->gossip_level == GOSSIP_NONE ? "NONE" - : "INVALID"); - /*~ We need to ask for something to start the gossip flowing. */ - msg = towire_gossip_timestamp_filter(peer, - &peer->daemon->chain_hash, - gossip_start(peer->daemon->rstate, - peer->gossip_level), - UINT32_MAX); - queue_peer_msg(peer, take(msg)); -} - /*~ We don't actually keep node_announcements in memory; we keep them in * a file called `gossip_store`. If we need some node details, we reload * and reparse. It's slow, but generally rare. */ @@ -557,39 +501,22 @@ done: return daemon_conn_read_next(conn, peer->dc); } -/* What gossip level do we set for this to meet our target? */ -enum gossip_level peer_gossip_level(const struct daemon *daemon, - bool gossip_queries_feature) +/* If we have many peers, we don't ask them all to gossip. */ +static bool peer_should_gossip(const struct daemon *daemon) { struct peer *peer; - size_t gossip_levels[ARRAY_SIZE(gossip_level_targets)]; - enum gossip_level glevel; - - /* Old peers always give us a flood. */ - if (!gossip_queries_feature) - return GOSSIP_HIGH; + size_t n_gossipers = 0; #if DEVELOPER /* Don't ask new peers for new gossip is dev-suppress-gossip has been set*/ if (suppress_gossip) - return GOSSIP_NONE; + return false; #endif - /* Figure out how many we have at each level. */ - memset(gossip_levels, 0, sizeof(gossip_levels)); list_for_each(&daemon->peers, peer, list) - gossip_levels[peer->gossip_level]++; - - /* If we're missing gossip, try to fill GOSSIP_HIGH */ - if (seeker_gossip(daemon->seeker)) - glevel = GOSSIP_HIGH; - else - glevel = GOSSIP_MEDIUM; + n_gossipers += peer->gossip_enabled; - while (gossip_levels[glevel] >= gossip_level_targets[glevel]) - glevel++; - - return glevel; + return n_gossipers < 8; } /*~ This is where connectd tells us about a new peer, and we hand back an fd for @@ -648,8 +575,9 @@ static struct io_plan *connectd_new_peer(struct io_conn *conn, peer->query_channel_blocks = NULL; peer->query_channel_range_cb = NULL; peer->num_pings_outstanding = 0; - peer->gossip_level = peer_gossip_level(daemon, - peer->gossip_queries_feature); + /* We can't disable gossip if it doesn't support queries! */ + peer->gossip_enabled = peer_should_gossip(daemon) + || !peer->gossip_queries_feature; /* We keep a list so we can find peer by id */ list_add_tail(&peer->daemon->peers, &peer->list); @@ -665,7 +593,7 @@ static struct io_plan *connectd_new_peer(struct io_conn *conn, /* This sends the initial timestamp filter (wait until we're synced!). */ if (daemon->current_blockheight) - setup_gossip_range(peer); + seeker_setup_peer_gossip(daemon->seeker, peer); /* BOLT #7: * @@ -911,9 +839,6 @@ static struct io_plan *gossip_init(struct io_conn *conn, /* Load stored gossip messages */ timestamp = gossip_store_load(daemon->rstate, daemon->rstate->gs); - /* If gossip_store less than 24 hours old, say we're OK. */ - if (timestamp < gossip_time_now(daemon->rstate).ts.tv_sec - 24*3600) - gossip_missing(daemon, daemon->seeker); /* Now disable all local channels, they can't be connected yet. */ gossip_disable_local_channels(daemon); @@ -927,6 +852,9 @@ static struct io_plan *gossip_init(struct io_conn *conn, time_from_sec(GOSSIP_PRUNE_INTERVAL(daemon->rstate->dev_fast_gossip_prune) / 4), gossip_refresh_network, daemon)); + /* Fire up the seeker! */ + daemon->seeker = new_seeker(daemon, timestamp); + return daemon_conn_read_next(conn, daemon->master); } @@ -1354,7 +1282,7 @@ static struct io_plan *new_blockheight(struct io_conn *conn, struct peer *peer; list_for_each(&daemon->peers, peer, list) - setup_gossip_range(peer); + seeker_setup_peer_gossip(daemon->seeker, peer); } return daemon_conn_read_next(conn, daemon->master); @@ -1724,7 +1652,6 @@ int main(int argc, char *argv[]) daemon = tal(NULL, struct daemon); list_head_init(&daemon->peers); - daemon->seeker = new_seeker(daemon); daemon->deferred_txouts = tal_arr(daemon, struct short_channel_id, 0); daemon->node_announce_timer = NULL; daemon->current_blockheight = 0; /* i.e. unknown */ diff --git a/gossipd/gossipd.h b/gossipd/gossipd.h index 393276b93..0e5d102a2 100644 --- a/gossipd/gossipd.h +++ b/gossipd/gossipd.h @@ -64,18 +64,6 @@ struct daemon { struct seeker *seeker; }; -/*~ How gossipy do we ask a peer to be? */ -enum gossip_level { - /* Give us everything since epoch */ - GOSSIP_HIGH, - /* Give us everything from 24 hours ago. */ - GOSSIP_MEDIUM, - /* Give us everything from now. */ - GOSSIP_LOW, - /* Give us nothing. */ - GOSSIP_NONE, -}; - /* This represents each peer we're gossiping with */ struct peer { /* daemon->peers */ @@ -120,8 +108,8 @@ struct peer { const struct short_channel_id *scids, bool complete); - /* Are we asking this peer to give us lot of gossip? */ - enum gossip_level gossip_level; + /* Are we asking this peer to give us gossip? */ + bool gossip_enabled; /* The daemon_conn used to queue messages to/from the peer. */ struct daemon_conn *dc; @@ -137,10 +125,6 @@ void peer_supplied_good_gossip(struct peer *peer); struct peer *random_peer(struct daemon *daemon, bool (*check_peer)(const struct peer *peer)); -/* Extract gossip level for this peer */ -enum gossip_level peer_gossip_level(const struct daemon *daemon, - bool gossip_queries_feature); - /* Queue a gossip message for the peer: the subdaemon on the other end simply * forwards it to the peer. */ void queue_peer_msg(struct peer *peer, const u8 *msg TAKES); @@ -153,4 +137,7 @@ void queue_peer_from_store(struct peer *peer, /* Reset gossip range for this peer. */ void setup_gossip_range(struct peer *peer); +/* A peer has given us these short channel ids: see if we need to catch up */ +void process_scids(struct daemon *daemon, const struct short_channel_id *scids); + #endif /* LIGHTNING_GOSSIPD_GOSSIPD_H */ diff --git a/gossipd/seeker.c b/gossipd/seeker.c index a3d6991f5..37261e5d4 100644 --- a/gossipd/seeker.c +++ b/gossipd/seeker.c @@ -7,88 +7,231 @@ #include #include #include +#include #include +#include + +#define GOSSIP_SEEKER_INTERVAL(seeker) \ + DEV_FAST_GOSSIP((seeker)->daemon->rstate->dev_fast_gossip, 5, 60) + +enum seeker_state { + /* First initialized, no peers. */ + STARTING_UP_NEED_PEER, + + /* Still streaming gossip from single peer. */ + STARTING_UP_FIRSTPEER, + + /* Normal running. */ + NORMAL, +}; /* Gossip we're seeking at the moment. */ struct seeker { - /* Do we think we're missing gossip? Contains timer to re-check */ - struct oneshot *gossip_missing; + struct daemon *daemon; + + enum seeker_state state; + + /* Timer which checks on progress every minute */ + struct oneshot *check_timer; /* Channels we've heard about, but don't know. */ struct short_channel_id *unknown_scids; + + /* Timestamp of gossip store (or 0). */ + u32 last_gossip_timestamp; + + /* During startup, we ask a single peer for gossip. */ + struct peer *random_peer_softref; + + /* This checks progress of our random peer during startup */ + size_t prev_gossip_count; }; -struct seeker *new_seeker(struct daemon *daemon) +/* Mutual recursion */ +static void seeker_check(struct seeker *seeker); + +static void begin_check_timer(struct seeker *seeker) +{ + const u32 polltime = GOSSIP_SEEKER_INTERVAL(seeker); + + seeker->check_timer = new_reltimer(&seeker->daemon->timers, + seeker, + time_from_sec(polltime), + seeker_check, seeker); +} + +struct seeker *new_seeker(struct daemon *daemon, u32 timestamp) { struct seeker *seeker = tal(daemon, struct seeker); - seeker->gossip_missing = NULL; - seeker->unknown_scids = tal_arr(seeker, struct short_channel_id, 0); + seeker->daemon = daemon; + seeker->unknown_scids = tal_arr(seeker, struct short_channel_id, 0); + seeker->last_gossip_timestamp = timestamp; + seeker->state = STARTING_UP_NEED_PEER; + begin_check_timer(seeker); return seeker; } +/* Set this peer as our random peer; return false if NULL. */ +static bool selected_peer(struct seeker *seeker, struct peer *peer) +{ + if (!peer) + return false; + + set_softref(seeker, &seeker->random_peer_softref, peer); + + /* Give it some grace in case we immediately hit timer */ + seeker->prev_gossip_count + = peer->gossip_counter - GOSSIP_SEEKER_INTERVAL(seeker); + return true; +} -/*~ This is a timer, which goes off 10 minutes after the last time we noticed - * that gossip was missing. */ -static void gossip_not_missing(struct daemon *daemon) +static bool peer_made_progress(struct seeker *seeker) { - struct seeker *seeker = daemon->seeker; - - /* Corner case: no peers, try again! */ - if (list_empty(&daemon->peers)) - gossip_missing(daemon, daemon->seeker); - else { - struct peer *peer; - - seeker->gossip_missing = tal_free(seeker->gossip_missing); - status_info("We seem to be caught up on gossip messages"); - /* Free any lagging/stale unknown scids. */ - seeker->unknown_scids = tal_free(seeker->unknown_scids); - - /* Reset peers we marked as HIGH */ - list_for_each(&daemon->peers, peer, list) { - if (peer->gossip_level != GOSSIP_HIGH) - continue; - if (!peer->gossip_queries_feature) - continue; - peer->gossip_level = peer_gossip_level(daemon, true); - setup_gossip_range(peer); - } + const struct peer *peer = seeker->random_peer_softref; + + /* Has it made progress (at least one valid update per second)? If + * not, we assume it's finished, and if it hasn't, we'll end up + * querying backwards in next steps. */ + if (peer->gossip_counter + >= seeker->prev_gossip_count + GOSSIP_SEEKER_INTERVAL(seeker)) { + seeker->prev_gossip_count = peer->gossip_counter; + return true; } + + return false; } -static bool peer_is_not_gossip_high(const struct peer *peer) +static void normal_gossip_start(struct seeker *seeker, struct peer *peer) { - return peer->gossip_level != GOSSIP_HIGH; + u32 start; + u8 *msg; + + /* FIXME: gets the last minute of gossip, works around our current + * lack of discovery if we're missing gossip. */ + if (peer->gossip_enabled) + start = time_now().ts.tv_sec - 60; + else + start = UINT32_MAX; + + status_debug("seeker: starting %s from %s", + peer->gossip_enabled ? "gossip" : "disabled gossip", + type_to_string(tmpctx, struct node_id, &peer->id)); + + /* This is allowed even if they don't understand it (odd) */ + msg = towire_gossip_timestamp_filter(NULL, + &seeker->daemon->chain_hash, + start, + UINT32_MAX); + queue_peer_msg(peer, take(msg)); } -/* We've found gossip is missing. */ -void gossip_missing(struct daemon *daemon, struct seeker *seeker) +/* We have selected this peer to stream us startup gossip */ +static void peer_gossip_startup(struct seeker *seeker, struct peer *peer) +{ + const u32 polltime = GOSSIP_SEEKER_INTERVAL(seeker); + u8 *msg; + u32 start; + + if (seeker->last_gossip_timestamp < polltime) + start = 0; + else + start = seeker->last_gossip_timestamp - polltime; + + selected_peer(seeker, peer); + + status_debug("seeker: startup gossip from t=%u from %s", + start, type_to_string(tmpctx, struct node_id, &peer->id)); + msg = towire_gossip_timestamp_filter(NULL, + &peer->daemon->chain_hash, + start, UINT32_MAX); + queue_peer_msg(peer, take(msg)); +} + +static bool peer_has_gossip_queries(const struct peer *peer) { - if (!seeker->gossip_missing) { - status_info("We seem to be missing gossip messages"); - /* FIXME: we could use query_channel_range. */ - /* Make some peers gossip harder. */ - for (size_t i = 0; i < 3; i++) { - struct peer *peer = random_peer(daemon, - peer_is_not_gossip_high); - - if (!peer) - break; - - status_info("%s: gossip harder!", - type_to_string(tmpctx, struct node_id, - &peer->id)); - peer->gossip_level = GOSSIP_HIGH; - setup_gossip_range(peer); + return peer->gossip_queries_feature; +} + +static void check_firstpeer(struct seeker *seeker) +{ + struct peer *peer = seeker->random_peer_softref, *p; + + /* It might have died, pick another. */ + if (!peer) { + status_debug("seeker: startup peer died, re-choosing"); + peer = random_peer(seeker->daemon, peer_has_gossip_queries); + /* No peer? Wait for a new one to join. */ + if (!peer) { + status_debug("seeker: no peers, waiting"); + seeker->state = STARTING_UP_NEED_PEER; + return; } + + peer_gossip_startup(seeker, peer); + return; + } + + /* If no progress, we assume it's finished, and if it hasn't, + * we'll end up querying backwards in next steps. */ + if (peer_made_progress(seeker)) + return; + + /* Begin normal gossip regime */ + status_debug("seeker: startup peer finished"); + clear_softref(seeker, &seeker->random_peer_softref); + seeker->state = NORMAL; + list_for_each(&seeker->daemon->peers, p, list) { + if (p == peer) + continue; + + normal_gossip_start(seeker, p); + } +} + +/* Periodic timer to see how our gossip is going. */ +static void seeker_check(struct seeker *seeker) +{ + switch (seeker->state) { + case STARTING_UP_NEED_PEER: + break; + case STARTING_UP_FIRSTPEER: + check_firstpeer(seeker); + break; + case NORMAL: + /* FIXME: Check! */ + break; } - tal_free(seeker->gossip_missing); - /* Check again in 10 minutes. */ - seeker->gossip_missing = new_reltimer(&daemon->timers, daemon, - time_from_sec(600), - gossip_not_missing, daemon); + begin_check_timer(seeker); +} + +/* We get this when we have a new peer. */ +void seeker_setup_peer_gossip(struct seeker *seeker, struct peer *peer) +{ + /* Can't do anything useful with these peers. */ + if (!peer->gossip_queries_feature) + return; + + switch (seeker->state) { + case STARTING_UP_NEED_PEER: + peer_gossip_startup(seeker, peer); + seeker->state = STARTING_UP_FIRSTPEER; + return; + case STARTING_UP_FIRSTPEER: + /* Waiting for seeker_check to release us */ + return; + case NORMAL: + normal_gossip_start(seeker, peer); + return; + } + abort(); +} + +/* We've found gossip is missing. */ +void gossip_missing(struct daemon *daemon, struct seeker *seeker) +{ + /* FIXME */ } bool remove_unknown_scid(struct seeker *seeker, @@ -103,11 +246,6 @@ bool remove_unknown_scid(struct seeker *seeker, return false; } -bool seeker_gossip(const struct seeker *seeker) -{ - return seeker->gossip_missing != NULL; -} - bool add_unknown_scid(struct seeker *seeker, const struct short_channel_id *scid) { diff --git a/gossipd/seeker.h b/gossipd/seeker.h index 3f25208d4..c597a4543 100644 --- a/gossipd/seeker.h +++ b/gossipd/seeker.h @@ -6,19 +6,17 @@ struct daemon; struct peer; struct short_channel_id; -struct seeker *new_seeker(struct daemon *daemon); - -void gossip_missing(struct daemon *daemon, struct seeker *seeker); +struct seeker *new_seeker(struct daemon *daemon, u32 timestamp); void query_unknown_channel(struct daemon *daemon, struct peer *peer, const struct short_channel_id *id); +void seeker_setup_peer_gossip(struct seeker *seeker, struct peer *peer); + +void gossip_missing(struct daemon *daemon, struct seeker *seeker); bool remove_unknown_scid(struct seeker *seeker, const struct short_channel_id *scid); bool add_unknown_scid(struct seeker *seeker, const struct short_channel_id *scid); - -bool seeker_gossip(const struct seeker *seeker); - #endif /* LIGHTNING_GOSSIPD_SEEKER_H */ diff --git a/gossipd/test/run-extended-info.c b/gossipd/test/run-extended-info.c index d125870a8..b71dfd682 100644 --- a/gossipd/test/run-extended-info.c +++ b/gossipd/test/run-extended-info.c @@ -47,6 +47,9 @@ const u8 *gossip_store_get(const tal_t *ctx UNNEEDED, /* Generated stub for master_badmsg */ void master_badmsg(u32 type_expected UNNEEDED, const u8 *msg) { fprintf(stderr, "master_badmsg called!\n"); abort(); } +/* Generated stub for process_scids */ +void process_scids(struct daemon *daemon UNNEEDED, const struct short_channel_id *scids UNNEEDED) +{ fprintf(stderr, "process_scids called!\n"); abort(); } /* Generated stub for queue_peer_from_store */ void queue_peer_from_store(struct peer *peer UNNEEDED, const struct broadcastable *bcast UNNEEDED)