Browse Source

gossipd: really fix peer handoff.

954a3990fa had two errors:
1) We created the handoff message *before* we sent the final packet, meaning
   that the cryptostate was out-of-sync.
2) We called io_wait() on the output side of a duplex connection: it has
   to be io_wait_out().

This time, stress testing for 2 hours revealed no more problems.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ppa-0.6.1
Rusty Russell 7 years ago
committed by Christian Decker
parent
commit
3f84ca1052
  1. 77
      gossipd/gossip.c

77
gossipd/gossip.c

@ -128,7 +128,10 @@ struct peer {
bool reach_again; bool reach_again;
/* Waiting to send_peer_with_fds to master? */ /* Waiting to send_peer_with_fds to master? */
const u8 *send_to_master; bool return_to_master;
/* If we're exiting due to non-gossip msg, otherwise release */
u8 *nongossip_msg;
}; };
struct addrhint { struct addrhint {
@ -194,7 +197,7 @@ static struct peer *new_peer(const tal_t *ctx,
peer->daemon = daemon; peer->daemon = daemon;
peer->local = true; peer->local = true;
peer->reach_again = false; peer->reach_again = false;
peer->send_to_master = NULL; peer->return_to_master = false;
peer->num_pings_outstanding = 0; peer->num_pings_outstanding = 0;
peer->broadcast_index = 0; peer->broadcast_index = 0;
msg_queue_init(&peer->peer_out, peer); msg_queue_init(&peer->peer_out, peer);
@ -412,23 +415,28 @@ static void fail_release(struct peer *peer)
daemon_conn_send(&peer->daemon->master, take(msg)); daemon_conn_send(&peer->daemon->master, take(msg));
} }
static struct io_plan *wait_until_ready_for_master(struct io_conn *conn, static struct io_plan *ready_for_master(struct io_conn *conn, struct peer *peer)
struct peer *peer)
{ {
/* One of these is always true, since we've just finished read/write */ u8 *msg;
if (!peer_in_started(conn, &peer->pcs) if (peer->nongossip_msg)
&& !peer_out_started(conn, &peer->pcs)) { msg = towire_gossip_peer_nongossip(peer, &peer->id,
if (send_peer_with_fds(peer, take(peer->send_to_master))) { &peer->pcs.cs,
/* In case we set this earlier. */ peer->gfeatures,
tal_del_destructor(peer, fail_release); peer->lfeatures,
peer->send_to_master = NULL; peer->nongossip_msg);
return io_close_taken_fd(conn); else
} else msg = towire_gossipctl_release_peer_reply(peer,
return io_close(conn); &peer->pcs.cs,
} peer->gfeatures,
peer->lfeatures);
/* Don't do any more I/O. */ if (send_peer_with_fds(peer, take(msg))) {
return io_wait(conn, peer, wait_until_ready_for_master, peer); /* In case we set this earlier. */
tal_del_destructor(peer, fail_release);
peer->return_to_master = false;
return io_close_taken_fd(conn);
} else
return io_close(conn);
} }
static struct io_plan *peer_msgin(struct io_conn *conn, static struct io_plan *peer_msgin(struct io_conn *conn,
@ -438,8 +446,12 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
* pass up to master */ * pass up to master */
static struct io_plan *peer_next_in(struct io_conn *conn, struct peer *peer) static struct io_plan *peer_next_in(struct io_conn *conn, struct peer *peer)
{ {
if (peer->send_to_master) if (peer->return_to_master) {
return wait_until_ready_for_master(conn, peer); assert(!peer_in_started(conn, &peer->pcs));
if (!peer_out_started(conn, &peer->pcs))
return ready_for_master(conn, peer);
return io_wait(conn, peer, peer_next_in, peer);
}
return peer_read_message(conn, &peer->pcs, peer_msgin); return peer_read_message(conn, &peer->pcs, peer_msgin);
} }
@ -488,12 +500,8 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
case WIRE_REVOKE_AND_ACK: case WIRE_REVOKE_AND_ACK:
case WIRE_INIT: case WIRE_INIT:
/* Not our place to handle this, so we punt */ /* Not our place to handle this, so we punt */
peer->send_to_master peer->return_to_master = true;
= towire_gossip_peer_nongossip(peer, &peer->id, peer->nongossip_msg = tal_steal(peer, msg);
&peer->pcs.cs,
peer->gfeatures,
peer->lfeatures,
msg);
/* This will wait. */ /* This will wait. */
return peer_next_in(conn, peer); return peer_next_in(conn, peer);
@ -543,8 +551,12 @@ static struct io_plan *peer_pkt_out(struct io_conn *conn, struct peer *peer)
} }
/* Do we want to send this peer to the master daemon? */ /* Do we want to send this peer to the master daemon? */
if (peer->send_to_master) if (peer->return_to_master) {
return wait_until_ready_for_master(conn, peer); assert(!peer_out_started(conn, &peer->pcs));
if (!peer_in_started(conn, &peer->pcs))
return ready_for_master(conn, peer);
return io_out_wait(conn, peer, peer_pkt_out, peer);
}
/* If we're supposed to be sending gossip, do so now. */ /* If we're supposed to be sending gossip, do so now. */
if (peer->gossip_sync) { if (peer->gossip_sync) {
@ -738,21 +750,18 @@ static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon,
master_badmsg(WIRE_GOSSIPCTL_RELEASE_PEER, msg); master_badmsg(WIRE_GOSSIPCTL_RELEASE_PEER, msg);
peer = find_peer(daemon, &id); peer = find_peer(daemon, &id);
if (!peer || !peer->local || peer->send_to_master) { if (!peer || !peer->local || peer->return_to_master) {
/* This can happen with dying peers, or reconnect */ /* This can happen with dying peers, or reconnect */
status_trace("release_peer: peer %s %s", status_trace("release_peer: peer %s %s",
type_to_string(trc, struct pubkey, &id), type_to_string(trc, struct pubkey, &id),
!peer ? "not found" !peer ? "not found"
: !peer->send_to_master ? "already releasing" : peer->return_to_master ? "already releasing"
: "not local"); : "not local");
msg = towire_gossipctl_release_peer_replyfail(msg); msg = towire_gossipctl_release_peer_replyfail(msg);
daemon_conn_send(&daemon->master, take(msg)); daemon_conn_send(&daemon->master, take(msg));
} else { } else {
msg = towire_gossipctl_release_peer_reply(peer, peer->return_to_master = true;
&peer->pcs.cs, peer->nongossip_msg = NULL;
peer->gfeatures,
peer->lfeatures);
peer->send_to_master = msg;
/* Wake output, in case it's idle. */ /* Wake output, in case it's idle. */
msg_wake(&peer->peer_out); msg_wake(&peer->peer_out);

Loading…
Cancel
Save