Browse Source

gossipd: don't send a peer to master with half-written or half-read packet.

In this case, it was a gossip message half-sent, when we asked the peer
to be released.  Fix the problem in general by making send_peer_with_fds()
wait until after the next packet.

test_routing_gossip/lightning-4/log:
	b'lightning_openingd(8738): TRACE: First per_commit_point = 02e2ff759ed70c71f154695eade1983664a72546ebc552861f844bff5ea5b933bf'
	b'lightning_openingd(8738): TRACE: Failed hdr decrypt with rn=11'
	b'lightning_openingd(8738): STATUS_FAIL_PEER_IO: Reading accept_channel: Success'

test_routing_gossip/lightning-5/log:

	b'lightning_gossipd(8461): UPDATE WIRE_GOSSIP_PEER_NONGOSSIP'
	b'lightning_gossipd(8461): UPDATE WIRE_GOSSIP_PEER_NONGOSSIP'
	b'lightningd(8308): Failed to get netaddr for outgoing: Transport endpoint is not connected'

The problem occurs here on release, but could be on any place where we hand
a peer over when using ccan/io.  Note the other case (channel.c).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ppa-0.6.1
Rusty Russell 7 years ago
committed by Christian Decker
parent
commit
954a3990fa
  1. 5
      channeld/channel.c
  2. 98
      gossipd/gossip.c

5
channeld/channel.c

@ -2107,6 +2107,11 @@ static void gossip_gone(struct io_conn *unused, struct daemon_conn *dc)
"Gossip connection closed"); "Gossip connection closed");
} }
/* FIXME: This doesn't cover partly read packets! We could be halfway
* through receiving a gossip msg, for example. We'll simply reconnect
* in this case, but the real fix is to wean off ccan/io here, as it doesn't
* buy us anything: a poll for read on gossipfd, masterfd and peerfd then acting
* synchronous would be a simpler model. */
static void send_shutdown_complete(struct peer *peer) static void send_shutdown_complete(struct peer *peer)
{ {
const u8 *msg; const u8 *msg;

98
gossipd/gossip.c

@ -126,6 +126,9 @@ struct peer {
/* If we die, should we reach again? */ /* If we die, should we reach again? */
bool reach_again; bool reach_again;
/* Waiting to send_peer_with_fds to master? */
const u8 *send_to_master;
}; };
struct addrhint { struct addrhint {
@ -140,7 +143,7 @@ struct addrhint {
/* FIXME: Reorder */ /* FIXME: Reorder */
static struct io_plan *peer_start_gossip(struct io_conn *conn, static struct io_plan *peer_start_gossip(struct io_conn *conn,
struct peer *peer); struct peer *peer);
static void send_peer_with_fds(struct peer *peer, const u8 *msg); static bool send_peer_with_fds(struct peer *peer, const u8 *msg);
static void wake_pkt_out(struct peer *peer); static void wake_pkt_out(struct peer *peer);
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id); static void try_reach_peer(struct daemon *daemon, const struct pubkey *id);
@ -191,6 +194,7 @@ static struct peer *new_peer(const tal_t *ctx,
peer->daemon = daemon; peer->daemon = daemon;
peer->local = true; peer->local = true;
peer->reach_again = false; peer->reach_again = false;
peer->send_to_master = NULL;
peer->num_pings_outstanding = 0; peer->num_pings_outstanding = 0;
peer->broadcast_index = 0; peer->broadcast_index = 0;
msg_queue_init(&peer->peer_out, peer); msg_queue_init(&peer->peer_out, peer);
@ -295,9 +299,11 @@ static struct io_plan *peer_init_received(struct io_conn *conn,
* gossipfd closed (forget_peer) or reconnect. */ * gossipfd closed (forget_peer) or reconnect. */
peer_finalized(peer); peer_finalized(peer);
/* We will not have anything queued, since we're not duplex. */
msg = towire_gossip_peer_connected(peer, &peer->id, &peer->pcs.cs, msg = towire_gossip_peer_connected(peer, &peer->id, &peer->pcs.cs,
peer->gfeatures, peer->lfeatures); peer->gfeatures, peer->lfeatures);
send_peer_with_fds(peer, msg); if (!send_peer_with_fds(peer, msg))
return io_close(conn);
/* Start the gossip flowing. */ /* Start the gossip flowing. */
/* FIXME: This is a bit wasteful in the common case where master /* FIXME: This is a bit wasteful in the common case where master
@ -398,10 +404,49 @@ static void handle_pong(struct peer *peer, const u8 *pong)
tal_len(pong)))); tal_len(pong))));
} }
/* If master asks us to release peer, we attach this destructor in case it
* dies while we're waiting for it to finish IO */
static void fail_release(struct peer *peer)
{
u8 *msg = towire_gossipctl_release_peer_replyfail(peer);
daemon_conn_send(&peer->daemon->master, take(msg));
}
static struct io_plan *wait_until_ready_for_master(struct io_conn *conn,
struct peer *peer)
{
/* One of these is always true, since we've just finished read/write */
if (!peer_in_started(conn, &peer->pcs)
&& !peer_out_started(conn, &peer->pcs)) {
if (send_peer_with_fds(peer, take(peer->send_to_master))) {
/* In case we set this earlier. */
tal_del_destructor(peer, fail_release);
peer->send_to_master = NULL;
return io_close_taken_fd(conn);
} else
return io_close(conn);
}
/* Don't do any more I/O. */
return io_wait(conn, peer, wait_until_ready_for_master, peer);
}
static struct io_plan *peer_msgin(struct io_conn *conn,
struct peer *peer, u8 *msg);
/* Wrapper around peer_read_message: don't read another if we want to
* pass up to master */
static struct io_plan *peer_next_in(struct io_conn *conn, struct peer *peer)
{
if (peer->send_to_master)
return wait_until_ready_for_master(conn, peer);
return peer_read_message(conn, &peer->pcs, peer_msgin);
}
static struct io_plan *peer_msgin(struct io_conn *conn, static struct io_plan *peer_msgin(struct io_conn *conn,
struct peer *peer, u8 *msg) struct peer *peer, u8 *msg)
{ {
u8 *s;
enum wire_type t = fromwire_peektype(msg); enum wire_type t = fromwire_peektype(msg);
switch (t) { switch (t) {
@ -415,15 +460,15 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
case WIRE_NODE_ANNOUNCEMENT: case WIRE_NODE_ANNOUNCEMENT:
case WIRE_CHANNEL_UPDATE: case WIRE_CHANNEL_UPDATE:
handle_gossip_msg(peer->daemon->rstate, msg); handle_gossip_msg(peer->daemon->rstate, msg);
return peer_read_message(conn, &peer->pcs, peer_msgin); return peer_next_in(conn, peer);
case WIRE_PING: case WIRE_PING:
handle_ping(peer, msg); handle_ping(peer, msg);
return peer_read_message(conn, &peer->pcs, peer_msgin); return peer_next_in(conn, peer);
case WIRE_PONG: case WIRE_PONG:
handle_pong(peer, msg); handle_pong(peer, msg);
return peer_read_message(conn, &peer->pcs, peer_msgin); return peer_next_in(conn, peer);
case WIRE_OPEN_CHANNEL: case WIRE_OPEN_CHANNEL:
case WIRE_CHANNEL_REESTABLISH: case WIRE_CHANNEL_REESTABLISH:
@ -443,13 +488,15 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
case WIRE_REVOKE_AND_ACK: case WIRE_REVOKE_AND_ACK:
case WIRE_INIT: case WIRE_INIT:
/* Not our place to handle this, so we punt */ /* Not our place to handle this, so we punt */
s = towire_gossip_peer_nongossip(msg, &peer->id, peer->send_to_master
= towire_gossip_peer_nongossip(peer, &peer->id,
&peer->pcs.cs, &peer->pcs.cs,
peer->gfeatures, peer->gfeatures,
peer->lfeatures, peer->lfeatures,
msg); msg);
send_peer_with_fds(peer, take(s));
return io_close_taken_fd(conn); /* This will wait. */
return peer_next_in(conn, peer);
} }
/* BOLT #1: /* BOLT #1:
@ -463,7 +510,7 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
} else } else
peer_error(peer, "Unknown packet %u", t); peer_error(peer, "Unknown packet %u", t);
return peer_read_message(conn, &peer->pcs, peer_msgin); return peer_next_in(conn, peer);
} }
/* Wake up the outgoing direction of the connection and write any /* Wake up the outgoing direction of the connection and write any
@ -495,6 +542,10 @@ static struct io_plan *peer_pkt_out(struct io_conn *conn, struct peer *peer)
peer_pkt_out); peer_pkt_out);
} }
/* Do we want to send this peer to the master daemon? */
if (peer->send_to_master)
return wait_until_ready_for_master(conn, peer);
/* If we're supposed to be sending gossip, do so now. */ /* If we're supposed to be sending gossip, do so now. */
if (peer->gossip_sync) { if (peer->gossip_sync) {
struct queued_message *next; struct queued_message *next;
@ -518,7 +569,7 @@ static struct io_plan *peer_start_gossip(struct io_conn *conn, struct peer *peer
{ {
wake_pkt_out(peer); wake_pkt_out(peer);
return io_duplex(conn, return io_duplex(conn,
peer_read_message(conn, &peer->pcs, peer_msgin), peer_next_in(conn, peer),
peer_pkt_out(conn, peer)); peer_pkt_out(conn, peer));
} }
@ -554,7 +605,7 @@ static void forget_peer(struct io_conn *conn, struct daemon_conn *dc)
/* When a peer is to be owned by another daemon, we create a socket /* When a peer is to be owned by another daemon, we create a socket
* pair to send/receive gossip from it */ * pair to send/receive gossip from it */
static void send_peer_with_fds(struct peer *peer, const u8 *msg) static bool send_peer_with_fds(struct peer *peer, const u8 *msg)
{ {
int fds[2]; int fds[2];
@ -564,7 +615,7 @@ static void send_peer_with_fds(struct peer *peer, const u8 *msg)
/* FIXME: Send error to peer? */ /* FIXME: Send error to peer? */
/* Peer will be freed when caller closes conn. */ /* Peer will be freed when caller closes conn. */
return; return false;
} }
/* Now we talk to socket to get to peer's owner daemon. */ /* Now we talk to socket to get to peer's owner daemon. */
@ -583,6 +634,7 @@ static void send_peer_with_fds(struct peer *peer, const u8 *msg)
/* Don't get confused: we can't use this any more. */ /* Don't get confused: we can't use this any more. */
peer->fd = -1; peer->fd = -1;
return true;
} }
/** /**
@ -678,7 +730,7 @@ static struct io_plan *handle_peer(struct io_conn *conn, struct daemon *daemon,
static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon, static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon,
const u8 *msg) const u8 *msg)
{ {
struct pubkey id; struct pubkey id;
struct peer *peer; struct peer *peer;
@ -686,20 +738,24 @@ static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon,
master_badmsg(WIRE_GOSSIPCTL_RELEASE_PEER, msg); master_badmsg(WIRE_GOSSIPCTL_RELEASE_PEER, msg);
peer = find_peer(daemon, &id); peer = find_peer(daemon, &id);
if (!peer || !peer->local) { if (!peer || !peer->local || peer->send_to_master) {
status_trace("release_peer: peer %s not %s",
type_to_string(trc, struct pubkey, &id),
peer ? "local" : "found");
/* This can happen with dying peers, or reconnect */ /* This can happen with dying peers, or reconnect */
status_trace("release_peer: peer %s %s",
type_to_string(trc, struct pubkey, &id),
!peer ? "not found"
: !peer->send_to_master ? "already releasing"
: "not local");
msg = towire_gossipctl_release_peer_replyfail(msg); msg = towire_gossipctl_release_peer_replyfail(msg);
daemon_conn_send(&daemon->master, take(msg)); daemon_conn_send(&daemon->master, take(msg));
} else { } else {
msg = towire_gossipctl_release_peer_reply(msg, msg = towire_gossipctl_release_peer_reply(peer,
&peer->pcs.cs, &peer->pcs.cs,
peer->gfeatures, peer->gfeatures,
peer->lfeatures); peer->lfeatures);
send_peer_with_fds(peer, take(msg)); peer->send_to_master = msg;
io_close_taken_fd(peer->conn);
/* Wake output, in case it's idle. */
msg_wake(&peer->peer_out);
} }
return daemon_conn_read_next(conn, &daemon->master); return daemon_conn_read_next(conn, &daemon->master);
} }

Loading…
Cancel
Save