Browse Source

connectd: always tell master when connection fails/succeeded.

We used to separate implicit connection requests (ie. timed retries
for important peers) and explicit ones, and send a
WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT for the latter.

In the success case, that's now redundant, since we hand the connected
peer to the master using WIRE_CONNECT_PEER_CONNECTED; we just need a
message for the failure case.  And we might as well tell the master
every failure, so we don't have to distinguish internally.

This also solves a race we had before: connectd would send
WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT which completes the incoming
JSON connect command, then send WIRE_CONNECT_PEER_CONNECTED.  So
there's a window where the JSON command can return, but the peer isn't
known to lightningd yet.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ppa-0.6.1
Rusty Russell 7 years ago
committed by Christian Decker
parent
commit
30f08cc2b0
  1. 115
      connectd/connect.c
  2. 12
      connectd/connect_wire.csv
  3. 68
      lightningd/connect_control.c
  4. 1
      lightningd/connect_control.h
  5. 3
      lightningd/peer_control.c
  6. 3
      wallet/test/run-wallet.c

115
connectd/connect.c

@ -47,6 +47,7 @@
#include <netinet/in.h> #include <netinet/in.h>
#include <secp256k1_ecdh.h> #include <secp256k1_ecdh.h>
#include <sodium/randombytes.h> #include <sodium/randombytes.h>
#include <stdarg.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
@ -189,9 +190,6 @@ struct reaching {
/* FIXME: Support multiple address. */ /* FIXME: Support multiple address. */
struct wireaddr_internal addr; struct wireaddr_internal addr;
/* Whether connect command is waiting for the result. */
bool master_needs_response;
/* How far did we get? */ /* How far did we get? */
const char *connstate; const char *connstate;
}; };
@ -343,24 +341,15 @@ static void reached_peer(struct peer *peer, struct io_conn *conn)
{ {
/* OK, we've reached the peer successfully, tell everyone. */ /* OK, we've reached the peer successfully, tell everyone. */
struct reaching *r = find_reaching(peer->daemon, &peer->id); struct reaching *r = find_reaching(peer->daemon, &peer->id);
u8 *msg;
if (!r) if (!r)
return; return;
/* Don't call connect_failed */ /* Don't call destroy_io_conn */
io_set_finish(conn, NULL, NULL); io_set_finish(conn, NULL, NULL);
/* Don't free conn with reach */ /* Don't free conn with reach */
tal_steal(peer->daemon, conn); tal_steal(peer->daemon, conn);
/* Tell any connect command what happened. */
if (r->master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, &r->id,
true, "");
daemon_conn_send(&peer->daemon->master, take(msg));
}
tal_free(r); tal_free(r);
} }
@ -966,27 +955,30 @@ struct io_plan *connection_out(struct io_conn *conn, struct reaching *reach)
handshake_out_success, reach); handshake_out_success, reach);
} }
static void connect_failed(struct io_conn *conn, struct reaching *reach) static void PRINTF_FMT(3,4)
connect_failed(struct daemon *daemon,
const struct pubkey *id,
const char *errfmt, ...)
{ {
u8 *msg; u8 *msg;
struct important_peerid *imp; struct important_peerid *imp;
const char *err = tal_fmt(tmpctx, "%s: %s", va_list ap;
reach->connstate, char *err;
strerror(errno));
va_start(ap, errfmt);
err = tal_vfmt(tmpctx, errfmt, ap);
va_end(ap);
/* Tell any connect command what happened. */ /* Tell any connect command what happened. */
if (reach->master_needs_response) { msg = towire_connectctl_connect_failed(NULL, id, err);
msg = towire_connectctl_connect_to_peer_result(NULL, &reach->id, daemon_conn_send(&daemon->master, take(msg));
false, err);
daemon_conn_send(&reach->daemon->master, take(msg));
}
status_trace("Failed connected out for %s", status_trace("Failed connected out for %s: %s",
type_to_string(tmpctx, struct pubkey, &reach->id)); type_to_string(tmpctx, struct pubkey, id),
err);
/* If we want to keep trying, do so. */ /* If we want to keep trying, do so. */
imp = important_peerid_map_get(&reach->daemon->important_peerids, imp = important_peerid_map_get(&daemon->important_peerids, id);
&reach->id);
if (imp) { if (imp) {
imp->wait_seconds *= 2; imp->wait_seconds *= 2;
if (imp->wait_seconds > MAX_WAIT_SECONDS) if (imp->wait_seconds > MAX_WAIT_SECONDS)
@ -996,10 +988,16 @@ static void connect_failed(struct io_conn *conn, struct reaching *reach)
imp->wait_seconds); imp->wait_seconds);
/* If important_id freed, this will be removed too */ /* If important_id freed, this will be removed too */
imp->reconnect_timer imp->reconnect_timer
= new_reltimer(&reach->daemon->timers, imp, = new_reltimer(&daemon->timers, imp,
time_from_sec(imp->wait_seconds), time_from_sec(imp->wait_seconds),
retry_important, imp); retry_important, imp);
} }
}
static void destroy_io_conn(struct io_conn *conn, struct reaching *reach)
{
connect_failed(reach->daemon, &reach->id,
"%s: %s", reach->connstate, strerror(errno));
tal_free(reach); tal_free(reach);
} }
@ -1030,7 +1028,7 @@ static struct io_plan *conn_init(struct io_conn *conn, struct reaching *reach)
} }
assert(ai); assert(ai);
io_set_finish(conn, connect_failed, reach); io_set_finish(conn, destroy_io_conn, reach);
return io_connect(conn, ai, connection_out, reach); return io_connect(conn, ai, connection_out, reach);
} }
@ -1060,7 +1058,7 @@ static struct io_plan *conn_proxy_init(struct io_conn *conn,
status_failed(STATUS_FAIL_INTERNAL_ERROR, status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Can't reach to %u address", reach->addr.itype); "Can't reach to %u address", reach->addr.itype);
io_set_finish(conn, connect_failed, reach); io_set_finish(conn, destroy_io_conn, reach);
return io_tor_connect(conn, reach->daemon->proxyaddr, host, port, reach); return io_tor_connect(conn, reach->daemon->proxyaddr, host, port, reach);
} }
@ -1130,40 +1128,21 @@ gossip_resolve_addr(const tal_t *ctx, const struct pubkey *id)
return addr; return addr;
} }
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id, static void try_reach_peer(struct daemon *daemon, const struct pubkey *id)
bool master_needs_response)
{ {
struct wireaddr_internal *a; struct wireaddr_internal *a;
struct addrhint *hint; struct addrhint *hint;
int fd, af; int fd, af;
struct reaching *reach; struct reaching *reach;
u8 *msg;
bool use_proxy = daemon->use_proxy_always; bool use_proxy = daemon->use_proxy_always;
if (pubkey_set_get(&daemon->peers, id)) { /* Already done? May happen with timer. */
status_debug("try_reach_peer: have peer %s", if (pubkey_set_get(&daemon->peers, id))
type_to_string(tmpctx, struct pubkey, id));
if (master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, id,
true,
"");
daemon_conn_send(&daemon->master, take(msg));
}
return; return;
}
/* If we're trying to reach it right now, that's OK. */ /* If we're trying to reach it right now, that's OK. */
reach = find_reaching(daemon, id); if (find_reaching(daemon, id))
if (reach) {
/* Please tell us too. Master should not ask twice (we'll
* only respond once, and so one request will get stuck) */
if (reach->master_needs_response)
status_failed(STATUS_FAIL_MASTER_IO,
"Already reaching %s",
type_to_string(tmpctx, struct pubkey, id));
reach->master_needs_response = master_needs_response;
return; return;
}
hint = find_addrhint(daemon, id); hint = find_addrhint(daemon, id);
if (hint) if (hint)
@ -1187,14 +1166,7 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
} }
if (!a) { if (!a) {
status_debug("No address known for %s, giving up", connect_failed(daemon, id, "No address known");
type_to_string(tmpctx, struct pubkey, id));
if (master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, id,
false,
"No address known, giving up");
daemon_conn_send(&daemon->master, take(msg));
}
return; return;
} }
@ -1249,17 +1221,11 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
fd = socket(af, SOCK_STREAM, 0); fd = socket(af, SOCK_STREAM, 0);
if (fd < 0) { if (fd < 0) {
char *err = tal_fmt(tmpctx, connect_failed(daemon, id,
"Can't open %i socket for %s (%s), giving up", "Can't open %i socket for %s (%s)",
af, af,
type_to_string(tmpctx, struct pubkey, id), type_to_string(tmpctx, struct pubkey, id),
strerror(errno)); strerror(errno));
status_debug("%s", err);
if (master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, id,
false, err);
daemon_conn_send(&daemon->master, take(msg));
}
return; return;
} }
@ -1268,7 +1234,6 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
reach->daemon = daemon; reach->daemon = daemon;
reach->id = *id; reach->id = *id;
reach->addr = *a; reach->addr = *a;
reach->master_needs_response = master_needs_response;
reach->connstate = "Connection establishment"; reach->connstate = "Connection establishment";
list_add_tail(&daemon->reaching, &reach->list); list_add_tail(&daemon->reaching, &reach->list);
tal_add_destructor(reach, destroy_reaching); tal_add_destructor(reach, destroy_reaching);
@ -1290,7 +1255,7 @@ static void retry_important(struct important_peerid *imp)
if (!imp->daemon->reconnect) if (!imp->daemon->reconnect)
return; return;
try_reach_peer(imp->daemon, &imp->id, false); try_reach_peer(imp->daemon, &imp->id);
} }
static struct io_plan *connect_to_peer(struct io_conn *conn, static struct io_plan *connect_to_peer(struct io_conn *conn,
@ -1306,7 +1271,7 @@ static struct io_plan *connect_to_peer(struct io_conn *conn,
imp = important_peerid_map_get(&daemon->important_peerids, &id); imp = important_peerid_map_get(&daemon->important_peerids, &id);
if (imp) if (imp)
imp->reconnect_timer = tal_free(imp->reconnect_timer); imp->reconnect_timer = tal_free(imp->reconnect_timer);
try_reach_peer(daemon, &id, true); try_reach_peer(daemon, &id);
return daemon_conn_read_next(conn, &daemon->master); return daemon_conn_read_next(conn, &daemon->master);
} }
@ -1423,8 +1388,8 @@ static struct io_plan *recv_req(struct io_conn *conn, struct daemon_conn *master
case WIRE_CONNECTCTL_INIT_REPLY: case WIRE_CONNECTCTL_INIT_REPLY:
case WIRE_CONNECTCTL_ACTIVATE_REPLY: case WIRE_CONNECTCTL_ACTIVATE_REPLY:
case WIRE_CONNECT_PEER_CONNECTED: case WIRE_CONNECT_PEER_CONNECTED:
case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT:
case WIRE_CONNECT_RECONNECTED: case WIRE_CONNECT_RECONNECTED:
case WIRE_CONNECTCTL_CONNECT_FAILED:
break; break;
} }

12
connectd/connect_wire.csv

@ -46,14 +46,10 @@ connectctl_peer_addrhint,,addr,struct wireaddr_internal
connectctl_connect_to_peer,2001 connectctl_connect_to_peer,2001
connectctl_connect_to_peer,,id,struct pubkey connectctl_connect_to_peer,,id,struct pubkey
# Connectd->master: result (not a reply since it can be out-of-order, but # Connectd->master: connect failed.
# you will get one reply for every request). connectctl_connect_failed,2020
connectctl_connect_to_peer_result,2020 connectctl_connect_failed,,id,struct pubkey
connectctl_connect_to_peer_result,,id,struct pubkey connectctl_connect_failed,,failreason,wirestring
# True it connected.
connectctl_connect_to_peer_result,,connected,bool
# Otherwise, why we can't reach them.
connectctl_connect_to_peer_result,,failreason,wirestring
# Master -> connectd: try to always maintain connection to this peer (or not) # Master -> connectd: try to always maintain connection to this peer (or not)
connectctl_peer_important,2010 connectctl_peer_important,2010

Can't render this file because it has a wrong number of fields in line 5.

68
lightningd/connect_control.c

@ -68,32 +68,6 @@ static void connect_cmd_succeed(struct command *cmd, const struct pubkey *id)
command_success(cmd, response); command_success(cmd, response);
} }
static void connectd_connect_result(struct lightningd *ld, const u8 *msg)
{
struct pubkey id;
bool connected;
char *err;
struct connect *c;
if (!fromwire_connectctl_connect_to_peer_result(tmpctx, msg,
&id,
&connected,
&err))
fatal("Connect gave bad CONNECTCTL_CONNECT_TO_PEER_RESULT message %s",
tal_hex(msg, msg));
/* We can have multiple connect commands: complete them all */
while ((c = find_connect(ld, &id)) != NULL) {
if (connected) {
connect_cmd_succeed(c->cmd, &id);
} else {
command_fail(c->cmd, LIGHTNINGD, "%s", err);
}
/* They delete themselves from list */
}
}
static void json_connect(struct command *cmd, static void json_connect(struct command *cmd,
const char *buffer, const jsmntok_t *params) const char *buffer, const jsmntok_t *params)
{ {
@ -200,12 +174,10 @@ static void json_connect(struct command *cmd,
subd_send_msg(cmd->ld->connectd, take(msg)); subd_send_msg(cmd->ld->connectd, take(msg));
} }
/* If there isn't already a connect command, tell connectd */ msg = towire_connectctl_connect_to_peer(NULL, &id);
if (!find_connect(cmd->ld, &id)) { subd_send_msg(cmd->ld->connectd, take(msg));
msg = towire_connectctl_connect_to_peer(NULL, &id);
subd_send_msg(cmd->ld->connectd, take(msg)); /* Leave this here for peer_connected or connect_failed. */
}
/* Leave this here for connect_connect_result */
new_connect(cmd->ld, &id, cmd); new_connect(cmd->ld, &id, cmd);
command_still_pending(cmd); command_still_pending(cmd);
} }
@ -218,6 +190,34 @@ static const struct json_command connect_command = {
}; };
AUTODATA(json_command, &connect_command); AUTODATA(json_command, &connect_command);
static void connect_failed(struct lightningd *ld, const u8 *msg)
{
struct pubkey id;
char *err;
struct connect *c;
if (!fromwire_connectctl_connect_failed(tmpctx, msg, &id, &err))
fatal("Connect gave bad CONNECTCTL_CONNECT_FAILED message %s",
tal_hex(msg, msg));
/* We can have multiple connect commands: fail them all */
while ((c = find_connect(ld, &id)) != NULL) {
/* They delete themselves from list */
command_fail(c->cmd, LIGHTNINGD, "%s", err);
}
}
void connect_succeeded(struct lightningd *ld, const struct pubkey *id)
{
struct connect *c;
/* We can have multiple connect commands: fail them all */
while ((c = find_connect(ld, id)) != NULL) {
/* They delete themselves from list */
connect_cmd_succeed(c->cmd, id);
}
}
static void peer_please_disconnect(struct lightningd *ld, const u8 *msg) static void peer_please_disconnect(struct lightningd *ld, const u8 *msg)
{ {
struct pubkey id; struct pubkey id;
@ -261,8 +261,8 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
peer_connected(connectd->ld, msg, fds[0], fds[1]); peer_connected(connectd->ld, msg, fds[0], fds[1]);
break; break;
case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT: case WIRE_CONNECTCTL_CONNECT_FAILED:
connectd_connect_result(connectd->ld, msg); connect_failed(connectd->ld, msg);
break; break;
} }
return 0; return 0;

1
lightningd/connect_control.h

@ -9,6 +9,7 @@ struct pubkey;
int connectd_init(struct lightningd *ld); int connectd_init(struct lightningd *ld);
void connectd_activate(struct lightningd *ld); void connectd_activate(struct lightningd *ld);
void connect_succeeded(struct lightningd *ld, const struct pubkey *id);
void gossip_connect_result(struct lightningd *ld, const u8 *msg); void gossip_connect_result(struct lightningd *ld, const u8 *msg);
#endif /* LIGHTNING_LIGHTNINGD_CONNECT_CONTROL_H */ #endif /* LIGHTNING_LIGHTNINGD_CONNECT_CONTROL_H */

3
lightningd/peer_control.c

@ -448,6 +448,9 @@ void peer_connected(struct lightningd *ld, const u8 *msg,
fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s", fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
tal_hex(msg, msg)); tal_hex(msg, msg));
/* Complete any outstanding connect commands. */
connect_succeeded(ld, &id);
/* If we're already dealing with this peer, hand off to correct /* If we're already dealing with this peer, hand off to correct
* subdaemon. Otherwise, we'll hand to openingd to wait there. */ * subdaemon. Otherwise, we'll hand to openingd to wait there. */
peer = peer_by_id(ld, &id); peer = peer_by_id(ld, &id);

3
wallet/test/run-wallet.c

@ -59,6 +59,9 @@ void command_still_pending(struct command *cmd UNNEEDED)
/* Generated stub for command_success */ /* Generated stub for command_success */
void command_success(struct command *cmd UNNEEDED, struct json_result *response UNNEEDED) void command_success(struct command *cmd UNNEEDED, struct json_result *response UNNEEDED)
{ fprintf(stderr, "command_success called!\n"); abort(); } { fprintf(stderr, "command_success called!\n"); abort(); }
/* Generated stub for connect_succeeded */
void connect_succeeded(struct lightningd *ld UNNEEDED, const struct pubkey *id UNNEEDED)
{ fprintf(stderr, "connect_succeeded called!\n"); abort(); }
/* Generated stub for fromwire_connect_peer_connected */ /* Generated stub for fromwire_connect_peer_connected */
bool fromwire_connect_peer_connected(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, struct pubkey *id UNNEEDED, struct wireaddr_internal *addr UNNEEDED, struct crypto_state *crypto_state UNNEEDED, u8 **gfeatures UNNEEDED, u8 **lfeatures UNNEEDED) bool fromwire_connect_peer_connected(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, struct pubkey *id UNNEEDED, struct wireaddr_internal *addr UNNEEDED, struct crypto_state *crypto_state UNNEEDED, u8 **gfeatures UNNEEDED, u8 **lfeatures UNNEEDED)
{ fprintf(stderr, "fromwire_connect_peer_connected called!\n"); abort(); } { fprintf(stderr, "fromwire_connect_peer_connected called!\n"); abort(); }

Loading…
Cancel
Save