Browse Source

lightningd: add slow_reconnect flag for transient failure.

We normally reconnect after 1 second: have a flag to say wait for
60.  This will be used in the next patch which handles "soft" errors.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>


Header from folded patch 'channel_fail_transient_slowretry.patch':

fixup! lightningd: add slow_reconnect flag for transient failure.

@ZmnSCPxj points out that function is unsafe, since omitting the bool
parameter still compiled.  Make it two separate functions, each
with a distinctive name so every caller has to be fixed.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
pull/2938/head
Rusty Russell 6 years ago
parent
commit
f7a890ca35
  1. 34
      lightningd/channel.c
  2. 6
      lightningd/channel.h
  3. 3
      lightningd/channel_control.c
  4. 3
      lightningd/closing_control.c
  5. 2
      lightningd/connect_control.c
  6. 6
      lightningd/peer_control.c

34
lightningd/channel.c

@ -420,17 +420,12 @@ void channel_set_billboard(struct channel *channel, bool perm, const char *str)
} }
} }
void channel_fail_transient(struct channel *channel, const char *fmt, ...) static void err_and_reconnect(struct channel *channel,
const char *why,
u32 seconds_before_reconnect)
{ {
va_list ap;
const char *why;
va_start(ap, fmt);
why = tal_vfmt(channel, fmt, ap);
va_end(ap);
log_info(channel->log, "Peer transient failure in %s: %s", log_info(channel->log, "Peer transient failure in %s: %s",
channel_state_name(channel), why); channel_state_name(channel), why);
tal_free(why);
#if DEVELOPER #if DEVELOPER
if (dev_disconnect_permanent(channel->peer->ld)) { if (dev_disconnect_permanent(channel->peer->ld)) {
@ -441,7 +436,24 @@ void channel_fail_transient(struct channel *channel, const char *fmt, ...)
channel_set_owner(channel, NULL); channel_set_owner(channel, NULL);
/* Reconnect after 1 second: prevents some spurious reconnects delay_then_reconnect(channel, seconds_before_reconnect,
* during tests. */ &channel->peer->addr);
delay_then_reconnect(channel, 1, &channel->peer->addr); }
void channel_fail_reconnect_later(struct channel *channel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 60);
va_end(ap);
}
void channel_fail_reconnect(struct channel *channel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 1);
va_end(ap);
} }

6
lightningd/channel.h

@ -175,8 +175,12 @@ const char *channel_state_str(enum channel_state state);
void channel_set_owner(struct channel *channel, struct subd *owner); void channel_set_owner(struct channel *channel, struct subd *owner);
/* Channel has failed, but can try again. */ /* Channel has failed, but can try again. */
PRINTF_FMT(2,3) void channel_fail_transient(struct channel *channel, PRINTF_FMT(2,3) void channel_fail_reconnect(struct channel *channel,
const char *fmt, ...); const char *fmt, ...);
/* Channel has failed, but can try again after a minute. */
PRINTF_FMT(2,3) void channel_fail_reconnect_later(struct channel *channel,
const char *fmt,...);
/* Channel has failed, give up on it. */ /* Channel has failed, give up on it. */
void channel_fail_permanent(struct channel *channel, const char *fmt, ...); void channel_fail_permanent(struct channel *channel, const char *fmt, ...);
/* Permanent error, but due to internal problems, not peer. */ /* Permanent error, but due to internal problems, not peer. */

3
lightningd/channel_control.c

@ -317,7 +317,8 @@ void peer_start_channeld(struct channel *channel,
if (!channel->owner) { if (!channel->owner) {
log_unusual(channel->log, "Could not subdaemon channel: %s", log_unusual(channel->log, "Could not subdaemon channel: %s",
strerror(errno)); strerror(errno));
channel_fail_transient(channel, "Failed to subdaemon channel"); channel_fail_reconnect_later(channel,
"Failed to subdaemon channel");
return; return;
} }

3
lightningd/closing_control.c

@ -191,7 +191,8 @@ void peer_start_closingd(struct channel *channel,
if (!channel->owner) { if (!channel->owner) {
log_unusual(channel->log, "Could not subdaemon closing: %s", log_unusual(channel->log, "Could not subdaemon closing: %s",
strerror(errno)); strerror(errno));
channel_fail_transient(channel, "Failed to subdaemon closing"); channel_fail_reconnect_later(channel,
"Failed to subdaemon closing");
return; return;
} }

2
lightningd/connect_control.c

@ -278,7 +278,7 @@ static void peer_please_disconnect(struct lightningd *ld, const u8 *msg)
if (uc) if (uc)
kill_uncommitted_channel(uc, "Reconnected"); kill_uncommitted_channel(uc, "Reconnected");
else if (c) else if (c)
channel_fail_transient(c, "Reconnected"); channel_fail_reconnect(c, "Reconnected");
} }
static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fds) static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fds)

6
lightningd/peer_control.c

@ -396,7 +396,7 @@ void channel_errmsg(struct channel *channel,
/* No per_peer_state means a subd crash or disconnection. */ /* No per_peer_state means a subd crash or disconnection. */
if (!pps) { if (!pps) {
channel_fail_transient(channel, "%s: %s", channel_fail_reconnect(channel, "%s: %s",
channel->owner->name, desc); channel->owner->name, desc);
return; return;
} }
@ -1022,7 +1022,7 @@ static enum watch_result funding_depth_cb(struct lightningd *ld,
} else if (!short_channel_id_eq(channel->scid, &scid)) { } else if (!short_channel_id_eq(channel->scid, &scid)) {
/* This normally restarts channeld, initialized with updated scid /* This normally restarts channeld, initialized with updated scid
* and also adds it (at least our halve_chan) to rtable. */ * and also adds it (at least our halve_chan) to rtable. */
channel_fail_transient(channel, channel_fail_reconnect(channel,
"short_channel_id changed to %s (was %s)", "short_channel_id changed to %s (was %s)",
short_channel_id_to_str(tmpctx, &scid), short_channel_id_to_str(tmpctx, &scid),
short_channel_id_to_str(tmpctx, channel->scid)); short_channel_id_to_str(tmpctx, channel->scid));
@ -1373,7 +1373,7 @@ static struct command_result *json_disconnect(struct command *cmd,
channel = peer_active_channel(peer); channel = peer_active_channel(peer);
if (channel) { if (channel) {
if (*force) { if (*force) {
channel_fail_transient(channel, channel_fail_reconnect(channel,
"disconnect command force=true"); "disconnect command force=true");
return command_success(cmd, json_stream_success(cmd)); return command_success(cmd, json_stream_success(cmd));
} }

Loading…
Cancel
Save