Browse Source

lightningd: add slow_reconnect flag for transient failure.

We normally reconnect after 1 second: have a flag to say wait for
60.  This will be used in the next patch which handles "soft" errors.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>


Header from folded patch 'channel_fail_transient_slowretry.patch':

fixup! lightningd: add slow_reconnect flag for transient failure.

@ZmnSCPxj points out that function is unsafe, since omitting the bool
parameter still compiled.  Make it two separate functions, each
with a distinctive name so every caller has to be fixed.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
pull/2938/head
Rusty Russell 5 years ago
parent
commit
f7a890ca35
  1. 34
      lightningd/channel.c
  2. 8
      lightningd/channel.h
  3. 3
      lightningd/channel_control.c
  4. 3
      lightningd/closing_control.c
  5. 2
      lightningd/connect_control.c
  6. 12
      lightningd/peer_control.c

34
lightningd/channel.c

@ -420,17 +420,12 @@ void channel_set_billboard(struct channel *channel, bool perm, const char *str)
}
}
void channel_fail_transient(struct channel *channel, const char *fmt, ...)
static void err_and_reconnect(struct channel *channel,
const char *why,
u32 seconds_before_reconnect)
{
va_list ap;
const char *why;
va_start(ap, fmt);
why = tal_vfmt(channel, fmt, ap);
va_end(ap);
log_info(channel->log, "Peer transient failure in %s: %s",
channel_state_name(channel), why);
tal_free(why);
#if DEVELOPER
if (dev_disconnect_permanent(channel->peer->ld)) {
@ -441,7 +436,24 @@ void channel_fail_transient(struct channel *channel, const char *fmt, ...)
channel_set_owner(channel, NULL);
/* Reconnect after 1 second: prevents some spurious reconnects
* during tests. */
delay_then_reconnect(channel, 1, &channel->peer->addr);
delay_then_reconnect(channel, seconds_before_reconnect,
&channel->peer->addr);
}
void channel_fail_reconnect_later(struct channel *channel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 60);
va_end(ap);
}
void channel_fail_reconnect(struct channel *channel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 1);
va_end(ap);
}

8
lightningd/channel.h

@ -175,8 +175,12 @@ const char *channel_state_str(enum channel_state state);
void channel_set_owner(struct channel *channel, struct subd *owner);
/* Channel has failed, but can try again. */
PRINTF_FMT(2,3) void channel_fail_transient(struct channel *channel,
const char *fmt,...);
PRINTF_FMT(2,3) void channel_fail_reconnect(struct channel *channel,
const char *fmt, ...);
/* Channel has failed, but can try again after a minute. */
PRINTF_FMT(2,3) void channel_fail_reconnect_later(struct channel *channel,
const char *fmt,...);
/* Channel has failed, give up on it. */
void channel_fail_permanent(struct channel *channel, const char *fmt, ...);
/* Permanent error, but due to internal problems, not peer. */

3
lightningd/channel_control.c

@ -317,7 +317,8 @@ void peer_start_channeld(struct channel *channel,
if (!channel->owner) {
log_unusual(channel->log, "Could not subdaemon channel: %s",
strerror(errno));
channel_fail_transient(channel, "Failed to subdaemon channel");
channel_fail_reconnect_later(channel,
"Failed to subdaemon channel");
return;
}

3
lightningd/closing_control.c

@ -191,7 +191,8 @@ void peer_start_closingd(struct channel *channel,
if (!channel->owner) {
log_unusual(channel->log, "Could not subdaemon closing: %s",
strerror(errno));
channel_fail_transient(channel, "Failed to subdaemon closing");
channel_fail_reconnect_later(channel,
"Failed to subdaemon closing");
return;
}

2
lightningd/connect_control.c

@ -278,7 +278,7 @@ static void peer_please_disconnect(struct lightningd *ld, const u8 *msg)
if (uc)
kill_uncommitted_channel(uc, "Reconnected");
else if (c)
channel_fail_transient(c, "Reconnected");
channel_fail_reconnect(c, "Reconnected");
}
static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fds)

12
lightningd/peer_control.c

@ -396,7 +396,7 @@ void channel_errmsg(struct channel *channel,
/* No per_peer_state means a subd crash or disconnection. */
if (!pps) {
channel_fail_transient(channel, "%s: %s",
channel_fail_reconnect(channel, "%s: %s",
channel->owner->name, desc);
return;
}
@ -1022,10 +1022,10 @@ static enum watch_result funding_depth_cb(struct lightningd *ld,
} else if (!short_channel_id_eq(channel->scid, &scid)) {
/* This normally restarts channeld, initialized with updated scid
* and also adds it (at least our halve_chan) to rtable. */
channel_fail_transient(channel,
"short_channel_id changed to %s (was %s)",
short_channel_id_to_str(tmpctx, &scid),
short_channel_id_to_str(tmpctx, channel->scid));
channel_fail_reconnect(channel,
"short_channel_id changed to %s (was %s)",
short_channel_id_to_str(tmpctx, &scid),
short_channel_id_to_str(tmpctx, channel->scid));
*channel->scid = scid;
wallet_channel_save(ld->wallet, channel);
@ -1373,7 +1373,7 @@ static struct command_result *json_disconnect(struct command *cmd,
channel = peer_active_channel(peer);
if (channel) {
if (*force) {
channel_fail_transient(channel,
channel_fail_reconnect(channel,
"disconnect command force=true");
return command_success(cmd, json_stream_success(cmd));
}

Loading…
Cancel
Save