diff --git a/gossipd/gossip_wire.csv b/gossipd/gossip_wire.csv index ff8ceff93..658f54681 100644 --- a/gossipd/gossip_wire.csv +++ b/gossipd/gossip_wire.csv @@ -118,13 +118,13 @@ gossip_get_txout_reply,,satoshis,u64 gossip_get_txout_reply,,len,u16 gossip_get_txout_reply,,outscript,len*u8 -# master->gossipd a routing failure occurred -gossip_routing_failure,3021 -gossip_routing_failure,,erring_node,struct pubkey -gossip_routing_failure,,erring_channel,struct short_channel_id -gossip_routing_failure,,failcode,u16 -gossip_routing_failure,,len,u16 -gossip_routing_failure,,channel_update,len*u8 +# master->gossipd an htlc failed with this onion error. +gossip_payment_failure,3021 +gossip_payment_failure,,erring_node,struct pubkey +gossip_payment_failure,,erring_channel,struct short_channel_id +gossip_payment_failure,,erring_channel_direction,u8 +gossip_payment_failure,,len,u16 +gossip_payment_failure,,error,len*u8 # master -> gossipd: a potential funding outpoint was spent, please forget the eventual channel gossip_outpoint_spent,3024 diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index fa69cdfb7..9289f6a57 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -2511,30 +2511,94 @@ static struct io_plan *handle_txout_reply(struct io_conn *conn, return daemon_conn_read_next(conn, daemon->master); } +/* Fix up the channel_update to include the type if it doesn't currently have + * one. See ElementsProject/lightning#1730 and lightningnetwork/lnd#1599 for the + * in-depth discussion on why we break message parsing here... */ +static u8 *patch_channel_update(const tal_t *ctx, u8 *channel_update TAKES) +{ + u8 *fixed; + if (channel_update != NULL && + fromwire_peektype(channel_update) != WIRE_CHANNEL_UPDATE) { + /* This should be a channel_update, prefix with the + * WIRE_CHANNEL_UPDATE type, but isn't. Let's prefix it. */ + fixed = tal_arr(ctx, u8, 0); + towire_u16(&fixed, WIRE_CHANNEL_UPDATE); + towire(&fixed, channel_update, tal_bytelen(channel_update)); + if (taken(channel_update)) + tal_free(channel_update); + return fixed; + } else { + return tal_dup_arr(ctx, u8, + channel_update, tal_count(channel_update), 0); + } +} + +/* Return NULL if the wrapped onion error message has no channel_update field, + * or return the embedded channel_update message otherwise. */ +static u8 *channel_update_from_onion_error(const tal_t *ctx, + const u8 *onion_message) +{ + u8 *channel_update = NULL; + u64 unused64; + u32 unused32; + + /* Identify failcodes that have some channel_update. + * + * TODO > BOLT 1.0: Add new failcodes when updating to a + * new BOLT version. */ + if (!fromwire_temporary_channel_failure(ctx, + onion_message, + &channel_update) && + !fromwire_amount_below_minimum(ctx, + onion_message, &unused64, + &channel_update) && + !fromwire_fee_insufficient(ctx, + onion_message, &unused64, + &channel_update) && + !fromwire_incorrect_cltv_expiry(ctx, + onion_message, &unused32, + &channel_update) && + !fromwire_expiry_too_soon(ctx, + onion_message, + &channel_update)) + /* No channel update. */ + return NULL; + + return patch_channel_update(ctx, take(channel_update)); +} + /*~ lightningd tells us when a payment has failed; we mark the channel (or - * node) unusable here (maybe temporarily), and unpack and channel_update - * contained in the error. */ -static struct io_plan *handle_routing_failure(struct io_conn *conn, + * node) unusable here if it's a permanent failure, and unpack any + * channel_update contained in the error. */ +static struct io_plan *handle_payment_failure(struct io_conn *conn, struct daemon *daemon, const u8 *msg) { struct pubkey erring_node; struct short_channel_id erring_channel; - u16 failcode; + u8 erring_channel_direction; + u8 *error; + enum onion_type failcode; u8 *channel_update; - if (!fromwire_gossip_routing_failure(msg, - msg, + if (!fromwire_gossip_payment_failure(msg, msg, &erring_node, &erring_channel, - &failcode, - &channel_update)) - master_badmsg(WIRE_GOSSIP_ROUTING_FAILURE, msg); - + &erring_channel_direction, + &error)) + master_badmsg(WIRE_GOSSIP_PAYMENT_FAILURE, msg); + + failcode = fromwire_peektype(error); + channel_update = channel_update_from_onion_error(tmpctx, error); + if (channel_update) + status_debug("Extracted channel_update %s from onionreply %s", + tal_hex(tmpctx, channel_update), + tal_hex(tmpctx, error)); routing_failure(daemon->rstate, &erring_node, &erring_channel, - (enum onion_type) failcode, + erring_channel_direction, + failcode, channel_update); return daemon_conn_read_next(conn, daemon->master); @@ -2550,7 +2614,7 @@ static struct io_plan *handle_outpoint_spent(struct io_conn *conn, struct chan *chan; struct routing_state *rstate = daemon->rstate; if (!fromwire_gossip_outpoint_spent(msg, &scid)) - master_badmsg(WIRE_GOSSIP_ROUTING_FAILURE, msg); + master_badmsg(WIRE_GOSSIP_OUTPOINT_SPENT, msg); chan = get_channel(rstate, &scid); if (chan) { @@ -2587,7 +2651,7 @@ static struct io_plan *handle_local_channel_close(struct io_conn *conn, struct chan *chan; struct routing_state *rstate = daemon->rstate; if (!fromwire_gossip_local_channel_close(msg, &scid)) - master_badmsg(WIRE_GOSSIP_ROUTING_FAILURE, msg); + master_badmsg(WIRE_GOSSIP_LOCAL_CHANNEL_CLOSE, msg); chan = get_channel(rstate, &scid); if (chan) @@ -2621,8 +2685,8 @@ static struct io_plan *recv_req(struct io_conn *conn, case WIRE_GOSSIP_GET_TXOUT_REPLY: return handle_txout_reply(conn, daemon, msg); - case WIRE_GOSSIP_ROUTING_FAILURE: - return handle_routing_failure(conn, daemon, msg); + case WIRE_GOSSIP_PAYMENT_FAILURE: + return handle_payment_failure(conn, daemon, msg); case WIRE_GOSSIP_OUTPOINT_SPENT: return handle_outpoint_spent(conn, daemon, msg); diff --git a/gossipd/routing.c b/gossipd/routing.c index a2bc483cc..c9ce2ad99 100644 --- a/gossipd/routing.c +++ b/gossipd/routing.c @@ -1563,65 +1563,57 @@ struct route_hop *get_route(const tal_t *ctx, struct routing_state *rstate, return hops; } -/** - * routing_failure_channel_out - Handle routing failure on a specific channel - * - * If we want to delete the channel, we reparent it to disposal_context. - */ -static void routing_failure_channel_out(const tal_t *disposal_context, - struct node *node, - enum onion_type failcode, - struct chan *chan, - time_t now) -{ - /* BOLT #4: - * - * - if the PERM bit is NOT set: - * - SHOULD restore the channels as it receives new `channel_update`s. - */ - if (failcode & PERM) - /* Set it up to be pruned. */ - tal_steal(disposal_context, chan); -} - void routing_failure(struct routing_state *rstate, const struct pubkey *erring_node_pubkey, const struct short_channel_id *scid, + int erring_direction, enum onion_type failcode, const u8 *channel_update) { - struct node *node; - time_t now = time_now().ts.tv_sec; - status_trace("Received routing failure 0x%04x (%s), " "erring node %s, " - "channel %s", + "channel %s/%u", (int) failcode, onion_type_name(failcode), type_to_string(tmpctx, struct pubkey, erring_node_pubkey), - type_to_string(tmpctx, struct short_channel_id, scid)); - - node = get_node(rstate, erring_node_pubkey); - if (!node) { - status_unusual("routing_failure: Erring node %s not in map", - type_to_string(tmpctx, struct pubkey, - erring_node_pubkey)); - /* No node, so no channel, so any channel_update - * can also be ignored. */ - return; + type_to_string(tmpctx, struct short_channel_id, scid), + erring_direction); + + /* lightningd will only extract this if UPDATE is set. */ + if (channel_update) { + u8 *err = handle_channel_update(rstate, channel_update, "error"); + if (err) { + status_unusual("routing_failure: " + "bad channel_update %s", + sanitize_error(err, err, NULL)); + tal_free(err); + } + } else if (failcode & UPDATE) { + status_unusual("routing_failure: " + "UPDATE bit set, no channel_update. " + "failcode: 0x%04x", + (int) failcode); } - /* BOLT #4: - * - * - if the NODE bit is set: - * - SHOULD remove all channels connected with the erring node from - * consideration. - * - */ + /* We respond to permanent errors, ignore the rest: they're + * for the pay command to worry about. */ + if (!(failcode & PERM)) + return; + if (failcode & NODE) { - for (int i = 0; i < tal_count(node->chans); ++i) { - routing_failure_channel_out(tmpctx, node, failcode, - node->chans[i], - now); + struct node *node = get_node(rstate, erring_node_pubkey); + if (!node) { + status_unusual("routing_failure: Erring node %s not in map", + type_to_string(tmpctx, struct pubkey, + erring_node_pubkey)); + } else { + status_trace("Deleting node %s", + type_to_string(tmpctx, + struct pubkey, + &node->id)); + for (size_t i = 0; i < tal_count(node->chans); ++i) { + /* Set it up to be pruned. */ + tal_steal(tmpctx, node->chans[i]); + } } } else { struct chan *chan = get_channel(rstate, scid); @@ -1632,49 +1624,21 @@ void routing_failure(struct routing_state *rstate, type_to_string(tmpctx, struct short_channel_id, scid)); - else if (chan->nodes[0] != node && chan->nodes[1] != node) - status_unusual("routing_failure: " - "Channel %s does not connect to %s", - type_to_string(tmpctx, - struct short_channel_id, - scid), - type_to_string(tmpctx, struct pubkey, - erring_node_pubkey)); - else - routing_failure_channel_out(tmpctx, - node, failcode, chan, now); - } - - /* Update the channel if UPDATE failcode. Do - * this after deactivating, so that if the - * channel_update is newer it will be - * reactivated. */ - if (failcode & UPDATE) { - u8 *err; - if (tal_count(channel_update) == 0) { - /* Suppress UNUSUAL log if local failure */ - if (pubkey_eq(erring_node_pubkey, &rstate->local_id)) + else { + /* This error can be triggered by sendpay if caller + * uses the wrong key for dest. */ + if (failcode == WIRE_INVALID_ONION_HMAC + && !pubkey_eq(&chan->nodes[!erring_direction]->id, + erring_node_pubkey)) return; - status_unusual("routing_failure: " - "UPDATE bit set, no channel_update. " - "failcode: 0x%04x", - (int) failcode); - return; - } - err = handle_channel_update(rstate, channel_update, "error"); - if (err) { - status_unusual("routing_failure: " - "bad channel_update %s", - sanitize_error(err, err, NULL)); - tal_free(err); - return; + + status_trace("Deleting channel %s", + type_to_string(tmpctx, + struct short_channel_id, + scid)); + /* Set it up to be deleted. */ + tal_steal(tmpctx, chan); } - } else { - if (tal_count(channel_update) != 0) - status_unusual("routing_failure: " - "UPDATE bit clear, channel_update given. " - "failcode: 0x%04x", - (int) failcode); } } diff --git a/gossipd/routing.h b/gossipd/routing.h index b29551f94..80c762243 100644 --- a/gossipd/routing.h +++ b/gossipd/routing.h @@ -271,6 +271,7 @@ struct route_hop *get_route(const tal_t *ctx, struct routing_state *rstate, void routing_failure(struct routing_state *rstate, const struct pubkey *erring_node, const struct short_channel_id *erring_channel, + int erring_direction, enum onion_type failcode, const u8 *channel_update); diff --git a/lightningd/gossip_control.c b/lightningd/gossip_control.c index 392b5d295..3fc0a18ff 100644 --- a/lightningd/gossip_control.c +++ b/lightningd/gossip_control.c @@ -111,7 +111,7 @@ static unsigned gossip_msg(struct subd *gossip, const u8 *msg, const int *fds) case WIRE_GOSSIP_GET_CHANNEL_PEER: case WIRE_GOSSIP_GET_TXOUT_REPLY: case WIRE_GOSSIP_OUTPOINT_SPENT: - case WIRE_GOSSIP_ROUTING_FAILURE: + case WIRE_GOSSIP_PAYMENT_FAILURE: case WIRE_GOSSIP_QUERY_SCIDS: case WIRE_GOSSIP_QUERY_CHANNEL_RANGE: case WIRE_GOSSIP_SEND_TIMESTAMP_FILTER: diff --git a/lightningd/pay.c b/lightningd/pay.c index 198985341..1c2ac32cd 100644 --- a/lightningd/pay.c +++ b/lightningd/pay.c @@ -188,63 +188,6 @@ void payment_succeeded(struct lightningd *ld, struct htlc_out *hout, payment_trigger_success(ld, &hout->payment_hash); } -/* Fix up the channel_update to include the type if it doesn't currently have - * one. See ElementsProject/lightning#1730 and lightningnetwork/lnd#1599 for the - * in-depth discussion on why we break message parsing here... */ -static u8 *patch_channel_update(const tal_t *ctx, u8 *channel_update TAKES) -{ - u8 *fixed; - if (channel_update != NULL && - fromwire_peektype(channel_update) != WIRE_CHANNEL_UPDATE) { - /* This should be a channel_update, prefix with the - * WIRE_CHANNEL_UPDATE type, but isn't. Let's prefix it. */ - fixed = tal_arr(ctx, u8, 0); - towire_u16(&fixed, WIRE_CHANNEL_UPDATE); - towire(&fixed, channel_update, tal_bytelen(channel_update)); - if (taken(channel_update)) - tal_free(channel_update); - return fixed; - } else { - return tal_dup_arr(ctx, u8, - channel_update, tal_count(channel_update), 0); - } -} - -/* Return NULL if the wrapped onion error message has no - * channel_update field, or return the embedded - * channel_update message otherwise. */ -static u8 *channel_update_from_onion_error(const tal_t *ctx, - const u8 *onion_message) -{ - u8 *channel_update = NULL; - u64 unused64; - u32 unused32; - - /* Identify failcodes that have some channel_update. - * - * TODO > BOLT 1.0: Add new failcodes when updating to a - * new BOLT version. */ - if (!fromwire_temporary_channel_failure(ctx, - onion_message, - &channel_update) && - !fromwire_amount_below_minimum(ctx, - onion_message, &unused64, - &channel_update) && - !fromwire_fee_insufficient(ctx, - onion_message, &unused64, - &channel_update) && - !fromwire_incorrect_cltv_expiry(ctx, - onion_message, &unused32, - &channel_update) && - !fromwire_expiry_too_soon(ctx, - onion_message, - &channel_update)) - /* No channel update. */ - return NULL; - - return patch_channel_update(ctx, take(channel_update)); -} - /* Return a struct routing_failure for an immediate failure * (returned directly from send_htlc_out). The returned * failure is allocated from the given context. */ @@ -269,7 +212,6 @@ immediate_routing_failure(const tal_t *ctx, /* FIXME: Don't set at all unless we know. */ else routing_failure->channel_dir = 0; - routing_failure->channel_update = NULL; return routing_failure; } @@ -293,8 +235,9 @@ local_routing_failure(const tal_t *ctx, routing_failure->erring_channel = payment->route_channels[0]; routing_failure->channel_dir = pubkey_idx(&ld->id, &payment->route_nodes[0]); - routing_failure->channel_update = NULL; + log_debug(hout->key.channel->log, "local_routing_failure: %u (%s)", + hout->failcode, onion_type_name(hout->failcode)); return routing_failure; } @@ -304,14 +247,13 @@ local_routing_failure(const tal_t *ctx, * failure to report (allocated from ctx) otherwise. */ static struct routing_failure* remote_routing_failure(const tal_t *ctx, + struct lightningd *ld, bool *p_retry_plausible, - bool *p_report_to_gossipd, const struct wallet_payment *payment, const struct onionreply *failure, struct log *log) { enum onion_type failcode = fromwire_peektype(failure->msg); - u8 *channel_update; struct routing_failure *routing_failure; const struct pubkey *route_nodes; const struct pubkey *erring_node; @@ -320,23 +262,13 @@ remote_routing_failure(const tal_t *ctx, static const struct short_channel_id dummy_channel = { 0 }; int origin_index; bool retry_plausible; - bool report_to_gossipd; int dir; routing_failure = tal(ctx, struct routing_failure); route_nodes = payment->route_nodes; route_channels = payment->route_channels; origin_index = failure->origin_index; - channel_update - = channel_update_from_onion_error(routing_failure, - failure->msg); - if (channel_update) - log_debug(log, "Extracted channel_update %s from onionreply %s", - tal_hex(tmpctx, channel_update), - tal_hex(tmpctx, failure->msg)); - retry_plausible = true; - report_to_gossipd = true; assert(origin_index < tal_count(route_nodes)); @@ -356,15 +288,10 @@ remote_routing_failure(const tal_t *ctx, retry_plausible = false; else retry_plausible = true; - /* Only send message to gossipd if NODE error; - * there is no "next" channel to report as - * failing if this is the last node. */ - if (failcode & NODE) - report_to_gossipd = true; - else - report_to_gossipd = false; erring_node = &route_nodes[origin_index]; } else { + u8 *gossip_msg; + /* Report the *next* channel as failing. */ erring_channel = &route_channels[origin_index + 1]; @@ -379,46 +306,29 @@ remote_routing_failure(const tal_t *ctx, erring_node = &route_nodes[origin_index + 1]; } else erring_node = &route_nodes[origin_index]; + + /* Tell gossipd: it may want to remove channels or even nodes + * in response to this, and there may be a channel_update + * embedded too */ + gossip_msg = towire_gossip_payment_failure(NULL, + erring_node, + erring_channel, + dir, + failure->msg); + subd_send_msg(ld->gossip, take(gossip_msg)); } routing_failure->erring_index = (unsigned int) (origin_index + 1); routing_failure->failcode = failcode; routing_failure->erring_node = *erring_node; routing_failure->erring_channel = *erring_channel; - routing_failure->channel_update = channel_update; routing_failure->channel_dir = dir; *p_retry_plausible = retry_plausible; - *p_report_to_gossipd = report_to_gossipd; return routing_failure; } -static void report_routing_failure(struct log *log, - struct subd *gossip, - struct routing_failure *fail) -{ - u8 *gossip_msg; - assert(fail); - - log_debug(log, - "Reporting route failure to gossipd: 0x%04x (%s) " - "node %s channel %s update %s", - fail->failcode, onion_type_name(fail->failcode), - type_to_string(tmpctx, struct pubkey, - &fail->erring_node), - type_to_string(tmpctx, struct short_channel_id, - &fail->erring_channel), - tal_hex(tmpctx, fail->channel_update)); - - gossip_msg = towire_gossip_routing_failure(tmpctx, - &fail->erring_node, - &fail->erring_channel, - (u16) fail->failcode, - fail->channel_update); - subd_send_msg(gossip, gossip_msg); -} - void payment_store(struct lightningd *ld, const struct sha256 *payment_hash) { @@ -453,7 +363,6 @@ void payment_failed(struct lightningd *ld, const struct htlc_out *hout, struct routing_failure* fail = NULL; const char *failmsg; bool retry_plausible; - bool report_to_gossipd; payment = wallet_payment_by_hash(tmpctx, ld->wallet, &hout->payment_hash); @@ -493,14 +402,15 @@ void payment_failed(struct lightningd *ld, const struct htlc_out *hout, fail = local_routing_failure(tmpctx, ld, hout, payment); failmsg = localfail; retry_plausible = true; - report_to_gossipd = true; } else { /* Must be remote fail. */ assert(!hout->failcode); failmsg = "reply from remote"; /* Try to parse reply. */ struct secret *path_secrets = payment->path_secrets; - struct onionreply *reply = unwrap_onionreply(tmpctx, path_secrets, + struct onionreply *reply; + + reply = unwrap_onionreply(tmpctx, path_secrets, tal_count(path_secrets), hout->failuremsg); if (!reply) { @@ -508,14 +418,9 @@ void payment_failed(struct lightningd *ld, const struct htlc_out *hout, "htlc %"PRIu64" failed with bad reply (%s)", hout->key.id, tal_hex(tmpctx, hout->failuremsg)); - /* Cannot report failure. */ + /* Cannot record failure. */ fail = NULL; - /* Can now retry; we selected a channel to mark - * unroutable by random */ retry_plausible = true; - /* Already reported something to gossipd, do not - * report anything else */ - report_to_gossipd = false; } else { enum onion_type failcode = fromwire_peektype(reply->msg); log_info(hout->key.channel->log, @@ -525,9 +430,8 @@ void payment_failed(struct lightningd *ld, const struct htlc_out *hout, hout->key.id, reply->origin_index, failcode, onion_type_name(failcode)); - fail = remote_routing_failure(tmpctx, + fail = remote_routing_failure(tmpctx, ld, &retry_plausible, - &report_to_gossipd, payment, reply, hout->key.channel->log); } @@ -545,15 +449,10 @@ void payment_failed(struct lightningd *ld, const struct htlc_out *hout, fail ? fail->failcode : 0, fail ? &fail->erring_node : NULL, fail ? &fail->erring_channel : NULL, - fail ? fail->channel_update : NULL, + NULL, failmsg, fail ? fail->channel_dir : 0); - /* Report to gossipd if we decided we should. */ - if (report_to_gossipd) - report_routing_failure(ld->log, ld->gossip, fail); - - /* Report to client. */ payment_route_failure(ld, &hout->payment_hash, retry_plausible, fail, hout->failuremsg, @@ -642,7 +541,6 @@ bool wait_payment(const tal_t *cxt, fail->failcode = failcode; fail->erring_node = *failnode; fail->erring_channel = *failchannel; - fail->channel_update = failupdate; fail->channel_dir = faildirection; result = sendpay_result_route_failure(tmpctx, !faildestperm, fail, NULL, faildetail); } @@ -763,7 +661,6 @@ send_payment(const tal_t *ctx, WIRE_UNKNOWN_NEXT_PEER, &route[0].channel_id, 0); - report_routing_failure(ld->log, ld->gossip, fail); /* Report routing failure to caller */ result = sendpay_result_route_failure(tmpctx, true, fail, NULL, @@ -792,7 +689,6 @@ send_payment(const tal_t *ctx, failcode, &route[0].channel_id, &channel->peer->id); - report_routing_failure(ld->log, ld->gossip, fail); /* Report routing failure to caller */ result = sendpay_result_route_failure(tmpctx, true, fail, NULL, @@ -917,9 +813,6 @@ static void json_waitsendpay_on_resolve(const struct sendpay_result *r, &fail->erring_channel); json_add_num(data, "erring_direction", fail->channel_dir); - if (fail->channel_update) - json_add_hex_talarr(data, "channel_update", - fail->channel_update); json_object_end(data); was_pending(command_failed(cmd, data)); return; diff --git a/lightningd/pay.h b/lightningd/pay.h index bb586ee2c..2630f7f7a 100644 --- a/lightningd/pay.h +++ b/lightningd/pay.h @@ -20,7 +20,6 @@ struct routing_failure { struct pubkey erring_node; struct short_channel_id erring_channel; int channel_dir; - u8 *channel_update; }; /* Result of send_payment */