Browse Source

pay: handle returned errors more gracefully.

The code had incorrect assertions, partially because it didn't clearly
distinguish errors from the final node (which, barring blockheight issues,
mean a complete failre) and intermediate nodes.

In particular, we can't trust the *values*, so we need to distinguish
these by the *sender*.

If a route is of length 2 (A, B):
- erring_index == 0 means us complaining about channel A.
- erring_index == 1 means A.node complaining about channel B.
- erring_index == 2 means the final destination node B.node.

This is particularly of note because Travis does NOT run test_pay_routeboost!

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
travis-test
Rusty Russell 5 years ago
committed by Christian Decker
parent
commit
baf3b9c460
  1. 339
      plugins/libplugin-pay.c

339
plugins/libplugin-pay.c

@ -592,7 +592,8 @@ fail:
} }
static void channel_hints_update(struct payment *root, static void channel_hints_update(struct payment *root,
struct short_channel_id *scid, int direction, const struct short_channel_id *scid,
int direction,
bool enabled, bool enabled,
struct amount_msat estimated_capacity) struct amount_msat estimated_capacity)
{ {
@ -660,129 +661,299 @@ static void payment_result_infer(struct route_hop *route,
r->erring_direction = &route[i].direction; r->erring_direction = &route[i].direction;
} }
static struct command_result * /* If a node takes too much fee or cltv, the next one reports it. We don't
payment_waitsendpay_finished(struct command *cmd, const char *buffer, * know who to believe, but log it */
const jsmntok_t *toks, struct payment *p) static void report_tampering(struct payment *p,
size_t report_pos,
const char *style)
{ {
struct payment *root; const struct node_id *id = &p->route[report_pos].nodeid;
struct route_hop *hop;
assert(p->route != NULL);
p->end_time = time_now(); if (report_pos == 0) {
p->result = tal_sendpay_result_from_json(p, buffer, toks);
if (p->result == NULL) {
plugin_log(p->plugin, LOG_UNUSUAL, plugin_log(p->plugin, LOG_UNUSUAL,
"Unable to parse `waitsendpay` result: %.*s", "Node #%zu (%s) claimed we sent them invalid %s",
json_tok_full_len(toks), report_pos + 1,
json_tok_full(buffer, toks)); type_to_string(tmpctx, struct node_id, id),
payment_set_step(p, PAYMENT_STEP_FAILED); style);
payment_continue(p); } else {
return command_still_pending(cmd); plugin_log(p->plugin, LOG_UNUSUAL,
"Node #%zu (%s) claimed #%zu (%s) sent them invalid %s",
report_pos + 1,
type_to_string(tmpctx, struct node_id, id),
report_pos,
type_to_string(tmpctx, struct node_id,
&p->route[report_pos-1].nodeid),
style);
}
} }
payment_result_infer(p->route, p->result); static struct command_result *
handle_final_failure(struct command *cmd,
struct payment *p,
const struct node_id *final_id,
enum onion_type failcode)
{
/* We use an exhaustive switch statement here so you get a compile
* warning when new ones are added, and can think about where they go */
switch (failcode) {
case WIRE_FINAL_INCORRECT_CLTV_EXPIRY:
report_tampering(p, tal_count(p->route)-1, "cltv");
goto error;
case WIRE_FINAL_INCORRECT_HTLC_AMOUNT:
report_tampering(p, tal_count(p->route)-1, "amount");
goto error;
if (p->result->state == PAYMENT_COMPLETE) { /* BOLT #4:
payment_set_step(p, PAYMENT_STEP_SUCCESS); *
payment_continue(p); * A _forwarding node_ MAY, but a _final node_ MUST NOT:
*...
* - return an `invalid_onion_version` error.
*...
* - return an `invalid_onion_hmac` error.
*...
* - return an `invalid_onion_key` error.
*...
* - return a `temporary_channel_failure` error.
*...
* - return a `permanent_channel_failure` error.
*...
* - return a `required_channel_feature_missing` error.
*...
* - return an `unknown_next_peer` error.
*...
* - return an `amount_below_minimum` error.
*...
* - return a `fee_insufficient` error.
*...
* - return an `incorrect_cltv_expiry` error.
*...
* - return an `expiry_too_soon` error.
*...
* - return an `expiry_too_far` error.
*...
* - return a `channel_disabled` error.
*/
case WIRE_INVALID_ONION_VERSION:
case WIRE_INVALID_ONION_HMAC:
case WIRE_INVALID_ONION_KEY:
case WIRE_TEMPORARY_CHANNEL_FAILURE:
case WIRE_PERMANENT_CHANNEL_FAILURE:
case WIRE_REQUIRED_CHANNEL_FEATURE_MISSING:
case WIRE_UNKNOWN_NEXT_PEER:
case WIRE_AMOUNT_BELOW_MINIMUM:
case WIRE_FEE_INSUFFICIENT:
case WIRE_INCORRECT_CLTV_EXPIRY:
case WIRE_EXPIRY_TOO_FAR:
case WIRE_EXPIRY_TOO_SOON:
case WIRE_CHANNEL_DISABLED:
goto strange_error;
case WIRE_INVALID_ONION_PAYLOAD:
case WIRE_INVALID_REALM:
case WIRE_PERMANENT_NODE_FAILURE:
case WIRE_TEMPORARY_NODE_FAILURE:
case WIRE_REQUIRED_NODE_FEATURE_MISSING:
#if EXPERIMENTAL_FEATURES
case WIRE_INVALID_ONION_BLINDING:
#endif
case WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS:
case WIRE_MPP_TIMEOUT:
goto error;
}
strange_error:
plugin_log(p->plugin, LOG_UNUSUAL,
"Final node %s reported strange error code %u",
type_to_string(tmpctx, struct node_id, final_id),
failcode);
error:
p->result->code = PAY_DESTINATION_PERM_FAIL;
payment_root(p)->abort = true;
payment_fail(p, "%s", p->result->message);
return command_still_pending(cmd); return command_still_pending(cmd);
} }
root = payment_root(p);
payment_chanhints_apply_route(p, true);
/* Either we have no erring_index, or it must be a correct index into static struct command_result *
* p->route. From the docs: handle_intermediate_failure(struct command *cmd,
* struct payment *p,
* - *erring_index*: The index of the node along the route that const struct node_id *errnode,
* reported the error. 0 for the local node, 1 for the first hop, const struct route_hop *errchan,
* and so on. enum onion_type failcode)
{
struct payment *root = payment_root(p);
/* We use an exhaustive switch statement here so you get a compile
* warning when new ones are added, and can think about where they go */
switch (failcode) {
/* BOLT #4:
* *
* The only difficulty is mapping the erring_index to the correct hop, * An _intermediate hop_ MUST NOT, but the _final node_:
* since the hop consists of the processing node, but the payload for *...
* the next hop. In addition there is a class of onion-related errors * - MUST return an `incorrect_or_unknown_payment_details` error.
* that are reported by the previous hop to the one erring, since the *...
* erring node couldn't read the onion in the first place. * - MUST return `final_incorrect_cltv_expiry` error.
*...
* - MUST return a `final_incorrect_htlc_amount` error.
*/ */
assert(p->result->erring_index == NULL || case WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS:
*p->result->erring_index - 1 < tal_count(p->route)); case WIRE_FINAL_INCORRECT_CLTV_EXPIRY:
case WIRE_FINAL_INCORRECT_HTLC_AMOUNT:
/* FIXME: Document in BOLT that intermediates must not return this! */
case WIRE_MPP_TIMEOUT:
goto strange_error;
switch (p->result->failcode) {
case WIRE_PERMANENT_CHANNEL_FAILURE: case WIRE_PERMANENT_CHANNEL_FAILURE:
case WIRE_CHANNEL_DISABLED: case WIRE_CHANNEL_DISABLED:
case WIRE_UNKNOWN_NEXT_PEER: case WIRE_UNKNOWN_NEXT_PEER:
case WIRE_REQUIRED_CHANNEL_FEATURE_MISSING: case WIRE_REQUIRED_CHANNEL_FEATURE_MISSING:
/* All of these result in the channel being marked as disabled. */ /* All of these result in the channel being marked as disabled. */
assert(*p->result->erring_index < tal_count(p->route)); channel_hints_update(root,
hop = &p->route[*p->result->erring_index]; &errchan->channel_id, errchan->direction,
channel_hints_update(root, &hop->channel_id, hop->direction,
false, AMOUNT_MSAT(0)); false, AMOUNT_MSAT(0));
break; break;
case WIRE_TEMPORARY_CHANNEL_FAILURE:
case WIRE_TEMPORARY_CHANNEL_FAILURE: {
/* These are an indication that the capacity was insufficient, /* These are an indication that the capacity was insufficient,
* remember the amount we tried as an estimate. */ * remember the amount we tried as an estimate. */
assert(*p->result->erring_index < tal_count(p->route)); struct amount_msat est = errchan->amount;
hop = &p->route[*p->result->erring_index]; est.millisatoshis *= 0.75; /* Raw: Multiplication */
struct amount_msat est = { channel_hints_update(root,
.millisatoshis = hop->amount.millisatoshis * 0.75}; /* Raw: Multiplication */ &errchan->channel_id, errchan->direction,
channel_hints_update(root, &hop->channel_id, hop->direction,
true, est); true, est);
break; goto error;
}
case WIRE_INCORRECT_CLTV_EXPIRY:
report_tampering(p, errchan - p->route, "cltv");
goto error;
case WIRE_INVALID_ONION_PAYLOAD:
case WIRE_INVALID_REALM:
case WIRE_PERMANENT_NODE_FAILURE:
case WIRE_TEMPORARY_NODE_FAILURE:
case WIRE_REQUIRED_NODE_FEATURE_MISSING:
case WIRE_INVALID_ONION_VERSION: case WIRE_INVALID_ONION_VERSION:
case WIRE_INVALID_ONION_HMAC: case WIRE_INVALID_ONION_HMAC:
case WIRE_INVALID_ONION_KEY: case WIRE_INVALID_ONION_KEY:
case WIRE_PERMANENT_NODE_FAILURE:
case WIRE_TEMPORARY_NODE_FAILURE:
case WIRE_REQUIRED_NODE_FEATURE_MISSING:
case WIRE_INVALID_ONION_PAYLOAD:
case WIRE_INVALID_REALM:
#if EXPERIMENTAL_FEATURES #if EXPERIMENTAL_FEATURES
case WIRE_INVALID_ONION_BLINDING: case WIRE_INVALID_ONION_BLINDING:
#endif #endif
/* These are reported by the last hop, i.e., the destination of hop i-1. */ tal_arr_expand(&root->excluded_nodes, *errnode);
assert(*p->result->erring_index - 1 < tal_count(p->route)); goto error;
hop = &p->route[*p->result->erring_index - 1];
tal_arr_expand(&root->excluded_nodes, hop->nodeid);
break;
case WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS:
p->result->code = PAY_DESTINATION_PERM_FAIL;
root->abort = true;
case WIRE_MPP_TIMEOUT:
/* These are permanent failures that should abort all of our
* attempts right away. We'll still track pending partial
* payments correctly, just not start new ones. */
root->abort = true;
break;
case WIRE_AMOUNT_BELOW_MINIMUM: case WIRE_AMOUNT_BELOW_MINIMUM:
case WIRE_FEE_INSUFFICIENT:
case WIRE_EXPIRY_TOO_FAR: case WIRE_EXPIRY_TOO_FAR:
case WIRE_EXPIRY_TOO_SOON: case WIRE_EXPIRY_TOO_SOON:
case WIRE_FEE_INSUFFICIENT: goto error;
case WIRE_INCORRECT_CLTV_EXPIRY:
case WIRE_FINAL_INCORRECT_CLTV_EXPIRY:
/* These are issues that are due to gossipd being out of date,
* we ignore them here, and wait for gossipd to adjust
* instead. */
break;
case WIRE_FINAL_INCORRECT_HTLC_AMOUNT:
/* These are symptoms of intermediate hops tampering with the
* payment. */
hop = &p->route[*p->result->erring_index];
plugin_log(
p->plugin, LOG_UNUSUAL,
"Node %s reported an incorrect HTLC amount, this could be "
"a prior hop messing with the amounts.",
type_to_string(tmpctx, struct node_id, &hop->nodeid));
break;
} }
strange_error:
plugin_log(p->plugin, LOG_UNUSUAL,
"Intermediate node %s reported strange error code %u",
type_to_string(tmpctx, struct node_id, errnode),
failcode);
error:
payment_fail(p, "%s", p->result->message); payment_fail(p, "%s", p->result->message);
return command_still_pending(cmd); return command_still_pending(cmd);
} }
/* From the docs:
*
* - *erring_index*: The index of the node along the route that
* reported the error. 0 for the local node, 1 for the first hop,
* and so on.
*
* The only difficulty is mapping the erring_index to the correct hop.
* We split into the erring node, and the error channel, since they're
* used in different contexts. NULL error_channel means it's the final
* node, whose errors are treated differently.
*/
static bool assign_blame(const struct payment *p,
const struct node_id **errnode,
const struct route_hop **errchan)
{
int index;
if (p->result->erring_index == NULL)
return false;
index = *p->result->erring_index;
/* BADONION errors are reported on behalf of the next node. */
if (p->result->failcode & BADONION)
index++;
/* Final node *shouldn't* report BADONION, but don't assume. */
if (index >= tal_count(p->route)) {
*errchan = NULL;
*errnode = &p->route[tal_count(p->route) - 1].nodeid;
return true;
}
*errchan = &p->route[index];
if (index == 0)
*errnode = p->local_id;
else
*errnode = &p->route[index - 1].nodeid;
return true;
}
static struct command_result *
payment_waitsendpay_finished(struct command *cmd, const char *buffer,
const jsmntok_t *toks, struct payment *p)
{
const struct node_id *errnode;
const struct route_hop *errchan;
assert(p->route != NULL);
p->end_time = time_now();
p->result = tal_sendpay_result_from_json(p, buffer, toks);
if (p->result == NULL) {
plugin_log(p->plugin, LOG_UNUSUAL,
"Unable to parse `waitsendpay` result: %.*s",
json_tok_full_len(toks),
json_tok_full(buffer, toks));
payment_set_step(p, PAYMENT_STEP_FAILED);
payment_continue(p);
return command_still_pending(cmd);
}
payment_result_infer(p->route, p->result);
if (p->result->state == PAYMENT_COMPLETE) {
payment_set_step(p, PAYMENT_STEP_SUCCESS);
payment_continue(p);
return command_still_pending(cmd);
}
payment_chanhints_apply_route(p, true);
if (!assign_blame(p, &errnode, &errchan)) {
plugin_log(p->plugin, LOG_UNUSUAL,
"No erring_index set in `waitsendpay` result: %.*s",
json_tok_full_len(toks),
json_tok_full(buffer, toks));
/* FIXME: Pick a random channel to fail? */
payment_set_step(p, PAYMENT_STEP_FAILED);
payment_continue(p);
return command_still_pending(cmd);
}
if (!errchan)
return handle_final_failure(cmd, p, errnode,
p->result->failcode);
return handle_intermediate_failure(cmd, p, errnode, errchan,
p->result->failcode);
}
static struct command_result *payment_sendonion_success(struct command *cmd, static struct command_result *payment_sendonion_success(struct command *cmd,
const char *buffer, const char *buffer,
const jsmntok_t *toks, const jsmntok_t *toks,

Loading…
Cancel
Save