channeld: check option_data_loss_protect fields.

Firstly, if they claim to know a future value, we ask the HSM; if they're right, we tell master what the per-commitment-secret it gave us (we have no way to validate this, though) and it will not broadcast a unilateral (knowing it will cause them to use a penalty tx!). Otherwise, we check the results they sent were valid. The spec says to do this (and close the channel if it's wrong!), because otherwise they could continually lie and give us a bad per-commitment-secret when we actually need it. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
6 years ago · 6aed936799
2 changed files with 215 additions and 11 deletions
--- a/channeld/channel.c
+++ b/channeld/channel.c
@ -1830,6 +1830,155 @@ static void resend_commitment(struct peer *peer, const struct changed_htlc *last
 			       peer->revocations_received);
 }
 /* BOLT #2:
 *
 * A receiving node:
 *  - if it supports `option_data_loss_protect`, AND the
 * `option_data_loss_protect` fields are present:
 *    - if `next_remote_revocation_number` is greater than expected above,
 *      AND `your_last_per_commitment_secret` is correct for that
 *     `next_remote_revocation_number` minus 1:
 */
 static void check_future_dataloss_fields(struct peer *peer,
 			u64 next_remote_revocation_number,
 			const struct secret *last_local_per_commit_secret,
 			const struct pubkey *remote_current_per_commitment_point)
 {
 	const u8 *msg;
 	bool correct;
 	assert(next_remote_revocation_number > peer->next_index[LOCAL] - 1);
 	/* They don't support option_data_loss_protect, we fail it due to
 	 * unexpected number */
 	if (!last_local_per_commit_secret)
 		peer_failed(&peer->cs,
 			    &peer->channel_id,
 			    "bad reestablish revocation_number: %"PRIu64
 			    " vs %"PRIu64,
 			    next_remote_revocation_number,
 			    peer->next_index[LOCAL] - 1);
 	msg = towire_hsm_check_future_secret(NULL,
 					     next_remote_revocation_number - 1,
 					     last_local_per_commit_secret);
 	msg = hsm_req(tmpctx, take(msg));
 	if (!fromwire_hsm_check_future_secret_reply(msg, &correct))
 		status_failed(STATUS_FAIL_HSM_IO,
 			      "Bad hsm_check_future_secret_reply: %s",
 			      tal_hex(tmpctx, msg));
 	if (!correct)
 		peer_failed(&peer->cs,
 			    &peer->channel_id,
 			    "bad future last_local_per_commit_secret: %"PRIu64
 			    " vs %"PRIu64,
 			    next_remote_revocation_number,
 			    peer->next_index[LOCAL] - 1);
 	/* Oh shit, they really are from the future! */
 	peer_billboard(true, "They have future commitment number %"PRIu64
 		       " vs our %"PRIu64". We must wait for them to close!",
 		       next_remote_revocation_number,
 		       peer->next_index[LOCAL] - 1);
 	/* BOLT #2:
 	 * - MUST NOT broadcast its commitment transaction.
 	 * - SHOULD fail the channel.
 	 * - SHOULD store `my_current_per_commitment_point` to
 	 *   retrieve funds should the sending node broadcast its
 	 *   commitment transaction on-chain.
 	 */
 	wire_sync_write(MASTER_FD,
 			take(towire_channel_fail_fallen_behind(NULL,
 				       remote_current_per_commitment_point)));
 	/* We have to send them an error to trigger dropping to chain. */
 	peer_failed(&peer->cs, &peer->channel_id,
 		    "Catastrophic failure: please close channel");
 }
 /* BOLT #2:
 *
 * A receiving node:
 *  - if it supports `option_data_loss_protect`, AND the
 * `option_data_loss_protect` fields are present:
 *...
 *    - otherwise (`your_last_per_commitment_secret` or
 *     `my_current_per_commitment_point` do not match the expected values):
 *      - SHOULD fail the channel.
 */
 static void check_current_dataloss_fields(struct peer *peer,
 			u64 next_remote_revocation_number,
 			const struct secret *last_local_per_commit_secret,
 			const struct pubkey *remote_current_per_commitment_point)
 {
 	struct secret old_commit_secret;
 	/* By the time we're called, we've ensured this is a valid revocation
 	 * number. */
 	assert(next_remote_revocation_number == peer->next_index[LOCAL] - 2
 	       || next_remote_revocation_number == peer->next_index[LOCAL] - 1);
 	if (!last_local_per_commit_secret)
 		return;
 	/* BOLT #2:
 	 *    - if `next_remote_revocation_number` equals 0:
 	 *      - MUST set `your_last_per_commitment_secret` to all zeroes
 	 */
 	status_trace("next_remote_revocation_number = %"PRIu64,
 		     next_remote_revocation_number);
 	if (next_remote_revocation_number == 0)
 		memset(&old_commit_secret, 0, sizeof(old_commit_secret));
 	else {
 		struct pubkey unused;
 		/* This gets previous revocation number, since asking for
 		 * commitment point N gives secret for N-2 */
 		get_per_commitment_point(next_remote_revocation_number+1,
 					 &unused, &old_commit_secret);
 	}
 	if (!secret_eq_consttime(&old_commit_secret,
 				 last_local_per_commit_secret))
 		peer_failed(&peer->cs,
 			    &peer->channel_id,
 			    "bad reestablish: your_last_per_commitment_secret %"PRIu64
 			    ": %s should be %s",
 			    next_remote_revocation_number,
 			    type_to_string(tmpctx, struct secret,
 					   last_local_per_commit_secret),
 			    type_to_string(tmpctx, struct secret,
 					   &old_commit_secret));
 	/* FIXME: We don't keep really old per_commit numbers, so we can't
 	 * check this 'needs retransmit' case! */
 	if (next_remote_revocation_number == peer->next_index[REMOTE]) {
 		if (!pubkey_eq(remote_current_per_commitment_point,
 			       &peer->old_remote_per_commit)) {
 			peer_failed(&peer->cs,
 				    &peer->channel_id,
 				    "bad reestablish: my_current_per_commitment_point %"PRIu64
 				    ": %s should be %s (new is %s)",
 				    next_remote_revocation_number,
 				    type_to_string(tmpctx, struct pubkey,
 						   remote_current_per_commitment_point),
 				    type_to_string(tmpctx, struct pubkey,
 						   &peer->old_remote_per_commit),
 				    type_to_string(tmpctx, struct pubkey,
 						   &peer->remote_per_commit));
 		}
 	} else
 		status_trace("option_data_loss_protect: can't check their claimed per_commitment_point %s #%"PRIu64"-1 as we're at %"PRIu64,
 			     type_to_string(tmpctx, struct pubkey,
 					    remote_current_per_commitment_point),
 			     next_remote_revocation_number,
 			     peer->next_index[REMOTE]);
 	status_trace("option_data_loss_protect: fields are correct");
 }
 static void peer_reconnect(struct peer *peer,
 			   const struct secret *last_remote_per_commit_secret)
 {
@ -1840,7 +1989,9 @@ static void peer_reconnect(struct peer *peer,
 	struct htlc_map_iter it;
 	const struct htlc *htlc;
 	u8 *msg;
-	struct pubkey my_current_per_commitment_point;
+	struct pubkey my_current_per_commitment_point,
 		*remote_current_per_commitment_point;
 	struct secret *last_local_per_commitment_secret;
 	get_per_commitment_point(peer->next_index[LOCAL]-1,
 				 &my_current_per_commitment_point, NULL);
@ -1887,14 +2038,31 @@ static void peer_reconnect(struct peer *peer,
 	} while (handle_peer_gossip_or_error(PEER_FD, GOSSIP_FD, &peer->cs,
 					     &peer->channel_id, msg));
-	if (!fromwire_channel_reestablish(msg, &channel_id,
+	remote_current_per_commitment_point = tal(peer, struct pubkey);
 	last_local_per_commitment_secret = tal(peer, struct secret);
 	/* We support option, so check for theirs. */
 	if (!fromwire_channel_reestablish_option_data_loss_protect(msg,
 					&channel_id,
 					&next_local_commitment_number,
 					&next_remote_revocation_number,
 					last_local_per_commitment_secret,
 					remote_current_per_commitment_point)) {
 		/* We don't have these, so free and NULL them */
 		remote_current_per_commitment_point
 			= tal_free(remote_current_per_commitment_point);
 		last_local_per_commitment_secret
 			= tal_free(last_local_per_commitment_secret);
 		if (!fromwire_channel_reestablish(msg, &channel_id,
 					  &next_local_commitment_number,
 					  &next_remote_revocation_number)) {
-		peer_failed(&peer->cs,
+			peer_failed(&peer->cs,
-			    &peer->channel_id,
+				    &peer->channel_id,
-			    "bad reestablish msg: %s %s",
+				    "bad reestablish msg: %s %s",
-			    wire_type_name(fromwire_peektype(msg)),
+				    wire_type_name(fromwire_peektype(msg)),
-			    tal_hex(msg, msg));
+				    tal_hex(msg, msg));
 		}
 	}
 	status_trace("Got reestablish commit=%"PRIu64" revoke=%"PRIu64,
@ -1950,15 +2118,22 @@ static void peer_reconnect(struct peer *peer,
 				    next_remote_revocation_number);
 		}
 		retransmit_revoke_and_ack = true;
-	} else if (next_remote_revocation_number != peer->next_index[LOCAL] - 1) {
+	} else if (next_remote_revocation_number < peer->next_index[LOCAL] - 1) {
 		peer_failed(&peer->cs,
 			    &peer->channel_id,
 			    "bad reestablish revocation_number: %"PRIu64
 			    " vs %"PRIu64,
 			    next_remote_revocation_number,
 			    peer->next_index[LOCAL]);
-	} else
+	} else if (next_remote_revocation_number > peer->next_index[LOCAL] - 1) {
-		retransmit_revoke_and_ack = false;
+		/* Remote claims it's ahead of us: can it prove it?
 		 * Does not return. */
 		check_future_dataloss_fields(peer,
 					     next_remote_revocation_number,
 					     last_local_per_commitment_secret,
 					     remote_current_per_commitment_point);
 	} else
 		retransmit_revoke_and_ack = false;
 	/* BOLT #2:
 	 *
@ -1997,6 +2172,12 @@ static void peer_reconnect(struct peer *peer,
 	else
 		retransmit_commitment_signed = false;
 	/* After we checked basic sanity, we check dataloss fields if any */
 	check_current_dataloss_fields(peer,
 				      next_remote_revocation_number,
 				      last_local_per_commitment_secret,
 				      remote_current_per_commitment_point);
 	/* We have to re-send in the same order we sent originally:
 	 * revoke_and_ack (usually) alters our next commitment. */
 	if (retransmit_revoke_and_ack && !peer->last_was_revoke)
--- a/tests/test_connection.py
+++ b/tests/test_connection.py
@ -1156,7 +1156,12 @@ def test_dataloss_protection(node_factory):
                           "8a")
    l1.fund_channel(l2, 10**6)
-    l2.restart()
+    l2.stop()
    # Save copy of the db.
    dbpath = os.path.join(l2.daemon.lightning_dir, "lightningd.sqlite3")
    orig_db = open(dbpath, "rb").read()
    l2.start()
    # l1 should have sent WIRE_CHANNEL_REESTABLISH with option_data_loss_protect.
    l1.daemon.wait_for_log("\[OUT\] 0088"
@ -1192,3 +1197,21 @@ def test_dataloss_protection(node_factory):
                           "[0-9a-f]{64}"
                           # my_current_per_commitment_point
                           "0[23][0-9a-f]{64}")
    # Now, move l2 back in time.
    l2.stop()
    # Overwrite with OLD db.
    open(dbpath, "wb").write(orig_db)
    l2.start()
    # l2 should freak out!
    l2.daemon.wait_for_log("Catastrophic failure: please close channel")
    # l1 should drop to chain.
    l1.daemon.wait_for_log('sendrawtx exit 0')
    # l2 must NOT drop to chain.
    l2.daemon.wait_for_log("Cannot broadcast our commitment tx: they have a future one")
    assert not l2.daemon.is_in_log('sendrawtx exit 0')
    # FIXME: l2 should still be able to collect onchain.