From c45d034bc0ac8e71f0d37392f5d4aab6161a1e3b Mon Sep 17 00:00:00 2001
From: lisa neigut <niftynei@gmail.com>
Date: Thu, 17 Jan 2019 17:21:19 -0800
Subject: [PATCH] option_data_loss_protect: fixup commitment point check

Spurious errors were occuring around checking the provided
current commitment point from the peer on reconnect when
option_data_loss_protect is enabled. The problem was that
we were using an inaccurate measure to screen for which
commitment point to compare the peer's provided one to.

This fixes the problem with screening, plus makes our
data_loss test a teensy bit more robust.
---
 channeld/channeld.c      | 56 ++++++++++++++++++++++++++++++----------
 tests/test_connection.py | 12 +++++----
 2 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/channeld/channeld.c b/channeld/channeld.c
index c0e357e9c..18fe40482 100644
--- a/channeld/channeld.c
+++ b/channeld/channeld.c
@@ -1979,6 +1979,7 @@ static void check_future_dataloss_fields(struct peer *peer,
  */
 static void check_current_dataloss_fields(struct peer *peer,
 			u64 next_remote_revocation_number,
+			u64 next_local_commitment_number,
 			const struct secret *last_local_per_commit_secret,
 			const struct pubkey *remote_current_per_commitment_point)
 {
@@ -1989,6 +1990,11 @@ static void check_current_dataloss_fields(struct peer *peer,
 	assert(next_remote_revocation_number == peer->next_index[LOCAL] - 2
 	       || next_remote_revocation_number == peer->next_index[LOCAL] - 1);
 
+	/* By the time we're called, we've ensured we're within 1 of
+	 * their commitment chain */
+	assert(next_local_commitment_number == peer->next_index[REMOTE] ||
+			next_local_commitment_number == peer->next_index[REMOTE] - 1);
+
 	if (!last_local_per_commit_secret)
 		return;
 
@@ -2021,16 +2027,23 @@ static void check_current_dataloss_fields(struct peer *peer,
 			    type_to_string(tmpctx, struct secret,
 					   &old_commit_secret));
 
-	/* FIXME: We don't keep really old per_commit numbers, so we can't
-	 * check this 'needs retransmit' case! */
-	if (next_remote_revocation_number == peer->next_index[REMOTE]) {
+	status_trace("Reestablish, comparing commitments. Remote's next local commitment number"
+			" is %"PRIu64". Our next remote is %"PRIu64" with %"PRIu64
+			" revocations received",
+			next_local_commitment_number,
+			peer->next_index[REMOTE],
+			peer->revocations_received);
+
+	/* Either they haven't received our commitment yet, or we're up to date */
+	if (next_local_commitment_number == peer->revocations_received + 1) {
 		if (!pubkey_eq(remote_current_per_commitment_point,
-			       &peer->old_remote_per_commit)) {
+				&peer->old_remote_per_commit)) {
 			peer_failed(&peer->cs,
 				    &peer->channel_id,
-				    "bad reestablish: my_current_per_commitment_point %"PRIu64
-				    ": %s should be %s (new is %s)",
-				    next_remote_revocation_number,
+				    "bad reestablish: remote's "
+				    "my_current_per_commitment_point %"PRIu64
+				    "is %s; expected %s (new is %s).",
+				    next_local_commitment_number - 1,
 				    type_to_string(tmpctx, struct pubkey,
 						   remote_current_per_commitment_point),
 				    type_to_string(tmpctx, struct pubkey,
@@ -2038,12 +2051,24 @@ static void check_current_dataloss_fields(struct peer *peer,
 				    type_to_string(tmpctx, struct pubkey,
 						   &peer->remote_per_commit));
 		}
-	} else
-		status_trace("option_data_loss_protect: can't check their claimed per_commitment_point %s #%"PRIu64"-1 as we're at %"PRIu64,
-			     type_to_string(tmpctx, struct pubkey,
-					    remote_current_per_commitment_point),
-			     next_remote_revocation_number,
-			     peer->next_index[REMOTE]);
+	} else {
+		/* We've sent a commit sig but haven't gotten a revoke+ack back */
+		if (!pubkey_eq(remote_current_per_commitment_point,
+				&peer->remote_per_commit)) {
+			peer_failed(&peer->cs,
+				    &peer->channel_id,
+				    "bad reestablish: remote's "
+				    "my_current_per_commitment_point %"PRIu64
+				    "is %s; expected %s (old is %s).",
+				    next_local_commitment_number - 1,
+				    type_to_string(tmpctx, struct pubkey,
+						   remote_current_per_commitment_point),
+				    type_to_string(tmpctx, struct pubkey,
+						   &peer->remote_per_commit),
+				    type_to_string(tmpctx, struct pubkey,
+						   &peer->old_remote_per_commit));
+		}
+	}
 
 	status_trace("option_data_loss_protect: fields are correct");
 }
@@ -2066,7 +2091,9 @@ static void peer_reconnect(struct peer *peer,
 	dataloss_protect = local_feature_negotiated(peer->localfeatures,
 						    LOCAL_DATA_LOSS_PROTECT);
 
-	get_per_commitment_point(peer->next_index[LOCAL]-1,
+	/* Our current per-commitment point is the commitment point in the last
+	 * received signed commitment */
+	get_per_commitment_point(peer->next_index[LOCAL] - 1,
 				 &my_current_per_commitment_point, NULL);
 
 	/* BOLT #2:
@@ -2265,6 +2292,7 @@ static void peer_reconnect(struct peer *peer,
 	if (dataloss_protect)
 		check_current_dataloss_fields(peer,
 					      next_remote_revocation_number,
+					      next_local_commitment_number,
 					      &last_local_per_commitment_secret,
 					      &remote_current_per_commitment_point);
 
diff --git a/tests/test_connection.py b/tests/test_connection.py
index 9539638b3..cbf8bd375 100644
--- a/tests/test_connection.py
+++ b/tests/test_connection.py
@@ -1255,10 +1255,12 @@ def test_funder_simple_reconnect(node_factory, bitcoind):
 
 
 @unittest.skipIf(not DEVELOPER, "needs LIGHTNINGD_DEV_LOG_IO")
-@unittest.skipIf(not EXPERIMENTAL_FEATURES, "needs option_dataloss_protect")
+@unittest.skipIf(not EXPERIMENTAL_FEATURES, "needs option_data_loss_protect")
 def test_dataloss_protection(node_factory, bitcoind):
-    l1 = node_factory.get_node(may_reconnect=True, log_all_io=True)
-    l2 = node_factory.get_node(may_reconnect=True, log_all_io=True)
+    l1 = node_factory.get_node(may_reconnect=True, log_all_io=True,
+                               feerates=(7500, 7500, 7500))
+    l2 = node_factory.get_node(may_reconnect=True, log_all_io=True,
+                               feerates=(7500, 7500, 7500))
 
     l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
     # l1 should send out WIRE_INIT (0010)
@@ -1283,9 +1285,9 @@ def test_dataloss_protection(node_factory, bitcoind):
                            # channel_id
                            "[0-9a-f]{64}"
                            # next_local_commitment_number
-                           "000000000000000[1-9]"
+                           "0000000000000001"
                            # next_remote_revocation_number
-                           "000000000000000[0-9]"
+                           "0000000000000000"
                            # your_last_per_commitment_secret (funding_depth may
                            # trigger a fee-update and commit, hence this may not
                            # be zero)