From 9d26c6b38ea118523937b83a5d1fe3fe4eceeb05 Mon Sep 17 00:00:00 2001
From: niftynei <niftynei@gmail.com>
Date: Wed, 10 Mar 2021 20:23:03 -0600
Subject: [PATCH] mfc: check that we can retry when things go haywire

There's a version of this that keeps the PSBT in memory and does some
fancy addition/subtraction of unuseable parts for the v2's, however
it's much easier and simpler to simply error on the peer and re-start
from the very beginning.

This only works if we haven't gotten commitments from the peer yet (in
fact either method would only work if we haven't got commitments from
the peer yet), so if we've got commitments from them we simply mark them
as failed an go again.

In a perfect world, we'd remember what inputs we used last time, and
reuse those again on the re-attempt, which would pefectly guarantee both
that the failed opens (ones w/ commitments exchanged) would be canceled
after this completes (and we could re-try the failed again).

As it is, this is not perfect. It is, however, servicable.
---
 plugins/spender/multifundchannel.c | 123 +++++++++++++++++++++++++----
 tests/test_opening.py              |  86 +++++++++++++++++++-
 2 files changed, 194 insertions(+), 15 deletions(-)

diff --git a/plugins/spender/multifundchannel.c b/plugins/spender/multifundchannel.c
index d30e6fb2de0b..61892145185c 100644
--- a/plugins/spender/multifundchannel.c
+++ b/plugins/spender/multifundchannel.c
@@ -73,6 +73,33 @@ bool is_v2(const struct multifundchannel_destination *dest)
 	return dest->protocol == OPEN_CHANNEL;
 }
 
+static bool
+has_commitments_secured(const struct multifundchannel_destination *dest)
+{
+	/* If it failed, make sure we hadn't gotten
+	 * commitments yet */
+	enum multifundchannel_state state = dest->state;
+	if (state == MULTIFUNDCHANNEL_FAILED)
+		state = dest->fail_state;
+
+	switch (state) {
+	case MULTIFUNDCHANNEL_START_NOT_YET:
+	case MULTIFUNDCHANNEL_CONNECTED:
+	case MULTIFUNDCHANNEL_STARTED:
+	case MULTIFUNDCHANNEL_UPDATED:
+		return false;
+	case MULTIFUNDCHANNEL_COMPLETED:
+	case MULTIFUNDCHANNEL_SECURED:
+	case MULTIFUNDCHANNEL_SIGNED:
+	case MULTIFUNDCHANNEL_DONE:
+		return true;
+	case MULTIFUNDCHANNEL_FAILED:
+		/* Shouldn't be FAILED */
+		break;
+	}
+	abort();
+}
+
 /*-----------------------------------------------------------------------------
 Command Cleanup
 -----------------------------------------------------------------------------*/
@@ -148,6 +175,22 @@ mfc_cleanup_psbt(struct command *cmd,
 	send_outreq(cmd->plugin, req);
 }
 
+/* Cleans up a `openchannel_init` by doing `openchannel_abort` for the channel*/
+static void
+mfc_cleanup_oc(struct command *cmd,
+	       struct multifundchannel_cleanup *cleanup,
+	       struct multifundchannel_destination *dest)
+{
+	struct out_req *req = jsonrpc_request_start(cmd->plugin,
+						     cmd,
+						     "openchannel_abort",
+						     &mfc_cleanup_done,
+						     &mfc_cleanup_done,
+						     cleanup);
+	json_add_channel_id(req->js, "channel_id", &dest->channel_id);
+	send_outreq(cmd->plugin, req);
+}
+
 /* Cleans up a `fundchannel_start` by doing `fundchannel_cancel` on
 the node.
 */
@@ -184,6 +227,8 @@ mfc_cleanup_(struct multifundchannel_command *mfc,
 	cleanup->cb = cb;
 	cleanup->arg = arg;
 
+	/* If there's commitments, anywhere, we have to fail/restart.
+	 * We also cleanup the PSBT if there's no v2's around */
 	if (mfc->psbt) {
 		plugin_log(mfc->cmd->plugin, LOG_DBG,
 			   "mfc %"PRIu64": unreserveinputs task.", mfc->id);
@@ -195,22 +240,60 @@ mfc_cleanup_(struct multifundchannel_command *mfc,
 		struct multifundchannel_destination *dest;
 		dest = &mfc->destinations[i];
 
-		// FIXME: openchannel_abort??
-		if (dest->protocol == OPEN_CHANNEL)
+		switch (dest->state) {
+		case MULTIFUNDCHANNEL_STARTED:
+			/* v1 handling */
+			if (!is_v2(dest)) {
+				plugin_log(mfc->cmd->plugin, LOG_DBG,
+					   "mfc %"PRIu64", dest %u: "
+					   "fundchannel_cancel task.",
+					   mfc->id, dest->index);
+				++cleanup->pending;
+				mfc_cleanup_fc(mfc->cmd, cleanup, dest);
+			} else { /* v2 handling */
+				plugin_log(mfc->cmd->plugin, LOG_DBG,
+					   "mfc %"PRIu64", dest %u:"
+					   " openchannel_abort task.",
+					   mfc->id, dest->index);
+				++cleanup->pending;
+				mfc_cleanup_oc(mfc->cmd, cleanup, dest);
+			}
 			continue;
-
-		/* If not started/completed, nothing to clean up.  */
-		if (dest->state != MULTIFUNDCHANNEL_STARTED &&
-			dest->state != MULTIFUNDCHANNEL_COMPLETED)
+		case MULTIFUNDCHANNEL_COMPLETED:
+			/* Definitely a v1 */
+			plugin_log(mfc->cmd->plugin, LOG_DBG,
+				   "mfc %"PRIu64", dest %u: "
+				   "fundchannel_cancel task.",
+				   mfc->id, dest->index);
+			++cleanup->pending;
+			mfc_cleanup_fc(mfc->cmd, cleanup, dest);
 			continue;
-
-		plugin_log(mfc->cmd->plugin, LOG_DBG,
-			   "mfc %"PRIu64", dest %u: "
-			   "fundchannel_cancel task.",
-			   mfc->id, dest->index);
-
-		++cleanup->pending;
-		mfc_cleanup_fc(mfc->cmd, cleanup, dest);
+		case MULTIFUNDCHANNEL_UPDATED:
+			/* Definitely a v2 */
+			plugin_log(mfc->cmd->plugin, LOG_DBG,
+				   "mfc %"PRIu64", dest %u:"
+				   " openchannel_abort task.",
+				   mfc->id, dest->index);
+			++cleanup->pending;
+			mfc_cleanup_oc(mfc->cmd, cleanup, dest);
+			continue;
+		case MULTIFUNDCHANNEL_SECURED:
+		case MULTIFUNDCHANNEL_SIGNED:
+			/* We don't actually *send* the
+			 * transaction until here,
+			 * but peer isnt going to forget. This
+			 * open is borked until an input is
+			 * spent or it times out */
+			continue;
+		case MULTIFUNDCHANNEL_START_NOT_YET:
+		case MULTIFUNDCHANNEL_CONNECTED:
+		case MULTIFUNDCHANNEL_DONE:
+		case MULTIFUNDCHANNEL_FAILED:
+			/* Nothing to do ! */
+			continue;
+		}
+		/* We shouldn't make it this far */
+		abort();
 	}
 
 	if (cleanup->pending == 0)
@@ -1601,7 +1684,19 @@ post_cleanup_redo_multifundchannel(struct multifundchannel_redo *redo)
 
 		dest = &old_destinations[i];
 
+		/* We have to fail any v2 that has commitments already */
+		if (is_v2(dest) && has_commitments_secured(dest)
+		     && !dest_failed(dest)) {
+			fail_destination(dest, take(tal_fmt(NULL, "%s",
+					 "\"Attempting retry,"
+					 " yet this peer already has"
+					 " exchanged commitments and is"
+					 " using the v2 open protocol."
+					 " Must spend input to reset.\"")));
+		}
+
 		if (dest_failed(dest)) {
+		    /* We can't re-try committed v2's */
 			struct multifundchannel_removed removed;
 
 			plugin_log(mfc->cmd->plugin, LOG_DBG,
diff --git a/tests/test_opening.py b/tests/test_opening.py
index d1341494f6e5..c0a038d1d183 100644
--- a/tests/test_opening.py
+++ b/tests/test_opening.py
@@ -2,7 +2,7 @@
 from fixtures import TEST_NETWORK
 from pyln.client import RpcError
 from utils import (
-    only_one, wait_for, sync_blockheight, EXPERIMENTAL_FEATURES
+    only_one, wait_for, sync_blockheight, EXPERIMENTAL_FEATURES, DEVELOPER
 )
 
 import pytest
@@ -14,6 +14,90 @@ def find_next_feerate(node, peer):
     return chan['next_feerate']
 
 
+@unittest.skipIf(not DEVELOPER, "disconnect=... needs DEVELOPER=1")
+@unittest.skipIf(not EXPERIMENTAL_FEATURES, "dual-funding is experimental only")
+def test_multifunding_v2_best_effort(node_factory, bitcoind):
+    '''
+    Check that best_effort flag works.
+    '''
+    disconnects = ["-WIRE_INIT",
+                   "-WIRE_ACCEPT_CHANNEL",
+                   "-WIRE_FUNDING_SIGNED"]
+    l1 = node_factory.get_node(options={'dev-force-features': '+223'},
+                               allow_warning=True,
+                               may_reconnect=True)
+    l2 = node_factory.get_node(options={'dev-force-features': '+223'},
+                               allow_warning=True,
+                               may_reconnect=True)
+    l3 = node_factory.get_node(disconnect=disconnects)
+    l4 = node_factory.get_node()
+
+    l1.fundwallet(2000000)
+
+    destinations = [{"id": '{}@localhost:{}'.format(l2.info['id'], l2.port),
+                     "amount": 50000},
+                    {"id": '{}@localhost:{}'.format(l3.info['id'], l3.port),
+                     "amount": 50000},
+                    {"id": '{}@localhost:{}'.format(l4.info['id'], l4.port),
+                     "amount": 50000}]
+
+    for i, d in enumerate(disconnects):
+        failed_sign = d == "-WIRE_FUNDING_SIGNED"
+        # Should succeed due to best-effort flag.
+        min_channels = 1 if failed_sign else 2
+        l1.rpc.multifundchannel(destinations, minchannels=min_channels)
+
+        bitcoind.generate_block(6, wait_for_mempool=1)
+
+        # l3 should fail to have channels; l2 also fails on last attempt
+        node_list = [l1, l4] if failed_sign else [l1, l2, l4]
+        for node in node_list:
+            node.daemon.wait_for_log(r'to CHANNELD_NORMAL')
+
+        # There should be working channels to l2 and l4 for every run
+        # but the last
+        working_chans = [l4] if failed_sign else [l2, l4]
+        for ldest in working_chans:
+            inv = ldest.rpc.invoice(5000, 'i{}'.format(i), 'i{}'.format(i))['bolt11']
+            l1.rpc.pay(inv)
+
+        # Function to find the SCID of the channel that is
+        # currently open.
+        # Cannot use LightningNode.get_channel_scid since
+        # it assumes the *first* channel found is the one
+        # wanted, but in our case we close channels and
+        # open again, so multiple channels may remain
+        # listed.
+        def get_funded_channel_scid(n1, n2):
+            peers = n1.rpc.listpeers(n2.info['id'])['peers']
+            assert len(peers) == 1
+            peer = peers[0]
+            channels = peer['channels']
+            assert channels
+            for c in channels:
+                state = c['state']
+                if state in ('DUALOPEND_AWAITING_LOCKIN', 'CHANNELD_AWAITING_LOCKIN', 'CHANNELD_NORMAL'):
+                    return c['short_channel_id']
+            assert False
+
+        # Now close channels to l2 and l4, for the next run.
+        if not failed_sign:
+            l1.rpc.close(get_funded_channel_scid(l1, l2))
+        l1.rpc.close(get_funded_channel_scid(l1, l4))
+
+        for node in node_list:
+            node.daemon.wait_for_log(r'to CLOSINGD_COMPLETE')
+
+    # With 2 down, it will fail to fund channel
+    l2.stop()
+    l3.stop()
+    with pytest.raises(RpcError, match=r'Connection refused'):
+        l1.rpc.multifundchannel(destinations, minchannels=2)
+
+    # This works though.
+    l1.rpc.multifundchannel(destinations, minchannels=1)
+
+
 @unittest.skipIf(TEST_NETWORK != 'regtest', 'elementsd doesnt yet support PSBT features we need')
 @unittest.skipIf(not EXPERIMENTAL_FEATURES, "dual-funding is experimental only")
 def test_v2_rbf(node_factory, bitcoind, chainparams):