From 14b7a1b25ec1dd48b7ebda215fafeaf5b352b5a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:43:11 +1030 Subject: [PATCH 01/39] pytest: don't run tests marked slow_test at all if VALGRIND and SLOW_MACHINE. We used to just run these without valgrind, but we already run them in CI (which sets SLOW_MACHINE) without valgrind, so this just doubles up. Signed-off-by: Rusty Russell --- contrib/pyln-testing/pyln/testing/utils.py | 14 +++++--------- tests/conftest.py | 4 +++- tests/test_closing.py | 4 ++-- tests/test_connection.py | 16 ++++++++-------- tests/test_plugin.py | 2 +- tests/test_reckless.py | 4 ++-- 6 files changed, 21 insertions(+), 23 deletions(-) diff --git a/contrib/pyln-testing/pyln/testing/utils.py b/contrib/pyln-testing/pyln/testing/utils.py index 8607cd9bdf1d..732bd7b49012 100644 --- a/contrib/pyln-testing/pyln/testing/utils.py +++ b/contrib/pyln-testing/pyln/testing/utils.py @@ -841,7 +841,7 @@ def call(self, method, payload=None, cmdprefix=None, filter=None): class LightningNode(object): - def __init__(self, node_id, lightning_dir, bitcoind, executor, valgrind, may_fail=False, + def __init__(self, node_id, lightning_dir, bitcoind, executor, may_fail=False, may_reconnect=False, broken_log=None, allow_warning=False, @@ -900,7 +900,7 @@ def __init__(self, node_id, lightning_dir, bitcoind, executor, valgrind, may_fai self.daemon.opts["dev-debugger"] = dbgvar if os.getenv("DEBUG_LIGHTNINGD"): self.daemon.opts["dev-debug-self"] = None - if valgrind: + if VALGRIND: self.daemon.env["LIGHTNINGD_DEV_NO_BACKTRACE"] = "1" self.daemon.opts["dev-no-plugin-checksum"] = None else: @@ -926,7 +926,7 @@ def __init__(self, node_id, lightning_dir, bitcoind, executor, valgrind, may_fai dsn = db.get_dsn() if dsn is not None: self.daemon.opts['wallet'] = dsn - if valgrind: + if VALGRIND: trace_skip_pattern = '*python*,*bitcoin-cli*,*elements-cli*,*cln-grpc*,*clnrest*,*wss-proxy*,*cln-bip353*,*reckless' if not valgrind_plugins: trace_skip_pattern += ',*plugins*' @@ -1653,10 +1653,6 @@ class NodeFactory(object): """ def __init__(self, request, testname, bitcoind, executor, directory, db_provider, node_cls, jsonschemas): - if request.node.get_closest_marker("slow_test") and SLOW_MACHINE: - self.valgrind = False - else: - self.valgrind = VALGRIND self.testname = testname # Set test name in environment for coverage file organization @@ -1755,7 +1751,7 @@ def get_node(self, node_id=None, options=None, dbfile=None, db = self.db_provider.get_db(os.path.join(lightning_dir, TEST_NETWORK), self.testname, node_id) db.provider = self.db_provider node = self.node_cls( - node_id, lightning_dir, self.bitcoind, self.executor, self.valgrind, db=db, + node_id, lightning_dir, self.bitcoind, self.executor, db=db, port=port, grpc_port=grpc_port, options=options, may_fail=may_fail or expect_fail, jsonschemas=self.jsonschemas, **kwargs @@ -1872,7 +1868,7 @@ def killall(self, expected_successes): # leak detection upsets VALGRIND by reading uninitialized mem, # and valgrind adds extra fds. # If it's dead, we'll catch it below. - if not self.valgrind: + if not VALGRIND: try: # This also puts leaks in log. leaks = self.nodes[i].rpc.dev_memleak()['leaks'] diff --git a/tests/conftest.py b/tests/conftest.py index 029050742e06..dcbd43cabb4a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import pytest -from pyln.testing.utils import EXPERIMENTAL_DUAL_FUND +from pyln.testing.utils import EXPERIMENTAL_DUAL_FUND, VALGRIND, SLOW_MACHINE # This function is based upon the example of how to @@ -37,3 +37,5 @@ def pytest_runtest_setup(item): else: # If there's no openchannel marker, skip if EXP_DF if EXPERIMENTAL_DUAL_FUND: pytest.skip('v1-only test, EXPERIMENTAL_DUAL_FUND=1') + if "slow_test" in item.keywords and VALGRIND and SLOW_MACHINE: + pytest.skip("Skipping slow tests under VALGRIND") diff --git a/tests/test_closing.py b/tests/test_closing.py index 3a7c31642473..f3c7dbee06cc 100644 --- a/tests/test_closing.py +++ b/tests/test_closing.py @@ -1,7 +1,7 @@ from fixtures import * # noqa: F401,F403 from pyln.client import RpcError, Millisatoshi from shutil import copyfile -from pyln.testing.utils import SLOW_MACHINE +from pyln.testing.utils import SLOW_MACHINE, VALGRIND from utils import ( only_one, sync_blockheight, wait_for, TIMEOUT, account_balance, first_channel_id, closing_fee, TEST_NETWORK, @@ -3232,7 +3232,7 @@ def check_billboard(): def test_shutdown(node_factory): # Fail, in that it will exit before cleanup. l1 = node_factory.get_node(may_fail=True) - if not node_factory.valgrind: + if not VALGRIND: leaks = l1.rpc.dev_memleak()['leaks'] if len(leaks): raise Exception("Node {} has memory leaks: {}" diff --git a/tests/test_connection.py b/tests/test_connection.py index 05724c517ec8..78261f7119a7 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -13,7 +13,7 @@ mine_funding_to_announce, first_scid, CHANNEL_SIZE ) -from pyln.testing.utils import SLOW_MACHINE, VALGRIND, EXPERIMENTAL_DUAL_FUND, FUNDAMOUNT, RUST +from pyln.testing.utils import VALGRIND, EXPERIMENTAL_DUAL_FUND, FUNDAMOUNT, RUST import os import pytest @@ -1463,7 +1463,7 @@ def test_funding_v2_corners(node_factory, bitcoind): assert l1.rpc.openchannel_update(start['channel_id'], start['psbt'])['commitments_secured'] -@unittest.skipIf(SLOW_MACHINE and not VALGRIND, "Way too taxing on CI machines") +@pytest.mark.slow_test @pytest.mark.openchannel('v1') def test_funding_cancel_race(node_factory, bitcoind, executor): l1 = node_factory.get_node() @@ -1474,7 +1474,7 @@ def test_funding_cancel_race(node_factory, bitcoind, executor): bitcoind.generate_block(1) wait_for(lambda: len(l1.rpc.listfunds()["outputs"]) != 0) - if node_factory.valgrind: + if VALGRIND: num = 5 else: num = 100 @@ -1536,7 +1536,7 @@ def test_funding_cancel_race(node_factory, bitcoind, executor): assert num_cancel == len(nodes) # We should have raced at least once! - if not node_factory.valgrind: + if not VALGRIND: assert num_cancel > 0 assert num_complete > 0 @@ -1544,7 +1544,7 @@ def test_funding_cancel_race(node_factory, bitcoind, executor): executor.map(lambda n: n.stop(), node_factory.nodes) -@unittest.skipIf(SLOW_MACHINE and not VALGRIND, "Way too taxing on CI machines") +@pytest.mark.slow_test @pytest.mark.openchannel('v2') def test_funding_v2_cancel_race(node_factory, bitcoind, executor): l1 = node_factory.get_node() @@ -1555,7 +1555,7 @@ def test_funding_v2_cancel_race(node_factory, bitcoind, executor): bitcoind.generate_block(1) wait_for(lambda: len(l1.rpc.listfunds()["outputs"]) != 0) - if node_factory.valgrind: + if VALGRIND: num = 5 else: num = 100 @@ -1610,7 +1610,7 @@ def test_funding_v2_cancel_race(node_factory, bitcoind, executor): print("Cancelled {} complete {}".format(num_cancel, num_complete)) # We should have raced at least once! - if not node_factory.valgrind: + if not VALGRIND: assert num_cancel > 0 assert num_complete > 0 @@ -3452,7 +3452,7 @@ def test_feerate_stress(node_factory, executor): @pytest.mark.slow_test def test_pay_disconnect_stress(node_factory, executor): """Expose race in htlc restoration in channeld: 50% chance of failure""" - if node_factory.valgrind: + if VALGRIND: NUM_RUNS = 2 else: NUM_RUNS = 5 diff --git a/tests/test_plugin.py b/tests/test_plugin.py index f02bd8070f10..1b5499269008 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -2983,7 +2983,7 @@ def test_autoclean(node_factory): # Under valgrind in CI, it can 50 seconds between creating invoice # and restarting. - if node_factory.valgrind: + if VALGRIND: short_timeout = 10 longer_timeout = 60 else: diff --git a/tests/test_reckless.py b/tests/test_reckless.py index 7fb04d742726..275960d576fb 100644 --- a/tests/test_reckless.py +++ b/tests/test_reckless.py @@ -2,7 +2,7 @@ import subprocess from pathlib import PosixPath, Path import socket -from pyln.testing.utils import VALGRIND, SLOW_MACHINE +from pyln.testing.utils import VALGRIND import pytest import os import re @@ -352,7 +352,7 @@ def test_tag_install(node_factory): @pytest.mark.flaky(reruns=5) -@unittest.skipIf(VALGRIND and SLOW_MACHINE, "node too slow for starting plugin under valgrind") +@pytest.mark.slow_test def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() From 79349ff07d515cf243e322bc62ea1239b0181792 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:44:11 +1030 Subject: [PATCH 02/39] CI: remove reruns on all failures. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is covering up real bugs; we need to fix CI instead. From https://pypi.org/project/pytest-rerunfailures/#re-run-all-failures: ``` To re-run all test failures, use the --reruns command line option with the maximum number of times you’d like the tests to run: $ pytest --reruns 5 ``` Signed-off-by: Rusty Russell --- .github/workflows/ci.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ea79a16d9639..2c7a9b9c36c1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ env: RUST_PROFILE: release SLOW_MACHINE: 1 CI_SERVER_URL: "http://35.239.136.52:3170" - PYTEST_OPTS_BASE: "--reruns=10 -vvv --junit-xml=report.xml --timeout=1800 --durations=10" + PYTEST_OPTS_BASE: "-vvv --junit-xml=report.xml --timeout=1800 --durations=10" jobs: prebuild: @@ -337,7 +337,7 @@ jobs: timeout-minutes: 120 env: RUST_PROFILE: release # Has to match the one in the compile step - PYTEST_OPTS: --reruns=10 -vvv --junit-xml=report.xml --timeout=1800 --durations=10 + PYTEST_OPTS: -vvv --junit-xml=report.xml --timeout=1800 --durations=10 needs: - compile strategy: @@ -453,7 +453,7 @@ jobs: env: RUST_PROFILE: release # Has to match the one in the compile step CFG: compile-gcc - PYTEST_OPTS: --reruns=10 -vvv --junit-xml=report.xml --timeout=1800 --durations=10 --test-group-random-seed=42 + PYTEST_OPTS: -vvv --junit-xml=report.xml --timeout=1800 --durations=10 --test-group-random-seed=42 needs: - compile strategy: @@ -541,7 +541,7 @@ jobs: RUST_PROFILE: release SLOW_MACHINE: 1 TEST_DEBUG: 1 - PYTEST_OPTS: --reruns=10 -vvv --junit-xml=report.xml --timeout=1800 --durations=10 --test-group-random-seed=42 + PYTEST_OPTS: -vvv --junit-xml=report.xml --timeout=1800 --durations=10 --test-group-random-seed=42 needs: - compile strategy: @@ -632,7 +632,7 @@ jobs: env: VALGRIND: 0 GENERATE_EXAMPLES: 1 - PYTEST_OPTS: --reruns=10 -vvv --junit-xml=report.xml --timeout=1800 --durations=10 + PYTEST_OPTS: -vvv --junit-xml=report.xml --timeout=1800 --durations=10 TEST_NETWORK: regtest needs: - compile @@ -678,7 +678,7 @@ jobs: timeout-minutes: 120 env: RUST_PROFILE: release # Has to match the one in the compile step - PYTEST_OPTS: --reruns=10 -vvv --junit-xml=report.xml --timeout=1800 --durations=10 + PYTEST_OPTS: -vvv --junit-xml=report.xml --timeout=1800 --durations=10 needs: - compile strategy: From e2fe05cf61a07321cb75cda2859c094b109dcc10 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:45:11 +1030 Subject: [PATCH 03/39] connectd: fix race when we supply a new address. This shows up as a flake in test_route_by_old_scid: ``` # Now restart l2, make sure it remembers the original! l2.restart() > l2.rpc.connect(l1.info['id'], 'localhost', l1.port) tests/test_splicing.py:554: ... > raise RpcError(method, payload, resp['error']) E pyln.client.lightning.RpcError: RPC call failed: method: connect, payload: {'id': '0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518', 'host': 'localhost', 'port': 33837}, error: {'code': 400, 'message': 'Unable to connect, no address known for peer'} ``` This is because it's already (auto)connecting, and fails. This failure is reported, before we've added the new address (once we add the new address, we connect fine, but it's too late!): ``` lightningd-2 2025-12-08T02:39:18.241Z DEBUG gossipd: REPLY WIRE_GOSSIPD_NEW_BLOCKHEIGHT_REPLY with 0 fds lightningd-2 2025-12-08T02:39:18.320Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Initializing important peer with 0 addresses lightningd-2 2025-12-08T02:39:18.320Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Failed connected out: Unable to connect, no address known for peer lightningd-2 2025-12-08T02:39:18.344Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Will try reconnect in 1 seconds lightningd-2 2025-12-08T02:39:18.344Z DEBUG 035d2b1192dfba134e10e540875d366ebc8bc353d5aa766b80c090b39c3a5d885d-connectd: Initializing important peer with 1 addresses lightningd-2 2025-12-08T02:39:18.344Z DEBUG 035d2b1192dfba134e10e540875d366ebc8bc353d5aa766b80c090b39c3a5d885d-connectd: Connected out, starting crypto lightningd-2 2025-12-08T02:39:18.344Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Adding 1 addresses to important peer lightningd-2 2025-12-08T02:39:18.345Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Connected out, starting crypto {'run_id': 256236335046680576, 'github_repository': 'ElementsProject/lightning', 'github_sha': '555f1ac461d151064aad6fc83b94a0290e2e9d5d', 'github_ref': 'refs/pull/8767/merge', 'github_ref_name': 'HEAD', 'github_run_id': 20013689862, 'github_head_ref': 'fixup-backfill-bug', 'github_run_number': 14774, 'github_base_ref': 'master', 'github_run_attempt': '1', 'testname': 'test_route_by_old_scid', 'start_time': 1765161493, 'end_time': 1765161558, 'outcome': 'fail'} lightningd-2 2025-12-08T02:39:18.421Z DEBUG 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-hsmd: Got WIRE_HSMD_ECDH_REQ lightningd-2 2025-12-08T02:39:18.421Z DEBUG hsmd: Client: Received message 1 from client lightningd-2 2025-12-08T02:39:18.453Z DEBUG 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-hsmd: Got WIRE_HSMD_ECDH_REQ lightningd-2 2025-12-08T02:39:18.453Z DEBUG hsmd: Client: Received message 1 from client --------------------------- Captured stdout teardown --------------------------- ``` --- lightningd/connect_control.c | 19 +++++++++++++++---- tests/test_splicing.py | 1 - 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index e374ae961606..f2710db52b7a 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -277,10 +277,21 @@ static void connect_failed(struct lightningd *ld, connect_nsec, connect_attempted); - /* We can have multiple connect commands: fail them all */ - while ((c = find_connect(ld, id)) != NULL) { - /* They delete themselves from list */ - was_pending(command_fail(c->cmd, errcode, "%s", errmsg)); + /* There's a race between autoreconnect and connect commands. This + * matters because the autoreconnect might have failed, but that was before + * the connect_to_peer command gave connectd a new address. This we wait for + * one we explicitly asked for before failing. + * + * A similar pattern could occur with multiple connect commands, however connectd + * does simply combine those, so we don't get a response per request, and it's a + * very rare corner case (which, unlike the above, doesn't happen in CI!). + */ + if (strstarts(connect_reason, "connect command")) { + /* We can have multiple connect commands: fail them all */ + while ((c = find_connect(ld, id)) != NULL) { + /* They delete themselves from list */ + was_pending(command_fail(c->cmd, errcode, "%s", errmsg)); + } } } diff --git a/tests/test_splicing.py b/tests/test_splicing.py index 0c5b71ac614e..966c75f64e7b 100644 --- a/tests/test_splicing.py +++ b/tests/test_splicing.py @@ -499,7 +499,6 @@ def test_splice_stuck_htlc(node_factory, bitcoind, executor): assert l1.db_query("SELECT count(*) as c FROM channeltxs;")[0]['c'] == 0 -@pytest.mark.flaky(reruns=5) @unittest.skipIf(TEST_NETWORK != 'regtest', 'elementsd doesnt yet support PSBT features we need') def test_route_by_old_scid(node_factory, bitcoind): l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True, opts={'experimental-splicing': None, 'may_reconnect': True}) From 2c23c02f190aafff47752e6d1c8bfd3248d11671 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:46:11 +1030 Subject: [PATCH 04/39] pytest: fix real reason for warning issue in test_route_by_old_scid. We can still get a warning: lightningd-1 2025-12-10T01:11:07.232Z DEBUG 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-connectd: Received WIRE_WARNING: WARNING: channel_announcement: no unspent txout 109x1x1 This has nothing to do with l1 talking about the original channel (which would be 103x1x): it's because l2's gossipd (being the node which does the splice) immediately forgets the pre-splice id. If l1 sends some gossip, it will get a warning message. Signed-off-by: Rusty Russell --- tests/test_splicing.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test_splicing.py b/tests/test_splicing.py index 966c75f64e7b..e5a3565b1394 100644 --- a/tests/test_splicing.py +++ b/tests/test_splicing.py @@ -501,7 +501,13 @@ def test_splice_stuck_htlc(node_factory, bitcoind, executor): @unittest.skipIf(TEST_NETWORK != 'regtest', 'elementsd doesnt yet support PSBT features we need') def test_route_by_old_scid(node_factory, bitcoind): - l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True, opts={'experimental-splicing': None, 'may_reconnect': True}) + opts = {'experimental-splicing': None, 'may_reconnect': True} + # l1 sometimes talks about pre-splice channels. l2 (being part of the splice) immediately forgets + # the old scid and uses the new one, then complains when l1 talks about it. Which is fine, but + # breaks CI. + l1opts = opts.copy() + l1opts['allow_warning'] = True + l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True, opts=[l1opts, opts, opts]) # Get pre-splice route. inv = l3.rpc.invoice(10000000, 'test_route_by_old_scid', 'test_route_by_old_scid') @@ -527,11 +533,6 @@ def test_route_by_old_scid(node_factory, bitcoind): l1.rpc.sendpay(route, inv['payment_hash'], payment_secret=inv['payment_secret']) l1.rpc.waitsendpay(inv['payment_hash']) - # Make sure l1 has seen and processed announcement for new splice - # scid, otherwise we can get gossip warning here (which breaks CI) if we splice again. - scid = only_one(l3.rpc.listchannels(source=l3.info['id'])['channels'])['short_channel_id'] - wait_for(lambda: l1.rpc.listchannels(short_channel_id=scid)['channels'] != []) - # Let's splice again, so the original scid is two behind the times. l3.fundwallet(200000) funds_result = l3.rpc.fundpsbt("109000sat", "slow", 166, excess_as_change=True) From bd0f06ee2608b12fa456ba16881d1d5c2fd576e0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:47:11 +1030 Subject: [PATCH 05/39] pytest: remove test_lockup_drain. Since this was written, we now test if remote side would get into this situation and stop it from happening, so the test doesn't work any more. Signed-off-by: Rusty Russell --- tests/test_pay.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tests/test_pay.py b/tests/test_pay.py index 48abcb3142ab..94ee2c0a4abe 100644 --- a/tests/test_pay.py +++ b/tests/test_pay.py @@ -2821,31 +2821,6 @@ def test_channel_spendable_receivable_capped(node_factory, bitcoind): assert l2.rpc.listpeerchannels()['channels'][0]['receivable_msat'] == Millisatoshi(0xFFFFFFFF) -@unittest.skipIf(True, "Test is extremely flaky") -def test_lockup_drain(node_factory, bitcoind): - """Try to get channel into a state where opener can't afford fees on additional HTLC, so peer can't add HTLC""" - l1, l2 = node_factory.line_graph(2, opts={'may_reconnect': True}) - - # l1 sends all the money to l2 until even 1 msat can't get through. - total = l1.drain(l2) - - # Even if feerate now increases 2x (30000), l2 should be able to send - # non-dust HTLC to l1. - l1.force_feerates(30000) - l2.pay(l1, total // 2) - - # reset fees and send all back again - l1.force_feerates(15000) - l1.drain(l2) - - # But if feerate increase just a little more, l2 should not be able to send - # non-fust HTLC to l1 - l1.force_feerates(30002) # TODO: Why does 30001 fail? off by one in C code? - wait_for(lambda: l1.rpc.listpeers()['peers'][0]['connected']) - with pytest.raises(RpcError, match=r".*Capacity exceeded.*"): - l2.pay(l1, total // 2) - - @unittest.skipIf(TEST_NETWORK != 'regtest', 'Assumes anchors') def test_htlc_too_dusty_outgoing(node_factory, bitcoind, chainparams): """ Try to hit the 'too much dust' limit, should fail the HTLC """ From 0e0099b18607ce624bca29a01fc6e62bdf5e820e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:48:11 +1030 Subject: [PATCH 06/39] pytest: restore and fix disabled test test_excluded_adjacent_routehint. 1. It was flaky, probably because it didn't wait for the remote update_channel. 2. Rusty applied a fix in 5f664dac77d, not clear if it worked. 3. Christian disabled it altogether in 23ce9a947df. Signed-off-by: Rusty Russell --- lightningd/connect_control.c | 3 ++- tests/test_pay.py | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index f2710db52b7a..611b5ba265fe 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -286,7 +286,8 @@ static void connect_failed(struct lightningd *ld, * does simply combine those, so we don't get a response per request, and it's a * very rare corner case (which, unlike the above, doesn't happen in CI!). */ - if (strstarts(connect_reason, "connect command")) { + if (strstarts(connect_reason, "connect command") + || errcode == CONNECT_DISCONNECTED_DURING) { /* We can have multiple connect commands: fail them all */ while ((c = find_connect(ld, id)) != NULL) { /* They delete themselves from list */ diff --git a/tests/test_pay.py b/tests/test_pay.py index 94ee2c0a4abe..1b35d5094806 100644 --- a/tests/test_pay.py +++ b/tests/test_pay.py @@ -3480,7 +3480,6 @@ def test_reject_invalid_payload(node_factory): l2.daemon.wait_for_log(r'Failing HTLC because of an invalid payload') -@unittest.skip("Test is flaky causing CI to be unusable.") def test_excluded_adjacent_routehint(node_factory, bitcoind): """Test case where we try have a routehint which leads to an adjacent node, but the result exceeds our maxfee; we crashed trying to find @@ -3489,10 +3488,15 @@ def test_excluded_adjacent_routehint(node_factory, bitcoind): """ l1, l2, l3 = node_factory.line_graph(3) + # Make sure l2->l3 is usable. + wait_for(lambda: 'remote' in only_one(l3.rpc.listpeerchannels()['channels'])['updates']) + # We'll be forced to use routehint, since we don't know about l3. inv = l3.rpc.invoice(10**3, "lbl", "desc", exposeprivatechannels=l2.get_channel_scid(l3)) - l1.wait_channel_active(l1.get_channel_scid(l2)) + # Make sure l1->l2 is usable. + wait_for(lambda: 'remote' in only_one(l1.rpc.listpeerchannels()['channels'])['updates']) + # This will make it reject the routehint. err = r'Fee exceeds our fee budget: 1msat > 0msat, discarding route' with pytest.raises(RpcError, match=err): From 08efb015c8c7304a7c045fe28d0e7d7b085e9137 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:49:11 +1030 Subject: [PATCH 07/39] pytest: expect slow commands with giant commando test ``` 2025-12-10T02:51:06.2435409Z [gw1] [ 77%] ERROR tests/test_plugin.py::test_commando ...lightningd-1: had BROKEN messages ... 2025-12-10T03:00:26.0440311Z lightningd-1 2025-12-10T02:51:01.548Z UNUSUAL jsonrpc#69: That's weird: Request checkrune took 5961 milliseconds ``` Signed-off-by: Rusty Russell --- tests/test_plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 1b5499269008..f51e45c33d01 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -2845,7 +2845,8 @@ def test_plugin_shutdown(node_factory): def test_commando(node_factory, executor): l1, l2 = node_factory.line_graph(2, fundchannel=False, - opts={'log-level': 'io'}) + # Under valgrind, checkrune of 400k command can be slow! + opts={'log-level': 'io', 'broken_log': "That's weird: Request .* took"}) rune = l1.rpc.createrune()['rune'] From 05716225a1c24906304b2df1b8b44c8b28fa8cde Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:50:11 +1030 Subject: [PATCH 08/39] pytest: fix test_bitcoin_backend_gianttx flake. signpsbt could be the one which takes a long time, so allow any psbt event. Signed-off-by: Rusty Russell --- tests/test_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index f51e45c33d01..d62ccf75037d 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -1904,7 +1904,7 @@ def test_bitcoin_backend(node_factory, bitcoind): def test_bitcoin_backend_gianttx(node_factory, bitcoind): """Test that a giant tx doesn't crash bcli""" # This complains about how long fundpsbt took. - l1 = node_factory.get_node(start=False, broken_log='Request fundpsbt took') + l1 = node_factory.get_node(start=False, broken_log="That's weird: Request .*psbt took") # With memleak we spend far too much time gathering backtraces. if "LIGHTNINGD_DEV_MEMLEAK" in l1.daemon.env: del l1.daemon.env["LIGHTNINGD_DEV_MEMLEAK"] From 0f3cc9af4abefdbdfd331d25301c8e1f13cb8d61 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:51:11 +1030 Subject: [PATCH 09/39] pytest: note that we also trigger CI failure on this "That's weird" messages. Signed-off-by: Rusty Russell --- contrib/pyln-testing/pyln/testing/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/pyln-testing/pyln/testing/fixtures.py b/contrib/pyln-testing/pyln/testing/fixtures.py index db4206a1279c..ef55ecd0aa65 100644 --- a/contrib/pyln-testing/pyln/testing/fixtures.py +++ b/contrib/pyln-testing/pyln/testing/fixtures.py @@ -498,7 +498,7 @@ def map_node_error(nodes, f, msg): map_node_error(nf.nodes, printValgrindErrors, "reported valgrind errors") map_node_error(nf.nodes, printCrashLog, "had crash.log files") - map_node_error(nf.nodes, checkBroken, "had BROKEN messages") + map_node_error(nf.nodes, checkBroken, "had BROKEN or That's weird messages") map_node_error(nf.nodes, lambda n: not n.allow_warning and n.daemon.is_in_log(r' WARNING:'), "had warning messages") map_node_error(nf.nodes, checkReconnect, "had unexpected reconnections") map_node_error(nf.nodes, checkPluginJSON, "had malformed hooks/notifications") From 1191689a12cecac925040ac58489e41cc0ff2902 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:52:11 +1030 Subject: [PATCH 10/39] pytest: fix flake in test_coin_movement_notices We restart the nodeL if the coin_movements.py plugin hasn't processed the notification yet, it will be incorrect: ``` > assert account_balance(l2, chanid_1) == 100001001 E AssertionError: assert 150_001_001msat == 100_001_001 E + where 150001001msat = account_balance(, '39ac52c818c5304cf0664940ff236c4e3f8f4ceb8993cb1491347142d61b62bc') ``` Signed-off-by: Rusty Russell --- tests/test_plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index d62ccf75037d..d45b07b3bd78 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -2191,7 +2191,6 @@ def test_plugin_fail(node_factory): l1.daemon.wait_for_log(r': exited during normal operation') -@pytest.mark.flaky(reruns=5) @pytest.mark.openchannel('v1') @pytest.mark.openchannel('v2') def test_coin_movement_notices(node_factory, bitcoind, chainparams): @@ -2269,6 +2268,9 @@ def test_coin_movement_notices(node_factory, bitcoind, chainparams): l2.rpc.sendpay(route, payment_hash21, payment_secret=inv['payment_secret']) l2.rpc.waitsendpay(payment_hash21) + # Make sure coin_movements.py sees event before we restart! + l2.daemon.wait_for_log(f"plugin-coin_movements.py: coin movement: .*'payment_hash': '{payment_hash21}'") + # restart to test index l2.restart() wait_for(lambda: all(c['state'] == 'CHANNELD_NORMAL' for c in l2.rpc.listpeerchannels()["channels"])) From aa02190ece0c2e06f5784c794a081a0f564e865e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:53:11 +1030 Subject: [PATCH 11/39] pytest: enable test_offline. Not clear why it was disabled, but it never got re-enabled. Signed-off-by: Rusty Russell --- tests/test_connection.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/test_connection.py b/tests/test_connection.py index 78261f7119a7..92a2a12f9487 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -4523,15 +4523,24 @@ def test_reconnect_no_additional_transient_failure(node_factory, bitcoind): assert not l1.daemon.is_in_log(f"{l2id}-chan#1: Peer transient failure in CHANNELD_NORMAL: Disconnected", start=offset1) -@pytest.mark.xfail(strict=True) def test_offline(node_factory): # if get_node starts it, it'll expect an address, so do it manually. l1 = node_factory.get_node(options={"offline": None}, start=False) l1.daemon.start() - # we expect it to log offline mode an not to create any listener + # we expect it to log offline mode not to listen. assert l1.daemon.is_in_log("Started in offline mode!") - assert not l1.daemon.is_in_log("connectd: Created listener on") + line = l1.daemon.is_in_log("connectd: Created listener on") + port = re.search(r'connectd: Created listener on 127.0.0.1:(.*)', line).groups()[0] + + l2 = node_factory.get_node() + + # We cannot connect in! + with pytest.raises(RpcError, match=f"All addresses failed: 127.0.0.1:{port}: Connection establishment: Connection refused."): + l2.rpc.connect(l1.rpc.getinfo()['id'], 'localhost', int(port)) + + # We *can* connect out + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) def test_last_stable_connection(node_factory): From fc3a6d0f9024c196303da0e027aa1cae348f1a8b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:54:11 +1030 Subject: [PATCH 12/39] pytest: fix feerate check in test_peer_anchor_push We didn't log when anchor transactions had short signatures, which causes this test to not assert (did_short_sig): ``` total_feerate_perkw = total_fees / total_weight * 1000 > check_feerate([l3, l2], total_feerate_perkw, feerate) tests/test_closing.py:4063: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ nodes = [, ] actual_feerate = 14006.105538595726, expected_feerate = 14000 def check_feerate(nodes, actual_feerate, expected_feerate): # Feerate can't be lower. assert actual_feerate > expected_feerate - 2 if actual_feerate >= expected_feerate + 2: if any([did_short_sig(n) for n in nodes]): return # Use assert as it shows the actual values on failure > assert actual_feerate < expected_feerate + 2 E AssertionError ``` Signed-off-by: Rusty Russell --- hsmd/libhsmd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hsmd/libhsmd.c b/hsmd/libhsmd.c index e4a14e44cf14..dd2329236abb 100644 --- a/hsmd/libhsmd.c +++ b/hsmd/libhsmd.c @@ -1820,6 +1820,13 @@ static u8 *handle_sign_anchorspend(struct hsmd_client *c, const u8 *msg_in) fmt_pubkey(tmpctx, &local_funding_pubkey), fmt_wally_psbt(tmpctx, psbt)); } + if (dev_warn_on_overgrind + && psbt->inputs[0].signatures.num_items == 1 + && psbt->inputs[0].signatures.items[0].value_len < 71) { + hsmd_status_fmt(LOG_BROKEN, NULL, + "overgrind: short signature length %zu", + psbt->inputs[0].signatures.items[0].value_len); + } return towire_hsmd_sign_anchorspend_reply(NULL, psbt); } From 122483c6f60414708ba16a394466dd8e835b5a5e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:55:11 +1030 Subject: [PATCH 13/39] pytest: test the askrene doesn't use local dying channels. We don't want it to think that it can use both pre-splice and post-splice channels! Signed-off-by: Rusty Russell --- tests/test_askrene.py | 46 ++++++++++++++++++++++++++++++++++++++++++- tests/utils.py | 5 +++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/tests/test_askrene.py b/tests/test_askrene.py index bf87287e9b05..613cfa1b990f 100644 --- a/tests/test_askrene.py +++ b/tests/test_askrene.py @@ -3,7 +3,7 @@ from pyln.client import RpcError from pyln.testing.utils import SLOW_MACHINE from utils import ( - only_one, first_scid, GenChannel, generate_gossip_store, + only_one, first_scid, first_scidd, GenChannel, generate_gossip_store, sync_blockheight, wait_for, TEST_NETWORK, TIMEOUT, mine_funding_to_announce ) import os @@ -11,6 +11,7 @@ import subprocess import time import tempfile +import unittest def direction(src, dst): @@ -1915,3 +1916,46 @@ def test_askrene_reserve_clash(node_factory, bitcoind): layers=['layer2'], maxfee_msat=1000, final_cltv=5) + + +@unittest.skipIf(TEST_NETWORK != 'regtest', 'elementsd doesnt yet support PSBT features we need') +@pytest.mark.openchannel('v1') +@pytest.mark.openchannel('v2') +def test_splice_dying_channel(node_factory, bitcoind): + """We should NOT try to use the pre-splice channel here""" + l1, l2, l3 = node_factory.line_graph(3, + wait_for_announce=True, + fundamount=200000, + opts={'experimental-splicing': None}) + + chan_id = l1.get_channel_id(l2) + funds_result = l1.rpc.addpsbtoutput(100000) + pre_splice_scidd = first_scidd(l1, l2) + + # Pay with fee by subjtracting 5000 from channel balance + result = l1.rpc.splice_init(chan_id, -105000, funds_result['psbt']) + result = l1.rpc.splice_update(chan_id, result['psbt']) + assert(result['commitments_secured'] is False) + result = l1.rpc.splice_update(chan_id, result['psbt']) + assert(result['commitments_secured'] is True) + result = l1.rpc.splice_signed(chan_id, result['psbt']) + + mine_funding_to_announce(bitcoind, + [l1, l2, l3], + num_blocks=6, wait_for_mempool=1) + + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] == 'CHANNELD_NORMAL') + post_splice_scidd = first_scidd(l1, l2) + + # You will use the new scid + route = only_one(l1.rpc.getroutes(l1.info['id'], l2.info['id'], '50000sat', ['auto.localchans'], 100000, 6)['routes']) + assert only_one(route['path'])['short_channel_id_dir'] == post_splice_scidd + + # And you will not be able to route 100001 sats: + with pytest.raises(RpcError, match="We could not find a usable set of paths"): + l1.rpc.getroutes(l1.info['id'], l2.info['id'], '100001sat', ['auto.localchans'], 100000, 6) + + # But l3 would think it can use both, since it doesn't eliminate dying channel! + wait_for(lambda: [c['active'] for c in l3.rpc.listchannels()['channels']] == [True] * 6) + routes = l3.rpc.getroutes(l1.info['id'], l2.info['id'], '200001sat', [], 100000, 6)['routes'] + assert set([only_one(r['path'])['short_channel_id_dir'] for r in routes]) == set([pre_splice_scidd, post_splice_scidd]) diff --git a/tests/utils.py b/tests/utils.py index fc3f18b18da1..d879bf3db092 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -458,6 +458,11 @@ def first_scid(n1, n2): return only_one(n1.rpc.listpeerchannels(n2.info['id'])['channels'])['short_channel_id'] +def first_scidd(n1, n2): + c = only_one(n1.rpc.listpeerchannels(n2.info['id'])['channels']) + return c['short_channel_id'] + '/' + str(c['direction']) + + def basic_fee(feerate, anchor_expected): if anchor_expected: # option_anchor_outputs / option_anchors_zero_fee_htlc_tx From 6492d42e56227e6e94dbb7040a59101f14b6f386 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:56:11 +1030 Subject: [PATCH 14/39] gossipd: don't shortcut dying phase for local channels. This means that we won't complain to peers which gossip about our channels, but it does mean that our channel graph (like other nodes on the network) will show two channels, not one, for the duration. For this reason, we need askrene to omit local dying channels. Signed-off-by: Rusty Russell --- gossipd/gossmap_manage.c | 9 --------- plugins/askrene/askrene.c | 25 +++++++++++++++++++++++++ tests/test_closing.py | 4 ++-- tests/test_misc.py | 4 ++-- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 7a371d0f6edb..c0589148ce47 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -1332,7 +1332,6 @@ void gossmap_manage_channel_spent(struct gossmap_manage *gm, struct short_channel_id scid) { struct gossmap_chan *chan; - const struct gossmap_node *me; const u8 *msg; struct chan_dying cd; struct gossmap *gossmap = gossmap_manage_get_gossmap(gm); @@ -1341,14 +1340,6 @@ void gossmap_manage_channel_spent(struct gossmap_manage *gm, if (!chan) return; - me = gossmap_find_node(gossmap, &gm->daemon->id); - /* We delete our own channels immediately, since we have local knowledge */ - if (gossmap_nth_node(gossmap, chan, 0) == me - || gossmap_nth_node(gossmap, chan, 1) == me) { - kill_spent_channel(gm, gossmap, scid); - return; - } - /* Is it already dying? It's lightningd re-telling us */ if (channel_already_dying(gm->dying_channels, scid)) return; diff --git a/plugins/askrene/askrene.c b/plugins/askrene/askrene.c index 23e541d75911..fe8f477fbd85 100644 --- a/plugins/askrene/askrene.c +++ b/plugins/askrene/askrene.c @@ -573,6 +573,7 @@ static struct command_result *do_getroutes(struct command *cmd, struct route **routes; struct flow **flows; struct json_stream *response; + const struct gossmap_node *me; /* update the gossmap */ if (gossmap_refresh(askrene->gossmap)) { @@ -593,6 +594,30 @@ static struct command_result *do_getroutes(struct command *cmd, rq->additional_costs = info->additional_costs; rq->maxparts = info->maxparts; + /* We also eliminate any local channels we *know* are dying. + * Most channels get 12 blocks grace in case it's a splice, + * but if it's us, we know about the splice already. */ + me = gossmap_find_node(rq->gossmap, &askrene->my_id); + if (me) { + for (size_t i = 0; i < me->num_chans; i++) { + struct short_channel_id_dir scidd; + const struct gossmap_chan *c = gossmap_nth_chan(rq->gossmap, + me, i, NULL); + if (!gossmap_chan_is_dying(rq->gossmap, c)) + continue; + + scidd.scid = gossmap_chan_scid(rq->gossmap, c); + /* Disable both directions */ + for (scidd.dir = 0; scidd.dir < 2; scidd.dir++) { + bool enabled = false; + gossmap_local_updatechan(localmods, + &scidd, + &enabled, + NULL, NULL, NULL, NULL, NULL); + } + } + } + /* apply selected layers to the localmods */ apply_layers(askrene, rq, &info->source, info->amount, localmods, info->layers, info->local_layer); diff --git a/tests/test_closing.py b/tests/test_closing.py index f3c7dbee06cc..33abde9c8b6e 100644 --- a/tests/test_closing.py +++ b/tests/test_closing.py @@ -1851,8 +1851,8 @@ def test_onchaind_replay(node_factory, bitcoind): # Wait for nodes to notice the failure, this seach needle is after the # DB commit so we're sure the tx entries in onchaindtxs have been added - l1.daemon.wait_for_log("Deleting channel .* due to the funding outpoint being spent") - l2.daemon.wait_for_log("Deleting channel .* due to the funding outpoint being spent") + l1.daemon.wait_for_log("closing soon due to the funding outpoint being spent") + l2.daemon.wait_for_log("closing soon due to the funding outpoint being spent") # We should at least have the init tx now assert len(l1.db_query("SELECT * FROM channeltxs;")) > 0 diff --git a/tests/test_misc.py b/tests/test_misc.py index e159b1c48bb4..c7a697394d33 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1488,8 +1488,8 @@ def no_more_blocks(req): l1.rpc.close(l2.info['id']) bitcoind.generate_block(1, True) - l1.daemon.wait_for_log(r'Deleting channel') - l2.daemon.wait_for_log(r'Deleting channel') + l1.daemon.wait_for_log(r'closing soon due to the funding outpoint being spent') + l2.daemon.wait_for_log(r'closing soon due to the funding outpoint being spent') @pytest.mark.openchannel('v1') From a654a92f1f65e9adaa38179255d6e384f7150cb4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:57:11 +1030 Subject: [PATCH 15/39] pytest: remove channel upgrade tests. We removed the functionality, but only disabled the tests. Signed-off-by: Rusty Russell --- tests/test_connection.py | 272 --------------------------------------- 1 file changed, 272 deletions(-) diff --git a/tests/test_connection.py b/tests/test_connection.py index 92a2a12f9487..bd887416c24f 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -3729,278 +3729,6 @@ def test_openchannel_init_alternate(node_factory, executor): print("nothing to do") -@unittest.skip("experimental-upgrade-protocol TLV fields conflict with splicing TLV fields") -def test_upgrade_statickey(node_factory, executor): - """l1 doesn't have option_static_remotekey, l2 offers it.""" - l1, l2 = node_factory.get_nodes(2, opts=[{'may_reconnect': True, - 'experimental-upgrade-protocol': None, - # This forces us to allow sending non-static-remotekey! - 'dev-any-channel-type': None}, - {'may_reconnect': True, - # This forces us to accept non-static-remotekey! - 'dev-any-channel-type': None, - 'experimental-upgrade-protocol': None}]) - - l1.fundwallet(2000000) - l1.rpc.connect(l2.info['id'], 'localhost', port=l2.port) - l1.rpc.fundchannel(l2.info['id'], 'all', channel_type=[]) - - # Now reconnect. - l1.rpc.disconnect(l2.info['id'], force=True) - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - - l1.daemon.wait_for_logs([r"They sent current_channel_type \[\]", - r"They offered upgrade to \[12\]"]) - l2.daemon.wait_for_log(r"They sent desired_channel_type \[12\]") - - l1.daemon.wait_for_log('option_static_remotekey enabled at 1/1') - l2.daemon.wait_for_log('option_static_remotekey enabled at 1/1') - - # Make sure it's committed to db! - wait_for(lambda: l1.db_query('SELECT local_static_remotekey_start, remote_static_remotekey_start FROM channels;') == [{'local_static_remotekey_start': 1, 'remote_static_remotekey_start': 1}]) - - # They will consider themselves upgraded. - l1.rpc.disconnect(l2.info['id'], force=True) - # They won't offer upgrade! - assert not l1.daemon.is_in_log("They offered upgrade", - start=l1.daemon.logsearch_start) - l1.daemon.wait_for_log(r"They sent current_channel_type \[12\]") - l2.daemon.wait_for_log(r"They sent desired_channel_type \[12\]") - - -@unittest.skip("experimental-upgrade-protocol TLV fields conflict with splicing TLV fields") -def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): - """We test penalty before/after, and unilateral before/after""" - l1, l2 = node_factory.get_nodes(2, opts=[{'may_reconnect': True, - 'experimental-upgrade-protocol': None, - # This forces us to allow sending non-static-remotekey! - 'dev-any-channel-type': None, - # We try to cheat! - 'broken_log': r"onchaind-chan#[0-9]*: Could not find resolution for output .*: did \*we\* cheat\?"}, - {'may_reconnect': True, - # This forces us to allow non-static-remotekey! - 'dev-any-channel-type': None, - 'experimental-upgrade-protocol': None}]) - - l1.fundwallet(FUNDAMOUNT + 1000) - l1.rpc.connect(l2.info['id'], 'localhost', port=l2.port) - l1.rpc.fundchannel(l2.info['id'], 'all', channel_type=[]) - bitcoind.generate_block(1, wait_for_mempool=1) - wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] == 'CHANNELD_NORMAL') - - # TEST 1: Cheat from pre-upgrade. - tx = l1.rpc.dev_sign_last_tx(l2.info['id'])['tx'] - - l1.rpc.disconnect(l2.info['id'], force=True) - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - l1.daemon.wait_for_log('option_static_remotekey enabled at 1/1') - - # Make sure another commitment happens, sending failed payment. - routestep = { - 'amount_msat': 1, - 'id': l2.info['id'], - 'delay': 5, - 'channel': first_scid(l1, l2) - } - l1.rpc.sendpay([routestep], '00' * 32, payment_secret='00' * 32) - with pytest.raises(RpcError, match=r'WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS'): - l1.rpc.waitsendpay('00' * 32) - - # Make sure l2 gets REVOKE_AND_ACK from previous. - l2.daemon.wait_for_log('peer_in WIRE_UPDATE_ADD_HTLC') - l2.daemon.wait_for_log('peer_out WIRE_REVOKE_AND_ACK') - l2.daemon.wait_for_log('peer_in WIRE_REVOKE_AND_ACK') - - # Pre-statickey penalty works. - bitcoind.rpc.sendrawtransaction(tx) - bitcoind.generate_block(1) - - _, txid, blocks = l2.wait_for_onchaind_tx('OUR_PENALTY_TX', - 'THEIR_REVOKED_UNILATERAL/DELAYED_CHEAT_OUTPUT_TO_THEM') - assert blocks == 0 - - bitcoind.generate_block(100, wait_for_mempool=txid) - # This works even if they disconnect and listpeerchannels() is empty: - wait_for(lambda: l1.rpc.listpeerchannels()['channels'] == []) - wait_for(lambda: l2.rpc.listpeerchannels()['channels'] == []) - - # TEST 2: Cheat from post-upgrade. - l1.fundwallet(FUNDAMOUNT + 1000) - l1.rpc.connect(l2.info['id'], 'localhost', port=l2.port) - l1.rpc.fundchannel(l2.info['id'], 'all', channel_type=[]) - - l1.rpc.disconnect(l2.info['id'], force=True) - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - - l1.daemon.wait_for_log('option_static_remotekey enabled at 1/1') - l2.daemon.wait_for_log('option_static_remotekey enabled at 1/1') - bitcoind.generate_block(1, wait_for_mempool=1) - wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] == 'CHANNELD_NORMAL') - - l1.pay(l2, 1000000) - - # We will try to cheat later. - tx = l1.rpc.dev_sign_last_tx(l2.info['id'])['tx'] - - l1.pay(l2, 1000000) - - # Pre-statickey penalty works. - bitcoind.rpc.sendrawtransaction(tx) - bitcoind.generate_block(1) - - _, txid, blocks = l2.wait_for_onchaind_tx('OUR_PENALTY_TX', - 'THEIR_REVOKED_UNILATERAL/DELAYED_CHEAT_OUTPUT_TO_THEM') - assert blocks == 0 - - bitcoind.generate_block(100, wait_for_mempool=txid) - # This works even if they disconnect and listpeers() is empty: - wait_for(lambda: len(l1.rpc.listpeerchannels()['channels']) == 0) - wait_for(lambda: len(l2.rpc.listpeerchannels()['channels']) == 0) - - # TEST 3: Unilateral close from pre-upgrade - l1.rpc.connect(l2.info['id'], 'localhost', port=l2.port) - l1.fundwallet(FUNDAMOUNT + 1000) - l1.rpc.fundchannel(l2.info['id'], 'all', channel_type=[]) - bitcoind.generate_block(1, wait_for_mempool=1) - wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] == 'CHANNELD_NORMAL') - - # Give them both something for onchain close. - l1.pay(l2, 1000000) - - # Make sure it's completely quiescent. - l1.daemon.wait_for_log("chan#3: Removing out HTLC 0 state RCVD_REMOVE_ACK_REVOCATION FULFILLED") - - l1.rpc.disconnect(l2.info['id'], force=True) - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - l1.daemon.wait_for_log('option_static_remotekey enabled at 3/3') - - # But this is the *pre*-update commit tx! - l2.stop() - l1.rpc.close(l2.info['id'], unilateraltimeout=1) - bitcoind.generate_block(1, wait_for_mempool=1) - l2.start() - - # They should both handle it fine. - _, txid, blocks = l1.wait_for_onchaind_tx('OUR_DELAYED_RETURN_TO_WALLET', - 'OUR_UNILATERAL/DELAYED_OUTPUT_TO_US') - assert blocks == 4 - l2.daemon.wait_for_logs(['Ignoring output .*: THEIR_UNILATERAL/OUTPUT_TO_US', - 'Ignoring output .*: THEIR_UNILATERAL/DELAYED_OUTPUT_TO_THEM']) - bitcoind.generate_block(4) - bitcoind.generate_block(100, wait_for_mempool=txid) - - # This works even if they disconnect and listpeerchannels() is empty: - wait_for(lambda: len(l1.rpc.listpeerchannels()['channels']) == 0) - wait_for(lambda: len(l2.rpc.listpeerchannels()['channels']) == 0) - - # TEST 4: Unilateral close from post-upgrade - l1.rpc.connect(l2.info['id'], 'localhost', port=l2.port) - l1.rpc.fundchannel(l2.info['id'], 'all', channel_type=[]) - - l1.rpc.disconnect(l2.info['id'], force=True) - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - l1.daemon.wait_for_log('option_static_remotekey enabled at 1/1') - - bitcoind.generate_block(1, wait_for_mempool=1) - wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] == 'CHANNELD_NORMAL') - - # Move to static_remotekey. - l1.pay(l2, 1000000) - - l2.stop() - l1.rpc.close(l2.info['id'], unilateraltimeout=1) - bitcoind.generate_block(1, wait_for_mempool=1) - l2.start() - - # They should both handle it fine. - _, txid, blocks = l1.wait_for_onchaind_tx('OUR_DELAYED_RETURN_TO_WALLET', - 'OUR_UNILATERAL/DELAYED_OUTPUT_TO_US') - assert blocks == 4 - l2.daemon.wait_for_logs(['Ignoring output .*: THEIR_UNILATERAL/OUTPUT_TO_US', - 'Ignoring output .*: THEIR_UNILATERAL/DELAYED_OUTPUT_TO_THEM']) - - bitcoind.generate_block(4) - bitcoind.generate_block(100, wait_for_mempool=txid) - - # This works even if they disconnect and listpeerchannels() is empty: - wait_for(lambda: len(l2.rpc.listpeerchannels()['channels']) == 0) - - -@unittest.skip("experimental-upgrade-protocol TLV fields conflict with splicing TLV fields") -def test_upgrade_statickey_fail(node_factory, executor, bitcoind): - """We reconnect at all points during retransmit, and we won't upgrade.""" - l1_disconnects = ['-WIRE_COMMITMENT_SIGNED', - '-WIRE_REVOKE_AND_ACK'] - l2_disconnects = ['-WIRE_REVOKE_AND_ACK', - '-WIRE_COMMITMENT_SIGNED'] - - l1, l2 = node_factory.get_nodes(2, opts=[{'may_reconnect': True, - 'dev-no-reconnect': None, - 'disconnect': l1_disconnects, - # This allows us to send non-static-remotekey! - 'dev-any-channel-type': None, - 'experimental-upgrade-protocol': None, - # Don't have feerate changes! - 'feerates': (7500, 7500, 7500, 7500)}, - {'may_reconnect': True, - 'dev-no-reconnect': None, - 'experimental-upgrade-protocol': None, - # This forces us to accept non-static-remotekey! - 'dev-any-channel-type': None, - 'disconnect': l2_disconnects, - 'plugin': os.path.join(os.getcwd(), 'tests/plugins/hold_htlcs.py'), - 'hold-time': 10000, - 'hold-result': 'fail'}]) - l1.fundwallet(FUNDAMOUNT + 1000) - l1.rpc.connect(l2.info['id'], 'localhost', port=l2.port) - l1.rpc.fundchannel(l2.info['id'], 'all', channel_type=[]) - bitcoind.generate_block(1, wait_for_mempool=1) - wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['state'] == 'CHANNELD_NORMAL') - - # This HTLC will fail - l1.rpc.sendpay([{'amount_msat': 1000, 'id': l2.info['id'], 'delay': 5, 'channel': first_scid(l1, l2)}], '00' * 32, payment_secret='00' * 32) - - # Each one should cause one disconnection, no upgrade. - for d in l1_disconnects + l2_disconnects: - l1.daemon.wait_for_log('Peer connection lost') - l2.daemon.wait_for_log('Peer connection lost') - assert not l1.daemon.is_in_log('option_static_remotekey enabled') - assert not l2.daemon.is_in_log('option_static_remotekey enabled') - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - line1 = l1.daemon.wait_for_log('No upgrade') - line2 = l2.daemon.wait_for_log('No upgrade') - - # On the last reconnect, it retransmitted revoke_and_ack. - assert re.search('No upgrade: we retransmitted', line1) - assert re.search('No upgrade: pending changes', line2) - - # Make sure we already skip the first of these. - l1.daemon.wait_for_log('billboard perm: Reconnected, and reestablished.') - assert 'option_static_remotekey' not in only_one(l1.rpc.listpeerchannels()['channels'])['features'] - assert 'option_static_remotekey' not in only_one(l2.rpc.listpeerchannels()['channels'])['features'] - - sleeptime = 1 - while True: - # Now when we reconnect, despite having an HTLC, we're quiescent. - l1.rpc.disconnect(l2.info['id'], force=True) - l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - - oldstart = l1.daemon.logsearch_start - l1.daemon.wait_for_log('billboard perm: Reconnected, and reestablished.') - if not l1.daemon.is_in_log('No upgrade:', start=oldstart): - break - - # Give it some processing time before reconnect... - time.sleep(sleeptime) - sleeptime += 1 - - l1.daemon.logsearch_start = oldstart - assert l1.daemon.wait_for_log('option_static_remotekey enabled at 2/2') - assert l2.daemon.wait_for_log('option_static_remotekey enabled at 2/2') - assert 'option_static_remotekey' in only_one(l1.rpc.listpeerchannels()['channels'])['features'] - assert 'option_static_remotekey' in only_one(l2.rpc.listpeerchannels()['channels'])['features'] - - def test_quiescence(node_factory, executor): l1, l2 = node_factory.line_graph(2) From aa6984c41e3aabe8c10ecb01e7eb877b713cf2b4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:58:11 +1030 Subject: [PATCH 16/39] pytest: move benchmark in test_connection.py to tests/benchmarks.py Signed-off-by: Rusty Russell --- tests/benchmark.py | 36 +++++++++++++++++++++++++++++++++++- tests/test_connection.py | 35 ----------------------------------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/tests/benchmark.py b/tests/benchmark.py index ec0062e82f7a..97d26f6071c1 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -1,7 +1,8 @@ from concurrent import futures from fixtures import * # noqa: F401,F403 +from pyln.client import RpcError from tqdm import tqdm -from utils import (wait_for, TIMEOUT) +from utils import (wait_for, TIMEOUT, only_one) import os @@ -228,3 +229,36 @@ def test_spam_listcommands(node_factory, bitcoind, benchmark): # This calls "listinvoice" 100,000 times (which doesn't need a transaction commit) benchmark(l1.rpc.spamlistcommand, 100_000) + + +def test_payment_speed(node_factory, benchmark): + """This makes sure we don't screw up nagle handling. + + Normally: + Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations + test_payment_speed 16.3587 40.4925 27.4874 5.5512 27.7885 8.9291 9;0 36.3803 33 1 + + Without TCP_NODELAY: + Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations + test_payment_speed 153.7132 163.2027 158.6747 3.4059 158.5219 6.3745 3;0 6.3022 9 1 + """ + l1 = get_bench_node(node_factory, extra_options={'commit-time': 0}) + l2 = get_bench_node(node_factory, extra_options={'commit-time': 0}) + + node_factory.join_nodes([l1, l2]) + + scid = only_one(l1.rpc.listpeerchannels()['channels'])['short_channel_id'] + routestep = { + 'amount_msat': 100, + 'id': l2.info['id'], + 'delay': 5, + 'channel': scid + } + + def onepay(l1, routestep): + phash = random.randbytes(32).hex() + l1.rpc.sendpay([routestep], phash) + with pytest.raises(RpcError, match="WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS"): + l1.rpc.waitsendpay(phash) + + benchmark(onepay, l1, routestep) diff --git a/tests/test_connection.py b/tests/test_connection.py index bd887416c24f..37530022b966 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -4579,41 +4579,6 @@ def test_no_delay(node_factory): assert end < start + 100 * 0.5 -@unittest.skipIf(os.getenv('TEST_BENCH', '0') == '0', "For profiling") -def test_bench(node_factory): - """Is our Nagle disabling for critical messages working?""" - l1, l2 = node_factory.get_nodes(2, opts={'start': False, - 'commit-time': 0}) - - # memleak detection plays havoc with profiles. - del l1.daemon.env["LIGHTNINGD_DEV_MEMLEAK"] - del l2.daemon.env["LIGHTNINGD_DEV_MEMLEAK"] - - l1.start() - l2.start() - node_factory.join_nodes([l1, l2]) - - scid = only_one(l1.rpc.listpeerchannels()['channels'])['short_channel_id'] - routestep = { - 'amount_msat': 100, - 'id': l2.info['id'], - 'delay': 5, - 'channel': scid - } - - start = time.time() - # If we were stupid enough to leave Nagle enabled, this would add 200ms - # seconds delays each way! - for _ in range(1000): - phash = random.randbytes(32).hex() - l1.rpc.sendpay([routestep], phash) - with pytest.raises(RpcError, match="WIRE_INCORRECT_OR_UNKNOWN_PAYMENT_DETAILS"): - l1.rpc.waitsendpay(phash) - end = time.time() - duration = end - start - assert duration == 0 - - def test_listpeerchannels_by_scid(node_factory): l1, l2, l3 = node_factory.line_graph(3, announce_channels=False) From 50fcade03892d7bf983206b35924077771a87f6b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 09:59:11 +1030 Subject: [PATCH 17/39] pytest: give test_xpay_maxfee longer, as it can time out under CI. ``` > ret = l1.rpc.xpay(invstring=inv, maxfee=maxfee) tests/test_xpay.py:585: ... > raise RpcError(method, payload, resp['error']) E pyln.client.lightning.RpcError: RPC call failed: method: xpay, payload: {'invstring': 'lnbcrt1m1p5n5wdzsp5qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqpp52mu6842a26hs40xxgzscflm4smk5yjctqgf7hhrwhx7dh2492vzsdp22pshj6twvusxummyv5sr2wfqwa5hg6pqd4shsen9v5cqpj9qyyqqqj9kvjvrzy0ygdsfsskjtss0xrkrt7lq8jyrgzvtvdw5y9xqy0v25ddxd787c9ym36udm876lyhevznj8j9qzk0r7x03xm0akvq6ltwcq7vm7tk', 'maxfee': 57966}, error: {'code': 209, 'message': "Failed after 4 attempts. We got temporary_channel_failure for 59x81x28707/1, assuming it can't carry 49498813msat. Then routing for remaining 49498813msat failed: linear_routes: timed out after deadline"} ... lightningd-1 2025-12-11T03:25:41.972Z DEBUG plugin-cln-askrene: notify msg debug: get_routes failed after 15572 ms ``` Signed-off-by: Rusty Russell --- tests/test_xpay.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_xpay.py b/tests/test_xpay.py index 7423a951c51a..08219dedca61 100644 --- a/tests/test_xpay.py +++ b/tests/test_xpay.py @@ -557,7 +557,9 @@ def test_xpay_maxfee(node_factory, bitcoind, chainparams): opts=[{'gossip_store_file': outfile.name, 'subdaemon': 'channeld:../tests/plugins/channeld_fakenet', 'allow_warning': True, - 'dev-throttle-gossip': None}, + 'dev-throttle-gossip': None, + # This can be more than 10 seconds under CI! + 'askrene-timeout': 60}, {'allow_bad_gossip': True}]) # l1 needs to know l2's shaseed for the channel so it can make revocations From 7ae430cf2e1b21189edaca0f27fef9e7d4dd7cd0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:00:11 +1030 Subject: [PATCH 18/39] pytest: fix timing flake in test_invoice_expiry. Under Postgres, this actually takes more than 2 seconds, so w2 really has timed out already: ``` time.sleep(2) # total 2 assert not w1.done() > assert not w2.done() E assert not True E + where True = done() E + where done = .done tests/test_invoices.py:420: AssertionError ``` So space the timeouts out more, and sleep one second too short; the .result() (which sleeps) will catch up if we were extremely slow. Signed-off-by: Rusty Russell --- tests/test_invoices.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_invoices.py b/tests/test_invoices.py index 6f4bc3ca612f..a3db715adf02 100644 --- a/tests/test_invoices.py +++ b/tests/test_invoices.py @@ -407,9 +407,10 @@ def test_invoice_expiry(node_factory, executor): # Test expiration waiting. # The second invoice created expires first. - l2.rpc.invoice('any', 'inv1', 'description', 10) - l2.rpc.invoice('any', 'inv2', 'description', 4) - l2.rpc.invoice('any', 'inv3', 'description', 16) + # Times should be long enough even for our terrible CI runners! + l2.rpc.invoice('any', 'inv1', 'description', 16) + l2.rpc.invoice('any', 'inv2', 'description', 10) + l2.rpc.invoice('any', 'inv3', 'description', 22) # Check waitinvoice correctly waits w1 = executor.submit(l2.rpc.waitinvoice, 'inv1') @@ -419,19 +420,18 @@ def test_invoice_expiry(node_factory, executor): assert not w1.done() assert not w2.done() assert not w3.done() - time.sleep(4) # total 6 + time.sleep(7) # total 9 assert not w1.done() - with pytest.raises(RpcError): + with pytest.raises(RpcError): # total 10 w2.result() assert not w3.done() - time.sleep(6) # total 12 - with pytest.raises(RpcError): + time.sleep(5) # total 15 + with pytest.raises(RpcError): # total 16 w1.result() assert not w3.done() - time.sleep(8) # total 20 with pytest.raises(RpcError): w3.result() From 80b0ca5c920a9387878b45aad7f38a393df27bb5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:01:11 +1030 Subject: [PATCH 19/39] ci: don't run shard 2/12 ubsan without parallel. 3974806e5af added this: CI: Try not running group 2/10 UBSAN in parallel. It's being killed with signal 143, which means docker isn't happy; too much memory consumption? But since we're now at 12 groups, that probably doesn't apply (it might not have even before, in the two years since that commit since so may things have been added). And it caused this shard to take over 2 hours and timed out. Signed-off-by: Rusty Russell --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2c7a9b9c36c1..f0b531c8eb1a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -553,7 +553,7 @@ jobs: PYTEST_OPTS: --test-group=1 --test-group-count=12 - NAME: ASan/UBSan (02/12) GROUP: 2 - PYTEST_OPTS: --test-group=2 --test-group-count=12 -n 1 + PYTEST_OPTS: --test-group=2 --test-group-count=12 - NAME: ASan/UBSan (03/12) GROUP: 3 PYTEST_OPTS: --test-group=3 --test-group-count=12 From a66c9d1c6006e2f5a04a3f5d215ce69c77ae0ac1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:02:11 +1030 Subject: [PATCH 20/39] pytest: disable remaining flaky and skip markers to see what else fails. Signed-off-by: Rusty Russell --- tests/test_closing.py | 2 +- tests/test_coinmoves.py | 1 - tests/test_invoices.py | 1 - tests/test_plugin.py | 1 - tests/test_reckless.py | 1 - 5 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_closing.py b/tests/test_closing.py index 33abde9c8b6e..47cc28d8077c 100644 --- a/tests/test_closing.py +++ b/tests/test_closing.py @@ -3437,7 +3437,7 @@ def test_closing_higherfee(node_factory, bitcoind, executor, anchors): wait_for(lambda: l2.rpc.listpeerchannels()['channels'][0]['state'] == 'CLOSINGD_COMPLETE') -@unittest.skipIf(True, "Test is extremely flaky") +@pytest.mark.flaky(reruns=3) def test_htlc_rexmit_while_closing(node_factory, executor): """Retranmitting an HTLC revocation while shutting down should work""" # FIXME: This should be in lnprototest! UNRELIABLE. diff --git a/tests/test_coinmoves.py b/tests/test_coinmoves.py index dc5abb044ce5..291f857ba6b3 100644 --- a/tests/test_coinmoves.py +++ b/tests/test_coinmoves.py @@ -681,7 +681,6 @@ def test_coinmoves_unilateral_htlc_before_included(node_factory, bitcoind): check_balances(l1, l2, fundchannel['channel_id'], 0) -@pytest.mark.flaky(reruns=5) @pytest.mark.openchannel('v1') @pytest.mark.openchannel('v2') @unittest.skipIf(TEST_NETWORK != 'regtest', "Amounts are for regtest.") diff --git a/tests/test_invoices.py b/tests/test_invoices.py index a3db715adf02..37e6695be7ab 100644 --- a/tests/test_invoices.py +++ b/tests/test_invoices.py @@ -927,7 +927,6 @@ def test_invoices_wait_db_migration(node_factory, bitcoind): @unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "This test is based on a sqlite3 snapshot") @unittest.skipIf(TEST_NETWORK != 'regtest', "The DB migration is network specific due to the chain var.") -@pytest.mark.flaky(reruns=5) def test_invoice_botched_migration(node_factory, chainparams): """Test for grubles' case, where they ran successfully with the wrong var: they have *both* last_invoice_created_index *and *last_invoices_created_index* (this can happen if invoice id 1 was deleted, so they didn't die on invoice creation): Error executing statement: wallet/db.c:1684: UPDATE vars SET name = 'last_invoices_created_index' WHERE name = 'last_invoice_created_index': UNIQUE constraint failed: vars.name diff --git a/tests/test_plugin.py b/tests/test_plugin.py index d45b07b3bd78..d63d1867ace4 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -4311,7 +4311,6 @@ def test_plugin_nostart(node_factory): assert [p['name'] for p in l1.rpc.plugin_list()['plugins'] if 'badinterp' in p['name']] == [] -@unittest.skip("A bit flaky, but when breaks, it is costing us 2h of CI time") def test_plugin_startdir_lol(node_factory): """Though we fail to start many of them, we don't crash!""" l1 = node_factory.get_node(broken_log='.*') diff --git a/tests/test_reckless.py b/tests/test_reckless.py index 275960d576fb..311ed1ae4c1a 100644 --- a/tests/test_reckless.py +++ b/tests/test_reckless.py @@ -351,7 +351,6 @@ def test_tag_install(node_factory): header = line -@pytest.mark.flaky(reruns=5) @pytest.mark.slow_test def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) From 20e65811ade8f09e6f1ae59c509ad7a50943f54f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:03:11 +1030 Subject: [PATCH 21/39] pytest: don't run test_hook_in_use under VALGRIND on CI. It's not reliable: ``` # We should have deferred hook update at least once! > l2.daemon.wait_for_log("UNUSUAL plugin-dep_b.py: Deferring registration of hook htlc_accepted until it's not in use.") tests/test_plugin.py:2646: ... if self.is_in_log(r): print("({} was previously in logs!)".format(r)) > raise TimeoutError('Unable to find "{}" in logs.'.format(exs)) E TimeoutError: Unable to find "[re.compile("UNUSUAL plugin-dep_b.py: Deferring registration of hook htlc_accepted until it's not in use.")]" in logs. ``` Signed-off-by: Rusty Russell --- tests/test_plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index d63d1867ace4..713b3fbb399a 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -2614,6 +2614,7 @@ def test_htlc_accepted_hook_failonion(node_factory): l1.rpc.pay(inv) +@pytest.mark.slow_test # VALGRIND running generally too slow to trigger race we need. def test_hook_in_use(node_factory): """If a hook is in use when we add a plugin to it, we have to defer""" dep_a = os.path.join(os.path.dirname(__file__), 'plugins/dep_a.py') From 38f67c996d3c2e846dd8209757d85a2ca9a79208 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:04:11 +1030 Subject: [PATCH 22/39] lightningd: fix occasional memleak when we detach subd from channel. Do this by setting notleak when we do the detach! ``` **BROKEN** lightningd: MEMLEAK: 0x60f0000bbb38 **BROKEN** lightningd: label=ccan/ccan/io/io.c:92:struct io_conn **BROKEN** lightningd: alloc: **BROKEN** lightningd: /home/runner/work/lightning/lightning/ccan/ccan/tal/tal.c:488 (tal_alloc_) **BROKEN** lightningd: /home/runner/work/lightning/lightning/ccan/ccan/io/io.c:92 (io_new_conn_) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/subd.c:781 (new_subd) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/subd.c:835 (new_channel_subd_) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/channel_control.c:1715 (peer_start_channeld) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/peer_control.c:1390 (connect_activate_subd) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/peer_control.c:1516 (peer_connected_hook_final) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/plugin_hook.c:243 (hook_done) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/plugin_hook.c:343 (plugin_hook_call_next) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/plugin_hook.c:299 (plugin_hook_callback) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/plugin.c:701 (plugin_response_handle) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/plugin.c:790 (plugin_read_json) **BROKEN** lightningd: /home/runner/work/lightning/lightning/ccan/ccan/io/io.c:60 (next_plan) **BROKEN** lightningd: /home/runner/work/lightning/lightning/ccan/ccan/io/io.c:422 (do_plan) **BROKEN** lightningd: /home/runner/work/lightning/lightning/ccan/ccan/io/io.c:439 (io_ready) **BROKEN** lightningd: /home/runner/work/lightning/lightning/ccan/ccan/io/poll.c:470 (io_loop) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/io_loop_with_timers.c:22 (io_loop_with_timers) **BROKEN** lightningd: /home/runner/work/lightning/lightning/lightningd/lightningd.c:1492 (main) **BROKEN** lightningd: ../sysdeps/nptl/libc_start_call_main.h:58 (__libc_start_call_main) **BROKEN** lightningd: ../csu/libc-start.c:392 (__libc_start_main_impl) **BROKEN** lightningd: parents: **BROKEN** lightningd: lightningd/lightningd.c:108:struct lightningd ``` Signed-off-by: Rusty Russell --- lightningd/subd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lightningd/subd.c b/lightningd/subd.c index 744d615296c5..05e08c08205a 100644 --- a/lightningd/subd.c +++ b/lightningd/subd.c @@ -438,6 +438,8 @@ static bool handle_peer_error(struct subd *sd, const u8 *msg, int fds[1]) /* Don't free sd; we may be about to free channel. */ sd->channel = NULL; + /* While it's cleaning up, this is not a leak! */ + notleak(sd); sd->errcb(channel, peer_fd, desc, err_for_them, disconnect, warning); return true; } @@ -641,6 +643,8 @@ static void destroy_subd(struct subd *sd) /* Clear any transient messages in billboard */ sd->billboardcb(channel, false, NULL); + /* While it's cleaning up, this is not a leak! */ + notleak(sd); sd->channel = NULL; /* We can be freed both inside msg handling, or spontaneously. */ @@ -928,11 +932,6 @@ void subd_release_channel(struct subd *owner, const void *channel) assert(owner->channel == channel); owner->channel = NULL; tal_free(owner); - } else { - /* Caller has reassigned channel->owner, so there's no pointer - * to this subd owner while it's freeing itself. If we - * ask memleak right now, it will complain! */ - notleak(owner); } } From 065e78e39932105df634e4485394843623f68203 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:05:11 +1030 Subject: [PATCH 23/39] pytest: fix flake if rune tests are slow. If one second has passed during testing, checkrune might pass: ``` # default (sec) rune_per_default = l1.rpc.createrune(restrictions=[["per=1"]])['rune'] assert rune_per_default == 'NrM7go6C4qzfRQDkUSv1DtRroJWSKqdjIOuvGS4TLFE9NCZwZXI9MQ==' > do_test_rune_per_restriction(l1, rune_per_default, 1) tests/test_runes.py:269: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ l1 = rune_to_test = 'NrM7go6C4qzfRQDkUSv1DtRroJWSKqdjIOuvGS4TLFE9NCZwZXI9MQ==' per_sec = 1 def do_test_rune_per_restriction(l1, rune_to_test, per_sec): ... # cannot use same rune till 'per_sec' seconds > with pytest.raises(RpcError, match='Not permitted:') as exc_info: E Failed: DID NOT RAISE tests/test_runes.py:217: Failed ``` Signed-off-by: Rusty Russell --- tests/test_runes.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_runes.py b/tests/test_runes.py index 4c080a8bae39..ad21719c7acd 100644 --- a/tests/test_runes.py +++ b/tests/test_runes.py @@ -244,9 +244,9 @@ def test_createrune_per_restriction(node_factory): l1 = node_factory.get_node() # 1 sec = 1,000,000,000 nanoseconds (nsec) - rune_per_nano_sec = l1.rpc.createrune(restrictions=[["per=1000000000nsec"]])['rune'] - assert rune_per_nano_sec == 'Bl0V_vkVkGr4h356JbCMCcoDyyKE8djkoQ2156iPB509MCZwZXI9MTAwMDAwMDAwMG5zZWM=' - do_test_rune_per_restriction(l1, rune_per_nano_sec, 1) + rune_per_nano_sec = l1.rpc.createrune(restrictions=[["per=2000000000nsec"]])['rune'] + assert rune_per_nano_sec == 'FU709V1zX-JJR2hlpBfN2hpPEqahtzi6q65fZxnRRhM9MCZwZXI9MjAwMDAwMDAwMG5zZWM=' + do_test_rune_per_restriction(l1, rune_per_nano_sec, 2) # 1 sec = 1,000,000 microseconds (usec) rune_per_micro_sec = l1.rpc.createrune(restrictions=[["per=2000000usec"]])['rune'] @@ -254,9 +254,9 @@ def test_createrune_per_restriction(node_factory): do_test_rune_per_restriction(l1, rune_per_micro_sec, 2) # 1 sec = 1,000 milliseconds (msec) - rune_per_milli_sec = l1.rpc.createrune(restrictions=[["per=1000msec"]])['rune'] - assert rune_per_milli_sec == 'EzVpQwjYe2aoNQiRa4_s7FJtomD3kWzx7lusMpzA59w9MiZwZXI9MTAwMG1zZWM=' - do_test_rune_per_restriction(l1, rune_per_milli_sec, 1) + rune_per_milli_sec = l1.rpc.createrune(restrictions=[["per=2000msec"]])['rune'] + assert rune_per_milli_sec == 'eoEyi0Na_GeXBpmQ_cXQHrvmAuGWwq4bJrYo0jKk6V09MiZwZXI9MjAwMG1zZWM=' + do_test_rune_per_restriction(l1, rune_per_milli_sec, 2) # 1 sec rune_per_sec = l1.rpc.createrune(restrictions=[["per=2sec"]])['rune'] @@ -264,9 +264,9 @@ def test_createrune_per_restriction(node_factory): do_test_rune_per_restriction(l1, rune_per_sec, 2) # default (sec) - rune_per_default = l1.rpc.createrune(restrictions=[["per=1"]])['rune'] - assert rune_per_default == 'NrM7go6C4qzfRQDkUSv1DtRroJWSKqdjIOuvGS4TLFE9NCZwZXI9MQ==' - do_test_rune_per_restriction(l1, rune_per_default, 1) + rune_per_default = l1.rpc.createrune(restrictions=[["per=2"]])['rune'] + assert rune_per_default == 'pd0Xr2U3uv-mJQfsp801doqTN5zpRRuc2Clp5Yb8zmU9NCZwZXI9Mg==' + do_test_rune_per_restriction(l1, rune_per_default, 2) # 1 minute rune_per_min = l1.rpc.createrune(restrictions=[["per=1min"]])['rune'] From 607f828227b8212102637edcf8cac9e70173c1da Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 10:06:11 +1030 Subject: [PATCH 24/39] pytest: get more\ information when test_funding_v2_cancel_race fails. ``` 2026-01-05T00:11:22.0447771Z # Only up to one should succeed. 2026-01-05T00:11:22.0448201Z success = False 2026-01-05T00:11:22.0448571Z for c in completes: 2026-01-05T00:11:22.0448957Z try: 2026-01-05T00:11:22.0449322Z c.result(TIMEOUT) 2026-01-05T00:11:22.0449934Z num_complete += 1 2026-01-05T00:11:22.0450378Z > assert not success 2026-01-05T00:11:22.0451005Z E assert not True 2026-01-05T00:11:22.0451201Z 2026-01-05T00:11:22.0451331Z tests/test_connection.py:1596: AssertionError ``` We don't know *which* ones succeeded. Fix that. Signed-off-by: Rusty Russell --- tests/test_connection.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/test_connection.py b/tests/test_connection.py index 37530022b966..f0623c78e134 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -1580,27 +1580,30 @@ def test_funding_v2_cancel_race(node_factory, bitcoind, executor): # Switch order around. for i in range(4): if (i + count) % 2 == 0: - completes.append(executor.submit(l1.rpc.openchannel_update, - start['channel_id'], - start['psbt'])) + completes.append(("openchannel_update", + executor.submit(l1.rpc.openchannel_update, + start['channel_id'], + start['psbt']))) else: - cancels.append(executor.submit(l1.rpc.openchannel_abort, - start['channel_id'])) + cancels.append(("openchannel_abort", + executor.submit(l1.rpc.openchannel_abort, + start['channel_id']))) - # Only up to one should succeed. - success = False - for c in completes: + for i, c in enumerate(completes): try: - c.result(TIMEOUT) - num_complete += 1 - assert not success - success = True + c[1].result(TIMEOUT) + completes[i] = (completes[i][0], True) except RpcError: - pass + completes[i] = (completes[i][0], False) + + # Only up to one should succeed. + num_successes = sum(c[1] is True for c in completes) + assert num_successes <= 1, f"Multiple successes in {completes}, cancels = {cancels}" + num_complete += num_successes for c in cancels: try: - c.result(TIMEOUT) + c[1].result(TIMEOUT) num_cancel += 1 except RpcError: pass From fbb84c69b463f5da80ba8d898b7582ea85f47531 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:29 +1030 Subject: [PATCH 25/39] pytest: make sure node order is stable before querying in test_gossmap_lost_node The channel vanishes from listchannels when it's dying, *but* only when it gets deleted do we consider moving the actual node_announcement. We have to wait until gossipd has seen the 12 blocks, and move it if necessary. ``` E Full diff: E { E 'nodes': [ E - { E - 'addresses': [], E - 'alias': 'SILENTARTIST-e986cd2-modded', E - 'color': '022d22', E - 'features': '808898880a8a59a1', E - 'last_timestamp': 1767572731, E - 'nodeid': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', E - }, E { E 'addresses': [], E 'alias': 'HOPPINGFIRE-e986cd2-modded', E 'color': '035d2b', E 'features': '808898880a8a59a1', E 'last_timestamp': 1767572731, E 'nodeid': '035d2b1192dfba134e10e540875d366ebc8bc353d5aa766b80c090b39c3a5d885d', E }, E { E 'addresses': [], E 'alias': 'JUNIORFELONY-e986cd2-modded', E 'color': '0382ce', E 'features': '808898880a8a59a1', E 'last_timestamp': 1767572731, E 'nodeid': '0382ce59ebf18be7d84677c2e35f23294b9992ceca95491fcf8a56c6cb2d9de199', E }, E { E 'addresses': [], E + 'alias': 'SILENTARTIST-e986cd2-modded', E + 'color': '022d22', E + 'features': '808898880a8a59a1', E + 'last_timestamp': 1767572731, E + 'nodeid': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', E + }, E + { E + 'addresses': [], E 'alias': 'JUNIORBEAM-e986cd2-modded', E 'color': '0266e4', E 'features': '808898880a8a59a1', E 'last_timestamp': 1767572732, E 'nodeid': '0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518', E }, E ], E } tests/test_gossip.py:2390: AssertionError ``` Signed-off-by: Rusty Russell --- tests/test_gossip.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_gossip.py b/tests/test_gossip.py index df1717d756e6..45bbd177d98d 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -2378,13 +2378,16 @@ def test_gossmap_lost_node(node_factory, bitcoind): scid23 = only_one(l2.rpc.listpeerchannels(l3.info['id'])['channels'])['short_channel_id'] l2.rpc.close(l3.info['id']) bitcoind.generate_block(13, wait_for_mempool=1) - wait_for(lambda: l1.rpc.listchannels(scid23) == {'channels': []}) + + # Order of nodes is not stable. + sync_blockheight(bitcoind, [l1]) + assert l1.rpc.listchannels(scid23) == {'channels': []} pre_channels = l1.rpc.listchannels() - pre_nodes = l1.rpc.listnodes() + pre_nodes = sorted(l1.rpc.listnodes()['nodes'], key=lambda n: n['nodeid']) l1.restart() post_channels = l1.rpc.listchannels() - post_nodes = l1.rpc.listnodes() + post_nodes = sorted(l1.rpc.listnodes()['nodes'], key=lambda n: n['nodeid']) assert post_channels == pre_channels assert post_nodes == pre_nodes From e6c9dab42d532521c4175fc2f74c605ddec45ef0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:41 +1030 Subject: [PATCH 26/39] pytest: reduce test_funding_v2_cancel_race nodes under CI. ``` [gw0] [ 24%] PASSED tests/test_misc.py::test_hsm_capabilities tests/test_connection.py::test_funding_cancel_race Error: Process completed with exit code 143. ``` Seems like 100 nodes is too many! Signed-off-by: Rusty Russell --- tests/test_connection.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_connection.py b/tests/test_connection.py index f0623c78e134..ffbf3c9e3d63 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -13,7 +13,7 @@ mine_funding_to_announce, first_scid, CHANNEL_SIZE ) -from pyln.testing.utils import VALGRIND, EXPERIMENTAL_DUAL_FUND, FUNDAMOUNT, RUST +from pyln.testing.utils import VALGRIND, EXPERIMENTAL_DUAL_FUND, FUNDAMOUNT, RUST, SLOW_MACHINE import os import pytest @@ -1476,6 +1476,8 @@ def test_funding_cancel_race(node_factory, bitcoind, executor): if VALGRIND: num = 5 + elif SLOW_MACHINE: + num = 20 else: num = 100 @@ -1557,6 +1559,8 @@ def test_funding_v2_cancel_race(node_factory, bitcoind, executor): if VALGRIND: num = 5 + elif SLOW_MACHINE: + num = 20 else: num = 100 From 1dc54a344bf3909da941ddcd2a25911fd80c527f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:41 +1030 Subject: [PATCH 27/39] pay: don't notify using uninitialized hint field. Rather than break the API, use total capacity here. ``` Valgrind error file: valgrind-errors.5880 ==5880== Use of uninitialised value of size 8 ==5880== at 0x4A390BB: _itoa_word (_itoa.c:183) ==5880== by 0x4A43C9B: __printf_buffer (vfprintf-process-arg.c:155) ==5880== by 0x4A69D90: vsnprintf (vsnprintf.c:96) ==5880== by 0x1875E6: json_out_addv (json_out.c:239) ==5880== by 0x14471E: json_add_primitive_fmt (json_stream.c:170) ==5880== by 0x144BA2: json_add_u64 (json_stream.c:282) ==5880== by 0x145E33: json_add_amount_msat (json_stream.c:619) ==5880== by 0x11DDE2: channel_hint_to_json (channel_hint.c:33) ==5880== by 0x11FE9F: channel_hint_notify_core (libplugin-pay.c:394) ==5880== by 0x11FF7A: channel_hint_notify (libplugin-pay.c:412) ==5880== by 0x1201EA: channel_hints_update (libplugin-pay.c:455) ==5880== by 0x122DAF: handle_intermediate_failure (libplugin-pay.c:1437) ==5880== ``` Signed-off-by: Rusty Russell --- plugins/channel_hint.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/plugins/channel_hint.c b/plugins/channel_hint.c index 4862832d8cbd..299c605d6305 100644 --- a/plugins/channel_hint.c +++ b/plugins/channel_hint.c @@ -30,8 +30,14 @@ void channel_hint_to_json(const char *name, const struct channel_hint *hint, json_object_start(dest, name); json_add_u32(dest, "timestamp", hint->timestamp); json_add_short_channel_id_dir(dest, "scid", hint->scid); - json_add_amount_msat(dest, "estimated_capacity_msat", - hint->estimated_capacity); + /* The estimated_capacity is unset if it's not enabled; use total_capacity */ + if (hint->enabled) { + json_add_amount_msat(dest, "estimated_capacity_msat", + hint->estimated_capacity); + } else { + json_add_amount_msat(dest, "estimated_capacity_msat", + hint->capacity); + } json_add_amount_msat(dest, "total_capacity_msat", hint->capacity); json_add_bool(dest, "enabled", hint->enabled); json_object_end(dest); From a55a037a90e3c0e4e6d072e03436c78038c5f5da Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:41 +1030 Subject: [PATCH 28/39] pytest: don't get upset at slow multi-input signing under valgrind. ``` 2026-01-06T07:46:35.5710043Z lightningd-1 2026-01-06T07:45:11.040Z UNUSUAL jsonrpc#68: That's weird: Request signpsbt took 5099 milliseconds ``` Signed-off-by: Rusty Russell --- tests/test_wallet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_wallet.py b/tests/test_wallet.py index 967a7f9a2e87..a9a86f0d0db5 100644 --- a/tests/test_wallet.py +++ b/tests/test_wallet.py @@ -1225,7 +1225,8 @@ def test_sign_and_send_psbt(node_factory, bitcoind, chainparams): @unittest.skipIf(TEST_NETWORK == 'liquid-regtest', "BIP86 random_hsm not compatible with liquid-regtest bech32") def test_txsend(node_factory, bitcoind, chainparams): amount = 1000000 - l1 = node_factory.get_node(random_hsm=True) + # Under valgrind, we can actually take 5 seconds to sign multiple inputs! + l1 = node_factory.get_node(random_hsm=True, broken_log="That's weird: Request signpsbt took") addr = chainparams['example_addr'] # Add some funds to withdraw later From fa7ec2c00ffe9d8c8612a0c81423e7bd48bb1a1e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:42 +1030 Subject: [PATCH 29/39] pytest: explicitly test failed case exposed by race. This showed up as a flake, where we "got lucky" and the sendpay resolved before waitsendpay was called. Instead, make this race explicit, so we can test it. ``` # FIXME: #define PAY_UNPARSEABLE_ONION 202 PAY_UNPARSEABLE_ONION = 202 > assert err.value.error['code'] == PAY_UNPARSEABLE_ONION E assert 204 == 202 tests/test_misc.py:2152: AssertionError ``` Signed-off-by: Rusty Russell --- tests/test_misc.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_misc.py b/tests/test_misc.py index c7a697394d33..7d85942f82af 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -2135,6 +2135,7 @@ def test_bad_onion(node_factory, bitcoind): assert err.value.error['data']['erring_channel'] == route[1]['channel'] +@pytest.mark.xfail(strict=True) def test_bad_onion_immediate_peer(node_factory, bitcoind): """Test that we handle the malformed msg when we're the origin""" l1, l2 = node_factory.line_graph(2, opts=[{}, {'dev-fail-process-onionpacket': None}]) @@ -2154,6 +2155,13 @@ def test_bad_onion_immediate_peer(node_factory, bitcoind): WIRE_INVALID_ONION_HMAC = 0x8000 | 0x4000 | 5 assert err.value.error['data']['failcode'] == WIRE_INVALID_ONION_HMAC + # Asking again about the same payment should give same result. + with pytest.raises(RpcError) as err: + l1.rpc.waitsendpay(inv['payment_hash']) + + assert err.value.error['code'] == PAY_UNPARSEABLE_ONION + assert err.value.error['data']['failcode'] == WIRE_INVALID_ONION_HMAC + # Same, but using injectpaymentonion with corrupt onion. blockheight = l1.rpc.getinfo()['blockheight'] hops = [{'pubkey': l1.info['id'], From 713a8431f3969cd757b4e38720fdee50b3698648 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:42 +1030 Subject: [PATCH 30/39] lightningd: fix error code on waitsendpay on old errors. We don't explicitly save the return code in db, so we need to reconstruct it. We didn't cover the "peer told us our onion was bad" corner case. But it's hardly worth a changelog message, since users will never see this. Signed-off-by: Rusty Russell --- lightningd/pay.c | 9 +++++++-- tests/test_misc.py | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lightningd/pay.c b/lightningd/pay.c index 17a41a2ed081..156ecf303258 100644 --- a/lightningd/pay.c +++ b/lightningd/pay.c @@ -745,8 +745,13 @@ static struct command_result *wait_payment(struct lightningd *ld, /* FIXME: We don't store this! */ fail->msg = NULL; - rpcerrorcode = faildestperm ? PAY_DESTINATION_PERM_FAIL - : PAY_TRY_OTHER_ROUTE; + /* Peers which fail directly can hit this! */ + if (failcode & BADONION) + rpcerrorcode = PAY_UNPARSEABLE_ONION; + else if (faildestperm) + rpcerrorcode = PAY_DESTINATION_PERM_FAIL; + else + rpcerrorcode = PAY_TRY_OTHER_ROUTE; return sendpay_fail( cmd, payment, rpcerrorcode, NULL, fail, diff --git a/tests/test_misc.py b/tests/test_misc.py index 7d85942f82af..0cfdbe21fbb2 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -2135,7 +2135,6 @@ def test_bad_onion(node_factory, bitcoind): assert err.value.error['data']['erring_channel'] == route[1]['channel'] -@pytest.mark.xfail(strict=True) def test_bad_onion_immediate_peer(node_factory, bitcoind): """Test that we handle the malformed msg when we're the origin""" l1, l2 = node_factory.line_graph(2, opts=[{}, {'dev-fail-process-onionpacket': None}]) From 82f10cfd6888505a8bf7f0eaa1cec1c2227654e6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:42 +1030 Subject: [PATCH 31/39] pytest: fix flake race in test_even_sendcustommsg. It failed, because it got the message before connectd has processed the updated allow list: ``` lightningd-2 2026-01-06T07:53:02.817Z INFO plugin-allow_even_msgs.py: Killing plugin: stopped by lightningd via RPC ... lightningd-1 2026-01-06T07:53:02.820Z DEBUG 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-lightningd: sendcustommsg id="-c:sendcustommsg#16" sending a custom even message (43690) ... lightningd-1 2026-01-06T07:53:02.820Z 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-connectd: [OUT] aaaaffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbb lightningd-2 2026-01-06T07:53:02.823Z 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: [IN] aaaaffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbb ... lightningd-2 2026-01-06T07:53:02.823Z DEBUG connectd: Now allowing 0 custom message types ``` Resulting in: `` l2.daemon.wait_for_log(r'\[IN\] {}'.format(msg)) > l1.daemon.wait_for_log('Invalid unknown even msg') tests/test_misc.py:4673: ... > raise TimeoutError('Unable to find "{}" in logs.'.format(exs)) E TimeoutError: Unable to find "[re.compile('Invalid unknown even msg')]" in logs. ``` Signed-off-by: Rusty Russell --- tests/test_misc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_misc.py b/tests/test_misc.py index 0cfdbe21fbb2..c95995d89406 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -4675,6 +4675,9 @@ def test_even_sendcustommsg(node_factory): # It does if we remove the plugin though! l2.rpc.plugin_stop("allow_even_msgs.py") + # Make sure connectd has processed the update! + l2.daemon.wait_for_log("connectd: Now allowing 0 custom message types") + l1.rpc.sendcustommsg(l2.info['id'], msg) l2.daemon.wait_for_log(r'\[IN\] {}'.format(msg)) l1.daemon.wait_for_log('Invalid unknown even msg') From 26e7e6ff64b02f701d0f339a5d6385325845f342 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 16:12:42 +1030 Subject: [PATCH 32/39] pytest: mark test_connection.py::test_disconnect_opener flaky. It's a real bug, which I've reported in https://github.com/ElementsProject/lightning/issues/8822 Signed-off-by: Rusty Russell --- tests/test_connection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_connection.py b/tests/test_connection.py index ffbf3c9e3d63..7a3e85f9baaa 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -472,6 +472,8 @@ def test_disconnect(node_factory): @pytest.mark.openchannel('v1') @pytest.mark.openchannel('v2') +# FIXME: https://github.com/ElementsProject/lightning/issues/8822 +@pytest.mark.flaky(reruns=1) def test_disconnect_opener(node_factory): # Now error on opener side during channel open. disconnects = ['-WIRE_OPEN_CHANNEL', From afa8356e10dd327105a5fcfce9a9d88715122c4f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 19:09:02 +1030 Subject: [PATCH 33/39] pytest: mark reckless install test flaky. Sometimes it times out under CI, but running 100 times here reveals nothing. Assume network issues and mark it flaky. ``` node_factory = @pytest.mark.slow_test def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() > r = reckless([f"--network={NETWORK}", "-v", "install", "testpluguv"], dir=node.lightning_dir) tests/test_reckless.py:358: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ tests/test_reckless.py:141: in reckless r = subprocess.run(cmds, capture_output=True, encoding='utf-8', env=my_env, /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/subprocess.py:505: in run stdout, stderr = process.communicate(input, timeout=timeout) /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/subprocess.py:1154: in communicate stdout, stderr = self._communicate(input, endtime, timeout) /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/subprocess.py:2022: in _communicate self._check_timeout(endtime, orig_timeout, stdout, stderr) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = endtime = 4623.246515403, orig_timeout = 60 stdout_seq = [b'[2026-01-07 05:55:15,159] DEBUG: Warning: Reckless requires write access\n[2026-01-07 05:55:15,159] DEBUG: Searching for testpluguv\n', b'[2026-01-07 05:55:15,179] DEBUG: InstInfo(testpluguv, https://github.com/lightningd/plugins, None, None, None, testpluguv), Source.GITHUB_REPO\nfound testpluguv in source: https://github.com/lightningd/plugins\n[2026-01-07 05:55:15,179] DEBUG: entry: None\n[2026-01-07 05:55:15,179] DEBUG: sub-directory: testpluguv\n[2026-01-07 05:55:15,179] DEBUG: Retrieving testpluguv from https://github.com/lightningd/plugins\n[2026-01-07 05:55:15,179] DEBUG: Install requested from InstInfo(testpluguv, https://github.com/lightningd/plugins, None, None, None, testpluguv).\n', b'cloning Source.GITHUB_REPO InstInfo(testpluguv, https://github.com/lightningd/plugins, None, None, None, testpluguv)\n[2026-01-07 05:55:15,405] DEBUG: cloned_src: InstInfo(testpluguv, /tmp/reckless-433081020a3dff932/clone, None, testpluguv.py, uv.lock, testpluguv/testpluguv)\n', b'[2026-01-07 05:55:15,409] DEBUG: using latest commit of default branch\n', b'[2026-01-07 05:55:15,417] DEBUG: checked out HEAD: 095457638f8080cd614a81cb4ad1cba7883549e3\n[2026-01-07 05:55:15,417] DEBUG: using installer pythonuv\n[2026-01-07 05:55:15,417] DEBUG: creating /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/testpluguv\n[2026-01-07 05:55:15,418] DEBUG: creating /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/testpluguv/source\n[2026-01-07 05:55:15,418] DEBUG: copying /tmp/reckless-433081020a3dff932/clone/testpluguv/testpluguv tree to /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/testpluguv/source/testpluguv\n[2026-01-07 05:55:15,419] DEBUG: linking source /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/testpluguv/source/testpluguv/testpluguv.py to /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/testpluguv/testpluguv.py\n[2026-01-07 05:55:15,419] DEBUG: InstInfo(testpluguv, /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/testpluguv, None, testpluguv.py, uv.lock, source/testpluguv)\n'] stderr_seq = [b'config file not found: /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/liquid-regtest/config\npress [Y] to create one now.\nconfig file not found: /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/liquid-regtest-reckless.conf\nconfig file not found: /tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1/reckless/.sources\n'] skip_check_and_raise = False def _check_timeout(self, endtime, orig_timeout, stdout_seq, stderr_seq, skip_check_and_raise=False): """Convenience for checking if a timeout has expired.""" if endtime is None: return if skip_check_and_raise or _time() > endtime: > raise TimeoutExpired( self.args, orig_timeout, output=b''.join(stdout_seq) if stdout_seq else None, stderr=b''.join(stderr_seq) if stderr_seq else None) E subprocess.TimeoutExpired: Command '['tools/reckless', '-l', '/tmp/ltests-kr4cjtd8/test_reckless_uv_install_1/lightning-1', '--network=liquid-regtest', '-v', 'install', 'testpluguv']' timed out after 60 seconds ``` Signed-off-by: Rusty Russell --- tests/test_reckless.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_reckless.py b/tests/test_reckless.py index 311ed1ae4c1a..94632a2422cb 100644 --- a/tests/test_reckless.py +++ b/tests/test_reckless.py @@ -351,7 +351,9 @@ def test_tag_install(node_factory): header = line +# Note: uv timeouts from the GH network seem to happen? @pytest.mark.slow_test +@pytest.mark.flaky(reruns=3) def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() From 869ddb8863406351dbf9e08189d6b353c59f20a0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Jan 2026 19:14:26 +1030 Subject: [PATCH 34/39] pytest: work around pay flakiness. pay sometimes ignores exclusions. WONTFIX. ``` with pytest.raises(RpcError, match=r'is not reachable directly and all routehints were unusable.'): > l1.rpc.pay(inv, exclude=[scid12]) tests/test_pay.py:5279: ... elif "error" in resp: > raise RpcError(method, payload, resp['error']) E pyln.client.lightning.RpcError: RPC call failed: method: pay, payload: {'bolt11': 'lnbcrt1230n1p54mma3sp5x7uerjgyg7ws6fnzdwxc7pgpj6j25uhpqp5uvx3fk8dkcqm37m2spp5k02racjc9knux958u5rgtva24jfvxtr5w3t53pfeavn3thmyny0qdq8v3jhxccxqyjw5qcqp9rzjqgkjyd3q5dv6gllh77kygly9c3kfy0d9xwyjyxsq2nq3c83u5vw4jqqqvuqqqqgqqqqqqqqpqqqqqzsqqc9qxpqysgqcmv875mmzcjl8mwxxndy9an6p870ffpdxdtypmgf5gzsydnt2d68n4kjph0rcprye6tfz0ex0c5clgj3zwm8jgd5vs0fdv7hf7dqr8cqdrg3gf', 'exclude': ['103x2x0/1']}, error: {'code': 210, 'message': 'Ran out of routes to try after 6 attempts: see `paystatus`', 'attempts': [{'status': 'failed', 'failreason': 'No path found', 'partid': 0, 'amount_msat': 123000}, {'status': 'pending', 'failreason': 'No path found', 'partid': 1, 'amount_msat': 123000, 'parent_partid': 0}, {'status': 'failed', 'failreason': 'No path found', 'partid': 2, 'amount_msat': 57006, 'parent_partid': 1}, {'status': 'failed', 'failreason': 'No path found', 'partid': 4, 'amount_msat': 57006, 'parent_partid': 2}, {'status': 'failed', 'failreason': 'No path found', 'partid': 3, 'amount_msat': 65994, 'parent_partid': 1}, {'status': 'failed', 'failreason': 'No path found', 'partid': 5, 'amount_msat': 65994, 'parent_partid': 3}]} ``` The logs show that it doesn't exclude the routehint early: in successful runs we get "After filtering routehints we're left with 0 usable hints". Perhaps this is something to do with the timing of our own notifications? ``` 2026-01-07T05:51:10.7902502Z lightningd-1 2026-01-07T05:31:29.706Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Received getchaininfo blockcount=108, headercount=108 2026-01-07T05:51:10.7903334Z lightningd-1 2026-01-07T05:31:29.715Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: waitblockheight reports syncheight=108 2026-01-07T05:51:10.7904256Z lightningd-1 2026-01-07T05:31:29.734Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Updated a channel hint for 103x2x0/1: enabled true, estimated capacity 978718000msat 2026-01-07T05:51:10.7905355Z lightningd-1 2026-01-07T05:31:29.734Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Updated a channel hint for 7269357x11669990x33910/1: enabled false, estimated capacity UNKNOWN 2026-01-07T05:51:10.7906580Z lightningd-1 2026-01-07T05:31:29.735Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Updated a channel hint for 103x2x0/1: enabled false, estimated capacity UNKNOWN 2026-01-07T05:51:10.7907665Z lightningd-1 2026-01-07T05:31:29.735Z INFO plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Payment fee constraint 615msat is below exemption threshold, allowing a maximum fee of 5000msat 2026-01-07T05:51:10.7908845Z lightningd-1 2026-01-07T05:31:29.752Z DEBUG plugin-pay: Received a channel_hint {.scid = 103x2x0/1, .enabled = 1, .estimate = 978718000msat, .capacity = 1000000000msat } 2026-01-07T05:51:10.7909710Z lightningd-1 2026-01-07T05:31:29.754Z INFO plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Filtering out 1 routehints 2026-01-07T05:51:10.7910544Z lightningd-1 2026-01-07T05:31:29.779Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Checking hint {.scid=103x2x0/1, .enabled=1, .estimate=978718000msat} 2026-01-07T05:51:10.7911470Z lightningd-1 2026-01-07T05:31:29.780Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: After filtering routehints we're left with 1 usable hints 2026-01-07T05:51:10.7912385Z lightningd-1 2026-01-07T05:31:29.780Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Checking hint {.scid=103x2x0/1, .enabled=1, .estimate=978718000msat} 2026-01-07T05:51:10.7913471Z lightningd-1 2026-01-07T05:31:29.780Z DEBUG plugin-pay: cmd -c:pay#64/cln:pay#121 partid 0: Using routehint 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59 (103x1x0) cltv_delta=6 ``` Signed-off-by: Rusty Russell --- tests/test_pay.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_pay.py b/tests/test_pay.py index 1b35d5094806..c921b907bf7f 100644 --- a/tests/test_pay.py +++ b/tests/test_pay.py @@ -5255,6 +5255,7 @@ def test_sendpay_grouping(node_factory, bitcoind): assert([p['status'] for p in pays] == ['failed', 'failed', 'complete']) +@pytest.mark.flaky(reruns=2) def test_pay_manual_exclude(node_factory, bitcoind): l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True) l1_id = l1.rpc.getinfo()['id'] From 1012705a453ebd1b1153ed06c8c0eeb032c8b2bd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 8 Jan 2026 07:41:18 +1030 Subject: [PATCH 35/39] lightningd: don't complain if gossipd tells us about dead channel on startup. This can happen if gossipd hasn't processed the blocks yet: ``` lightningd-2 2026-01-07T06:05:19.430Z **BROKEN** 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-chan#3: gossipd gave channel_update in CGOSSIP_CHANNEL_ANNOUNCED_DEAD? update=010240d5d1b653118c047218802d8c5d6bda49124fc9e1cb30ceff72e24c44e6a20d0b6b6fbe5465def31a01c8ff49dc171542a64a1a69d5149698f31e1ba4e721c106226e46111a0b59caaf126043eb5bbf28c34f3a5e332a1fc7b2b73cf188910f00006f0000010000695df63a010200060000000000000000000000010000000a000000003b023380 ``` It does catch up later, so ignore this. Signed-off-by: Rusty Russell --- lightningd/channel_gossip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lightningd/channel_gossip.c b/lightningd/channel_gossip.c index d00627a15de1..01207818a5d5 100644 --- a/lightningd/channel_gossip.c +++ b/lightningd/channel_gossip.c @@ -1096,13 +1096,15 @@ void channel_gossip_update_from_gossipd(struct channel *channel, case CGOSSIP_WAITING_FOR_USABLE: case CGOSSIP_CHANNEL_DEAD: case CGOSSIP_CHANNEL_UNANNOUNCED_DYING: - case CGOSSIP_CHANNEL_ANNOUNCED_DEAD: /* Shouldn't happen. */ log_broken(channel->log, "gossipd gave channel_update in %s? update=%s", channel_gossip_state_str(channel->channel_gossip->state), tal_hex(tmpctx, channel_update)); /* fall thru */ + /* ANNOUNCED_DEAD can happen is gossipd hadn't processed block + * when we restarted; ignore, as it will catch up soon. */ + case CGOSSIP_CHANNEL_ANNOUNCED_DEAD: case CGOSSIP_CHANNEL_ANNOUNCED_DYING: if (taken(channel_update)) tal_free(channel_update); From f06ef779dd6311def2432eddfaf8059b61ae3f91 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 8 Jan 2026 07:48:22 +1030 Subject: [PATCH 36/39] hsmd: check *all* anchor inputs for short sigs. Anchors will have one input from the commitment tx, and at least on more (in this case, 3 more); we were only checking the first one for short signatures. ``` total_feerate_perkw = total_fees / total_weight * 1000 > check_feerate([l3, l2], total_feerate_perkw, feerate) tests/test_closing.py:4064: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ nodes = [, ] actual_feerate = 14006.105538595726, expected_feerate = 14000 def check_feerate(nodes, actual_feerate, expected_feerate): # Feerate can't be lower. assert actual_feerate > expected_feerate - 2 if actual_feerate >= expected_feerate + 2: if any([did_short_sig(n) for n in nodes]): return # Use assert as it shows the actual values on failure > assert actual_feerate < expected_feerate + 2 E AssertionError ``` Signed-off-by: Rusty Russell --- hsmd/libhsmd.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hsmd/libhsmd.c b/hsmd/libhsmd.c index dd2329236abb..53988bf0358f 100644 --- a/hsmd/libhsmd.c +++ b/hsmd/libhsmd.c @@ -1820,12 +1820,16 @@ static u8 *handle_sign_anchorspend(struct hsmd_client *c, const u8 *msg_in) fmt_pubkey(tmpctx, &local_funding_pubkey), fmt_wally_psbt(tmpctx, psbt)); } - if (dev_warn_on_overgrind - && psbt->inputs[0].signatures.num_items == 1 - && psbt->inputs[0].signatures.items[0].value_len < 71) { - hsmd_status_fmt(LOG_BROKEN, NULL, - "overgrind: short signature length %zu", - psbt->inputs[0].signatures.items[0].value_len); + + if (dev_warn_on_overgrind) { + for (size_t i = 0; i < psbt->num_inputs; i++) { + if (psbt->inputs[i].signatures.num_items == 1 + && psbt->inputs[i].signatures.items[0].value_len < 71) { + hsmd_status_fmt(LOG_BROKEN, NULL, + "overgrind: short signature length %zu", + psbt->inputs[i].signatures.items[0].value_len); + } + } } return towire_hsmd_sign_anchorspend_reply(NULL, psbt); From b0c84225bdf59f12ec265ce68867b31e4f4bb982 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 8 Jan 2026 11:48:03 +1030 Subject: [PATCH 37/39] pytest: fix flake in test_coinmoves_unilateral_htlc_timeout We need to make sure anchor reaches bitcoind, otherwise it might mine the commitment tx without it. This can happen in test_coinmoves_unilateral_htlc_fulfill as well. ``` > check_chain_moves(l1, expected_chain1) tests/test_coinmoves.py:844: ... E Full diff: E [ E { E 'account_id': 'wallet', E 'blockheight': 102, E 'created_index': 1, E 'credit_msat': 100000000000, E 'debit_msat': 0, E 'extra_tags': [], E 'output_msat': 100000000000, E 'primary_tag': 'deposit', E 'utxo': 'fca99b85e58f8ae23e5c6872e0500784997deb98bfc92e43449206553a108db2:1', E }, E { E 'account_id': 'wallet', E 'blockheight': 103, E 'created_index': 2, E 'credit_msat': 0, E 'debit_msat': 100000000000, E 'extra_tags': [], E 'output_msat': 100000000000, E 'primary_tag': 'withdrawal', E 'spending_txid': 'c097ad8bde478396c961369b69c50a144fae3423f36af4554f3fb1dacfdff83f', E 'utxo': 'fca99b85e58f8ae23e5c6872e0500784997deb98bfc92e43449206553a108db2:1', E }, E { E 'account_id': 'wallet', E 'blockheight': 103, E 'created_index': 3, E 'credit_msat': 25000000, E 'debit_msat': 0, E 'extra_tags': [], E 'output_msat': 25000000, E 'primary_tag': 'deposit', E 'utxo': 'c097ad8bde478396c961369b69c50a144fae3423f36af4554f3fb1dacfdff83f:1', E }, E { E 'account_id': '3ff8dfcfdab13f4f55f46af32334ae4f140ac5699b3661c9968347de8bad97c0', E 'blockheight': 103, E 'created_index': 4, E 'credit_msat': 99970073000, E 'debit_msat': 0, E 'extra_tags': [ E 'opener', E ], E 'output_msat': 99970073000, E 'peer_id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', E 'primary_tag': 'channel_open', E 'utxo': 'c097ad8bde478396c961369b69c50a144fae3423f36af4554f3fb1dacfdff83f:0', E }, E { E - 'account_id': 'wallet', E + 'account_id': '3ff8dfcfdab13f4f55f46af32334ae4f140ac5699b3661c9968347de8bad97c0', E 'blockheight': 104, E 'created_index': 5, E - 'credit_msat': 0, E - 'debit_msat': 25000000, E - 'extra_tags': [], E - 'output_msat': 25000000, E - 'primary_tag': 'withdrawal', E - 'spending_txid': '1b6fbf9887d6f9cce727fc8bf9f582a3353be682998d4bafc9691c9ed26897e7', E - 'utxo': 'c097ad8bde478396c961369b69c50a144fae3423f36af4554f3fb1dacfdff83f:1', E - }, E - { E - 'account_id': 'wallet', E - 'blockheight': 104, E - 'created_index': 6, E - 'credit_msat': Decimal('15579000.00000000'), E - 'debit_msat': 0, E - 'extra_tags': [], E - 'output_msat': Decimal('15579000.00000000'), E - 'primary_tag': 'deposit', E - 'utxo': '1b6fbf9887d6f9cce727fc8bf9f582a3353be682998d4bafc9691c9ed26897e7:0', E - }, E - { E - 'account_id': '3ff8dfcfdab13f4f55f46af32334ae4f140ac5699b3661c9968347de8bad97c0', E - 'blockheight': 104, E - 'created_index': 7, E 'credit_msat': 0, E 'debit_msat': 49970073000, E 'extra_tags': [], E 'output_count': 5, E 'output_msat': 99970073000, E 'primary_tag': 'channel_close', E 'spending_txid': 'a499419bfdce179727cffca45429151db47839b247d83f71837429f021ae6322', E 'utxo': 'c097ad8bde478396c961369b69c50a144fae3423f36af4554f3fb1dacfdff83f:0', E }, E { E 'account_id': 'external', E 'blockheight': 104, E - 'created_index': 8, E ? ^ E + 'created_index': 6, E ? ^ E 'credit_msat': 330000, E 'debit_msat': 0, E 'extra_tags': [], E 'originating_account': '3ff8dfcfdab13f4f55f46af32334ae4f140ac5699b3661c9968347de8bad97c0', E 'output_msat': 330000, E 'primary_tag': 'anchor', E 'utxo': 'a499419bfdce179727cffca45429151db47839b247d83f71837429f021ae6322:0', E }, E { E 'account_id': 'external', E 'blockheight': 104, E - 'created_index': 9, E ? ^ E + 'created_index': 7, E ? ^ E 'credit_msat': 330000, E 'debit_msat': 0, E 'extra_tags': [], E 'originating_account': '3ff8dfcfdab13f4f55f46af32334ae4f140ac5699b3661c9968347de8bad97c0', E 'output_msat': 330000, E 'primary_tag': 'anchor', E 'utxo': 'a499419bfdce179727cffca45429151db47839b247d83f71837429f021ae6322:1', E }, E { E 'account_id': 'external', E 'blockheight': 104, E - 'created_index': 10, E ? ^^ E + 'created_index': 8, E ? ^ E 'credit_msat': 50000000000, E 'debit_msat': 0, E 'extra_tags': [], E 'originating_account': '3ff8dfcfdab13f4f55f46af32334ae4f140ac5699b3661c9968347de8bad97c0', E 'output_msat': 50000000000, E 'primary_tag': 'to_them', E 'utxo': 'a499419bfdce179727cffca45429151db47839b247d83f71837429f021ae6322:4', E }, E ] ``` Signed-off-by: Rusty Russell --- tests/test_coinmoves.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_coinmoves.py b/tests/test_coinmoves.py index 291f857ba6b3..927347d0ffa6 100644 --- a/tests/test_coinmoves.py +++ b/tests/test_coinmoves.py @@ -521,6 +521,7 @@ def test_coinmoves_unilateral_htlc_before_included(node_factory, bitcoind): check_chain_moves(l2, expected_chain2) close_info = l1.rpc.close(l2.info['id'], unilateraltimeout=1) + # Close, no anchor. bitcoind.generate_block(1, wait_for_mempool=1) # Make sure onchaind has digested it. @@ -714,7 +715,8 @@ def test_coinmoves_unilateral_htlc_timeout(node_factory, bitcoind): line = l1.daemon.wait_for_log("Creating anchor spend for local commit tx ") anchor_spend_txid = re.search(r'Creating anchor spend for local commit tx ([0-9a-f]{64})', line).group(1) - bitcoind.generate_block(1, wait_for_mempool=1) + # Close, and anchor. + bitcoind.generate_block(1, wait_for_mempool=2) sync_blockheight(bitcoind, [l1, l2]) # Make sure onchaind has digested it. @@ -1024,6 +1026,7 @@ def test_coinmoves_unilateral_htlc_dust(node_factory, bitcoind): check_chain_moves(l2, expected_chain2) close_info = l1.rpc.close(l2.info['id'], unilateraltimeout=1) + # Close, no anchor. bitcoind.generate_block(1, wait_for_mempool=1) sync_blockheight(bitcoind, [l1, l2]) @@ -1217,7 +1220,8 @@ def test_coinmoves_unilateral_htlc_fulfill(node_factory, bitcoind): line = l1.daemon.wait_for_log("Creating anchor spend for local commit tx ") anchor_spend_txid = re.search(r'Creating anchor spend for local commit tx ([0-9a-f]{64})', line).group(1) - bitcoind.generate_block(1, wait_for_mempool=1) + # Close, and anchor. + bitcoind.generate_block(1, wait_for_mempool=2) sync_blockheight(bitcoind, [l1, l2]) # Make sure onchaind has digested it. From 401c627d5939a887919ce5e9b2ae5557972c4c55 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 8 Jan 2026 11:49:56 +1030 Subject: [PATCH 38/39] pytest: fix coinmoves flake, where routing credit/debit can appear in either order. It's done as HTLCs finalize, but we can close the incoming HTLC as soon as we get the preimage, so that entire thing could finish before the outgoing HTLC. ``` > check_channel_moves(l1, expected_channel1) tests/test_coinmoves.py:307: ... E Full diff: E [ E { E 'account_id': '58d371ab100e0ea847a11c9550add273ef8531bc12bb51b0e30c8f833506a772', E 'created_index': 1, E 'credit_msat': 0, E 'debit_msat': 1000000, E 'fees_msat': 0, E 'group_id': 1318196858430961660, E 'part_id': 1, E 'payment_hash': '8da829ab29715106a4e767facc0b58776ae5bfc11c4e9dcda3063013e1ef8ef0', E 'primary_tag': 'invoice', E }, E { E 'account_id': '0b872506f67b363803cd85cf9ff6807ebc1dc8a4521aa191386b4c5366d490d7', E 'created_index': 2, E 'credit_msat': 100000, E 'debit_msat': 0, E 'fees_msat': 0, E 'primary_tag': 'pushed', E }, E { E + 'account_id': '0b872506f67b363803cd85cf9ff6807ebc1dc8a4521aa191386b4c5366d490d7', E + 'created_index': 3, E + 'credit_msat': 10000100001, E + 'debit_msat': 0, E + 'fees_msat': 100001, E + 'payment_hash': '0ebfa5387de5fd12c15089833b0193fb6007e9f494ec24d479e327a96ac8e8c0', E + 'primary_tag': 'routed', E + }, E + { E 'account_id': '58d371ab100e0ea847a11c9550add273ef8531bc12bb51b0e30c8f833506a772', E - 'created_index': 3, E ? ^ E + 'created_index': 4, E ? ^ E 'credit_msat': 0, E 'debit_msat': 10000000000, E 'fees_msat': 100001, E 'payment_hash': '0ebfa5387de5fd12c15089833b0193fb6007e9f494ec24d479e327a96ac8e8c0', E 'primary_tag': 'routed', E }, E - { E - 'account_id': '0b872506f67b363803cd85cf9ff6807ebc1dc8a4521aa191386b4c5366d490d7', E - 'created_index': 4, E - 'credit_msat': 10000100001, E - 'debit_msat': 0, E - 'fees_msat': 100001, E - 'payment_hash': '0ebfa5387de5fd12c15089833b0193fb6007e9f494ec24d479e327a96ac8e8c0', E - 'primary_tag': 'routed', E - }, E ] ``` --- tests/test_coinmoves.py | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tests/test_coinmoves.py b/tests/test_coinmoves.py index 927347d0ffa6..b3679fbbdf43 100644 --- a/tests/test_coinmoves.py +++ b/tests/test_coinmoves.py @@ -286,18 +286,35 @@ def test_coinmoves(node_factory, bitcoind): l3.rpc.xpay(inv['bolt11'], '10000000sat') # Make sure it's fully settled. wait_for(lambda: only_one(l3.rpc.listpeerchannels(l1.info['id'])['channels'])['htlcs'] == []) - expected_channel1 += [{'account_id': fundchannel['channel_id'], - 'credit_msat': 0, - 'debit_msat': 10000000000, - 'fees_msat': 100001, - 'payment_hash': inv['payment_hash'], - 'primary_tag': 'routed'}, - {'account_id': l3fundchannel['channel_id'], - 'credit_msat': 10000100001, - 'debit_msat': 0, - 'fees_msat': 100001, - 'payment_hash': inv['payment_hash'], - 'primary_tag': 'routed'}] + # These can actually go in either order, since we record them when HTLC is *fully* + # resolved. + wait_for(lambda: len(l1.rpc.listchannelmoves()['channelmoves']) > len(expected_channel1)) + if l1.rpc.listchannelmoves()['channelmoves'][len(expected_channel1)]['credit_msat'] == 0: + expected_channel1 += [{'account_id': fundchannel['channel_id'], + 'credit_msat': 0, + 'debit_msat': 10000000000, + 'fees_msat': 100001, + 'payment_hash': inv['payment_hash'], + 'primary_tag': 'routed'}, + {'account_id': l3fundchannel['channel_id'], + 'credit_msat': 10000100001, + 'debit_msat': 0, + 'fees_msat': 100001, + 'payment_hash': inv['payment_hash'], + 'primary_tag': 'routed'}] + else: + expected_channel1 += [{'account_id': l3fundchannel['channel_id'], + 'credit_msat': 10000100001, + 'debit_msat': 0, + 'fees_msat': 100001, + 'payment_hash': inv['payment_hash'], + 'primary_tag': 'routed'}, + {'account_id': fundchannel['channel_id'], + 'credit_msat': 0, + 'debit_msat': 10000000000, + 'fees_msat': 100001, + 'payment_hash': inv['payment_hash'], + 'primary_tag': 'routed'}] expected_channel2 += [{'account_id': fundchannel['channel_id'], 'credit_msat': 10000000000, 'debit_msat': 0, From 7172033895ec43655aa96abe71f4e7b4995d4889 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 8 Jan 2026 17:20:32 +1030 Subject: [PATCH 39/39] pytest: fix bad gossip flake in test_buy_liquidity_ad_check_bookkeeping If we don't wait for the channel announcement to be processed, we can get bad gossip: ``` lightningd-2 2026-01-08T04:53:53.795Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-chan#2: Funding tx 2f41b1cc99dea016b7feddbeb1f31ae21b30f56d77ecb2ecb2b2f0faff4808fe depth 12 of 1 lightningd-2 2026-01-08T04:53:53.795Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-chan#2: Funding tx 2f41b1cc99dea016b7feddbeb1f31ae21b30f56d77ecb2ecb2b2f0faff4808fe confirmed, but peer in state ONCHAIN lightningd-2 2026-01-08T04:53:53.802Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-onchaind-chan#2: Got new message WIRE_ONCHAIND_DEPTH lightningd-2 2026-01-08T04:53:53.802Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-onchaind-chan#2: Sending 0 missing htlc messages lightningd-2 2026-01-08T04:53:53.802Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-onchaind-chan#2: FUNDING_TRANSACTION/FUNDING_OUTPUT->MUTUAL_CLOSE depth 6 lightningd-2 2026-01-08T04:53:53.802Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-onchaind-chan#2: billboard: All outputs resolved: waiting 94 more blocks before forgetting channel lightningd-2 2026-01-08T04:53:53.812Z DEBUG gossipd: gossmap_manage: new block, adding 104x1x1 to pending... lightningd-2 2026-01-08T04:53:53.812Z DEBUG gossipd: REPLY WIRE_GOSSIPD_NEW_BLOCKHEIGHT_REPLY with 0 fds lightningd-1 2026-01-08T04:53:53.819Z DEBUG 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-connectd: peer_in WIRE_WARNING lightningd-1 2026-01-08T04:53:53.820Z DEBUG 022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59-connectd: Received WIRE_WARNING: WARNING: channel_announcement: no unspent txout 104x1x1 lightningd-2 2026-01-08T04:53:53.820Z TRACE gossipd: channel_announcement: got reply for 104x1x1... lightningd-2 2026-01-08T04:53:53.820Z TRACE 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-gossipd: Bad gossip order: channel_announcement: no unspent txout 104x1x1 lightningd-2 2026-01-08T04:53:53.820Z DEBUG 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: peer_out WIRE_WARNING ``` Signed-off-by: Rusty Russell --- tests/test_opening.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_opening.py b/tests/test_opening.py index 2261bcd14663..b1bf9406b682 100644 --- a/tests/test_opening.py +++ b/tests/test_opening.py @@ -1976,6 +1976,11 @@ def test_buy_liquidity_ad_check_bookkeeping(node_factory, bitcoind): bitcoind.generate_block(2) l1.daemon.wait_for_log('to CHANNELD_NORMAL') + # Avoid bad gossip messages caused by channel announcements being + # processed after closing. + for n in (l1, l2): + wait_for(lambda: all([c['active'] for c in n.rpc.listchannels()['channels']])) + chan_id = first_channel_id(l1, l2) ev_tags = [e['tag'] for e in l1.rpc.bkpr_listaccountevents(chan_id)['events']] assert 'lease_fee' in ev_tags