diff --git a/connectd/connectd.c b/connectd/connectd.c index 07a952732188..48d027e54211 100644 --- a/connectd/connectd.c +++ b/connectd/connectd.c @@ -310,6 +310,7 @@ static struct peer *new_peer(struct daemon *daemon, peer->ready_to_die = false; peer->active = false; peer->peer_outq = msg_queue_new(peer, false); + peer->last_recv_time = time_now(); #if DEVELOPER peer->dev_writes_enabled = NULL; diff --git a/connectd/connectd.h b/connectd/connectd.h index 9af5f815d7ca..db5ec2022120 100644 --- a/connectd/connectd.h +++ b/connectd/connectd.h @@ -87,6 +87,9 @@ struct peer { /* Random ping timer, to detect dead connections. */ struct oneshot *ping_timer; + /* Last time we received traffic */ + struct timeabs last_recv_time; + #if DEVELOPER bool dev_read_enabled; /* If non-NULL, this counts down; 0 means disable */ diff --git a/connectd/multiplex.c b/connectd/multiplex.c index e99c78e6e214..0e52671654fc 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -423,16 +423,23 @@ static void set_ping_timer(struct peer *peer) static void send_ping(struct peer *peer) { - /* Already have a ping in flight? */ - if (peer->expecting_pong != PONG_UNEXPECTED) { - status_peer_debug(&peer->id, "Last ping unreturned: hanging up"); - if (peer->to_peer) - io_close(peer->to_peer); - return; + /* If it's still sending us traffic, maybe ping reply is backed up? + * That's OK, ping is just to make sure it's still alive, and clearly + * it is. */ + if (time_before(peer->last_recv_time, + timeabs_sub(time_now(), time_from_sec(60)))) { + /* Already have a ping in flight? */ + if (peer->expecting_pong != PONG_UNEXPECTED) { + status_peer_debug(&peer->id, "Last ping unreturned: hanging up"); + if (peer->to_peer) + io_close(peer->to_peer); + return; + } + + inject_peer_msg(peer, take(make_ping(NULL, 1, 0))); + peer->expecting_pong = PONG_EXPECTED_PROBING; } - inject_peer_msg(peer, take(make_ping(NULL, 1, 0))); - peer->expecting_pong = PONG_EXPECTED_PROBING; set_ping_timer(peer); } @@ -991,6 +998,9 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn, if (!IFDEV(peer->dev_read_enabled, true)) return read_hdr_from_peer(peer_conn, peer); + /* We got something! */ + peer->last_recv_time = time_now(); + /* Don't process packets while we're closing */ if (peer->ready_to_die) return read_hdr_from_peer(peer_conn, peer); diff --git a/tests/test_connection.py b/tests/test_connection.py index feed3ed939e7..b6cf02b7cd50 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -3848,15 +3848,18 @@ def test_ping_timeout(node_factory): l1, l2 = node_factory.get_nodes(2, opts=[{'dev-no-reconnect': None, 'disconnect': l1_disconnects}, - {}]) + {'dev-no-ping-timer': None}]) l1.rpc.connect(l2.info['id'], 'localhost', l2.port) - # Takes 15-45 seconds, then another to try second ping - # Because of ping timer randomness we don't know which side hangs up first - wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == [], - timeout=45 + 45 + 5) - wait_for(lambda: (l1.daemon.is_in_log('Last ping unreturned: hanging up') - or l2.daemon.is_in_log('Last ping unreturned: hanging up'))) + # This can take 10 seconds (dev-fast-gossip means timer fires every 5 seconds) + l1.daemon.wait_for_log('seeker: startup peer finished', timeout=15) + # Ping timers runs at 15-45 seconds, *but* only fires if also 60 seconds + # after previous traffic. + l1.daemon.wait_for_log('dev_disconnect: xWIRE_PING', timeout=60 + 45 + 5) + + # Next pign will cause hangup + l1.daemon.wait_for_log('Last ping unreturned: hanging up', timeout=45 + 5) + wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == []) @pytest.mark.openchannel('v1') diff --git a/tests/test_gossip.py b/tests/test_gossip.py index 1c94e185e4fc..aa67ffe76322 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1281,8 +1281,7 @@ def test_node_reannounce(node_factory, bitcoind, chainparams): filters=['0109', '0107', '0102', '0100', '0012']) # May send its own announcement *twice*, since it always spams us. - msgs2 = list(set(msgs2)) - assert msgs == msgs2 + assert set(msgs) == set(msgs2) # Won't have queued up another one, either. assert not l1.daemon.is_in_log('node_announcement: delaying') @@ -1308,8 +1307,7 @@ def test_node_reannounce(node_factory, bitcoind, chainparams): # channel_announcement and channel_updates. # And pings. filters=['0109', '0107', '0102', '0100', '0012']) - msgs2 = list(set(msgs2)) - assert msgs != msgs2 + assert set(msgs) != set(msgs2) def test_gossipwith(node_factory):