Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lightningd: make shutdown smoother and safer, PART II (again) #4959

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion channeld/channeld.c
Original file line number Diff line number Diff line change
Expand Up @@ -4077,7 +4077,7 @@ int main(int argc, char *argv[])

expired = timers_expire(&peer->timers, now);
if (expired) {
timer_expired(peer, expired);
timer_expired(expired);
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion common/timeout.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void *reltimer_arg(struct oneshot *t)
return t->arg;
}

void timer_expired(tal_t *ctx, struct timer *timer)
void timer_expired(struct timer *timer)
{
struct oneshot *t = container_of(timer, struct oneshot, timer);

Expand Down
2 changes: 1 addition & 1 deletion common/timeout.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ struct oneshot *new_reltimer_(struct timers *timers,
/* Get timer arg. */
void *reltimer_arg(struct oneshot *t);

void timer_expired(tal_t *ctx, struct timer *timer);
void timer_expired(struct timer *timer);

#endif /* LIGHTNING_COMMON_TIMEOUT_H */
2 changes: 1 addition & 1 deletion connectd/connectd.c
Original file line number Diff line number Diff line change
Expand Up @@ -2075,7 +2075,7 @@ int main(int argc, char *argv[])
for (;;) {
struct timer *expired;
io_loop(&daemon->timers, &expired);
timer_expired(daemon, expired);
timer_expired(expired);
}
}

Expand Down
2 changes: 1 addition & 1 deletion gossipd/gossipd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1548,7 +1548,7 @@ int main(int argc, char *argv[])
struct timer *expired = NULL;
io_loop(&daemon->timers, &expired);

timer_expired(daemon, expired);
timer_expired(expired);
}
}

Expand Down
2 changes: 1 addition & 1 deletion gossipd/test/run-onion_message.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ void status_setup_async(struct daemon_conn *master UNNEEDED)
void subdaemon_setup(int argc UNNEEDED, char *argv[])
{ fprintf(stderr, "subdaemon_setup called!\n"); abort(); }
/* Generated stub for timer_expired */
void timer_expired(tal_t *ctx UNNEEDED, struct timer *timer UNNEEDED)
void timer_expired(struct timer *timer UNNEEDED)
{ fprintf(stderr, "timer_expired called!\n"); abort(); }
/* Generated stub for towire_gossipd_addgossip_reply */
u8 *towire_gossipd_addgossip_reply(const tal_t *ctx UNNEEDED, const wirestring *err UNNEEDED)
Expand Down
4 changes: 2 additions & 2 deletions gossipd/test/run-txout_failure.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ int main(int argc, char *argv[])
t = timers_expire(&timers, timemono_add(time_mono(),
time_from_sec(3601)));
assert(t);
timer_expired(NULL, t);
timer_expired(t);

/* Still there, just old. Refresh scid1 */
assert(rstate->num_txout_failures == 0);
Expand All @@ -172,7 +172,7 @@ int main(int argc, char *argv[])
t = timers_expire(&timers, timemono_add(time_mono(),
time_from_sec(3601)));
assert(t);
timer_expired(NULL, t);
timer_expired(t);

assert(rstate->num_txout_failures == 0);
assert(in_txout_failures(rstate, &scid1));
Expand Down
2 changes: 1 addition & 1 deletion lightningd/io_loop_with_timers.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void *io_loop_with_timers(struct lightningd *ld)
/* This routine is legal in early startup, too. */
if (ld->wallet)
db_begin_transaction(ld->wallet->db);
timer_expired(ld, expired);
timer_expired(expired);
if (ld->wallet)
db_commit_transaction(ld->wallet->db);
}
Expand Down
3 changes: 1 addition & 2 deletions lightningd/lightningd.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
*/
ld->plugins = plugins_new(ld, ld->log_book, ld);
ld->plugins->startup = true;
ld->plugins->shutdown = false;

/*~ This is set when a JSON RPC command comes in to shut us down. */
ld->stop_conn = NULL;
Expand Down Expand Up @@ -1197,7 +1196,7 @@ int main(int argc, char *argv[])
remove_sigchild_handler(sigchld_conn);
shutdown_subdaemons(ld);

/* Tell plugins we're shutting down, closes the db for write access. */
/* Tell plugins we're shutting down, closes the db. */
shutdown_plugins(ld);

/* Cleanup JSON RPC separately: destructors assume some list_head * in ld */
Expand Down
31 changes: 10 additions & 21 deletions lightningd/plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ struct plugins *plugins_new(const tal_t *ctx, struct log_book *log_book,
p->startup = true;
p->plugin_cmds = tal_arr(p, struct plugin_command *, 0);
p->blacklist = tal_arr(p, const char *, 0);
p->shutdown = false;
p->plugin_idx = 0;
#if DEVELOPER
p->dev_builtin_plugins_unimportant = false;
Expand Down Expand Up @@ -213,7 +212,7 @@ static void destroy_plugin(struct plugin *p)
check_plugins_manifests(p->plugins);

/* Daemon shutdown overrules plugin's importance; aborts init checks */
if (p->plugins->shutdown) {
if (p->plugins->ld->state == LD_STATE_SHUTDOWN) {
/* But return if this was the last plugin! */
if (list_empty(&p->plugins->plugins))
io_break(destroy_plugin);
Expand Down Expand Up @@ -786,7 +785,7 @@ static void plugin_conn_finish(struct io_conn *conn, struct plugin *plugin)
{
/* This is expected at shutdown of course. */
plugin_kill(plugin,
plugin->plugins->shutdown
plugin->plugins->ld->state == LD_STATE_SHUTDOWN
? LOG_DBG : LOG_INFORM,
"exited %s", state_desc(plugin));
}
Expand Down Expand Up @@ -2088,18 +2087,12 @@ void plugins_set_builtin_plugins_dir(struct plugins *plugins,
NULL, NULL);
}

static void plugin_shutdown_timeout(struct lightningd *ld)
{
io_break(plugin_shutdown_timeout);
}

void shutdown_plugins(struct lightningd *ld)
{
struct plugin *p, *next;

/* Don't complain about important plugins vanishing and
* crash any attempt to write to db. */
ld->plugins->shutdown = true;
/* The next io_loop does not need db access, close it. */
ld->wallet->db = tal_free(ld->wallet->db);

/* Tell them all to shutdown; if they care. */
list_for_each_safe(&ld->plugins->plugins, p, next, list) {
Expand All @@ -2110,20 +2103,16 @@ void shutdown_plugins(struct lightningd *ld)

/* If anyone was interested in shutdown, give them time. */
if (!list_empty(&ld->plugins->plugins)) {
struct timers *orig_timers, *timer;
struct timers *timer;
struct timer *expired;

/* 30 seconds should do it, use a clean timers struct */
orig_timers = ld->timers;
timer = tal(NULL, struct timers);
timers_init(timer, time_mono());
new_reltimer(timer, timer, time_from_sec(30),
plugin_shutdown_timeout, ld);

ld->timers = timer;
void *ret = io_loop_with_timers(ld);
assert(ret == plugin_shutdown_timeout || ret == destroy_plugin);
ld->timers = orig_timers;
tal_free(timer);
new_reltimer(timer, timer, time_from_sec(30), NULL, NULL);

void *ret = io_loop(timer, &expired);
assert(ret == NULL || ret == destroy_plugin);

/* Report and free remaining plugins. */
while (!list_empty(&ld->plugins->plugins)) {
Expand Down
3 changes: 0 additions & 3 deletions lightningd/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,6 @@ struct plugins {
/* Blacklist of plugins from --disable-plugin */
const char **blacklist;

/* Whether we are shutting down, blocks db write's */
bool shutdown;

/* Index to show what order they were added in */
u64 plugin_idx;

Expand Down
1 change: 0 additions & 1 deletion lightningd/plugin_hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ void plugin_hook_db_sync(struct db *db)
size_t i;
size_t num_hooks;

db_check_plugins_not_shutdown(db);
const char **changes = db_changes(db);
num_hooks = tal_count(hook->hooks);
if (num_hooks == 0)
Expand Down
2 changes: 1 addition & 1 deletion lightningd/test/run-find_my_abspath.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ void shutdown_plugins(struct lightningd *ld UNNEEDED)
void stop_topology(struct chain_topology *topo UNNEEDED)
{ fprintf(stderr, "stop_topology called!\n"); abort(); }
/* Generated stub for timer_expired */
void timer_expired(tal_t *ctx UNNEEDED, struct timer *timer UNNEEDED)
void timer_expired(struct timer *timer UNNEEDED)
{ fprintf(stderr, "timer_expired called!\n"); abort(); }
/* Generated stub for towire_bigsize */
void towire_bigsize(u8 **pptr UNNEEDED, const bigsize_t val UNNEEDED)
Expand Down
8 changes: 2 additions & 6 deletions tests/plugins/misc_notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""

from pyln.client import Plugin, RpcError
from time import sleep
import sys
import pytest

Expand All @@ -30,25 +29,22 @@ def channel_state_changed(plugin, channel_state_changed, **kwargs):

@plugin.subscribe("shutdown")
def shutdown(plugin, **kwargs):
plugin.log("received shutdown notification")

# 'shutdown' notification can be called in two ways, from `plugin stop` or from
# lightningd's shutdown loop, we test which one by making `getinfo` call
try:
plugin.rpc.getinfo()
plugin.rpc.datastore(key='test', string='Allowed', mode="create-or-append")
plugin.log("datastore success")
plugin.log("via plugin stop, datastore success")
except RpcError as e:
if e.error == {'code': -5, 'message': 'lightningd is shutting down'}:
# JSON RPC is disabled by now, but can do logging
with pytest.raises(RpcError, match=r'-5.*lightningd is shutting down'):
plugin.rpc.datastore(key='test', string='Not allowed', mode="create-or-append")
plugin.log("datastore failed")
plugin.log("via lightningd shutdown, datastore failed")
else:
raise

plugin.log("delaying shutdown with 5s")
sleep(5)
sys.exit(0)


Expand Down
4 changes: 4 additions & 0 deletions tests/test_pay.py
Original file line number Diff line number Diff line change
Expand Up @@ -4352,6 +4352,10 @@ def test_fetchinvoice_3hop(node_factory, bitcoind):

l1.rpc.call('fetchinvoice', {'offer': offer1['bolt12']})

# Make sure l4 handled both onions before shutting down
l1.daemon.wait_for_log(r'plugin-fetchinvoice: Received modern onion .*obs2\\":false')
l1.daemon.wait_for_log(r'plugin-fetchinvoice: No match for modern onion.*obs2\\":true')

# Test with obsolete onion.
l4.stop()
l4.daemon.opts['dev-no-modern-onion'] = None
Expand Down
53 changes: 16 additions & 37 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1084,38 +1084,6 @@ def test_htlc_accepted_hook_direct_restart(node_factory, executor):
f1.result()


def test_htlc_accepted_hook_shutdown(node_factory, executor):
"""Hooks of important-plugins are never removed and these plugins are kept
alive until after subdaemons are shutdown. Also tests shutdown notification.
"""
l1, l2 = node_factory.line_graph(2, opts=[
{'may_reconnect': True, 'log-level': 'info'},
{'may_reconnect': True, 'log-level': 'debug',
'plugin': [os.path.join(os.getcwd(), 'tests/plugins/misc_notifications.py')],
'important-plugin': [os.path.join(os.getcwd(), 'tests/plugins/fail_htlcs.py')]}
])

l2.rpc.plugin_stop(os.path.join(os.getcwd(), 'tests/plugins/misc_notifications.py'))
l2.daemon.wait_for_log(r'datastore success')
l2.rpc.plugin_start(os.path.join(os.getcwd(), 'tests/plugins/misc_notifications.py'))

i1 = l2.rpc.invoice(msatoshi=1000, label="inv1", description="desc")['bolt11']

# fail_htlcs.py makes payment fail
with pytest.raises(RpcError):
l1.rpc.pay(i1)

f_stop = executor.submit(l2.rpc.stop)
l2.daemon.wait_for_log(r'plugin-misc_notifications.py: delaying shutdown with 5s')

# Should still fail htlc while shutting down
with pytest.raises(RpcError):
l1.rpc.pay(i1)

l2.daemon.wait_for_log(r'datastore failed')
f_stop.result()


@pytest.mark.developer("without DEVELOPER=1, gossip v slow")
def test_htlc_accepted_hook_forward_restart(node_factory, executor):
"""l2 restarts while it is pondering what to do with an HTLC.
Expand Down Expand Up @@ -2593,22 +2561,33 @@ def init(options, configuration, plugin):


def test_plugin_shutdown(node_factory):
"""test 'shutdown' notification"""
"""test 'shutdown' notifications, via `plugin stop` or via `stop`"""

p = os.path.join(os.getcwd(), "tests/plugins/test_libplugin")
l1 = node_factory.get_node(options={'plugin': p})
p2 = os.path.join(os.getcwd(), 'tests/plugins/misc_notifications.py')
l1 = node_factory.get_node(options={'plugin': [p, p2]})

l1.rpc.plugin_stop(p)
l1.daemon.wait_for_log(r"test_libplugin: shutdown called")
# FIXME: clean this up!
l1.daemon.wait_for_log(r"test_libplugin: Killing plugin: exited during normal operation")

# Now try timeout.
# Via `plugin stop` it can make RPC calls before it (self-)terminates
l1.rpc.plugin_stop(p2)
l1.daemon.wait_for_log(r'misc_notifications.py: via plugin stop, datastore success')
l1.rpc.plugin_start(p2)

# Now try timeout via `plugin stop`
l1.rpc.plugin_start(p, dont_shutdown=True)
l1.rpc.plugin_stop(p)
l1.daemon.wait_for_log(r"test_libplugin: shutdown called")
l1.daemon.wait_for_log(r"test_libplugin: Timeout on shutdown: killing anyway")

# Now, should also shutdown on finish.
l1.rpc.plugin_start(p)
# Now, should also shutdown or timeout on finish, RPC calls then fail with error code -5
l1.rpc.plugin_start(p, dont_shutdown=True)
needle = l1.daemon.logsearch_start
l1.rpc.stop()
l1.daemon.wait_for_log(r"test_libplugin: shutdown called")
l1.daemon.logsearch_start = needle # we don't know what comes first
l1.daemon.is_in_log(r'misc_notifications.py: via lightningd shutdown, datastore failed')
l1.daemon.is_in_log(r'test_libplugin: failed to self-terminate in time, killing.')
SimonVrouwe marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 0 additions & 6 deletions wallet/db.c
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,6 @@ struct db *db_setup(const tal_t *ctx, struct lightningd *ld,
struct db *db = db_open(ctx, ld->wallet_dsn);
bool migrated;
db->log = new_log(db, ld->log_book, NULL, "database");
db->plugins_shutdown = &ld->plugins->shutdown;

db_begin_transaction(db);

Expand Down Expand Up @@ -2351,11 +2350,6 @@ void db_changes_add(struct db_stmt *stmt, const char * expanded)
tal_arr_expand(&db->changes, tal_strdup(db->changes, expanded));
}

void db_check_plugins_not_shutdown(struct db *db)
{
assert(!*db->plugins_shutdown);
}

const char **db_changes(struct db *db)
{
return db->changes;
Expand Down
3 changes: 0 additions & 3 deletions wallet/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,6 @@ struct db_stmt *db_prepare_v2_(const char *location, struct db *db,
#define db_prepare_v2(db,query) \
db_prepare_v2_(__FILE__ ":" stringify(__LINE__), db, query)

/* Check that plugins are not shutting down when calling db_write hook */
void db_check_plugins_not_shutdown(struct db *db);

/**
* Access pending changes that have been added to the current transaction.
*/
Expand Down
3 changes: 0 additions & 3 deletions wallet/db_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ struct db {
* Used to bump the data_version in the DB.*/
bool dirty;

/* Only needed to check shutdown state of plugins */
const bool *plugins_shutdown;

/* The current DB version we expect to update if changes are
* committed. */
u32 data_version;
Expand Down