From 5ddf2aaa52e9f8977e902e437dd2036604ec7097 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Sat, 3 Feb 2024 21:34:02 -0500 Subject: [PATCH 1/2] lib: Actually create the rcu and save it before using it In a non-controlled startup, the rcu data structures were not being created until after logging could happen. This is bad. Move it so that the rcu data structures are created first, before logging( HA! ) can happen. Signed-off-by: Donald Sharp --- lib/frr_pthread.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/frr_pthread.c b/lib/frr_pthread.c index f7e57136d607..1ffa5934aa5b 100644 --- a/lib/frr_pthread.c +++ b/lib/frr_pthread.c @@ -234,6 +234,10 @@ static void *frr_pthread_attr_non_controlled_start(void *arg) int frr_pthread_non_controlled_startup(pthread_t thread, const char *name, const char *os_name) { + struct rcu_thread *rcu_thread = rcu_thread_new(NULL); + + rcu_thread_start(rcu_thread); + struct frr_pthread_attr attr = { .start = frr_pthread_attr_non_controlled_start, .stop = frr_pthread_attr_default.stop, @@ -245,7 +249,7 @@ int frr_pthread_non_controlled_startup(pthread_t thread, const char *name, return -1; fpt->thread = thread; - fpt->rcu_thread = rcu_thread_new(NULL); + fpt->rcu_thread = rcu_thread; frr_pthread_inner(fpt); return 0; From 5bc104120b772e9cb236ff84de877e0c8435011c Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Sat, 3 Feb 2024 22:35:31 -0500 Subject: [PATCH 2/2] bgpd: Prevent rpki from hooking multiple times into rcu code As far as I can tell, the rpki code creates a pthread that is used to handle the i/o associated with talking to the remote rpki server. The problem that we are having is that the rpki code in FRR wants to behave like FRR code and use the zlog_XXX functions. These functions all depend on the RCU code. Which is a bit picky( and rightly so!!! ) about being started up properly and shut down properly. This commit is fixing the problem of shutdown. From playing with the rpki code, I was able to experimentally determine that the rpki_create_socket callback function can be called multiple times per pthread. Additionally I was able to clearly see multiple *different* pthreads actually be created. This leaves the possiblity that each time it is called it might be hooking into the RCU code. Which makes the rcu code unhappy on shutdown. Let's address the issue by checking to see if this pthread has already hooked into the RCU code or not. If so then don't do this again. Signed-off-by: Donald Sharp --- bgpd/bgp_rpki.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/bgpd/bgp_rpki.c b/bgpd/bgp_rpki.c index 923fc0b50609..22f78fb80a93 100644 --- a/bgpd/bgp_rpki.c +++ b/bgpd/bgp_rpki.c @@ -110,6 +110,8 @@ struct rpki_vrf { QOBJ_FIELDS; }; +static pthread_key_t rpki_pthread; + static struct rpki_vrf *find_rpki_vrf(const char *vrfname); static int bgp_rpki_vrf_update(struct vrf *vrf, bool enabled); static int bgp_rpki_write_vrf(struct vty *vty, struct vrf *vrf); @@ -887,6 +889,8 @@ static int bgp_rpki_fini(void) static int bgp_rpki_module_init(void) { + pthread_key_create(&rpki_pthread, NULL); + lrtr_set_alloc_functions(malloc_wrapper, realloc_wrapper, free_wrapper); hook_register(bgp_rpki_prefix_status, rpki_validate_prefix); @@ -1297,11 +1301,35 @@ static int rpki_create_socket(void *_cache) rpki_vrf = cache->rpki_vrf; - if (frr_pthread_non_controlled_startup(cache->rtr_socket->thread_id, + /* + * the rpki infrastructure can call this function + * multiple times per pthread. Why? I have absolutely + * no idea, and I am not sure I care a whole bunch. + * Why does this matter? Well when we attempt to + * hook this pthread into the rcu structure multiple + * times the rcu code asserts on shutdown. Clearly + * upset that you have rcu data associated with a pthread + * that has not been cleaned up. And frankly this is rightly so. + * + * At this point we know that this function is not + * called a million bajillion times so let's just + * add a bit of insurance by looking to see if + * some thread specific code has been set for this + * pthread. If not, hook into the rcu code and + * make things happy. + * + * IF YOU PUT A ZLOG_XXXX prior to the call into + * frr_pthread_non_controlled_startup in this function + * BGP WILL CRASH. You have been warned. + */ + if (!pthread_getspecific(rpki_pthread) && + frr_pthread_non_controlled_startup(cache->rtr_socket->thread_id, "RPKI RTRLIB socket", "rpki_create_socket") < 0) return -1; + pthread_setspecific(rpki_pthread, &rpki_pthread); + if (rpki_vrf->vrfname == NULL) vrf = vrf_lookup_by_id(VRF_DEFAULT); else