From 454f2e7a8bab8d94f181dbe353f3dac03301793e Mon Sep 17 00:00:00 2001 From: Narayanan Iyer Date: Tue, 9 Jul 2024 14:24:03 -0400 Subject: [PATCH] Fix rare double free in MUPIP BACKUP in case of MUPIP STOP Background ---------- * In internal testing, we noticed a rare failure in the `v51000/mu_bkup_stop` subtest where a `mupip backup` process that was sent a `SIGTERM` (by the test) ended up creating a core file due to ASAN assert failing on a double free. * Below are relevant details from the core file. ```c Core was generated by `mupip backup -online -dbg * ./49181_online1'. Program terminated with signal SIGSEGV, Segmentation fault. (gdb) where #0 ydb_os_signal_handler (sig=11, info=0x7fd09968c3f0, context=0x7fd09968c2c0) at sr_unix/ydb_os_signal_handler.c:57 #1 #2 ydb_os_signal_handler (sig=6, info=0x7fd09968caf0, context=0x7fd09968c9c0) at sr_unix/ydb_os_signal_handler.c:57 #3 #4 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44 #5 __pthread_kill_internal (signo=6, threadid=) at ./nptl/pthread_kill.c:78 #6 __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 #7 __GI_abort () at ./stdlib/abort.c:79 #8 __sanitizer::Abort () at ../../../../src/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp:143 #9 __sanitizer::Die () at ../../../../src/libsanitizer/sanitizer_common/sanitizer_termination.cpp:58 #10 __asan::ScopedInErrorReport::~ScopedInErrorReport (this=0x7ffda6de6ebe, __in_chrg=) at ../../../../src/libsanitizer/asan/asan_report.cpp:190 #11 __asan::ReportDoubleFree (addr=140533757257728, free_stack=) at ../../../../src/libsanitizer/asan/asan_report.cpp:224 #12 __asan::Allocator::ReportInvalidFree (this=, stack=0x7ffda6de79f0, chunk_state=, ptr=0x7fd090ae2800) at ../../../../src/libsanitizer/asan/asan_allocator.cpp:757 #13 __interceptor_free (ptr=0x7fd090ae2800) at ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:53 #14 system_free (addr=0x7fd090ae2800) at sr_port/gtm_malloc_src.h:1485 #15 gtm_free_main (addr=0x7fd090ae2800, stack_level=1) at sr_port/gtm_malloc_src.h:854 #16 gtm_free (addr=0x7fd090ae2800) at sr_port/gtm_malloc_src.h:1501 #17 mubclnup (curr_ptr=0x0, stage=need_to_del_tempfile) at sr_port/mubclnup.c:103 #18 mupip_backup_call_on_signal () at sr_port/mupip_backup.c:208 #19 signal_exit_handler (exit_handler_name=0x7fd097f1dda0 "deferred_exit_handler", sig=15, info=0x7fd098480fd8 , context=0x7fd098481058 , is_deferred_exit=1) at sr_unix/signal_exit_handler.c:67 #20 deferred_exit_handler () at sr_unix/deferred_exit_handler.c:120 #21 deferred_signal_handler () at sr_port/deferred_signal_handler.c:95 #22 system_free (addr=0x7fd090ae2800) at sr_port/gtm_malloc_src.h:1486 #23 gtm_free_main (addr=0x7fd090ae2800, stack_level=1) at sr_port/gtm_malloc_src.h:854 #24 gtm_free (addr=0x7fd090ae2800) at sr_port/gtm_malloc_src.h:1501 #25 mubclnup (curr_ptr=0x0, stage=need_to_del_tempfile) at sr_port/mubclnup.c:103 #26 mupip_backup () at sr_port/mupip_backup.c:1585 #27 mupip_main (argc=6, argv=0x7ffda6deef18, envp=0x7ffda6deef50) at sr_unix/mupip_main.c:130 #28 dlopen_libyottadb (argc=6, argv=0x7ffda6deef18, envp=0x7ffda6deef50, main_func=0x55af49fd9020 "mupip_main") at sr_unix/dlopen_libyottadb.c:151 #29 main (argc=6, argv=0x7ffda6deef18, envp=0x7ffda6deef50) at sr_unix/mupip.c:21 (gdb) f 25 #25 mubclnup (curr_ptr=0x0, stage=need_to_del_tempfile) at sr_port/mubclnup.c:103 103 free(ptr->backup_hdr); (gdb) f 17 #17 mubclnup (curr_ptr=0x0, stage=need_to_del_tempfile) at sr_port/mubclnup.c:103 103 free(ptr->backup_hdr); (gdb) down #24 gtm_free (addr=0x7fd090ae2800) at sr_port/gtm_malloc_src.h:1501 1501 gtm_free_main(addr, TAIL_CALL_LEVEL); (gdb) down #23 gtm_free_main (addr=0x7fd090ae2800, stack_level=1) at sr_port/gtm_malloc_src.h:854 854 system_free(addr); (gdb) down #22 system_free (addr=0x7fd090ae2800) at sr_port/gtm_malloc_src.h:1486 1486 ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); (gdb) list 1481 { 1482 intrpt_state_t prev_intrpt_state; 1483 1484 DEFER_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); 1485 free(addr); 1486 ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); 1487 return; 1488 } ``` Issue ----- * We did a `free(ptr->backup_hdr)` at line 103. And that in turn ended up using the system `free()` function because the test framework had randomly set the `gtmdbglvl` env var to a value of `0x80000000`. * So at line 1485 above, the system free finished but at line 1486 we noticed the SIGTERM that was deferred and so decided to handle it. But the `ptr->backup_hdr` variable was still set to a non-NULL value so as part of the deferred exit handler, we tried to free this again resulting in the double free. Fix --- * The fix is to note `ptr->backup_hdr` in a local variable and clear the former and then attempting the `free()` on the local variable. This way if we decide to do deferred exit handling after the `free()` occurred, we will notice a NULL value of `ptr->backup_hdr` and so avoid the double free. Notes ----- * This is considered a too rare a race condition to be encountered in practice and so it is expected to not be noticed by a user. Therefore no YDB issue is created for this. --- sr_port/mubclnup.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sr_port/mubclnup.c b/sr_port/mubclnup.c index 280ff6207..c61b1a6d4 100644 --- a/sr_port/mubclnup.c +++ b/sr_port/mubclnup.c @@ -3,7 +3,7 @@ * Copyright (c) 2001-2021 Fidelity National Information * * Services, Inc. and/or its subsidiaries. All rights reserved. * * * - * Copyright (c) 2023 YottaDB LLC and/or its subsidiaries. * + * Copyright (c) 2023-2024 YottaDB LLC and/or its subsidiaries. * * All rights reserved. * * * * This source code contains the intellectual property * @@ -100,7 +100,16 @@ void mubclnup(backup_reg_list *curr_ptr, clnup_stage stage) || (give_up_after_create_tempfile == ptr->not_this_time)); if (give_up_before_create_tempfile != ptr->not_this_time) { - free(ptr->backup_hdr); + /* Need to free "ptr->backup_hdr". Use temporary variable to avoid double-free in + * case we get a signal (e.g. SIGTERM during the "free") as the deferred signal handler + * would then notice a non-NULL value of "ptr->backup_hdr" and try to free it again + * even though it was already freed before the signal/interrupt. + */ + sgmnt_data_ptr_t tmp_hdr; + + tmp_hdr = ptr->backup_hdr; + ptr->backup_hdr = NULL; + free(tmp_hdr); if (online) { /* Stop temporary file from growing if we made it active */ if (keep_going == ptr->not_this_time)