diff --git a/base/locks.jl b/base/locks.jl index bc7aeb61881bd..b9a33706f7ec8 100644 --- a/base/locks.jl +++ b/base/locks.jl @@ -23,6 +23,11 @@ function lock!(l::TatasLock) end end ccall(:jl_cpu_pause, Void, ()) + # Temporary solution before we have gc transition support in codegen. + # This could mess up gc state when we add codegen support. + # Use these as a safe point + gc_state = ccall(:jl_gc_safe_enter, Int8, ()) + ccall(:jl_gc_safe_leave, Void, (Int8,), gc_state) end end @@ -61,6 +66,11 @@ function lock!(l::RecursiveTatasLock) end end ccall(:jl_cpu_pause, Void, ()) + # Temporary solution before we have gc transition support in codegen. + # This could mess up gc state when we add codegen support. + # Use these as a safe point + gc_state = ccall(:jl_gc_safe_enter, Int8, ()) + ccall(:jl_gc_safe_leave, Void, (Int8,), gc_state) end end @@ -116,7 +126,11 @@ function lock!(m::Mutex) if m.ownertid == threadid() return 0 end + # Temporary solution before we have gc transition support in codegen. + # This could mess up gc state when we add codegen support. + gc_state = ccall(:jl_gc_safe_enter, Int8, ()) ccall(:uv_mutex_lock, Void, (Ptr{Void},), m.handle) + ccall(:jl_gc_safe_leave, Void, (Int8,), gc_state) m.ownertid = threadid() return 0 end diff --git a/doc/devdocs/debuggingtips.rst b/doc/devdocs/debuggingtips.rst index 70242120e7e4e..9cacd33090faa 100644 --- a/doc/devdocs/debuggingtips.rst +++ b/doc/devdocs/debuggingtips.rst @@ -24,7 +24,7 @@ Similarly, if you're debugging some of julia's internals (e.g., This is a good way to circumvent problems that arise from the order in which julia's output streams are initialized. -Julia's flisp interpreter uses ``value_t*`` objects; these can be displayed +Julia's flisp interpreter uses ``value_t`` objects; these can be displayed with ``call fl_print(ios_stdout, obj)``. Useful Julia variables for Inspecting @@ -74,7 +74,7 @@ Another useful frame is ``to_function(jl_lambda_info_t *li, bool cstyle)``. The #2 0x00007ffff7928bf7 in to_function (li=0x2812060, cstyle=false) at codegen.cpp:584 584 abort(); - (gdb) p jl_(jl_uncompress_ast(li, li.ast)) + (gdb) p jl_(jl_uncompress_ast(li, li->ast)) Inserting breakpoints upon certain conditions --------------------------------------------- @@ -91,10 +91,31 @@ Calling a particular method :: - (gdb) break jl_apply_generic if strcmp(F->name->name, "method_to_break")==0 + (gdb) break jl_apply_generic if strcmp((char*)(jl_symbol_name)(jl_gf_mtable(F)->name), "method_to_break")==0 Since this function is used for every call, you will make everything 1000x slower if you do this. +Dealing with signals +-------------------- + +Julia requires a few signal to function property. The profiler uses ``SIGUSR2`` +for sampling and the garbage collector uses ``SIGSEGV`` for threads +synchronization. If you are debugging some code that uses the profiler or +multiple julia threads, you may want to let the debugger ignore these signals +since they can be triggered very often during normal operations. The command to +do this in GDB is (replace ``SIGSEGV`` with ``SIGUSRS`` or other signals you +want to ignore):: + + (gdb) handle SIGSEGV noprint nostop pass + +The corresponding LLDB command is (after the process is started):: + + (lldb) pro hand -p true -s false -n false SIGSEGV + +If you are debugging a segfault with threaded code, you can set a breakpoint on +``jl_critical_error`` (``sigdie_handler`` should also work on Linux and BSD) in +order to only catch the actual segfault rather than the GC synchronization points. + Debugging during julia's build process (bootstrap) -------------------------------------------------- diff --git a/src/ast.c b/src/ast.c index 02f548d93f8dd..cb02fd4cf59a6 100644 --- a/src/ast.c +++ b/src/ast.c @@ -201,7 +201,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_lambda_info_t *lam); static jl_value_t *scm_to_julia(value_t e, int expronly) { - int en = jl_gc_enable(0); + int en = jl_gc_enable(0); // Might GC jl_value_t *v; JL_TRY { v = scm_to_julia_(e, expronly); diff --git a/src/builtins.c b/src/builtins.c index d50f7ce29bbb3..0f4b733019fc8 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -194,6 +194,9 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) JL_SIGATOMIC_BEGIN(); eh->prev = jl_current_task->eh; eh->gcstack = jl_pgcstack; +#ifdef JULIA_ENABLE_THREADING + eh->gc_state = jl_get_ptls_states()->gc_state; +#endif jl_current_task->eh = eh; // TODO: this should really go after setjmp(). see comment in // ctx_switch in task.c. diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index d639705ea90c1..0da60cab73bb3 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -194,7 +194,9 @@ class JuliaJITEventListener: public JITEventListener virtual void NotifyFunctionEmitted(const Function &F, void *Code, size_t Size, const EmittedFunctionDetails &Details) { + int8_t gc_state = jl_gc_safe_enter(); uv_rwlock_wrlock(&threadsafe); + jl_gc_safe_leave(gc_state); #if defined(_OS_WINDOWS_) create_PRUNTIME_FUNCTION((uint8_t*)Code, Size, F.getName(), (uint8_t*)Code, Size, NULL); #endif @@ -205,7 +207,9 @@ class JuliaJITEventListener: public JITEventListener std::map& getMap() { + int8_t gc_state = jl_gc_safe_enter(); uv_rwlock_rdlock(&threadsafe); + jl_gc_safe_leave(gc_state); return info; } #endif // ifndef USE_MCJIT @@ -225,7 +229,9 @@ class JuliaJITEventListener: public JITEventListener virtual void NotifyObjectEmitted(const ObjectImage &obj) #endif { + int8_t gc_state = jl_gc_safe_enter(); uv_rwlock_wrlock(&threadsafe); + jl_gc_safe_leave(gc_state); #ifdef LLVM36 object::section_iterator Section = obj.section_begin(); object::section_iterator EndSection = obj.section_end(); @@ -458,7 +464,9 @@ class JuliaJITEventListener: public JITEventListener std::map& getObjectMap() { + int8_t gc_state = jl_gc_safe_enter(); uv_rwlock_rdlock(&threadsafe); + jl_gc_safe_leave(gc_state); return objectmap; } #endif // USE_MCJIT @@ -477,6 +485,8 @@ JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, extern "C" char *jl_demangle(const char *name) { + // This function is not allowed to reference any TLS variables since + // it can be called from an unmanaged thread on OSX. const char *start = name + 6; const char *end = name + strlen(name); char *ret; @@ -508,6 +518,8 @@ void lookup_pointer(DIContext *context, char **name, size_t *line, char **inlinedat_file, size_t pointer, int demangle, int *fromC) { + // This function is not allowed to reference any TLS variables since + // it can be called from an unmanaged thread on OSX. DILineInfo info, topinfo; DIInliningInfo inlineinfo; if (demangle && *name != NULL) { @@ -629,6 +641,8 @@ void jl_getDylibFunctionInfo(char **name, char **filename, size_t *line, char** inlinedat_file, size_t *inlinedat_line, size_t pointer, int *fromC, int skipC, int skipInline) { + // This function is not allowed to reference any TLS variables since + // it can be called from an unmanaged thread on OSX. #ifdef _OS_WINDOWS_ IMAGEHLP_MODULE64 ModuleInfo; BOOL isvalid; @@ -838,6 +852,8 @@ void jl_getFunctionInfo(char **name, char **filename, size_t *line, char **inlinedat_file, size_t *inlinedat_line, size_t pointer, int *fromC, int skipC, int skipInline) { + // This function is not allowed to reference any TLS variables since + // it can be called from an unmanaged thread on OSX. *name = NULL; *line = -1; *filename = NULL; diff --git a/src/dump.c b/src/dump.c index e11bff55ac73a..6995c3e0ff055 100644 --- a/src/dump.c +++ b/src/dump.c @@ -1974,7 +1974,7 @@ JL_DLLEXPORT jl_value_t *jl_ast_rettype(jl_lambda_info_t *li, jl_value_t *ast) ios_mem(&src, 0); ios_setbuf(&src, (char*)bytes->data, jl_array_len(bytes), 0); src.size = jl_array_len(bytes); - int en = jl_gc_enable(0); + int en = jl_gc_enable(0); // Might GC jl_value_t *rt = jl_deserialize_value(&src, NULL); jl_gc_enable(en); tree_literal_values = NULL; @@ -1994,7 +1994,7 @@ JL_DLLEXPORT jl_value_t *jl_compress_ast(jl_lambda_info_t *li, jl_value_t *ast) ios_mem(&dest, 0); jl_array_t *last_tlv = tree_literal_values; jl_module_t *last_tem = tree_enclosing_module; - int en = jl_gc_enable(0); + int en = jl_gc_enable(0); // Might GC if (li->module->constant_table == NULL) { li->module->constant_table = jl_alloc_cell_1d(0); @@ -2038,7 +2038,7 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_ast(jl_lambda_info_t *li, jl_value_t *dat ios_mem(&src, 0); ios_setbuf(&src, (char*)bytes->data, jl_array_len(bytes), 0); src.size = jl_array_len(bytes); - int en = jl_gc_enable(0); + int en = jl_gc_enable(0); // Might GC (void)jl_deserialize_value(&src, NULL); // skip ret type jl_value_t *v = jl_deserialize_value(&src, NULL); jl_gc_enable(en); diff --git a/src/gc.c b/src/gc.c index 1c3d35f82b94a..ee1a34c797d7b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -34,8 +34,41 @@ extern "C" { #endif JL_DEFINE_MUTEX(pagealloc) +// Protect all access to `finalizer_list`, `finalizer_list_marked` and +// `to_finalize`. JL_DEFINE_MUTEX(finalizers) +/** + * Note about GC synchronization: + * + * When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from + * `0` to `1` to make sure that only one thread can be running the GC. Other + * threads that enters `jl_gc_collect()` at the same time (or later calling + * from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished. + * + * Before starting the mark phase the GC thread calls `jl_gc_signal_begin()` + * to make sure all the thread are in a safe state for the GC. The function + * activates the safepoint and wait for all the threads to get ready for the + * GC (`gc_state != 0`). It also acquires the `finalizers` lock so that no + * other thread will access them when the GC is running. + * + * During the mark and sweep phase of the GC, the threads that are not running + * the GC should either be running unmanaged code (or code section that does + * not have a GC critical region mainly including storing to the stack or + * another object) or paused at a safepoint and wait for the GC to finish. + * If a thread want to switch from running unmanaged code to running managed + * code, it has to perform a GC safepoint check after setting the `gc_state` + * flag (see `jl_gc_state_save_and_set()`. it is possible that the thread might + * have `gc_state == 0` in the middle of the GC transition back before entering + * the safepoint. This is fine since the thread won't be executing any GC + * critical region during that time). + * + * The finalizers are run after the GC finishes in normal mode (the `gc_state` + * when `jl_gc_collect` is called) with `jl_in_finalizer = 1`. (TODO:) When we + * have proper support of GC transition in codegen, we should execute the + * finalizers in unmanaged (GC safe) mode. + */ + // manipulating mark bits #define GC_CLEAN 0 // freshly allocated @@ -313,7 +346,96 @@ NOINLINE static uintptr_t gc_get_stack_ptr(void) #include "gc-debug.c" -int jl_in_gc; // referenced from switchto task.c +// Only one thread can be doing the collection right now. That thread set +// `jl_running_gc` to one on entering the GC and set it back afterward. +static volatile uint64_t jl_gc_running = 0; + +#ifdef JULIA_ENABLE_THREADING +JL_DLLEXPORT volatile size_t *jl_gc_signal_page = NULL; + +static void jl_wait_for_gc(void) +{ + while (jl_gc_running) { + jl_cpu_pause(); // yield? + } +} + +void jl_gc_signal_wait(void) +{ + int8_t state = jl_get_ptls_states()->gc_state; + jl_get_ptls_states()->gc_state = JL_GC_STATE_WAITING; + jl_wait_for_gc(); + jl_get_ptls_states()->gc_state = state; +} + +static void jl_gc_wait_for_the_world(void) +{ + for (int i = 0;i < jl_n_threads;i++) { + jl_tls_states_t *ptls = jl_all_task_states[i].ptls; + while (!ptls->gc_state) { + jl_cpu_pause(); // yield? + } + } +} + +void jl_gc_signal_init(void) +{ + // jl_page_size isn't available yet. +#ifdef _OS_WINDOWS_ + jl_gc_signal_page = (size_t*)VirtualAlloc(NULL, jl_getpagesize(), + MEM_COMMIT, PAGE_READONLY); +#else + jl_gc_signal_page = (size_t*)mmap(0, jl_getpagesize(), PROT_READ, + MAP_NORESERVE | MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (jl_gc_signal_page == MAP_FAILED) + jl_gc_signal_page = NULL; +#endif + if (jl_gc_signal_page == NULL) { + jl_printf(JL_STDERR, "could not allocate GC synchronization page\n"); + abort(); + } +} + +static void jl_gc_signal_begin(void) +{ +#ifdef __APPLE__ + // This needs to be after setting `jl_gc_running` so that only one thread + // can talk to the signal handler + jl_mach_gc_begin(); +#endif +#ifdef _OS_WINDOWS_ + DWORD old_prot; + VirtualProtect((void*)jl_gc_signal_page, jl_page_size, + PAGE_NOACCESS, &old_prot); +#else + mprotect((void*)jl_gc_signal_page, jl_page_size, PROT_NONE); +#endif + jl_gc_wait_for_the_world(); + JL_LOCK_NOGC(finalizers); +} + +static void jl_gc_signal_end(void) +{ + JL_UNLOCK_NOGC(finalizers); +#ifdef _OS_WINDOWS_ + DWORD old_prot; + VirtualProtect((void*)jl_gc_signal_page, jl_page_size, + PAGE_READONLY, &old_prot); +#else + mprotect((void*)jl_gc_signal_page, jl_page_size, PROT_READ); +#endif +#ifdef __APPLE__ + jl_mach_gc_end(); +#endif +} +#else + +#define jl_gc_signal_begin() +#define jl_gc_signal_end() + +#endif + static int jl_gc_finalizers_inhibited; // don't run finalizers during codegen #11956 // malloc wrappers, aligned allocation @@ -375,12 +497,15 @@ static void jl_gc_push_arraylist(arraylist_t *list) jl_pgcstack = (jl_gcframe_t*)list->items; } -// Same assumption as `jl_gc_push_arraylist` +// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock +// to be hold for the current thread and will release the lock when the +// function returns. static void jl_gc_run_finalizers_in_list(arraylist_t *list) { size_t len = list->len; jl_value_t **items = (jl_value_t**)list->items; jl_gc_push_arraylist(list); + JL_UNLOCK_NOGC(finalizers); for (size_t i = 2;i < len;i += 2) { run_finalizer(items[i], items[i + 1]); } @@ -389,8 +514,11 @@ static void jl_gc_run_finalizers_in_list(arraylist_t *list) static void run_finalizers(void) { - if (to_finalize.len == 0) + JL_LOCK_NOGC(finalizers); + if (to_finalize.len == 0) { + JL_UNLOCK_NOGC(finalizers); return; + } arraylist_t copied_list; memcpy(&copied_list, &to_finalize, sizeof(copied_list)); if (to_finalize.items == to_finalize._space) { @@ -400,6 +528,7 @@ static void run_finalizers(void) // empty out the first two entries for the GC frame arraylist_push(&copied_list, copied_list.items[0]); arraylist_push(&copied_list, copied_list.items[1]); + // This releases the finalizers lock. jl_gc_run_finalizers_in_list(&copied_list); arraylist_free(&copied_list); } @@ -408,10 +537,10 @@ void jl_gc_inhibit_finalizers(int state) { // NOTE: currently only called with the codegen lock held, but might need // more synchronization in the future - if (jl_gc_finalizers_inhibited && !state && !jl_in_gc) { - jl_in_gc = 1; + if (jl_gc_finalizers_inhibited && !state && !jl_in_finalizer) { + jl_in_finalizer = 1; run_finalizers(); - jl_in_gc = 0; + jl_in_finalizer = 0; } jl_gc_finalizers_inhibited = state; } @@ -430,22 +559,24 @@ static void schedule_all_finalizers(arraylist_t* flist) void jl_gc_run_all_finalizers(void) { + JL_LOCK_NOGC(finalizers); schedule_all_finalizers(&finalizer_list); schedule_all_finalizers(&finalizer_list_marked); + JL_UNLOCK_NOGC(finalizers); run_finalizers(); } JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) { - JL_LOCK(finalizers); + JL_LOCK_NOGC(finalizers); arraylist_push(&finalizer_list, (void*)v); arraylist_push(&finalizer_list, (void*)f); - JL_UNLOCK(finalizers); + JL_UNLOCK_NOGC(finalizers); } JL_DLLEXPORT void jl_finalize(jl_value_t *o) { - JL_LOCK(finalizers); + JL_LOCK_NOGC(finalizers); // Copy the finalizers into a temporary list so that code in the finalizer // won't change the list as we loop through them. // This list is also used as the GC frame when we are running the finalizers @@ -457,10 +588,13 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o) // still holding a reference to the object finalize_object(&finalizer_list, o, &copied_list); finalize_object(&finalizer_list_marked, o, &copied_list); - JL_UNLOCK(finalizers); if (copied_list.len > 2) { + // This releases the finalizers lock. jl_gc_run_finalizers_in_list(&copied_list); } + else { + JL_UNLOCK_NOGC(finalizers); + } arraylist_free(&copied_list); } @@ -712,7 +846,7 @@ static NOINLINE void *malloc_page(void) int i; region_t* region; int region_i = 0; - JL_LOCK(pagealloc); + JL_LOCK_NOGC(pagealloc); while(region_i < REGION_COUNT) { region = regions[region_i]; if (region == NULL) { @@ -780,7 +914,7 @@ static NOINLINE void *malloc_page(void) #endif current_pg_count++; max_pg_count = max_pg_count < current_pg_count ? current_pg_count : max_pg_count; - JL_UNLOCK(pagealloc); + JL_UNLOCK_NOGC(pagealloc); return ptr; } @@ -830,6 +964,7 @@ static inline int maybe_collect(void) jl_gc_collect(0); return 1; } + jl_gc_safepoint(); return 0; } @@ -1111,6 +1246,9 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset) jl_gc_collect(0); //allocd_bytes += osize; } + else { + jl_gc_safepoint(); + } gc_num.poolalloc++; // first try to use the freelist v = p->freelist; @@ -1993,15 +2131,29 @@ static void post_mark(arraylist_t *list, int dryrun) } // collector entry point and control +static volatile uint64_t jl_gc_disable_counter = 0; -static int is_gc_enabled = 1; JL_DLLEXPORT int jl_gc_enable(int on) { - int prev = is_gc_enabled; - is_gc_enabled = (on!=0); + jl_tls_states_t *ptls = jl_get_ptls_states(); + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + JL_ATOMIC_FETCH_AND_ADD(jl_gc_disable_counter, -1); + } + else if (prev && !on) { + // enable -> disable + JL_ATOMIC_FETCH_AND_ADD(jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint(); + } return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) { return is_gc_enabled; } +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + return !jl_get_ptls_states()->disable_gc; +} JL_DLLEXPORT int64_t jl_gc_total_bytes(void) { return total_allocd_bytes + allocd_bytes + collect_interval; } JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) { return total_gc_time; } @@ -2066,24 +2218,9 @@ static int saved_mark_sp = 0; static int sweep_mask = GC_MARKED; #define MIN_SCAN_BYTES 1024*1024 -JL_DLLEXPORT void jl_gc_collect(int full) +// Only one thread should be running in this function +static void _jl_gc_collect(int full, char *stack_hi) { - if (!is_gc_enabled) return; - if (jl_in_gc) return; - char *stack_hi = (char*)gc_get_stack_ptr(); - gc_debug_print(); - JL_SIGATOMIC_BEGIN(); - -#ifdef JULIA_ENABLE_THREADING - ti_threadgroup_barrier(tgworld, ti_tid); - if (ti_tid != 0) { - JL_SIGATOMIC_END(); - ti_threadgroup_barrier(tgworld, ti_tid); - return; - } -#endif - - jl_in_gc = 1; uint64_t t0 = jl_hrtime(); int recollect = 0; #if defined(GC_TIME) @@ -2162,7 +2299,7 @@ JL_DLLEXPORT void jl_gc_collect(int full) int64_t estimate_freed = -1; #if defined(GC_TIME) || defined(GC_FINAL_STATS) - uint64_t post_time = 0, finalize_time = 0; + uint64_t post_time = 0; #endif if (mark_sp == 0 || sweeping) { #if defined(GC_TIME) || defined(GC_FINAL_STATS) @@ -2274,26 +2411,16 @@ JL_DLLEXPORT void jl_gc_collect(int full) allocd_bytes_since_sweep = 0; jl_gc_total_freed_bytes += freed_bytes; freed_bytes = 0; - -#if defined(GC_FINAL_STATS) || defined(GC_TIME) - finalize_time = jl_hrtime(); -#endif - if (!jl_gc_finalizers_inhibited) { - run_finalizers(); - } -#if defined(GC_FINAL_STATS) || defined(GC_TIME) - finalize_time = jl_hrtime() - finalize_time; -#endif } #if defined(GC_FINAL_STATS) || defined(GC_TIME) uint64_t sweep_pause = jl_hrtime() - sweep_t0; #endif #ifdef GC_FINAL_STATS - total_sweep_time += sweep_pause - finalize_time - post_time; - total_fin_time += finalize_time + post_time; + total_sweep_time += sweep_pause - post_time; + total_fin_time += + post_time; #endif #ifdef GC_TIME - jl_printf(JL_STDOUT, "GC sweep pause %.2f ms live %ld kB (freed %d kB EST %d kB [error %d] = %d%% of allocd %d kB b/r %ld/%ld) (%.2f ms in post_mark, %.2f ms in %d fin) (marked in %d inc) mask %d | next in %d kB\n", NS2MS(sweep_pause), live_bytes/1024, SAVE2/1024, estimate_freed/1024, (SAVE2 - estimate_freed), pct, SAVE3/1024, bonus/1024, SAVE/1024, NS2MS(post_time), NS2MS(finalize_time), n_finalized, inc_count, sweep_mask, -allocd_bytes/1024); + jl_printf(JL_STDOUT, "GC sweep pause %.2f ms live %ld kB (freed %d kB EST %d kB [error %d] = %d%% of allocd %d kB b/r %ld/%ld) (%.2f ms in post_mark) (marked in %d inc) mask %d | next in %d kB\n", NS2MS(sweep_pause), live_bytes/1024, SAVE2/1024, estimate_freed/1024, (SAVE2 - estimate_freed), pct, SAVE3/1024, bonus/1024, SAVE/1024, NS2MS(post_time), inc_count, sweep_mask, -allocd_bytes/1024); #endif } n_pause++; @@ -2302,13 +2429,7 @@ JL_DLLEXPORT void jl_gc_collect(int full) #ifdef GC_FINAL_STATS max_pause = max_pause < pause ? pause : max_pause; #endif - jl_in_gc = 0; -#ifdef JULIA_ENABLE_THREADING - ti_threadgroup_barrier(tgworld, ti_tid); -#endif - - JL_SIGATOMIC_END(); #ifdef GC_TIME if (estimate_freed != SAVE2) { // this should not happen but it does @@ -2317,7 +2438,54 @@ JL_DLLEXPORT void jl_gc_collect(int full) #endif if (recollect) { n_pause--; - jl_gc_collect(0); + _jl_gc_collect(0, stack_hi); + } +} + +JL_DLLEXPORT void jl_gc_collect(int full) +{ + if (jl_gc_disable_counter) + return; + char *stack_hi = (char*)gc_get_stack_ptr(); + gc_debug_print(); + JL_SIGATOMIC_BEGIN(); + + int8_t old_state = jl_get_ptls_states()->gc_state; + jl_get_ptls_states()->gc_state = JL_GC_STATE_WAITING; + // In case multiple threads enter the GC at the same time, only allow + // one of them to actually run the collection. We can't just let the + // master thread do the GC since it might be running unmanaged code + // and can take arbitrarily long time before hitting a safe point. + if (JL_ATOMIC_COMPARE_AND_SWAP(jl_gc_running, 0, 1) != 0) { +#ifdef JULIA_ENABLE_THREADING + JL_SIGATOMIC_END(); + jl_wait_for_gc(); + jl_gc_state_set(old_state, JL_GC_STATE_WAITING); +#else + // For single thread, GC should not call itself (in finalizers) before + // setting jl_gc_running to false so this should never happen. + assert(0 && "GC synchronization failure"); +#endif + return; + } + jl_gc_signal_begin(); + + if (!jl_gc_disable_counter) + _jl_gc_collect(full, stack_hi); + + // Need to reset the page protection before resetting the flag since + // the thread will trigger a segfault immediately after returning from + // the signal handler. + jl_gc_signal_end(); + jl_gc_running = 0; + JL_SIGATOMIC_END(); + jl_gc_state_set(old_state, JL_GC_STATE_WAITING); + + if (!jl_gc_finalizers_inhibited) { + int8_t was_in_finalizer = jl_in_finalizer; + jl_in_finalizer = 1; + run_finalizers(); + jl_in_finalizer = was_in_finalizer; } } diff --git a/src/gf.c b/src/gf.c index 4e165226f379e..58cb31cf2a9b3 100644 --- a/src/gf.c +++ b/src/gf.c @@ -424,7 +424,7 @@ jl_function_t *jl_method_cache_insert(jl_methtable_t *mt, jl_tupletype_t *type, int jl_in_inference = 0; void jl_type_infer(jl_lambda_info_t *li, jl_tupletype_t *argtypes, jl_lambda_info_t *def) { - JL_LOCK(codegen); + JL_LOCK(codegen); // Might GC int last_ii = jl_in_inference; jl_in_inference = 1; if (jl_typeinf_func != NULL) { @@ -490,7 +490,7 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tupletype_t *type, jl_function_t *method, jl_tupletype_t *decl, jl_svec_t *sparams, int8_t isstaged) { - JL_LOCK(codegen); + JL_LOCK(codegen); // Might GC size_t i; int need_guard_entries = 0; jl_value_t *temp=NULL; diff --git a/src/init.c b/src/init.c index a1a3601969a37..d7d7f37c1b6f3 100644 --- a/src/init.c +++ b/src/init.c @@ -528,6 +528,7 @@ void _julia_init(JL_IMAGE_SEARCH rel) #ifdef JULIA_ENABLE_THREADING // Make sure we finalize the tls callback before starting any threads. jl_get_ptls_states_getter(); + jl_gc_signal_init(); #endif libsupport_init(); jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.), @@ -676,6 +677,7 @@ void _julia_init(JL_IMAGE_SEARCH rel) jl_current_module; } + // This needs to be after jl_start_threads if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON) jl_install_default_signal_handlers(); diff --git a/src/jlapi.c b/src/jlapi.c index 4dcfb25b7f0f4..009e64651557c 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -318,6 +318,26 @@ JL_DLLEXPORT jl_value_t *(jl_typeof)(jl_value_t *v) return jl_typeof(v); } +JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void) +{ + return jl_gc_unsafe_enter(); +} + +JL_DLLEXPORT void (jl_gc_unsafe_leave)(int8_t state) +{ + jl_gc_unsafe_leave(state); +} + +JL_DLLEXPORT int8_t (jl_gc_safe_enter)(void) +{ + return jl_gc_safe_enter(); +} + +JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state) +{ + jl_gc_safe_leave(state); +} + #ifdef __cplusplus } #endif diff --git a/src/jltypes.c b/src/jltypes.c index db316bbd50dff..7b784f118bbdd 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1920,7 +1920,7 @@ static ssize_t lookup_type_idx(jl_typename_t *tn, jl_value_t **key, size_t n, in static jl_value_t *lookup_type(jl_typename_t *tn, jl_value_t **key, size_t n) { int ord = is_typekey_ordered(key, n); - JL_LOCK(typecache); + JL_LOCK(typecache); // Might GC ssize_t idx = lookup_type_idx(tn, key, n, ord); jl_value_t *t = (idx < 0) ? NULL : jl_svecref(ord ? tn->cache : tn->linearcache, idx); JL_UNLOCK(typecache); @@ -2003,7 +2003,7 @@ jl_value_t *jl_cache_type_(jl_datatype_t *type) { if (is_cacheable(type)) { int ord = is_typekey_ordered(jl_svec_data(type->parameters), jl_svec_len(type->parameters)); - JL_LOCK(typecache); + JL_LOCK(typecache); // Might GC ssize_t idx = lookup_type_idx(type->name, jl_svec_data(type->parameters), jl_svec_len(type->parameters), ord); if (idx >= 0) diff --git a/src/julia.h b/src/julia.h index b93c268f284b9..6b3c95bdda7b9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -67,6 +67,20 @@ extern "C" { // JULIA_ENABLE_THREADING is switched on in Make.inc if JULIA_THREADS is // set (in Make.user) +#ifdef JULIA_ENABLE_THREADING +JL_DLLEXPORT extern volatile size_t *jl_gc_signal_page; +STATIC_INLINE void jl_gc_safepoint(void) +{ + // This triggers a SegFault when we are in GC + // Assign it to a variable to make sure the compiler emit the load + // and to avoid Clang warning for -Wunused-volatile-lvalue + size_t v = *jl_gc_signal_page; + (void)v; +} +#else // JULIA_ENABLE_THREADING +#define jl_gc_safepoint() +#endif // JULIA_ENABLE_THREADING + JL_DLLEXPORT int16_t jl_threadid(void); JL_DLLEXPORT void *jl_threadgroup(void); JL_DLLEXPORT void jl_cpu_pause(void); @@ -75,8 +89,9 @@ JL_DLLEXPORT void jl_threading_profile(void); #if defined(__GNUC__) # define JL_ATOMIC_FETCH_AND_ADD(a,b) \ __sync_fetch_and_add(&(a), (b)) +// Returns the original value of `a` # define JL_ATOMIC_COMPARE_AND_SWAP(a,b,c) \ - __sync_bool_compare_and_swap(&(a), (b), (c)) + __sync_val_compare_and_swap(&(a), (b), (c)) # define JL_ATOMIC_TEST_AND_SET(a) \ __sync_lock_test_and_set(&(a), 1) # define JL_ATOMIC_RELEASE(a) \ @@ -84,12 +99,13 @@ JL_DLLEXPORT void jl_threading_profile(void); #elif defined(_OS_WINDOWS_) # define JL_ATOMIC_FETCH_AND_ADD(a,b) \ _InterlockedExchangeAdd((volatile LONG *)&(a), (b)) +// Returns the original value of `a` # define JL_ATOMIC_COMPARE_AND_SWAP(a,b,c) \ _InterlockedCompareExchange64((volatile LONG64 *)&(a), (c), (b)) # define JL_ATOMIC_TEST_AND_SET(a) \ _InterlockedExchange64(&(a), 1) # define JL_ATOMIC_RELEASE(a) \ - _InterlockedExchange64(&(a), 0) + (void)_InterlockedExchange64(&(a), 0) #else # error "No atomic operations supported." #endif @@ -104,7 +120,7 @@ JL_DLLEXPORT void jl_threading_profile(void); extern uint64_t volatile m ## _mutex; \ extern int32_t m ## _lock_count; -#define JL_LOCK(m) do { \ +#define JL_LOCK_WAIT(m, wait_ex) do { \ if (m ## _mutex == uv_thread_self()) { \ ++m ## _lock_count; \ } \ @@ -112,10 +128,11 @@ JL_DLLEXPORT void jl_threading_profile(void); for (;;) { \ if (m ## _mutex == 0 && \ JL_ATOMIC_COMPARE_AND_SWAP(m ## _mutex, 0, \ - uv_thread_self())) { \ + uv_thread_self()) == 0) { \ m ## _lock_count = 1; \ break; \ } \ + wait_ex; \ jl_cpu_pause(); \ } \ } \ @@ -132,10 +149,16 @@ JL_DLLEXPORT void jl_threading_profile(void); #else #define JL_DEFINE_MUTEX(m) #define JL_DEFINE_MUTEX_EXT(m) -#define JL_LOCK(m) do {} while (0) +#define JL_LOCK_WAIT(m, wait_ex) do {} while (0) #define JL_UNLOCK(m) do {} while (0) #endif +// JL_LOCK is a GC safe point while JL_LOCK_NOGC is not +// Always use JL_LOCK unless no one holding the lock can trigger a GC or GC +// safepoint. JL_LOCK_NOGC should only be needed for GC internal locks. +#define JL_LOCK(m) JL_LOCK_WAIT(m, jl_gc_safepoint()) +#define JL_LOCK_NOGC(m) JL_LOCK_WAIT(m, ) +#define JL_UNLOCK_NOGC(m) JL_UNLOCK(m) // core data types ------------------------------------------------------------ @@ -1391,6 +1414,7 @@ typedef struct _jl_handler_t { jl_jmp_buf eh_ctx; jl_gcframe_t *gcstack; struct _jl_handler_t *prev; + int8_t gc_state; } jl_handler_t; typedef struct _jl_task_t { @@ -1426,6 +1450,16 @@ typedef struct _jl_task_t { typedef struct _jl_tls_states_t { jl_gcframe_t *pgcstack; jl_value_t *exception_in_transit; + // Whether it is safe to execute GC at the same time. +#define JL_GC_STATE_WAITING 1 + // gc_state = 1 means the thread is doing GC or is waiting for the GC to + // finish. +#define JL_GC_STATE_SAFE 2 + // gc_state = 2 means the thread is running unmanaged code that can be + // execute at the same time with the GC. + volatile int8_t gc_state; + volatile int8_t in_finalizer; + int8_t disable_gc; struct _jl_thread_heap_t *heap; jl_task_t *volatile current_task; jl_task_t *root_task; @@ -1462,16 +1496,48 @@ JL_DLLEXPORT JL_CONST_FUNC jl_tls_states_t *(jl_get_ptls_states)(void); #ifndef JULIA_ENABLE_THREADING extern JL_DLLEXPORT jl_tls_states_t jl_tls_states; #define jl_get_ptls_states() (&jl_tls_states) -#else +STATIC_INLINE int8_t jl_gc_state_set(int8_t state, int8_t old_state) +{ + (void)state; + return old_state; +} +STATIC_INLINE int8_t jl_gc_state_save_and_set(int8_t state) +{ + (void)state; + return 0; +} +#define jl_gc_unsafe_enter() jl_gc_state_save_and_set(0) +#define jl_gc_unsafe_leave(state) ((void)state) +#define jl_gc_safe_enter() jl_gc_state_save_and_set(JL_GC_STATE_SAFE) +#define jl_gc_safe_leave(state) ((void)state) +#else // ifndef JULIA_ENABLE_THREADING typedef jl_tls_states_t *(*jl_get_ptls_states_func)(void); JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f); -#endif +STATIC_INLINE int8_t jl_gc_state_set(int8_t state, int8_t old_state) +{ + jl_get_ptls_states()->gc_state = state; + // A safe point is required if we transition from GC-safe region to + // non GC-safe region. + if (old_state && !state) + jl_gc_safepoint(); + return old_state; +} +STATIC_INLINE int8_t jl_gc_state_save_and_set(int8_t state) +{ + return jl_gc_state_set(state, jl_get_ptls_states()->gc_state); +} +#define jl_gc_unsafe_enter() jl_gc_state_save_and_set(0) +#define jl_gc_unsafe_leave(state) jl_gc_state_set((state), 0) +#define jl_gc_safe_enter() jl_gc_state_save_and_set(JL_GC_STATE_SAFE) +#define jl_gc_safe_leave(state) jl_gc_state_set((state), JL_GC_STATE_SAFE) +#endif // ifndef JULIA_ENABLE_THREADING STATIC_INLINE void jl_eh_restore_state(jl_handler_t *eh) { JL_SIGATOMIC_BEGIN(); jl_current_task->eh = eh->prev; jl_pgcstack = eh->gcstack; + jl_gc_state_save_and_set(eh->gc_state); JL_SIGATOMIC_END(); } diff --git a/src/julia_internal.h b/src/julia_internal.h index 233b153955113..6ce57c36760bb 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -21,6 +21,7 @@ extern unsigned sig_stack_size; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; +#define jl_in_finalizer (jl_get_ptls_states()->in_finalizer) STATIC_INLINE jl_value_t *newobj(jl_value_t *type, size_t nfields) { @@ -246,6 +247,8 @@ void jl_start_threads(void); void jl_shutdown_threading(void); #ifdef JULIA_ENABLE_THREADING jl_get_ptls_states_func jl_get_ptls_states_getter(void); +void jl_gc_signal_init(void); +void jl_gc_signal_wait(void); #endif void jl_dump_bitcode(char *fname, const char *sysimg_data, size_t sysimg_len); @@ -282,9 +285,6 @@ JL_DLLEXPORT size_t rec_backtrace_ctx(ptrint_t *data, size_t maxsize, bt_context size_t rec_backtrace_ctx_dwarf(ptrint_t *data, size_t maxsize, bt_context_t ctx); #endif JL_DLLEXPORT void jl_raise_debugger(void); -#ifdef _OS_DARWIN_ -JL_DLLEXPORT void attach_exception_port(void); -#endif // Set *name and *filename to either NULL or malloc'd string void jl_getFunctionInfo(char **name, char **filename, size_t *line, char **inlinedat_file, size_t *inlinedat_line, @@ -436,6 +436,11 @@ int jl_array_isdefined(jl_value_t **args, int nargs); JL_DEFINE_MUTEX_EXT(codegen) +#if defined(__APPLE__) && defined(JULIA_ENABLE_THREADING) +void jl_mach_gc_begin(void); +void jl_mach_gc_end(void); +#endif + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/signal-handling.c b/src/signal-handling.c index 7dbabf669730b..88732a354d705 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -49,6 +49,9 @@ static void jl_critical_error(int sig, bt_context_t context, ptrint_t *bt_data, // what to do on a critical error static void jl_critical_error(int sig, bt_context_t context, ptrint_t *bt_data, size_t *bt_size) { + // This function is not allowed to reference any TLS variables. + // We need to explicitly pass in the TLS buffer pointer when + // we make `jl_filename` and `jl_lineno` thread local. size_t n = *bt_size; if (sig) jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig)); diff --git a/src/signals-mach.c b/src/signals-mach.c index 8f32afd15356d..f2e7387f0af20 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -15,6 +15,37 @@ #include #endif +static void attach_exception_port(thread_port_t thread); + +#ifdef JULIA_ENABLE_THREADING +JL_DEFINE_MUTEX(gc_suspend) +// This is a copy of `jl_gc_safepoint_activated` to make it easier +// to synchronic the GC and the signal handler +static int jl_gc_safepoint_activated = 0; +// low 16 bits are the thread id, the next 8 bits are the original gc_state +static arraylist_t suspended_threads; +void jl_mach_gc_begin(void) +{ + JL_LOCK_NOGC(gc_suspend); + jl_gc_safepoint_activated = 1; + JL_UNLOCK_NOGC(gc_suspend); +} +void jl_mach_gc_end(void) +{ + JL_LOCK_NOGC(gc_suspend); + jl_gc_safepoint_activated = 0; + for (size_t i = 0;i < suspended_threads.len;i++) { + uintptr_t item = (uintptr_t)suspended_threads.items[i]; + int16_t tid = (int16_t)item; + int8_t gc_state = (int8_t)(item >> 8); + jl_all_task_states[tid].ptls->gc_state = gc_state; + thread_resume(pthread_mach_thread_np(jl_all_task_states[tid].system_id)); + } + suspended_threads.len = 0; + JL_UNLOCK_NOGC(gc_suspend); +} +#endif + static mach_port_t segv_port = 0; extern boolean_t exc_server(mach_msg_header_t *, mach_msg_header_t *); @@ -36,6 +67,9 @@ void *mach_segv_listener(void *arg) static void allocate_segv_handler() { +#ifdef JULIA_ENABLE_THREADING + arraylist_new(&suspended_threads, jl_n_threads); +#endif pthread_t thread; pthread_attr_t attr; kern_return_t ret; @@ -53,7 +87,9 @@ static void allocate_segv_handler() jl_error("pthread_create failed"); } pthread_attr_destroy(&attr); - attach_exception_port(); + for (int16_t tid = 0;tid < jl_n_threads;tid++) { + attach_exception_port(pthread_mach_thread_np(jl_all_task_states[tid].system_id)); + } } #ifdef LIBOSXUNWIND @@ -117,20 +153,47 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, return profiler_segv_handler(exception_port, thread, task, exception, code, code_count); } #endif + int16_t tid; #ifdef JULIA_ENABLE_THREADING jl_tls_states_t *ptls = NULL; - for (int16_t tid = 0;tid < jl_n_threads;tid++) { + for (tid = 0;tid < jl_n_threads;tid++) { if (pthread_mach_thread_np(jl_all_task_states[tid].system_id) == thread) { ptls = jl_all_task_states[tid].ptls; break; } } + if (!ptls) { + // We don't know about this thread, let the kernel try another handler + // instead. This shouldn't actually happen since we only register the + // handler for the threads we know about. + jl_safe_printf("ERROR: Exception handler triggered on unmanaged thread.\n"); + return KERN_INVALID_ARGUMENT; + } #else jl_tls_states_t *ptls = &jl_tls_states; + tid = 0; #endif kern_return_t ret = thread_get_state(thread, x86_EXCEPTION_STATE64, (thread_state_t)&exc_state, &exc_count); HANDLE_MACH_ERROR("thread_get_state", ret); uint64_t fault_addr = exc_state.__faultvaddr; +#ifdef JULIA_ENABLE_THREADING + if (fault_addr == (uintptr_t)jl_gc_signal_page) { + JL_LOCK_NOGC(gc_suspend); + if (!jl_gc_safepoint_activated) { + // GC is done before we get the message, do nothing and return + JL_UNLOCK_NOGC(gc_suspend); + return KERN_SUCCESS; + } + // Otherwise, set the gc state of the thread, suspend and record it + int8_t gc_state = ptls->gc_state; + ptls->gc_state = JL_GC_STATE_WAITING; + uintptr_t item = tid | (((uintptr_t)gc_state) << 16); + arraylist_push(&suspended_threads, (void*)item); + thread_suspend(thread); + JL_UNLOCK_NOGC(gc_suspend); + return KERN_SUCCESS; + } +#endif #ifdef SEGV_EXCEPTION if (1) { #else @@ -151,7 +214,7 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, return KERN_INVALID_ARGUMENT; // rethrow the SEGV since it wasn't an error with writing to read-only memory excpt = jl_readonlymemory_exception; } - jl_throw_in_thread(0, thread, excpt); + jl_throw_in_thread(tid, thread, excpt); return KERN_SUCCESS; } @@ -159,16 +222,16 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, kern_return_t ret = thread_get_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, &count); HANDLE_MACH_ERROR("thread_get_state", ret); jl_critical_error(SIGSEGV, (unw_context_t*)&state, - jl_bt_data, &jl_bt_size); + ptls->bt_data, &ptls->bt_size); return KERN_INVALID_ARGUMENT; } } -JL_DLLEXPORT void attach_exception_port(void) +static void attach_exception_port(thread_port_t thread) { kern_return_t ret; // http://www.opensource.apple.com/source/xnu/xnu-2782.1.97/osfmk/man/thread_set_exception_ports.html - ret = thread_set_exception_ports(mach_thread_self(), EXC_MASK_BAD_ACCESS, segv_port, EXCEPTION_DEFAULT, MACHINE_THREAD_STATE); + ret = thread_set_exception_ports(thread, EXC_MASK_BAD_ACCESS, segv_port, EXCEPTION_DEFAULT, MACHINE_THREAD_STATE); HANDLE_MACH_ERROR("thread_set_exception_ports", ret); } @@ -269,7 +332,7 @@ void *mach_profile_listener(void *arg) (void)arg; int i; const int max_size = 512; - attach_exception_port(); + attach_exception_port(mach_thread_self()); #ifdef LIBOSXUNWIND mach_profiler_thread = mach_thread_self(); #endif diff --git a/src/signals-unix.c b/src/signals-unix.c index 94cf958648c29..1ebb8a863d853 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -75,6 +75,15 @@ static void segv_handler(int sig, siginfo_t *info, void *context) sigset_t sset; assert(sig == SIGSEGV); +#ifdef JULIA_ENABLE_THREADING + if (info->si_addr == jl_gc_signal_page) { + sigemptyset(&sset); + sigaddset(&sset, SIGSEGV); + sigprocmask(SIG_UNBLOCK, &sset, NULL); + jl_gc_signal_wait(); + return; + } +#endif if (jl_in_jl_ || is_addr_on_stack(jl_get_ptls_states(), info->si_addr)) { // stack overflow, or restarting jl_ sigemptyset(&sset); sigaddset(&sset, SIGSEGV); diff --git a/src/signals-win.c b/src/signals-win.c index 991a7cd188210..0d651b48c1da7 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -169,6 +169,13 @@ static LONG WINAPI _exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo, jl_throw_in_ctx(jl_stackovf_exception, ExceptionInfo->ContextRecord,in_ctx&&pSetThreadStackGuarantee); return EXCEPTION_CONTINUE_EXECUTION; case EXCEPTION_ACCESS_VIOLATION: +#ifdef JULIA_ENABLE_THREADING + if (ExceptionInfo->ExceptionRecord->ExceptionInformation[1] == + (intptr_t)jl_gc_signal_page) { + jl_gc_signal_wait(); + return EXCEPTION_CONTINUE_EXECUTION; + } +#endif if (ExceptionInfo->ExceptionRecord->ExceptionInformation[0] == 1) { // writing to read-only memory (e.g. mmap) jl_throw_in_ctx(jl_readonlymemory_exception, ExceptionInfo->ContextRecord,in_ctx); return EXCEPTION_CONTINUE_EXECUTION; diff --git a/src/task.c b/src/task.c index b0cf00ee7cb8c..a675279a59ddb 100644 --- a/src/task.c +++ b/src/task.c @@ -355,7 +355,6 @@ static void ctx_switch(jl_task_t *t, jl_jmp_buf *where) //JL_SIGATOMIC_END(); } -extern int jl_in_gc; JL_DLLEXPORT jl_value_t *jl_switchto(jl_task_t *t, jl_value_t *arg) { if (t == jl_current_task) { @@ -368,13 +367,15 @@ JL_DLLEXPORT jl_value_t *jl_switchto(jl_task_t *t, jl_value_t *arg) jl_throw(t->exception); return t->result; } - if (jl_in_gc) + if (jl_in_finalizer) jl_error("task switch not allowed from inside gc finalizer"); + int8_t gc_state = jl_gc_unsafe_enter(); jl_task_arg_in_transit = arg; ctx_switch(t, &t->ctx); jl_value_t *val = jl_task_arg_in_transit; jl_task_arg_in_transit = jl_nothing; throw_if_exception_set(jl_current_task); + jl_gc_unsafe_leave(gc_state); return val; } @@ -481,6 +482,8 @@ static int frame_info_from_ip(char **func_name, char **inlinedat_file, size_t *inlinedat_line, size_t ip, int skipC, int skipInline) { + // This function is not allowed to reference any TLS variables since + // it can be called from an unmanaged thread on OSX. static const char *name_unknown = "???"; int fromC = 0; @@ -758,6 +761,8 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void) //for looking up functions from gdb: JL_DLLEXPORT void gdblookup(ptrint_t ip) { + // This function is not allowed to reference any TLS variables since + // it can be called from an unmanaged thread on OSX. char *func_name; size_t line_num; char *file_name; @@ -808,6 +813,7 @@ JL_DLLEXPORT void gdbbacktrace(void) // yield to exception handler void JL_NORETURN throw_internal(jl_value_t *e) { + jl_gc_unsafe_enter(); assert(e != NULL); jl_exception_in_transit = e; if (jl_current_task->eh != NULL) { @@ -884,8 +890,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize) stk += pagesz; init_task(t, stk); - JL_GC_POP(); jl_gc_add_finalizer((jl_value_t*)t, jl_unprotect_stack_func); + JL_GC_POP(); #endif return t; diff --git a/src/threadgroup.h b/src/threadgroup.h index b54b5f05adea7..ef15cda0bcbec 100644 --- a/src/threadgroup.h +++ b/src/threadgroup.h @@ -49,4 +49,3 @@ int ti_threadgroup_destroy(ti_threadgroup_t *tg); extern ti_threadgroup_t *tgworld; #endif /* THREADGROUP_H */ - diff --git a/src/threading.c b/src/threading.c index 84e22c8e1adb6..2655987101961 100644 --- a/src/threading.c +++ b/src/threading.c @@ -134,6 +134,8 @@ JL_DLLEXPORT int jl_n_threads; // # threads we're actually using jl_thread_task_state_t *jl_all_task_states; // return calling thread's ID +// Also update the suspended_threads list in signals-mach when changing the +// type of the thread id. JL_DLLEXPORT int16_t jl_threadid(void) { return ti_tid; } struct _jl_thread_heap_t *jl_mk_thread_heap(void); @@ -228,6 +230,12 @@ void ti_threadfun(void *arg) while (ta->state == TI_THREAD_INIT) cpu_pause(); cpu_lfence(); + + // Assuming the functions called below doesn't contain unprotected GC + // critical region. In general, the following part of this function + // shouldn't call any managed code without calling `jl_gc_unsafe_enter` + // first. + jl_gc_state_set(JL_GC_STATE_SAFE, 0); uv_barrier_wait(&thread_init_done); // initialize this thread in the thread group tg = ta->tg; @@ -250,11 +258,19 @@ void ti_threadfun(void *arg) #endif if (work) { - if (work->command == TI_THREADWORK_DONE) + if (work->command == TI_THREADWORK_DONE) { break; - else if (work->command == TI_THREADWORK_RUN) + } + else if (work->command == TI_THREADWORK_RUN) { // TODO: return value? reduction? + // TODO: before we support getting return value from + // the work, and after we have proper GC transition + // support in the codegen and runtime we don't need to + // enter GC unsafe region when starting the work. + int8_t gc_state = jl_gc_unsafe_enter(); ti_run_fun(work->fun, work->args); + jl_gc_unsafe_leave(gc_state); + } } #if PROFILE_JL_THREADING @@ -406,6 +422,7 @@ JL_DLLEXPORT void *jl_threadgroup(void) { return (void *)tgworld; } // and run it in all threads JL_DLLEXPORT jl_value_t *jl_threading_run(jl_function_t *f, jl_svec_t *args) { + // GC safe #if PROFILE_JL_THREADING uint64_t tstart = rdtsc(); #endif @@ -417,6 +434,7 @@ JL_DLLEXPORT jl_value_t *jl_threading_run(jl_function_t *f, jl_svec_t *args) JL_TYPECHK(jl_threading_run, function, (jl_value_t*)f); JL_TYPECHK(jl_threading_run, simplevector, (jl_value_t*)args); + int8_t gc_state = jl_gc_unsafe_enter(); JL_GC_PUSH2(&argtypes, &fun); if (jl_svec_len(args) == 0) argtypes = (jl_tupletype_t*)jl_typeof(jl_emptytuple); @@ -454,8 +472,10 @@ JL_DLLEXPORT jl_value_t *jl_threading_run(jl_function_t *f, jl_svec_t *args) user_ticks[ti_tid] += (trun - tfork); #endif + jl_gc_state_set(JL_GC_STATE_SAFE, 0); // wait for completion (TODO: nowait?) ti_threadgroup_join(tgworld, ti_tid); + jl_gc_state_set(0, JL_GC_STATE_SAFE); #if PROFILE_JL_THREADING uint64_t tjoin = rdtsc(); @@ -463,6 +483,7 @@ JL_DLLEXPORT jl_value_t *jl_threading_run(jl_function_t *f, jl_svec_t *args) #endif JL_GC_POP(); + jl_gc_unsafe_leave(gc_state); return tw->ret; } diff --git a/src/toplevel.c b/src/toplevel.c index 73692abd3d979..4eb9f3020d04c 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -25,9 +25,9 @@ extern "C" { #endif // current line number in a file -JL_DLLEXPORT int jl_lineno = 0; +JL_DLLEXPORT int jl_lineno = 0; // need to update jl_critical_error if this is TLS // current file name -JL_DLLEXPORT const char *jl_filename = "no file"; +JL_DLLEXPORT const char *jl_filename = "no file"; // need to update jl_critical_error if this is TLS jl_module_t *jl_old_base_module = NULL; // the Main we started with, in case it is switched diff --git a/test/threads.jl b/test/threads.jl index 10b7ec13b5178..9fc414d0548fb 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -83,3 +83,18 @@ let lock = Threads.RecursiveSpinLock() @test unlock!(lock) == 0 @test unlock!(lock) == 1 end + +# Make sure doing a GC while holding a lock doesn't cause dead lock +# PR 14190. (This is only meaningful for threading) +function threaded_gc_locked{LockT}(::Type{LockT}) + lock = LockT() + @threads for i = 1:20 + lock!(lock) + gc(false) + unlock!(lock) + end +end + +threaded_gc_locked(SpinLock) +threaded_gc_locked(Threads.RecursiveSpinLock) +threaded_gc_locked(Mutex)