diff --git a/Make.inc b/Make.inc index 04650e68b643e..4eacd49e5155a 100644 --- a/Make.inc +++ b/Make.inc @@ -1503,7 +1503,7 @@ endef WINE ?= wine ifeq ($(BINARY),32) -HEAPLIM := --heap-size-hint=500M +HEAPLIM := --heap-size-hint=1000M else HEAPLIM := endif diff --git a/src/gc-debug.c b/src/gc-debug.c index 6e1587b17a6d9..23d1ca8f5f884 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1223,18 +1223,18 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect if (!gc_logging_enabled) { return; } - jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n", + jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n", pause/1e6, freed/(double)(1<<20), full ? "full" : "incr", recollect ? "recollect" : "" ); - jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB, heap_size %.2f MB, heap_target %.2f MB, live_bytes %.2f MB\n, Fragmentation %.3f", + jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n", jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20), jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20), + // live_bytes/(double)(1<<20), live byes tracking is not accurate. jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20), jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20), - live_bytes/(double)(1<<20), (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size) ); // Should fragmentation use bytes_resident instead of heap_size? diff --git a/src/gc.c b/src/gc.c index 41c0baf492507..ad0c545c13e8d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1,6 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "gc.h" +#include "julia.h" #include "julia_gcext.h" #include "julia_assert.h" #ifdef __GLIBC__ @@ -696,8 +697,8 @@ static uint64_t old_heap_size = 0; static uint64_t old_alloc_diff = 0; static uint64_t old_freed_diff = 0; static uint64_t gc_end_time = 0; - - +static int thrash_counter = 0; +static int thrashing = 0; // global variables for GC stats // Resetting the object to a young object, this is used when marking the @@ -1163,7 +1164,10 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); } } } @@ -3251,9 +3255,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // If the live data outgrows the suggested max_total_memory // we keep going with minimum intervals and full gcs until // we either free some space or get an OOM error. - if (live_bytes > max_total_memory) { - sweep_full = 1; - } if (gc_sweep_always_full) { sweep_full = 1; } @@ -3302,7 +3303,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) gc_num.last_full_sweep = gc_end_time; } - int thrashing = 0; // maybe we should report this to the user or error out? size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); double target_allocs = 0.0; double min_interval = default_collect_interval; @@ -3313,24 +3313,32 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) double collect_smooth_factor = 0.5; double tuning_factor = 0.03; double alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor); - double alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); + double alloc_time = jl_gc_smooth(old_mut_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator double gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor); - double gc_time = jl_gc_smooth(old_pause_time, pause, collect_smooth_factor); + double gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor); old_alloc_diff = alloc_diff; old_mut_time = mutator_time; old_freed_diff = freed_diff; old_pause_time = pause; - old_heap_size = heap_size; - thrashing = gc_time > mutator_time * 98 ? 1 : 0; + old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC + if (gc_time > alloc_time * 95 && !(thrash_counter < 4)) + thrash_counter += 1; + else if (thrash_counter > 0) + thrash_counter -= 1; if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0 ) { double alloc_rate = alloc_mem/alloc_time; double gc_rate = gc_mem/gc_time; target_allocs = sqrt(((double)heap_size/min_interval * alloc_rate)/(gc_rate * tuning_factor)); // work on multiples of min interval } } - if (target_allocs == 0.0 || thrashing) // If we are thrashing go back to default - target_allocs = 2*sqrt((double)heap_size/min_interval); + if (thrashing == 0 && thrash_counter >= 3) + thrashing = 1; + else if (thrashing == 1 && thrash_counter <= 2) + thrashing = 0; // maybe we should report this to the user or error out? + int bad_result = (target_allocs*min_interval + heap_size) > 2 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target); // Don't follow through on a bad decision + if (target_allocs == 0.0 || thrashing || bad_result) // If we are thrashing go back to default + target_allocs = 2*sqrt((double)heap_size/min_interval); uint64_t target_heap = (uint64_t)target_allocs*min_interval + heap_size; if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die target_heap = max_total_memory; @@ -3594,10 +3602,10 @@ void jl_gc_init(void) total_mem = uv_get_total_memory(); uint64_t constrained_mem = uv_get_constrained_memory(); if (constrained_mem > 0 && constrained_mem < total_mem) - total_mem = constrained_mem; + jl_gc_set_max_memory(constrained_mem - 250*1024*1024); // LLVM + other libraries need some amount of memory #endif if (jl_options.heap_size_hint) - jl_gc_set_max_memory(jl_options.heap_size_hint); + jl_gc_set_max_memory(jl_options.heap_size_hint - 250*1024*1024); t_start = jl_hrtime(); } @@ -3700,7 +3708,26 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz-old); + + int64_t diff = sz - old; + if (diff < 0) { + uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + if (free_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + (-diff)); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + (-diff))); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + } + } + else { + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); + if (alloc_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + } + } } return realloc(p, sz); } @@ -3817,7 +3844,27 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz-oldsz); + + int64_t diff = allocsz - oldsz; + if (diff < 0) { + uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + if (free_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + (-diff)); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + (-diff))); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + } + } + else { + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); + if (alloc_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + } + } + int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 917031b57fe5f..417ac82973558 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -971,6 +971,6 @@ end @test lines[3] == "foo" @test lines[4] == "bar" end -#heap-size-hint -@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000" +#heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.) +@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)" end diff --git a/test/testenv.jl b/test/testenv.jl index 21f3ee3b31dd1..a4bec15a34130 100644 --- a/test/testenv.jl +++ b/test/testenv.jl @@ -38,12 +38,7 @@ if !@isdefined(testenv_defined) function addprocs_with_testenv(X; rr_allowed=true, kwargs...) exename = rr_allowed ? `$rr_exename $test_exename` : test_exename if X isa Integer - if Sys.iswindows() - heap_size=round(Int,(Sys.free_memory()/(1024^2)/(X+1))) - heap_size -= 300 # I don't know anymore - else - heap_size=round(Int,(Sys.total_memory()/(1024^2)/(X+1))) - end + heap_size=round(Int,(Sys.free_memory()/(1024^2)/(X+1))) push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M") end addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)