diff --git a/NEWS.md b/NEWS.md index f4c642774d0d2..b30bb29e537f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,6 +47,7 @@ Library changes tasks mutating the dictionary or set ([#44534]). * Predicate function negation `!f` now returns a composed function `(!) ∘ f` instead of an anonymous function ([#44752]). * `RoundFromZero` now works for non-`BigFloat` types ([#41246]). +* `@time` now separates out % time spent recompiling invalidated methods ([#45015]). Standard library changes diff --git a/base/timing.jl b/base/timing.jl index c7870ac491169..1579cd5673bc9 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -55,9 +55,21 @@ function gc_alloc_count(diff::GC_Diff) diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc end -# cumulative total time spent on compilation, in nanoseconds -cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ()) -cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ()) +# cumulative total time spent on compilation and recompilation, in nanoseconds +function cumulative_compile_time_ns() + comp = ccall(:jl_cumulative_compile_time_ns, UInt64, ()) + recomp = ccall(:jl_cumulative_recompile_time_ns, UInt64, ()) + return comp, recomp +end + +function cumulative_compile_timing(b::Bool) + if b + ccall(:jl_cumulative_compile_timing_enable, Cvoid, ()) + else + ccall(:jl_cumulative_compile_timing_disable, Cvoid, ()) + end + return +end # total time spend in garbage collection, in nanoseconds gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ()) @@ -114,7 +126,7 @@ function format_bytes(bytes) # also used by InteractiveUtils end end -function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false, _lpad=true) +function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true) timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6) str = sprint() do io _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "") @@ -142,15 +154,20 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, ne end print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time") end + if recompile_time > 0 + print(io, ": ", Ryu.writefixed(Float64(100*recompile_time/compile_time), 0), "% of which was recompilation") + end parens && print(io, ")") end newline ? println(str) : print(str) nothing end -function timev_print(elapsedtime, diff::GC_Diff, compile_time, _lpad) +function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad) allocs = gc_alloc_count(diff) - time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true, _lpad) + compile_time = first(compile_times) + recompile_time = last(compile_times) + time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad) padded_nonzero_print(elapsedtime, "elapsed time (ns)") padded_nonzero_print(diff.total_time, "gc time (ns)") padded_nonzero_print(diff.allocd, "bytes allocated") @@ -181,8 +198,8 @@ end A macro to execute an expression, printing the time it took to execute, the number of allocations, and the total number of bytes its execution caused to be allocated, before -returning the value of the expression. Any time spent garbage collecting (gc) or -compiling is shown as a percentage. +returning the value of the expression. Any time spent garbage collecting (gc), compiling +new code, or recompiling invalidated code is shown as a percentage. Optionally provide a description string to print before the time report. @@ -201,6 +218,9 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ !!! compat "Julia 1.8" The option to add a description was introduced in Julia 1.8. +!!! compat "Julia 1.9" + Recompilation time being shown separately from compilation time was introduced in Julia 1.9 + ```julia-repl julia> x = rand(10,10); @@ -238,16 +258,18 @@ macro time(msg, ex) Experimental.@force_compile local stats = gc_num() local elapsedtime = time_ns() - local compile_elapsedtime = cumulative_compile_time_ns_before() + cumulative_compile_timing(true) + local compile_elapsedtimes = cumulative_compile_time_ns() local val = @__tryfinally($(esc(ex)), (elapsedtime = time_ns() - elapsedtime; - compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime) + cumulative_compile_timing(false); + compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes) ) local diff = GC_Diff(gc_num(), stats) local _msg = $(esc(msg)) local has_msg = !isnothing(_msg) has_msg && print(_msg, ": ") - time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true, !has_msg) + time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg) val end end @@ -320,16 +342,16 @@ macro timev(msg, ex) Experimental.@force_compile local stats = gc_num() local elapsedtime = time_ns() - local compile_elapsedtime = cumulative_compile_time_ns_before() + local compile_elapsedtimes = cumulative_compile_time_ns() local val = @__tryfinally($(esc(ex)), (elapsedtime = time_ns() - elapsedtime; - compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime) + compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes) ) local diff = GC_Diff(gc_num(), stats) local _msg = $(esc(msg)) local has_msg = !isnothing(_msg) has_msg && print(_msg, ": ") - timev_print(elapsedtime, diff, compile_elapsedtime, !has_msg) + timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg) val end end diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 80c8b928bd022..4d1ad0ddd1e74 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -316,6 +316,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES auto &context = *ctx; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); + bool is_recompile = false; if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); // if we don't have any decls already, try to generate it now @@ -330,6 +331,10 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES else if (jl_is_method(mi->def.method)) src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src); } + else { + // identify whether this is an invalidated method that is being recompiled + is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL; + } if (src == NULL && jl_is_method(mi->def.method) && jl_symbol_name(mi->def.method->name)[0] != '@') { if (mi->def.method->source != jl_nothing) { @@ -356,8 +361,12 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES else { codeinst = NULL; } - if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) { + uint64_t t_comp = jl_hrtime() - compiler_start_time; + if (is_recompile) + jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp); + } JL_UNLOCK(&jl_codegen_lock); JL_GC_POP(); return codeinst; diff --git a/src/jlapi.c b/src/jlapi.c index 634fce24e2a46..89c6e90684b69 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -471,20 +471,28 @@ JL_DLLEXPORT void (jl_cpu_wake)(void) jl_cpu_wake(); } -JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_before(void) +JL_DLLEXPORT void jl_cumulative_compile_timing_enable(void) { // Increment the flag to allow reentrant callers to `@time`. jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1); - return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } -JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after(void) +JL_DLLEXPORT void jl_cumulative_compile_timing_disable(void) { // Decrement the flag when done measuring, allowing other callers to continue measuring. jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1); +} + +JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns(void) +{ return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } +JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void) +{ + return jl_atomic_load_relaxed(&jl_cumulative_recompile_time); +} + JL_DLLEXPORT void jl_get_fenv_consts(int *ret) { ret[0] = FE_INEXACT; diff --git a/src/julia_internal.h b/src/julia_internal.h index 451e07eb9e3df..02130ef963198 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -171,6 +171,7 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT // Global *atomic* integers controlling *process-wide* measurement of compilation time. extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled; extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time; #define jl_return_address() ((uintptr_t)__builtin_return_address(0)) diff --git a/src/threading.c b/src/threading.c index 0314112aca425..b0757ad106dec 100644 --- a/src/threading.c +++ b/src/threading.c @@ -289,6 +289,7 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0; +JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0; // return calling thread's ID // Also update the suspended_threads list in signals-mach when changing the diff --git a/test/misc.jl b/test/misc.jl index 278e048155ae0..7291688de8f46 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -334,24 +334,23 @@ function timev_macro_scope() end @test timev_macro_scope() == 1 -before = Base.cumulative_compile_time_ns_before(); +before_comp, before_recomp = Base.cumulative_compile_time_ns() # no need to turn timing on, @time will do that # exercise concurrent calls to `@time` for reentrant compilation time measurement. -t1 = @async @time begin - sleep(2) - @eval module M ; f(x,y) = x+y ; end - @eval M.f(2,3) -end -t2 = @async begin - sleep(1) - @time 2 + 2 +@sync begin + t1 = @async @time begin + sleep(2) + @eval module M ; f(x,y) = x+y ; end + @eval M.f(2,3) + end + t2 = @async begin + sleep(1) + @time 2 + 2 + end end -after = Base.cumulative_compile_time_ns_after(); -@test after >= before; - -# wait for completion of these tasks before restoring stdout, to suppress their @time prints. -wait(t1); wait(t2) +after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that +@test after_comp >= before_comp; end # redirect_stdout