From 6ed7412f0e9e8f5a6f41abd888df4ab14834d43a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 3 Aug 2015 10:03:00 -0400 Subject: [PATCH] automatic recompilation of stale cache files (fixes #12259) --- base/docs/helpdb.jl | 15 +++++++ base/exports.jl | 1 + base/loading.jl | 107 +++++++++++++++++++++++++++++++++++++++++--- src/dump.c | 77 +++++++++++++++++++++++++------ src/julia.h | 1 + test/compile.jl | 28 +++++++----- 6 files changed, 200 insertions(+), 29 deletions(-) diff --git a/base/docs/helpdb.jl b/base/docs/helpdb.jl index c2f52d0fa9faf..1952da6fea3da 100644 --- a/base/docs/helpdb.jl +++ b/base/docs/helpdb.jl @@ -14564,6 +14564,21 @@ Evaluate the contents of a source file in the current context. During including, """ include +doc""" +```rst +:: + include_dependency(path::AbstractString) + +In a module, declare that the file specified by `path` (relative or +absolute) is a dependency for precompilation; that is, the +module will need to be recompiled if this file changes. + +This is only needed if your module depends on a file that is not +used via `include`. It has no effect outside of compilation. +``` +""" +include_dependency + doc""" ```rst :: diff --git a/base/exports.jl b/base/exports.jl index 81815305b651e..55d20fd323891 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1104,6 +1104,7 @@ export evalfile, include, include_string, + include_dependency, # RTS internals finalizer, diff --git a/base/loading.jl b/base/loading.jl index 2e73d2e582898..1eaff03d785b8 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -58,9 +58,10 @@ function _include_from_serialized(content::Vector{UInt8}) end # returns an array of modules loaded, or nothing if failed -function _require_from_serialized(node::Int, path_to_try::ByteString, toplevel_load::Bool) +function _require_from_serialized(node::Int, mod::Symbol, path_to_try::ByteString, toplevel_load::Bool) restored = nothing if toplevel_load && myid() == 1 && nprocs() > 1 + recompile_stale(mod, path_to_try) # broadcast top-level import/using from node 1 (only) if node == myid() content = open(readbytes, path_to_try) @@ -78,6 +79,7 @@ function _require_from_serialized(node::Int, path_to_try::ByteString, toplevel_l end end elseif node == myid() + myid() == 1 && recompile_stale(mod, path_to_try) restored = ccall(:jl_restore_incremental, Any, (Ptr{Uint8},), path_to_try) else content = remotecall_fetch(node, open, readbytes, path_to_try) @@ -97,8 +99,9 @@ end function _require_from_serialized(node::Int, mod::Symbol, toplevel_load::Bool) paths = @fetchfrom node find_all_in_cache_path(mod) + sort!(paths, by=mtime, rev=true) # try newest cachefiles first for path_to_try in paths - restored = _require_from_serialized(node, path_to_try, toplevel_load) + restored = _require_from_serialized(node, mod, path_to_try, toplevel_load) if restored === nothing warn("deserialization checks failed while attempting to load cache from $path_to_try") else @@ -112,9 +115,30 @@ end const package_locks = Dict{Symbol,Condition}() const package_loaded = Set{Symbol}() +# used to optionally track dependencies when requiring a module: +const _require_dependencies = ByteString[] +const _track_dependencies = [false] +function _include_dependency(_path::AbstractString) + prev = source_path(nothing) + path = (prev === nothing) ? abspath(_path) : joinpath(dirname(prev),_path) + if _track_dependencies[1] + push!(_require_dependencies, abspath(path)) + end + return path, prev +end +function include_dependency(path::AbstractString) + _include_dependency(path) + return nothing +end + # require always works in Main scope and loads files from node 1 toplevel_load = true function require(mod::Symbol) + # dependency-tracking is only used for one top-level include(path), + # and is not applied recursively to imported modules: + old_track_dependencies = _track_dependencies[1] + _track_dependencies[1] = false + global toplevel_load loading = get(package_locks, mod, false) if loading !== false @@ -133,7 +157,7 @@ function require(mod::Symbol) if JLOptions().incremental != 0 # spawn off a new incremental compile task from node 1 for recursive `require` calls cachefile = compile(mod) - if nothing === _require_from_serialized(1, cachefile, last) + if nothing === _require_from_serialized(1, mod, cachefile, last) warn("require failed to create a precompiled cache file") end return @@ -154,6 +178,7 @@ function require(mod::Symbol) toplevel_load = last loading = pop!(package_locks, mod) notify(loading, all=true) + _track_dependencies[1] = old_track_dependencies end nothing end @@ -189,9 +214,8 @@ end macro __FILE__() source_path() end -function include_from_node1(path::AbstractString) - prev = source_path(nothing) - path = (prev === nothing) ? abspath(path) : joinpath(dirname(prev),path) +function include_from_node1(_path::AbstractString) + path, prev = _include_dependency(_path) tls = task_local_storage() tls[:SOURCE_PATH] = path local result @@ -248,6 +272,7 @@ function create_expr_cache(input::AbstractString, output::AbstractString) task_local_storage()[:SOURCE_PATH] = $(source) end) end + serialize(io, :(Base._track_dependencies[1] = true)) serialize(io, :(Base.include($(abspath(input))))) if source !== nothing serialize(io, quote @@ -272,3 +297,73 @@ function compile(name::ByteString) create_expr_cache(path, cachefile) return cachefile end + +module_uuid(m::Module) = ccall(:jl_module_uuid, UInt64, (Any,), m) + +isvalid_cache_header(f::IOStream) = 0 != ccall(:jl_deserialize_verify_header, Cint, (Ptr{Void},), f.ios) + +function cache_dependencies(f::IO) + modules = Tuple{Symbol,UInt64}[] + files = ByteString[] + while true + n = ntoh(read(f, Int32)) + n == 0 && break + push!(modules, + (symbol(readbytes(f, n)), # module symbol + ntoh(read(f, UInt64)))) # module UUID (timestamp) + end + read(f, Int64) # total bytes for file dependencies + while true + n = ntoh(read(f, Int32)) + n == 0 && break + push!(files, bytestring(readbytes(f, n))) + end + return modules, files +end + +function cache_dependencies(cachefile::AbstractString) + io = open(cachefile, "r") + try + !isvalid_cache_header(io) && throw(ArgumentError("invalid cache file $cachefile")) + return cache_dependencies(io) + finally + close(io) + end +end + +function stale_cachefile(cachefile::AbstractString, cachefile_mtime::Real=mtime(cachefile)) + io = open(cachefile, "r") + try + if !isvalid_cache_header(io) + return true # invalid cache file + end + modules, files = cache_dependencies(io) + for f in files + if mtime(f) > cachefile_mtime + return true + end + end + # files are not stale, so module list is valid and needs checking + for (M,uuid) in modules + if !isdefined(Main, M) + require(M) # should recursively recompile module M if stale + end + if module_uuid(Main.(M)) != uuid + return true + end + end + return false # fresh cachefile + finally + close(io) + end +end + +function recompile_stale(mod, cachefile) + cachestat = stat(cachefile) + if iswritable(cachestat) && stale_cachefile(cachefile, cachestat.mtime) + if isinteractive() || 0 != ccall(:jl_generating_output, Cint, ()) + info("Recompiling stale cache file $cachefile for module $mod.") + end + create_expr_cache(find_in_path(string(mod)), cachefile) + end +end diff --git a/src/dump.c b/src/dump.c index f1b8c160c80b2..cce19ba3f1dc4 100644 --- a/src/dump.c +++ b/src/dump.c @@ -132,46 +132,48 @@ static jl_array_t *datatype_list=NULL; // (only used in MODE_SYSTEM_IMAGE) #define write_int8(s, n) write_uint8(s, n) #define read_int8(s) read_uint8(s) +/* read and write in network (bigendian) order: */ + static void write_int32(ios_t *s, int32_t i) { - write_uint8(s, i & 0xff); - write_uint8(s, (i>> 8) & 0xff); - write_uint8(s, (i>>16) & 0xff); write_uint8(s, (i>>24) & 0xff); + write_uint8(s, (i>>16) & 0xff); + write_uint8(s, (i>> 8) & 0xff); + write_uint8(s, i & 0xff); } static int32_t read_int32(ios_t *s) { - int b0 = read_uint8(s); - int b1 = read_uint8(s); - int b2 = read_uint8(s); int b3 = read_uint8(s); + int b2 = read_uint8(s); + int b1 = read_uint8(s); + int b0 = read_uint8(s); return b0 | (b1<<8) | (b2<<16) | (b3<<24); } static void write_uint64(ios_t *s, uint64_t i) { - write_int32(s, i & 0xffffffff); write_int32(s, (i>>32) & 0xffffffff); + write_int32(s, i & 0xffffffff); } static uint64_t read_uint64(ios_t *s) { - uint64_t b0 = (uint32_t)read_int32(s); uint64_t b1 = (uint32_t)read_int32(s); + uint64_t b0 = (uint32_t)read_int32(s); return b0 | (b1<<32); } static void write_uint16(ios_t *s, uint16_t i) { - write_uint8(s, i & 0xff); write_uint8(s, (i>> 8) & 0xff); + write_uint8(s, i & 0xff); } static uint16_t read_uint16(ios_t *s) { - int b0 = read_uint8(s); int b1 = read_uint8(s); + int b0 = read_uint8(s); return b0 | (b1<<8); } @@ -950,7 +952,7 @@ void jl_serialize_mod_list(ios_t *s) } // "magic" string and version header of .ji file -static const int JI_FORMAT_VERSION = 0; +static const int JI_FORMAT_VERSION = 1; static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature static const uint16_t BOM = 0xFEFF; // byte-order marker static void jl_serialize_header(ios_t *s) @@ -968,6 +970,52 @@ static void jl_serialize_header(ios_t *s) ios_write(s, commit, strlen(commit)+1); } +// serialize the global _require_dependencies array of pathnames that +// are include depenencies +void jl_serialize_dependency_list(ios_t *s) +{ + size_t total_size = 0; + static jl_array_t *deps = NULL; + if (!deps) + deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies")); + if (deps) { + // sort!(deps) so that we can easily eliminate duplicates + static jl_value_t *sort_func = NULL; + if (!sort_func) + sort_func = jl_get_global(jl_base_module, jl_symbol("sort!")); + jl_apply((jl_function_t*)sort_func, (jl_value_t**)&deps, 1); + + size_t l = jl_array_len(deps); + jl_value_t *prev = NULL; + for (size_t i=0; i < l; i++) { + jl_value_t *dep = jl_cellref(deps, i); + size_t slen = jl_string_len(dep); + if (!prev || memcmp(jl_string_data(dep), jl_string_data(prev), slen)) { + total_size += 4 + slen; + } + prev = dep; + } + total_size += 4; + } + // write the total size so that we can quickly seek past all of the + // dependencies if we don't need them + write_uint64(s, total_size); + if (deps) { + size_t l = jl_array_len(deps); + jl_value_t *prev = NULL; + for (size_t i=0; i < l; i++) { + jl_value_t *dep = jl_cellref(deps, i); + size_t slen = jl_string_len(dep); + if (!prev || memcmp(jl_string_data(dep), jl_string_data(prev), slen)) { + write_int32(s, slen); + ios_write(s, jl_string_data(dep), slen); + } + prev = dep; + } + write_int32(s, 0); // terminator, for ease of reading + } +} + // --- deserialize --- static jl_fptr_t jl_deserialize_fptr(ios_t *s) @@ -1553,7 +1601,7 @@ static int readstr_verify(ios_t *s, const char *str) return 1; } -static int jl_deserialize_header(ios_t *s) +DLLEXPORT int jl_deserialize_verify_header(ios_t *s) { uint16_t bom; return (readstr_verify(s, JI_MAGIC) && @@ -1936,6 +1984,7 @@ DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) serializer_worklist = worklist; jl_serialize_header(&f); jl_serialize_mod_list(&f); // this can throw, keep it early (before any actual initialization) + jl_serialize_dependency_list(&f); JL_SIGATOMIC_BEGIN(); arraylist_new(&reinit_list, 0); @@ -1976,11 +2025,13 @@ static jl_array_t *_jl_restore_incremental(ios_t *f) ios_close(f); return NULL; } - if (!jl_deserialize_header(f) || + if (!jl_deserialize_verify_header(f) || !jl_deserialize_verify_mod_list(f)) { ios_close(f); return NULL; } + size_t deplen = read_uint64(f); + ios_skip(f, deplen); // skip past the dependency list JL_SIGATOMIC_BEGIN(); arraylist_new(&backref_list, 4000); arraylist_push(&backref_list, jl_main_module); diff --git a/src/julia.h b/src/julia.h index f3788b20ec741..90867328d1562 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1130,6 +1130,7 @@ DLLEXPORT int julia_trampoline(int argc, const char *argv[], int (*pmain)(int ac DLLEXPORT void jl_atexit_hook(int status); DLLEXPORT void NORETURN jl_exit(int status); +DLLEXPORT int jl_deserialize_verify_header(ios_t *s); DLLEXPORT void jl_preload_sysimg_so(const char *fname); DLLEXPORT ios_t *jl_create_system_image(void); DLLEXPORT void jl_save_system_image(const char *fname); diff --git a/test/compile.jl b/test/compile.jl index 6d405d608b273..4f70e1877d1af 100644 --- a/test/compile.jl +++ b/test/compile.jl @@ -13,29 +13,37 @@ try print(f, """ module $Foo_module @doc "foo function" foo(x) = x + 1 + include_dependency("foo.jl") module Bar @doc "bar function" bar(x) = x + 2 + include_dependency("bar.jl") end end """) end - Base.compile(Foo_module) + cachefile = Base.compile(Foo_module) # use _require_from_serialized to ensure that the test fails if # the module doesn't load from the image: @test nothing !== Base._require_from_serialized(myid(), Foo_module, true) + + let Foo = eval(Main, Foo_module) + @test Foo.foo(17) == 18 + @test Foo.Bar.bar(17) == 19 + + # issue #12284: + @test stringmime("text/plain", Base.Docs.doc(Foo.foo)) == "foo function\n" + @test stringmime("text/plain", Base.Docs.doc(Foo.Bar.bar)) == "bar function\n" + + deps = Base.cache_dependencies(cachefile) + @test sort(deps[1]) == map(s -> (s, Base.module_uuid(eval(s))), + [:Base,:Core,:Main]) + @test sort(deps[2]) == [file,joinpath(dir,"bar.jl"),joinpath(dir,"foo.jl")] + end + finally splice!(Base.LOAD_CACHE_PATH, 1) splice!(LOAD_PATH, 1) rm(dir, recursive=true) end - -let Foo = eval(Main, Foo_module) - @test Foo.foo(17) == 18 - @test Foo.Bar.bar(17) == 19 - - # issue #12284: - @test stringmime("text/plain", Base.Docs.doc(Foo.foo)) == "foo function\n" - @test stringmime("text/plain", Base.Docs.doc(Foo.Bar.bar)) == "bar function\n" -end