From 34e6520f6a668297948658e28cc05f70ca6d4016 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 16 Oct 2025 06:30:35 +0000 Subject: [PATCH] loading: Try to clean up extension loading a bit The loading code is overall a bit messy. I'd like to add some new features to it, but I think before I can sensibly do that, some cleanup is required. This attempts to de-deduplicate the several places where extensions deal with weakdeps. The logic for extension loading weakdeps is present multiple times because we support loading extension: 1. By looking at the [extensions] section of the currently active Project.toml (for loading extensions of the currently active top level project) 2. By looking at the [extensions] sub-section of the top-level Manifest.toml (for any extension of a non-toplevel package); and 3. Like 1, except that rather than the env specifying a toplevel Project.toml, it specifies a directory and we search the second level for a Project.toml. This separation is a sensible resolution to the tradeoff of not wanting to scan all package's `Project.toml`s at load time while also not forcing a `resolve` for changes to the top-level Project.toml. Unfortunately, the loading behavior in case 1 currently differs from the loading behavior of case 2 and 3. In case 1, an extension is allowed to load any weakdep, in cases 2/3, it's only allowed to load weakdeps that are also extension triggers. I believe that the missing extra check for this restriction in case 1 is an oversight. So, this refactors all that by adding the check to case 1 and mostly deleting the code for case 3, by just putting the project-file-lookup code inline at the start of case 1. Additionally, I've factored out common queries into utility functions, even when the code itself is simple. The idea is to signal that these are the same data structure even if they appear in different places (Project vs Manifest). Lastly, this makes the following additional behavior changes that I don't expect to be observable: In case 2, when loading a non-extension-trigger, don't restart the search from scratch. The only case in which I could see this causing an observable behavior difference is if there are multiple versions of the package in the load path and for some reason the highest priority one doesn't have the extension (but then how did you load it in the first place??). However, even in that case, I think the new behavior is better, because it would get the dep of the version that *did* declare the extension. --- base/loading.jl | 251 ++++++++++++++++++++++++------------------------ test/loading.jl | 37 ++++--- 2 files changed, 149 insertions(+), 139 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 4ec5df16092d7..a3284c5dcc80e 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -348,11 +348,14 @@ function identify_package_env(where::PkgId, name::String) else for env in load_path() pkgid = manifest_deps_get(env, where, name) - pkgid === nothing && continue # not found--keep looking + # If we didn't find `where` at all, keep looking through the environment stack + pkgid === nothing && continue if pkgid.uuid !== nothing - pkg_env = pkgid, env # found in explicit environment--use it + pkg_env = pkgid, env end - break # found in implicit environment--return "not found" + # If we don't have pkgid.uuid, still break here - this is a sentinel that indicates + # that we've found `where` but it did not have the required dependency. We terminate the search. + break end if pkg_env === nothing && is_stdlib(where) # if not found it could be that manifests are from a different julia version/commit @@ -698,30 +701,31 @@ function project_deps_get(env::String, name::String)::Union{Nothing,PkgId} return nothing end +function package_get_here(project_file, name::String) + # if `where` matches the project, use [deps] section as manifest, and stop searching + pkg_uuid = explicit_project_deps_get(project_file, name) + pkg_uuid === nothing && return PkgId(name) + return PkgId(pkg_uuid, name) +end + function package_get(project_file, where::PkgId, name::String) proj = project_file_name_uuid(project_file, where.name) - if proj == where - # if `where` matches the project, use [deps] section as manifest, and stop searching - pkg_uuid = explicit_project_deps_get(project_file, name) - return PkgId(pkg_uuid, name) - end - return nothing + proj != where && return nothing + return package_get_here(project_file, name) end -function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothing,PkgId} - uuid = where.uuid - @assert uuid !== nothing - project_file = env_project_file(env) - if project_file isa String - pkg = package_get(project_file, where, name) - pkg === nothing || return pkg - d = parsed_toml(project_file) - exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing} - if exts !== nothing - proj = project_file_name_uuid(project_file, where.name) - # Check if `where` is an extension of the project - if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name) - # Extensions can load weak deps... +ext_may_load_weakdep(exts::String, name::String) = exts == name +ext_may_load_weakdep(exts::Vector{String}, name::String) = name in exts + +function package_extension_get(project_file, where::PkgId, name::String) + d = parsed_toml(project_file) + exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing} + if exts !== nothing + proj = project_file_name_uuid(project_file, where.name) + # Check if `where` is an extension of the project + if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name) + # Extensions can load weak deps if they are an extension trigger + if ext_may_load_weakdep(exts[where.name]::Union{String, Vector{String}}, name) weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing} if weakdeps !== nothing wuuid = get(weakdeps, name, nothing)::Union{String, Nothing} @@ -729,20 +733,45 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi return PkgId(UUID(wuuid), name) end end - # ... and they can load same deps as the project itself - mby_uuid = explicit_project_deps_get(project_file, name) - mby_uuid === nothing || return PkgId(mby_uuid, name) end + # ... and they can load same deps as the project itself + return package_get_here(project_file, name) end - # look for manifest file and `where` stanza - return explicit_manifest_deps_get(project_file, where, name) - elseif project_file - # if env names a directory, search it - return implicit_manifest_deps_get(env, where, name) end return nothing end +function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothing,PkgId} + @assert where.uuid !== nothing + project_file = env_project_file(env) + implicit_manifest = !(project_file isa String) + if implicit_manifest + project_file || return nothing + project_file = implicit_manifest_project(env, where) + project_file === nothing && return nothing + end + + # 1. Are we loading into the top-level project itself? dependencies come from [deps] + # N.B.: Here "top-level" includes package loaded from an implicit manifest, which + # uses the same code path. + pkg = package_get(project_file, where, name) + pkg === nothing || return pkg + + # 2. Are we an extension of the top-level project? dependencies come from [weakdeps] and [deps] + pkg = package_extension_get(project_file, where, name) + pkg === nothing || return pkg + + if implicit_manifest + # With an implicit manifest, getting here means that our (implicit) environment + # *has* the package `where`. If we don't find it, it just means that `where` doesn't + # have `name` as a dependency - c.f. the analogous case in `explicit_manifest_deps_get`. + return PkgId(name) + end + + # All other cases, dependencies come from the (top-level) manifest + return explicit_manifest_deps_get(project_file, where, name) +end + function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missing} project_file = env_project_file(env) if project_file isa String @@ -913,7 +942,7 @@ end # find project file root or deps `name => uuid` mapping # `ext` is the name of the extension if `name` is loaded from one # return `nothing` if `name` is not found -function explicit_project_deps_get(project_file::String, name::String, ext::Union{String,Nothing}=nothing)::Union{Nothing,UUID} +function explicit_project_deps_get(project_file::String, name::String)::Union{Nothing,UUID} d = parsed_toml(project_file) if get(d, "name", nothing)::Union{String, Nothing} === name root_uuid = dummy_uuid(project_file) @@ -925,19 +954,6 @@ function explicit_project_deps_get(project_file::String, name::String, ext::Unio uuid = get(deps, name, nothing)::Union{String, Nothing} uuid === nothing || return UUID(uuid) end - if ext !== nothing - extensions = get(d, "extensions", nothing) - extensions === nothing && return nothing - ext_data = get(extensions, ext, nothing) - ext_data === nothing && return nothing - if (ext_data isa String && name == ext_data) || (ext_data isa Vector{String} && name in ext_data) - weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing} - weakdeps === nothing && return nothing - wuuid = get(weakdeps, name, nothing)::Union{String, Nothing} - wuuid === nothing && return nothing - return UUID(wuuid) - end - end return nothing end @@ -964,14 +980,27 @@ function get_deps(raw_manifest::Dict) end end -# find `where` stanza and return the PkgId for `name` -# return `nothing` if it did not find `where` (indicating caller should continue searching) +function dep_stanza_get(stanza::Dict{String, Any}, name::String)::Union{Nothing, PkgId} + for (dep, uuid) in stanza + uuid::String + if dep === name + return PkgId(UUID(uuid), name) + end + end + return nothing +end + +function dep_stanza_get(stanza::Vector{String}, name::String)::Union{Nothing, PkgId} + name in stanza && return PkgId(name) + return nothing +end + +dep_stanza_get(stanza::Nothing, name::String) = nothing + function explicit_manifest_deps_get(project_file::String, where::PkgId, name::String)::Union{Nothing,PkgId} manifest_file = project_file_manifest_path(project_file) manifest_file === nothing && return nothing # manifest not found--keep searching LOAD_PATH d = get_deps(parsed_toml(manifest_file)) - found_where = false - found_name = false for (dep_name, entries) in d entries::Vector{Any} for entry in entries @@ -981,67 +1010,62 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St # deps is either a list of names (deps = ["DepA", "DepB"]) or # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."} deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} + local dep::Union{Nothing, PkgId} if UUID(uuid) === where.uuid - found_where = true - if deps isa Vector{String} - found_name = name in deps - found_name && @goto done - elseif deps isa Dict{String, Any} - deps = deps::Dict{String, Any} - for (dep, uuid) in deps - uuid::String - if dep === name - return PkgId(UUID(uuid), name) - end - end - end - else # Check for extensions + dep = dep_stanza_get(deps, name) + + # We found `where` in this environment, but it did not have a deps entry for + # `name`. This is likely because the dependency was modified without a corresponding + # change to dependency's Project or our Manifest. Return a sentinel here indicating + # that we know the package, but do not know its UUID. The caller will terminate the + # search and provide an appropriate error to the user. + dep === nothing && return PkgId(name) + else + # Check if we're trying to load into an extension of this package extensions = get(entry, "extensions", nothing) if extensions !== nothing if haskey(extensions, where.name) && where.uuid == uuid5(UUID(uuid), where.name) - found_where = true if name == dep_name + # Extension loads its base package return PkgId(UUID(uuid), name) end exts = extensions[where.name]::Union{String, Vector{String}} - weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} - if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts) - for deps′ in [weakdeps, deps] - if deps′ !== nothing - if deps′ isa Vector{String} - found_name = name in deps′ - found_name && @goto done - elseif deps′ isa Dict{String, Any} - deps′ = deps′::Dict{String, Any} - for (dep, uuid) in deps′ - uuid::String - if dep === name - return PkgId(UUID(uuid), name) - end - end - end - end - end - end - # `name` is not an ext, do standard lookup as if this was the parent - return identify_package(PkgId(UUID(uuid), dep_name), name) + # Extensions are allowed to load: + # 1. Any ordinary dep of the parent package + # 2. Any weakdep of the parent package declared as an extension trigger + for deps′ in (ext_may_load_weakdep(exts, name) ? + (get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}, deps) : + (deps,)) + dep = dep_stanza_get(deps′, name) + dep === nothing && continue + @goto have_dep + end + return PkgId(name) end end + continue end + + @label have_dep + dep.uuid !== nothing && return dep + + # We have the dep, but it did not specify a UUID. In this case, + # it must be that the name is unique in the manifest - so lookup + # the UUID at the lop level by name + name_deps = get(d, name, nothing)::Union{Nothing, Vector{Any}} + if name_deps === nothing || length(name_deps) != 1 + error("expected a single entry for $(repr(name)) in $(repr(project_file))") + end + entry = first(name_deps::Vector{Any})::Dict{String, Any} + uuid = get(entry, "uuid", nothing)::Union{String, Nothing} + uuid === nothing && return PkgId(name) + return PkgId(UUID(uuid), name) end end - @label done - found_where || return nothing - found_name || return PkgId(name) - # Only reach here if deps was not a dict which mean we have a unique name for the dep - name_deps = get(d, name, nothing)::Union{Nothing, Vector{Any}} - if name_deps === nothing || length(name_deps) != 1 - error("expected a single entry for $(repr(name)) in $(repr(project_file))") - end - entry = first(name_deps::Vector{Any})::Dict{String, Any} - uuid = get(entry, "uuid", nothing)::Union{String, Nothing} - uuid === nothing && return nothing - return PkgId(UUID(uuid), name) + + # We did not find `where` in this environment, either as a package or as an extension. + # The caller should continue searching the environment stack. + return nothing end # find `uuid` stanza, return the corresponding path @@ -1124,35 +1148,16 @@ function implicit_project_deps_get(dir::String, name::String)::Union{Nothing,Pkg return proj end -# look for an entry-point for `name`, check that UUID matches -# if there's a project file, look up `name` in its deps and return that -# otherwise return `nothing` to indicate the caller should keep searching -function implicit_manifest_deps_get(dir::String, where::PkgId, name::String)::Union{Nothing,PkgId} - @assert where.uuid !== nothing - project_file = entry_point_and_project_file(dir, where.name)[2] +function implicit_manifest_project(dir, pkg::PkgId)::Union{Nothing, String} + @assert pkg.uuid !== nothing + project_file = entry_point_and_project_file(dir, pkg.name)[2] if project_file === nothing # `where` could be an extension - project_file = implicit_env_project_file_extension(dir, where)[2] - project_file === nothing && return nothing - end - proj = project_file_name_uuid(project_file, where.name) - ext = nothing - if proj !== where - # `where` could be an extension in `proj` - d = parsed_toml(project_file) - exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing} - if exts !== nothing && where.name in keys(exts) - if where.uuid !== uuid5(proj.uuid, where.name) - return nothing - end - ext = where.name - else - return nothing - end + return implicit_env_project_file_extension(dir, pkg)[2] end - # this is the correct project, so stop searching here - pkg_uuid = explicit_project_deps_get(project_file, name, ext) - return PkgId(pkg_uuid, name) + proj = project_file_name_uuid(project_file, pkg.name) + proj == pkg || return nothing + return project_file end # look for an entry-point for `pkg` and return its path if UUID matches diff --git a/test/loading.jl b/test/loading.jl index 56302f1c0444b..7828d664a3186 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -463,7 +463,7 @@ function make_env(flat, root, roots, graph, paths, dummies) end const depots = [mkdepottempdir() for _ = 1:3] -const envs = Dict{String,Any}() +const envs = Pair{String, Any}[] append!(empty!(DEPOT_PATH), depots) @@ -557,7 +557,7 @@ for (flat, root, roots, graph) in graphs end end - envs[dir] = make_env(flat, root, roots, graph, paths, dummies) + push!(envs, dir => make_env(flat, root, roots, graph, paths, dummies)) end # materialize dependency graphs as implicit environments (if possible) @@ -590,7 +590,7 @@ for (flat, root, roots, graph) in graphs end end - envs[dir] = make_env(flat, root, roots, graph, paths, dummies) + push!(envs, dir => make_env(flat, root, roots, graph, paths, dummies)) end ## use generated environments to test package loading ## @@ -612,10 +612,12 @@ function test_find( where.uuid === nothing && continue deps = get(graph, where, Dict(where.name => where)) for name in NAMES - id = identify_package(where, name) - @test id == get(deps, name, nothing) - path = id === nothing ? nothing : locate_package(id) - @test path == get(paths, id, nothing) + @testset let where=where, name=name + id = identify_package(where, name) + @test id == get(deps, name, nothing) + path = id === nothing ? nothing : locate_package(id) + @test path == get(paths, id, nothing) + end end end end @@ -628,14 +630,17 @@ end end @testset "find_package with two envs in load path" begin - for x = false:true, - (env1, (_, _, roots1, graph1, paths1)) in (x ? envs : rand(envs, 10)), - (env2, (_, _, roots2, graph2, paths2)) in (x ? rand(envs, 10) : envs) - push!(empty!(LOAD_PATH), env1, env2) - roots = merge(roots2, roots1) - graph = merge(graph2, graph1) - paths = merge(paths2, paths1) - test_find(roots, graph, paths) + for x = false:true, env1idx in (x ? (1:length(envs)) : rand(1:length(envs), 10)), + env2idx in (x ? rand(1:length(envs), 10) : (1:length(envs))) + @testset let env1idx=env1idx, env2idx=env2idx + (env1, (_, _, roots1, graph1, paths1)) = envs[env1idx] + (env2, (_, _, roots2, graph2, paths2)) = envs[env2idx] + push!(empty!(LOAD_PATH), env1, env2) + roots = merge(roots2, roots1) + graph = merge(graph2, graph1) + paths = merge(paths2, paths1) + test_find(roots, graph, paths) + end end end @@ -753,7 +758,7 @@ end ## cleanup after tests ## -for env in keys(envs) +for (env, _) in envs rm(env, force=true, recursive=true) end