From 70db9ab17c8ffa0d341f47a4bb0a32276f3e9c49 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Tue, 21 Oct 2025 10:07:22 -0700 Subject: [PATCH 1/5] Add jl_invoke_api_t enum and use it in staticdata.c Renumber jl_invoke_api_t --- src/aotcompile.cpp | 8 ++-- src/gf.c | 11 +----- src/julia_internal.h | 51 ++++++++++++++++++++++--- src/staticdata.c | 89 ++++++++++++++------------------------------ 4 files changed, 78 insertions(+), 81 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 1f1ed18e880a2..8b646ec11a9d6 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -910,16 +910,16 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm uint32_t func_id = 0; uint32_t cfunc_id = 0; if (func == "jl_fptr_args") { - func_id = -1; + func_id = -JL_INVOKE_ARGS; } else if (func == "jl_fptr_sparam") { - func_id = -2; + func_id = -JL_INVOKE_SPARAM; } else if (func == "jl_f_opaque_closure_call") { - func_id = -4; + assert(false); // TODO: remove } else if (func == "jl_fptr_const_return") { - func_id = -5; + func_id = -JL_INVOKE_CONST; } else { //Safe b/c context is locked by params diff --git a/src/gf.c b/src/gf.c index 1d3a9636ddfa9..104b093c5d3af 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3768,15 +3768,8 @@ JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst) jl_callptr_t f = jl_atomic_load_relaxed(&codeinst->invoke); if (f == NULL) return 0; - if (f == &jl_fptr_args) - return 1; - if (f == &jl_fptr_const_return) - return 2; - if (f == &jl_fptr_sparam) - return 3; - if (f == &jl_fptr_interpret_call) - return 4; - return -1; + jl_invoke_api_t t = jl_callptr_invoke_api(f); + return t == JL_INVOKE_SPECSIG ? -1 : (int32_t)t; } JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m, diff --git a/src/julia_internal.h b/src/julia_internal.h index d67a5dce810a2..55b1eae1b0107 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -187,6 +187,12 @@ extern uintptr_t __stack_chk_guard; extern JL_DLLEXPORT uintptr_t __stack_chk_guard; #endif +#if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_) +# define jl_unreachable() __builtin_unreachable() +#else +# define jl_unreachable() ((void)jl_assume(0)) +#endif + // If this is detected in a backtrace of segfault, it means the functions // that use this value must be reworked into their async form with cb arg // provided and with JL_UV_LOCK used around the calls @@ -425,6 +431,45 @@ typedef struct _jl_abi_t { int is_opaque_closure; } jl_abi_t; +// The compiler uses the specific integer values returned by jl_invoke_api +typedef enum { + JL_INVOKE_ARGS = 1, // jl_fptr_args + JL_INVOKE_CONST = 2, // jl_fptr_const + JL_INVOKE_SPARAM = 3, // jl_fptr_sparam + JL_INVOKE_INTERPRETED = 4, // jl_fptr_interpret_call + JL_INVOKE_SPECSIG = 5, // jfptr_* wrapper +} jl_invoke_api_t; + +static inline int jl_jlcall_specptr_is_native(jl_invoke_api_t type) +{ + return type == JL_INVOKE_ARGS || type == JL_INVOKE_SPARAM || type == JL_INVOKE_SPECSIG; +} + +static inline jl_invoke_api_t jl_callptr_invoke_api(jl_callptr_t ptr) +{ + if (ptr == jl_fptr_args_addr) + return JL_INVOKE_ARGS; + else if (ptr == jl_fptr_const_return_addr) + return JL_INVOKE_CONST; + else if (ptr == jl_fptr_sparam_addr) + return JL_INVOKE_SPARAM; + else if (ptr == jl_fptr_interpret_call_addr) + return JL_INVOKE_INTERPRETED; + return JL_INVOKE_SPECSIG; +} + +static inline jl_callptr_t jl_invoke_api_callptr(jl_invoke_api_t type) +{ + switch (type) { + case JL_INVOKE_ARGS: return jl_fptr_args_addr; + case JL_INVOKE_CONST: return jl_fptr_const_return_addr; + case JL_INVOKE_SPARAM: return jl_fptr_sparam_addr; + case JL_INVOKE_INTERPRETED: return jl_fptr_interpret_call_addr; + case JL_INVOKE_SPECSIG: return NULL; + default: jl_unreachable(); + } +} + // useful constants extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter; @@ -2021,12 +2066,6 @@ struct _jl_image_fptrs_t; JL_DLLEXPORT void jl_write_coverage_data(const char*); void jl_write_malloc_log(void); -#if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_) -# define jl_unreachable() __builtin_unreachable() -#else -# define jl_unreachable() ((void)jl_assume(0)) -#endif - extern uv_mutex_t symtab_lock; jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT; diff --git a/src/staticdata.c b/src/staticdata.c index ec7f5f4267a55..89e2a8aa9cbd2 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -384,21 +384,11 @@ enum RefTags { #define SYS_EXTERNAL_LINK_UNIT sizeof(void*) -// calling conventions for internal entry points. -// this is used to set the method-instance->invoke field -typedef enum { - JL_API_NULL, - JL_API_BOXED, - JL_API_CONST, - JL_API_WITH_PARAMETERS, - JL_API_OC_CALL, - JL_API_INTERPRETED, - JL_API_BUILTIN, - JL_API_MAX -} jl_callingconv_t; - // Sub-divisions of some RefTags const uintptr_t BuiltinFunctionTag = ((uintptr_t)1 << (RELOC_TAG_OFFSET - 1)); +// Bit set on FunctionRef when invoke should be set to jl_fptr_args, and should +// not be zeroed when we want to disable native code. +const uintptr_t BuiltinInvokeTag = ((uintptr_t)1 << (RELOC_TAG_OFFSET - 2)); #if RELOC_TAG_OFFSET <= 32 @@ -1751,40 +1741,27 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED // preserve only JL_CI_FLAGS_NATIVE_CACHE_VALID bits jl_atomic_store_relaxed(&newci->flags, jl_atomic_load_relaxed(&newci->flags) & JL_CI_FLAGS_NATIVE_CACHE_VALID); jl_atomic_store_relaxed(&newci->specptr.fptr, NULL); - int8_t fptr_id = JL_API_NULL; + uintptr_t fptr_type = JL_INVOKE_SPECSIG; int8_t builtin_id = 0; if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { - fptr_id = JL_API_CONST; + fptr_type = JL_INVOKE_CONST; } else { if (jl_is_method(jl_get_ci_mi(ci)->def.method)) { builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&ci->specptr.fptr)); if (builtin_id) { // found in the table of builtins assert(builtin_id >= 2); - fptr_id = JL_API_BUILTIN; + fptr_type = (uintptr_t)JL_INVOKE_ARGS | BuiltinInvokeTag; } else { int32_t invokeptr_id = 0; int32_t specfptr_id = 0; jl_get_function_id(native_functions, ci, &invokeptr_id, &specfptr_id); // see if we generated code for it if (invokeptr_id) { - if (invokeptr_id == -1) { - fptr_id = JL_API_BOXED; - } - else if (invokeptr_id == -2) { - fptr_id = JL_API_WITH_PARAMETERS; - } - else if (invokeptr_id == -3) { - abort(); - } - else if (invokeptr_id == -4) { - fptr_id = JL_API_OC_CALL; - } - else if (invokeptr_id == -5) { - abort(); - } - else { - assert(invokeptr_id > 0); + if (invokeptr_id < 0) { + fptr_type = (jl_invoke_api_t)-invokeptr_id; + assert(fptr_type != JL_INVOKE_SPECSIG); + } else { ios_ensureroom(s->fptr_record, invokeptr_id * sizeof(void*)); ios_seek(s->fptr_record, (invokeptr_id - 1) * sizeof(void*)); write_reloc_t(s->fptr_record, (reloc_t)~reloc_offset); @@ -1808,10 +1785,10 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED } } jl_atomic_store_relaxed(&newci->invoke, NULL); // relocation offset - if (fptr_id != JL_API_NULL) { - assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize"); + if (fptr_type != JL_INVOKE_SPECSIG) { + assert(fptr_type < BuiltinFunctionTag && "too many functions to serialize"); arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location - arraylist_push(&s->relocs_list, (void*)(((uintptr_t)FunctionRef << RELOC_TAG_OFFSET) + fptr_id)); // relocation target + arraylist_push(&s->relocs_list, (void*)(((uintptr_t)FunctionRef << RELOC_TAG_OFFSET) + fptr_type)); // relocation target } if (builtin_id >= 2) { arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, specptr.fptr))); // relocation location @@ -1960,7 +1937,8 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) assert(offset < jl_n_builtins && "unknown function pointer id"); } else { - assert(offset < JL_API_MAX && "unknown function pointer id"); + assert((offset & ~BuiltinInvokeTag) < JL_INVOKE_SPECSIG && + "unknown function pointer id"); } break; case SysimageLinkage: @@ -2009,36 +1987,23 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas // offset -= 256; assert(0 && "corrupt relocation item id"); jl_unreachable(); // terminate control flow if assertion is disabled. - case FunctionRef: + case FunctionRef: { if (offset & BuiltinFunctionTag) { offset &= ~BuiltinFunctionTag; assert(offset < jl_n_builtins && "unknown function pointer ID"); return (uintptr_t)jl_builtin_f_addrs[offset]; } - switch ((jl_callingconv_t)offset) { - case JL_API_BOXED: - if (s->image->fptrs.nptrs) - return (uintptr_t)jl_fptr_args; - return (uintptr_t)NULL; - case JL_API_WITH_PARAMETERS: - if (s->image->fptrs.nptrs) - return (uintptr_t)jl_fptr_sparam; - return (uintptr_t)NULL; - case JL_API_OC_CALL: - if (s->image->fptrs.nptrs) - return (uintptr_t)jl_f_opaque_closure_call; - return (uintptr_t)NULL; - case JL_API_CONST: - return (uintptr_t)jl_fptr_const_return; - case JL_API_INTERPRETED: - return (uintptr_t)jl_fptr_interpret_call; - case JL_API_BUILTIN: - return (uintptr_t)jl_fptr_args; - case JL_API_NULL: - case JL_API_MAX: - //default: - assert("corrupt relocation item id"); - } + jl_invoke_api_t type = (jl_invoke_api_t)(offset & ~BuiltinInvokeTag); + uintptr_t fptr = (uintptr_t)jl_invoke_api_callptr(type); + assert(fptr && "corrupt relocation item id"); + // If use_sysimage_native_code != yes, zero out the invoke pointer for + // CodeInstances with native code, but not if invoke is jl_fptr_args and + // the specptr is a builtin. + if (s->image->fptrs.nptrs == 0 && jl_jlcall_specptr_is_native(type) && + (offset & BuiltinInvokeTag)) + return 0; + return fptr; + } case SysimageLinkage: { #ifdef _P64 size_t depsidx = offset >> DEPS_IDX_OFFSET; From 341bf400f99ad22c982e3ce157b5ffb1267d1739 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Tue, 21 Oct 2025 14:57:02 -0700 Subject: [PATCH 2/5] Set JL_CI_FLAGS_SPECPTR_SPECIALIZED only on specsig in jl_update_all_fptrs --- src/staticdata.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index 89e2a8aa9cbd2..85e5b16355eb5 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2281,9 +2281,13 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image) break; } if (specfunc) { + uint8_t flags = jl_atomic_load_relaxed(&codeinst->flags); + flags |= JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR | JL_CI_FLAGS_FROM_IMAGE; + if (jl_callptr_invoke_api(jl_atomic_load_relaxed(&codeinst->invoke)) == + JL_INVOKE_SPECSIG) + flags |= JL_CI_FLAGS_SPECPTR_SPECIALIZED; jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr); - // TODO: set JL_CI_FLAGS_SPECPTR_SPECIALIZED only if confirmed to be true - jl_atomic_store_relaxed(&codeinst->flags, jl_atomic_load_relaxed(&codeinst->flags) | JL_CI_FLAGS_SPECPTR_SPECIALIZED | JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR | JL_CI_FLAGS_FROM_IMAGE); + jl_atomic_store_relaxed(&codeinst->flags, flags); } else { jl_atomic_store_relaxed(&codeinst->invoke, (jl_callptr_t)fptr); From 580d5e543217f6fdd27f274d279157a49714930d Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Mon, 3 Nov 2025 16:08:44 -0800 Subject: [PATCH 3/5] [JIT] Switch to post-compile linking Use JITLink everywhere Rename jlcall_type, add jl_funcs_invoke_ptr Move JLLinkingLayer into JuliaOJIT Use jl_invoke_api_t elsewhere Rename JL_INVOKE_JFPTR -> JL_INVOKE_SPECSIG Put all special symbol names in one place Add helper for specsig -> tojlinvoke (fptr1) and use it Fix invariants for code_outputs Document JIT invariants better; remove invalid assertions Replace workqueue, partially support OpaqueClosure Add JIT tests Stop using strings so much Don't create an LLVM::Linker unless necessary Generate trampolines in aot_link_output GCChecker annotations, misc changes Re-add emit_always_inline Get JLDebuginfoPlugin and eh_frame working again Re-add OpaqueClosure MethodInstance global root Fix GCChecker annotations Clean up TODOs Read dump compile Use multiple threads in the JIT Add PLT/GOT for external fns Name Julia PLT GOT entries Do emit_llvmcall_modules at the end Suppress clang-tidy, static analyzer warnings Keep temporary_roots alive during emit_always_inline Mark pkg PLT thunks noinline Don't attempt to emit inline codeinsts when IR is too large or missing Improve thunk generation on x86 Fix infinite loop in emit_always_inline if inlining not possible Use local names for global targets Fix jl_get_llvmf_defn_impl cfunction hacks --- src/aotcompile.cpp | 719 ++++++----------- src/ccall.cpp | 16 +- src/cgutils.cpp | 28 +- src/codegen.cpp | 693 +++++++--------- src/debug-registry.h | 11 +- src/debuginfo.cpp | 65 +- src/jitlayers.cpp | 1436 +++++++++++++++------------------ src/jitlayers.h | 402 ++++++--- src/julia_internal.h | 44 +- src/llvm-late-gc-lowering.cpp | 3 + test/choosetests.jl | 2 +- test/jit.jl | 53 ++ 12 files changed, 1628 insertions(+), 1844 deletions(-) create mode 100644 test/jit.jl diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 8b646ec11a9d6..875064af6370b 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -15,7 +15,9 @@ // analysis passes #include +#include #include +#include #include #include #include @@ -67,9 +69,8 @@ static void addComdat(GlobalValue *G, Triple &T) } } - typedef struct { - orc::ThreadSafeModule M; + std::unique_ptr out; SmallVector jl_sysimg_fvars; SmallVector jl_sysimg_gvars; std::map> jl_fvar_map; @@ -164,7 +165,7 @@ LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code) { jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; if (data) - return wrap(&data->M); + return wrap(&data->out->get_tsm()); else return NULL; } @@ -348,11 +349,7 @@ class egal_set { }; } using ::egal_set; -struct jl_compiled_function_t { - orc::ThreadSafeModule TSM; - jl_llvm_functions_t decls; -}; -typedef DenseMap jl_compiled_functions_t; +typedef DenseMap jl_compiled_functions_t; static void record_method_roots(egal_set &method_roots, jl_method_instance_t *mi) { @@ -373,12 +370,12 @@ static void record_method_roots(egal_set &method_roots, jl_method_instance_t *mi JL_UNLOCK(&m->writelock); } -static void aot_optimize_roots(jl_codegen_params_t ¶ms, egal_set &method_roots, jl_compiled_functions_t &compiled_functions) +static void aot_optimize_roots(jl_codegen_output_t &out, egal_set &method_roots) { - for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) { - jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i); - auto ref = params.global_targets.find((void*)val); - if (ref == params.global_targets.end()) + for (size_t i = 0; i < jl_array_dim0(out.temporary_roots); i++) { + jl_value_t *val = jl_array_ptr_ref(out.temporary_roots, i); + auto ref = out.global_targets.find((void*)val); + if (ref == out.global_targets.end()) continue; auto get_global_root = [val, &method_roots]() { if (jl_is_globally_rooted(val)) @@ -391,143 +388,18 @@ static void aot_optimize_roots(jl_codegen_params_t ¶ms, egal_set &method_roo jl_value_t *mval = get_global_root(); if (mval != val) { GlobalVariable *GV = ref->second; - params.global_targets.erase(ref); - auto mref = params.global_targets.find((void*)mval); - if (mref != params.global_targets.end()) { - // replace ref with mref in all Modules - std::string OldName(GV->getName()); - StringRef NewName(mref->second->getName()); - for (auto &def : compiled_functions) { - orc::ThreadSafeModule &TSM = def.second.TSM; - Module &M = *TSM.getModuleUnlocked(); - if (GlobalValue *GV2 = M.getNamedValue(OldName)) { - if (GV2 == GV) - GV = nullptr; - // either replace or rename the old value to use the other equivalent name - if (GlobalValue *GV3 = M.getNamedValue(NewName)) { - GV2->replaceAllUsesWith(GV3); - GV2->eraseFromParent(); - } - else { - GV2->setName(NewName); - } - } - } - assert(GV == nullptr); - } - else { - params.global_targets[(void*)mval] = GV; - } - } - } -} - -static void resolve_workqueue(jl_codegen_params_t ¶ms, egal_set &method_roots, jl_compiled_functions_t &compiled_functions) -{ - jl_workqueue_t workqueue; - std::swap(params.workqueue, workqueue); - jl_code_instance_t *codeinst = NULL; - JL_GC_PUSH1(&codeinst); - assert(!params.cache); - while (!workqueue.empty()) { - auto it = workqueue.pop_back_val(); - codeinst = it.first; - auto &proto = it.second; - // try to emit code for this item from the workqueue - StringRef invokeName = ""; - StringRef preal_decl = ""; - bool preal_specsig = false; - { - auto it = compiled_functions.find(codeinst); - if (it != compiled_functions.end()) { - auto &decls = it->second.decls; - invokeName = decls.functionObject; - if (decls.functionObject == "jl_fptr_args") { - preal_decl = decls.specFunctionObject; - } - else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call" && decls.functionObject != "jl_fptr_const_return") { - preal_decl = decls.specFunctionObject; - preal_specsig = true; - } - } - } - // patch up the prototype we emitted earlier - Module *mod = proto.decl->getParent(); - assert(proto.decl->isDeclaration()); - Function *pinvoke = nullptr; - if (preal_decl.empty() && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { - std::string gf_thunk_name = emit_abi_constreturn(mod, params, proto.specsig, codeinst); - preal_specsig = proto.specsig; - if (invokeName.empty()) - invokeName = "jl_fptr_const_return"; - preal_decl = mod->getNamedValue(gf_thunk_name)->getName(); - } - if (preal_decl.empty()) { - pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); - if (!proto.specsig) { - proto.decl->replaceAllUsesWith(pinvoke); - proto.decl->eraseFromParent(); - proto.decl = pinvoke; - } - } - if (proto.specsig && !preal_specsig) { - // get or build an fptr1 that can invoke codeinst - if (pinvoke == nullptr) - pinvoke = get_or_emit_fptr1(preal_decl, mod); - // emit specsig-to-(jl)invoke conversion - proto.decl->setLinkage(GlobalVariable::InternalLinkage); - //protodecl->setAlwaysInline(); - jl_init_function(proto.decl, params.TargetTriple); - jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed - bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; - // TODO: maybe this can be cached in codeinst->specfptr? - emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke); - preal_decl = ""; // no need to fixup the name - } - if (!preal_decl.empty()) { - // merge and/or rename this prototype to the real function - if (Function *specfun = cast_or_null(mod->getNamedValue(preal_decl))) { - if (proto.decl != specfun) { - proto.decl->replaceAllUsesWith(specfun); - proto.decl->eraseFromParent(); - proto.decl = specfun; - } - } - else { - proto.decl->setName(preal_decl); - } - } - if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too - assert(proto.specsig); - StringRef ocinvokeDecl = invokeName; - // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too - // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure - if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") - ocinvokeDecl = pinvoke->getName(); - assert(!ocinvokeDecl.empty()); - assert(ocinvokeDecl != "jl_fptr_args"); - assert(ocinvokeDecl != "jl_fptr_const_return"); - assert(ocinvokeDecl != "jl_fptr_sparam"); - // merge and/or rename this prototype to the real function - if (Function *specfun = cast_or_null(mod->getNamedValue(ocinvokeDecl))) { - if (proto.oc != specfun) { - proto.oc->replaceAllUsesWith(specfun); - proto.oc->eraseFromParent(); - proto.oc = specfun; - } - } - else { - proto.oc->setName(ocinvokeDecl); + out.global_targets.erase(ref); + auto mref = out.global_targets.find((void*)mval); + if (mref == out.global_targets.end()) { + out.global_targets[(void *)mval] = GV; + } else { + GV->replaceAllUsesWith(mref->second); + GV->eraseFromParent(); } } - workqueue.append(params.workqueue); - params.workqueue.clear(); } - JL_GC_POP(); } - /// Link the function in the source module into the destination module if /// needed, setting up mapping information. /// Similar to orc::cloneFunctionDecl, but more complete for greater correctness @@ -550,41 +422,35 @@ Function *IRLinker_copyFunctionProto(Module *DstM, Function *SF) { return F; } -static Function *aot_abi_converter(jl_codegen_params_t ¶ms, Module *M, jl_abi_t from_abi, jl_code_instance_t *codeinst, Module *defM, StringRef func, StringRef specfunc, bool target_specsig) +static Function *aot_abi_converter(jl_codegen_output_t &out, jl_abi_t from_abi, jl_code_instance_t *codeinst, Function *func, Function *specfunc, bool target_specsig) { std::string gf_thunk_name; - if (!specfunc.empty()) { - Value *llvmtarget = IRLinker_copyFunctionProto(M, defM->getFunction(specfunc)); - gf_thunk_name = emit_abi_converter(M, params, from_abi, codeinst, llvmtarget, target_specsig); - } - else { - Value *llvmtarget = func.empty() ? nullptr : IRLinker_copyFunctionProto(M, defM->getFunction(func)); - gf_thunk_name = emit_abi_dispatcher(M, params, from_abi, codeinst, llvmtarget); - } - auto F = M->getFunction(gf_thunk_name); + if (specfunc) + gf_thunk_name = emit_abi_converter(out, from_abi, codeinst, specfunc, target_specsig); + else + gf_thunk_name = emit_abi_dispatcher(out, from_abi, codeinst, func); + auto F = out.get_module().getFunction(gf_thunk_name); assert(F); return F; } -static void generate_cfunc_thunks(jl_codegen_params_t ¶ms, jl_compiled_functions_t &compiled_functions) +static void generate_cfunc_thunks(jl_codegen_output_t &out) { DenseMap compiled_mi; - for (auto &def : compiled_functions) { - jl_code_instance_t *this_code = def.first; - jl_method_instance_t *mi = jl_get_ci_mi(this_code); - if (this_code->owner == jl_nothing && jl_atomic_load_relaxed(&this_code->max_world) == ~(size_t)0 && this_code->def == (jl_value_t*)mi) - compiled_mi[mi] = this_code; + for (auto &[ci, _] : out.ci_funcs) { + jl_method_instance_t *mi = jl_get_ci_mi(ci); + if (ci->owner == jl_nothing && jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0 && ci->def == (jl_value_t*)mi) + compiled_mi[mi] = ci; } size_t latestworld = jl_atomic_load_acquire(&jl_world_counter); - for (cfunc_decl_t &cfunc : params.cfuncs) { - Module *M = cfunc.cfuncdata->getParent(); + for (cfunc_decl_t &cfunc : out.cfuncs) { jl_value_t *sigt = cfunc.abi.sigt; JL_GC_PROMISE_ROOTED(sigt); jl_value_t *declrt = cfunc.abi.rt; JL_GC_PROMISE_ROOTED(declrt); - Function *unspec = aot_abi_converter(params, M, cfunc.abi, nullptr, nullptr, "", "", false); + Function *unspec = aot_abi_converter(out, cfunc.abi, nullptr, nullptr, nullptr, false); jl_code_instance_t *codeinst = nullptr; - auto assign_fptr = [¶ms, &cfunc, &codeinst, &unspec](Function *f) { + auto assign_fptr = [&out, &cfunc, &codeinst, &unspec](Function *f) { ConstantArray *init = cast(cfunc.cfuncdata->getInitializer()); SmallVector initvals; for (unsigned i = 0; i < init->getNumOperands(); ++i) @@ -593,7 +459,7 @@ static void generate_cfunc_thunks(jl_codegen_params_t ¶ms, jl_compiled_funct assert(initvals[0]->isNullValue()); assert(initvals[2]->isNullValue()); if (codeinst) { - Constant *llvmcodeinst = literal_pointer_val_slot(params, f->getParent(), (jl_value_t*)codeinst); + Constant *llvmcodeinst = literal_pointer_val_slot(out, (jl_value_t*)codeinst); initvals[2] = llvmcodeinst; // plast_codeinst } assert(initvals[4]->isNullValue()); @@ -601,19 +467,14 @@ static void generate_cfunc_thunks(jl_codegen_params_t ¶ms, jl_compiled_funct initvals[0] = f; cfunc.cfuncdata->setInitializer(ConstantArray::get(init->getType(), initvals)); }; - Module *defM = nullptr; - StringRef func; jl_method_instance_t *mi = (jl_method_instance_t*)jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0); + Function *func = nullptr; if ((jl_value_t*)mi != jl_nothing) { auto it = compiled_mi.find(mi); if (it != compiled_mi.end()) { codeinst = it->second; JL_GC_PROMISE_ROOTED(codeinst); - auto defs = compiled_functions.find(codeinst); - defM = defs->second.TSM.getModuleUnlocked(); - const jl_llvm_functions_t &decls = defs->second.decls; - func = decls.functionObject; - StringRef specfunc = decls.specFunctionObject; + const auto &decls = out.ci_funcs.find(codeinst)->second; jl_value_t *astrt = codeinst->rettype; if (astrt != (jl_value_t*)jl_bottom_type && jl_type_intersection(astrt, declrt) == jl_bottom_type) { @@ -622,55 +483,41 @@ static void generate_cfunc_thunks(jl_codegen_params_t ¶ms, jl_compiled_funct // even though we're likely to encounter memory errors in that case jl_printf(JL_STDERR, "WARNING: cfunction: return type of %s does not match\n", name_from_method_instance(mi)); } - if (func == "jl_fptr_const_return") { - std::string gf_thunk_name = emit_abi_constreturn(M, params, cfunc.abi, codeinst->rettype_const); - auto F = M->getFunction(gf_thunk_name); + if (decls.invoke_api == JL_INVOKE_CONST) { + std::string gf_thunk_name = emit_abi_constreturn(out, cfunc.abi, codeinst->rettype_const); + auto F = out.get_module().getFunction(gf_thunk_name); assert(F); assign_fptr(F); continue; } - else if (func == "jl_fptr_args") { - assert(!specfunc.empty()); + else if (decls.invoke_api == JL_INVOKE_ARGS) { + assert(decls.specptr); if (!cfunc.abi.specsig && jl_subtype(astrt, declrt)) { - assign_fptr(IRLinker_copyFunctionProto(M, defM->getFunction(specfunc))); + assign_fptr(decls.specptr); continue; } - assign_fptr(aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, specfunc, false)); + assign_fptr(aot_abi_converter(out, cfunc.abi, codeinst, nullptr, decls.specptr, false)); continue; } - else if (func == "jl_fptr_sparam" || func == "jl_f_opaque_closure_call") { - func = ""; // use jl_invoke instead for these, since we don't declare these prototypes + else if (decls.invoke_api == JL_INVOKE_SPARAM) { + func = nullptr; // use jl_invoke instead for these, since we don't declare these prototypes } else { - assert(!specfunc.empty()); + assert(decls.specptr); if (jl_egal(mi->specTypes, sigt) && jl_egal(declrt, astrt)) { - assign_fptr(IRLinker_copyFunctionProto(M, defM->getFunction(specfunc))); + assign_fptr(decls.specptr); continue; } - assign_fptr(aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, specfunc, true)); + assign_fptr(aot_abi_converter(out, cfunc.abi, codeinst, func, decls.specptr, true)); continue; } } } - Function *f = codeinst ? aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, "", false) : unspec; + Function *f = codeinst ? aot_abi_converter(out, cfunc.abi, codeinst, func, nullptr, false) : unspec; assign_fptr(f); } } -// destructively move the contents of src into dest -// this assumes that the targets of the two modules are the same -// including the DataLayout and ModuleFlags (for example) -// and that there is no module-level assembly -// Comdat is also removed, since this needs to be re-added later -static void jl_merge_module(Linker &L, orc::ThreadSafeModule srcTSM) JL_NOTSAFEPOINT -{ - srcTSM.consumingModuleDo([&L](std::unique_ptr src) JL_NOTSAFEPOINT { - bool error = L.linkInModule(std::move(src)); - assert(!error && "linking llvmcall modules failed"); - (void)error; - }); -} - static bool canPartition(const Function &F) { return !F.hasFnAttribute(Attribute::AlwaysInline) && @@ -725,12 +572,12 @@ void *jl_create_native_impl(LLVMOrcThreadSafeModuleRef llvmmod, int trim, int ex ct->world_age = last_age; jl_value_t *codeinfos = fargs[0]; JL_TYPECHK(jl_create_native, array_any, codeinfos); - void *data = jl_emit_native((jl_array_t*)codeinfos, llvmmod, NULL, external_linkage ? 1 : 0); + auto data = (jl_native_code_desc_t *)jl_emit_native((jl_array_t*)codeinfos, llvmmod, NULL, external_linkage ? 1 : 0); JL_GC_POP(); // move everything inside, now that we've merged everything // (before adding the exported headers) - ((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) { + data->out->get_tsm().withModuleDo([&](Module &M) { auto TT = Triple(M.getTargetTriple()); Function *juliapersonality_func = nullptr; if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) { @@ -766,6 +613,105 @@ void *jl_create_native_impl(LLVMOrcThreadSafeModuleRef llvmmod, int trim, int ex return data; } +// Emit a thunk that call a compiled CodeInstance from an external image. We +// want code that is similar to a PLT thunk (no frame pointer setup, destination +// function pointer loaded in a scratch register that is not used for +// arguments), so we call the target with `musttail` and use the "thunk" +// attribute: +// +// > If the musttail call appears in a function with the "thunk" attribute +// > and the caller and callee both have varargs, then any unprototyped +// > arguments in register or memory are forwarded to the callee. Similarly, +// > the return value of the callee is returned to the caller’s caller, even +// > if a void return type is in use +static Function *emit_pkg_plt_thunk(jl_codegen_output_t &out, jl_code_instance_t *ci, + Function *CallSite) +{ + auto &M = out.get_module(); + auto &Ctx = out.get_context(); + Type *PtrTy = PointerType::getUnqual(Ctx); + StringRef Name = name_from_method_instance(jl_get_ci_mi(ci)); + + // aarch64 generates poor code when we use the guaranteed varargs thunk + // trick, so just copy all the arguments like normal. + bool UseParams = out.TargetTriple.getArch() == Triple::aarch64; + + auto GV = new GlobalVariable(M, PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, + out.make_name(JL_SYM_JLPLT_GOT, Name)); + auto FTy = UseParams ? CallSite->getFunctionType() : + FunctionType::get(Type::getVoidTy(Ctx), true); + auto F = Function::Create(FTy, Function::PrivateLinkage, 0, + out.make_name(JL_SYM_JLPLT, Name), &M); + F->setCallingConv(CallSite->getCallingConv()); + AttrBuilder Attrs{Ctx}; + if (UseParams) + F->setAttributes(CallSite->getAttributes()); + Attrs.addAttribute(Attribute::NoInline); + Attrs.addAttribute("frame-pointer", "none"); + Attrs.addAttribute("nounwind"); + Attrs.addAttribute("thunk"); + F->addFnAttrs(Attrs); + + SmallVector Args; + if (UseParams) + for (auto &A : F->args()) + Args.push_back(&A); + + IRBuilder<> B(Ctx); + auto BB = BasicBlock::Create(Ctx, "top", F); + B.SetInsertPoint(BB); + auto FPtr = B.CreateAlignedLoad(PtrTy, GV, out.DL.getPointerABIAlignment(0)); + + auto Call = B.CreateCall(FTy, FPtr, Args); + Call->setTailCallKind(CallInst::TailCallKind::TCK_MustTail); + Call->setCallingConv(F->getCallingConv()); + if (UseParams) + Call->setAttributes(CallSite->getAttributes()); + if (UseParams && !FTy->getReturnType()->isVoidTy()) + B.CreateRet(Call); + else + B.CreateRetVoid(); + + out.external_fns.emplace_back(ci, GV); + return F; +} + +// Static version of JuliaOJIT::linkOutput +static void aot_link_output(jl_codegen_output_t &out) +{ + for (auto &[call, target] : out.call_targets) { + auto [ci, api] = call; + JL_GC_PROMISE_ROOTED(ci); + if (!target.decl->isDeclaration()) + continue; + + auto it = out.ci_funcs.find(ci); + jl_llvm_functions_t funcs; + if (it != out.ci_funcs.end()) { + funcs = it->second; + } + else if (out.external_linkage && api == JL_INVOKE_SPECSIG && + (jl_atomic_load_relaxed(&ci->flags) & JL_CI_FLAGS_FROM_IMAGE)) { + Function *f = emit_pkg_plt_thunk(out, ci, target.decl); + funcs = {JL_INVOKE_SPECSIG, nullptr, f}; + } + else { + Function *f = emit_tojlinvoke(ci, StringRef(), out); + f->setLinkage(GlobalValue::InternalLinkage); + funcs = {JL_INVOKE_ARGS, nullptr, f}; + } + + if (funcs.invoke_api != api) { + assert(api == JL_INVOKE_SPECSIG); // Only possibility right now + Function *f = emit_specsig_to_fptr1(out, ci, funcs.specptr); + funcs.invoke_api = JL_INVOKE_SPECSIG; + funcs.specptr = f; + } + + target.decl->replaceAllUsesWith(funcs.specptr); + target.decl->eraseFromParent(); + } +} // also be used be extern consumers like GPUCompiler.jl to obtain a module containing // all reachable & inferrrable functions. @@ -778,30 +724,26 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm if (cgparams == NULL) cgparams = &jl_default_cgparams; jl_native_code_desc_t *data = new jl_native_code_desc_t; - orc::ThreadSafeContext ctx; - orc::ThreadSafeModule backing; - if (!llvmmod) { - ctx = jl_ExecutionEngine->makeContext(); - backing = jl_create_ts_module("text", ctx, jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); + std::optional lock; + if (llvmmod) { + data->out = std::make_unique(*unwrap(llvmmod)); + } + else { + const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); + const Triple &triple = jl_ExecutionEngine->getTargetTriple(); + data->out = std::make_unique("text", DL, triple); + data->out->get_context().setDiscardValueNames(true); } - orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing; - auto ctxt = clone.getContext(); + auto &out = *data->out; // compile all methods for the current world and type-inference world - auto target_info = clone.withModuleDo([&](Module &M) { - return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); - }); egal_set method_roots; - jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second)); - if (!llvmmod) - params.getContext().setDiscardValueNames(true); - params.params = cgparams; - assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled - params.external_linkage = external_linkage; - params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); - bool safepoint_on_entry = params.safepoint_on_entry; - JL_GC_PUSH3(¶ms.temporary_roots, &method_roots.list, &method_roots.keyset); - jl_compiled_functions_t compiled_functions; + out.params = cgparams; + assert(out.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled + out.external_linkage = external_linkage; + out.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); + bool safepoint_on_entry = out.safepoint_on_entry; + JL_GC_PUSH3(&out.temporary_roots, &method_roots.list, &method_roots.keyset); size_t i, l; for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) { // each item in this list is either a CodeInstance followed by a CodeInfo indicating something @@ -810,24 +752,27 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm if (jl_is_code_instance(item)) { // now add it to our compilation results jl_code_instance_t *codeinst = (jl_code_instance_t*)item; + + // TODO: check + if (external_linkage && + (jl_atomic_load_relaxed(&codeinst->flags) & JL_CI_FLAGS_FROM_IMAGE)) { + ++i; + continue; + } + jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i); assert(jl_is_code_info(src)); - if (compiled_functions.count(codeinst)) - continue; // skip any duplicates that accidentally made there way in here (or make this an error?) if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) - params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable - orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), - params.tsctx, clone.getModuleUnlocked()->getDataLayout(), - Triple(clone.getModuleUnlocked()->getTargetTriple())); - jl_llvm_functions_t decls; - if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) - decls.functionObject = "jl_fptr_const_return"; + out.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable + if (out.ci_funcs.contains(codeinst)) + continue; // TODO: make this an error + if (!(out.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) + out.ci_funcs[codeinst] = {JL_INVOKE_CONST}; else - decls = jl_emit_codeinst(result_m, codeinst, src, params); - params.safepoint_on_entry = safepoint_on_entry; + jl_emit_codeinst(out, codeinst, src); + out.safepoint_on_entry = safepoint_on_entry; + JL_GC_PROMISE_ROOTED(codeinst); record_method_roots(method_roots, jl_get_ci_mi(codeinst)); - if (result_m) - compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; } else { assert(jl_is_simplevector(item)); @@ -835,191 +780,61 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm jl_value_t *sig = jl_svecref(item, 1); jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2); assert(jl_is_type(rt) && jl_is_type(sig)); - jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params); + jl_generate_ccallable(out, nameval, rt, sig); } } + + emit_always_inline(out); + emit_llvmcall_modules(out); // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them - resolve_workqueue(params, method_roots, compiled_functions); + aot_link_output(out); // including generating cfunction thunks - generate_cfunc_thunks(params, compiled_functions); - aot_optimize_roots(params, method_roots, compiled_functions); - params.temporary_roots = nullptr; - params.temporary_roots_set.clear(); + generate_cfunc_thunks(out); + aot_optimize_roots(out, method_roots); + out.temporary_roots = nullptr; + out.temporary_roots_set.clear(); JL_GC_POP(); - // process the globals array, before jl_merge_module destroys them - SmallVector gvars(params.global_targets.size()); - data->jl_value_to_llvm.resize(params.global_targets.size()); - StringSet<> gvars_names; - DenseSet gvars_set; - - size_t idx = 0; - for (auto &global : params.global_targets) { - gvars[idx] = global.second->getName().str(); - assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!"); - assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!"); - data->jl_value_to_llvm[idx] = global.first; - idx++; - } - CreateNativeMethods += compiled_functions.size(); - - size_t offset = gvars.size(); - data->jl_external_to_llvm.resize(params.external_fns.size()); - - for (auto &extern_fn : params.external_fns) { - jl_code_instance_t *this_code = std::get<0>(extern_fn.first); - bool specsig = std::get<1>(extern_fn.first); - assert(specsig && "Error external_fns doesn't handle non-specsig yet"); - (void) specsig; - GlobalVariable *F = extern_fn.second; - size_t idx = gvars.size() - offset; - assert(idx >= 0); - assert(idx < data->jl_external_to_llvm.size()); - data->jl_external_to_llvm[idx] = this_code; - assert(gvars_set.insert(F).second && "Duplicate gvar in params!"); - assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!"); - gvars.push_back(std::string(F->getName())); - } - - // clones the contents of the module `m` to the shadow_output collector - // while examining and recording what kind of function pointer we have - { - Linker L(*clone.getModuleUnlocked()); - for (auto &def : compiled_functions) { - jl_code_instance_t *this_code = def.first; - JL_GC_PROMISE_ROOTED(this_code); - jl_llvm_functions_t &decls = def.second.decls; - StringRef func = decls.functionObject; - StringRef cfunc = decls.specFunctionObject; - orc::ThreadSafeModule &M = def.second.TSM; - if (external_linkage) { - uint8_t specsigflags; - jl_callptr_t invoke; - void *fptr; - jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0); - if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) { - // this codeinst is already available externally: keep it only if canPartition demands it for local use - // TODO: for performance, avoid generating the src code when we know it would reach here anyways? - if (M.withModuleDo([&](Module &M) { return !canPartition(*cast(M.getNamedValue(cfunc))); })) { - jl_merge_module(L, std::move(M)); - } - continue; - } - } - jl_merge_module(L, std::move(M)); - uint32_t func_id = 0; - uint32_t cfunc_id = 0; - if (func == "jl_fptr_args") { - func_id = -JL_INVOKE_ARGS; - } - else if (func == "jl_fptr_sparam") { - func_id = -JL_INVOKE_SPARAM; - } - else if (func == "jl_f_opaque_closure_call") { - assert(false); // TODO: remove - } - else if (func == "jl_fptr_const_return") { - func_id = -JL_INVOKE_CONST; - } - else { - //Safe b/c context is locked by params - data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(func))); - func_id = data->jl_sysimg_fvars.size(); - } - if (!cfunc.empty()) { - //Safe b/c context is locked by params - data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(cfunc))); - cfunc_id = data->jl_sysimg_fvars.size(); - } - data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id); - } - bool Changed = true; - while (Changed) { - Changed = false; - // make sure everything referenced got included though, since some functions aren't - // correctly implemented by staticdata for external use, and so codegen won't emit - // an external reference but expects a private copy here instead - for (auto &def : compiled_functions) { - orc::ThreadSafeModule &M = def.second.TSM; - if (!M) - continue; - jl_llvm_functions_t &decls = def.second.decls; - StringRef func = decls.functionObject; - StringRef cfunc = decls.specFunctionObject; - if (func != "jl_fptr_args" && - func != "jl_fptr_sparam" && - func != "jl_f_opaque_closure_call" && - clone.getModuleUnlocked()->getNamedValue(func)) { - jl_merge_module(L, std::move(M)); - Changed = true; - continue; - } - if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) { - Changed = true; - jl_merge_module(L, std::move(M)); - } - } - } -#ifndef NDEBUG - // make sure we didn't forget anything that we promised to include in here - for (auto &def : compiled_functions) { - jl_llvm_functions_t &decls = def.second.decls; - StringRef func = decls.functionObject; - StringRef cfunc = decls.specFunctionObject; - if (func != "jl_fptr_args" && - func != "jl_fptr_sparam" && - func != "jl_f_opaque_closure_call") { - GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func); - assert(!F || !F->isDeclaration()); - } - if (!cfunc.empty()) { - GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc); - assert(!F || !F->isDeclaration()); - } - } -#endif - compiled_functions.clear(); - if (params._shared_module) { - bool error = L.linkInModule(std::move(params._shared_module)); - assert(!error && "Error linking in shared module"); - (void)error; - } + CreateNativeMethods += out.ci_funcs.size(); + CreateNativeGlobals += out.global_targets.size(); + + data->jl_value_to_llvm.reserve(out.global_targets.size()); + data->jl_sysimg_gvars.reserve(out.global_targets.size() + out.external_fns.size()); + + for (auto &[val, gv] : out.global_targets) { + data->jl_value_to_llvm.push_back(val); + data->jl_sysimg_gvars.push_back(gv); + } + for (auto &[ci, gv] : out.external_fns) { + data->jl_sysimg_gvars.push_back(gv); + data->jl_external_to_llvm.push_back(ci); } - // now get references to the globals in the merged module - // and set them to be internalized and initialized at startup - // filter out any gvars that got optimized away - idx = 0; - size_t newoffset = 0; - size_t newidx = 0; - for (auto &global : gvars) { - //Safe b/c context is locked by params - GlobalVariable *G = cast_or_null(clone.getModuleUnlocked()->getNamedValue(global)); - if (G != nullptr) { - assert(!G->hasInitializer()); - G->setInitializer(Constant::getNullValue(G->getValueType())); - G->setLinkage(GlobalValue::InternalLinkage); - G->setDSOLocal(true); - assert(newidx == data->jl_sysimg_gvars.size()); - if (idx < offset) { - data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx]; - newoffset = newidx + 1; - } - else { - data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset]; - } - data->jl_sysimg_gvars.push_back(G); - newidx++; + for (auto v : data->jl_sysimg_gvars) { + auto gv = (GlobalVariable *)v; + gv->setInitializer(Constant::getNullValue(gv->getValueType())); + gv->setLinkage(GlobalValue::InternalLinkage); + gv->setDSOLocal(true); + } + + for (auto &[ci, funcs] : out.ci_funcs) { + uint32_t invoke_id, specptr_id = 0; + if (funcs.invoke_api == JL_INVOKE_SPECSIG) { + assert(funcs.invoke); + data->jl_sysimg_fvars.push_back(funcs.invoke); + invoke_id = data->jl_sysimg_fvars.size(); + } else { + invoke_id = -funcs.invoke_api; + } + if (funcs.specptr) { + data->jl_sysimg_fvars.push_back(funcs.specptr); + specptr_id = data->jl_sysimg_fvars.size(); } - idx++; + data->jl_fvar_map[ci] = {invoke_id, specptr_id}; } - data->jl_value_to_llvm.resize(newoffset); - data->jl_external_to_llvm.resize(newidx - newoffset); - gvars.clear(); - CreateNativeGlobals += idx; - data->M = std::move(clone); - return (void*)data; + out.unlock(); + return (void *)data; } static object::Archive::Kind getDefaultForHost(Triple &triple) @@ -2075,11 +1890,14 @@ void jl_dump_native_impl(void *native_code, params = &default_emission_params; } + data->out->lock(); + Module &dataM = data->out->get_module(); + // We don't want to use MCJIT's target machine because // it uses the large code model and we may potentially // want less optimizations there. // make sure to emit the native object format, even if FORCE_ELF was set in codegen - Triple TheTriple(data->M.withModuleDo([](Module &M) { return M.getTargetTriple(); })); + Triple TheTriple = data->out->TargetTriple; if (TheTriple.isOSWindows()) { TheTriple.setObjectFormat(Triple::COFF); } else if (TheTriple.isOSDarwin()) { @@ -2119,12 +1937,8 @@ void jl_dump_native_impl(void *native_code, )); fixupTM(*SourceTM); auto DL = jl_create_datalayout(*SourceTM); - std::string StackProtectorGuard; - unsigned OverrideStackAlignment; - data->M.withModuleDo([&](Module &M) { - StackProtectorGuard = M.getStackProtectorGuard().str(); - OverrideStackAlignment = M.getOverrideStackAlignment(); - }); + std::string StackProtectorGuard = dataM.getStackProtectorGuard().str(); + unsigned OverrideStackAlignment = dataM.getOverrideStackAlignment(); auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) { return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released); @@ -2204,12 +2018,11 @@ void jl_dump_native_impl(void *native_code, bool has_veccall = false; - data->M.withModuleDo([&](Module &dataM) { + { JL_TIMING(NATIVE_AOT, NATIVE_Setup); - dataM.setTargetTriple(TheTriple.str()); dataM.setDataLayout(DL); dataM.setPICLevel(PICLevel::BigPIC); - auto &Context = dataM.getContext(); + auto &Context = data->out->get_context(); Type *T_psize = PointerType::getUnqual(Context); @@ -2284,21 +2097,18 @@ void jl_dump_native_impl(void *native_code, } has_veccall = !!dataM.getModuleFlag("julia.mv.veccall"); - }); + }; { // Don't use withModuleDo here since we delete the TSM midway through - auto TSCtx = data->M.getContext(); - auto lock = TSCtx.getLock(); - auto dataM = data->M.getModuleUnlocked(); + // auto TSCtx = data->out->get_tsm().consumingModuleDo(); + // auto lock = TSCtx.getLock(); + // auto dataM = data->M.getModuleUnlocked(); - data_outputs = compile(*dataM, "text", threads, [data, &lock, &TSCtx](Module &) { + data_outputs = compile(dataM, "text", threads, [data](Module &) { // Delete data when add_output thinks it's done with it // Saves memory for use when multithreading - auto lock2 = std::move(lock); delete data; - // Drop last reference to shared LLVM::Context - auto TSCtx2 = std::move(TSCtx); }); } @@ -2499,23 +2309,24 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ auto ctx = jl_ExecutionEngine->makeContext(); const auto &DL = jl_ExecutionEngine->getDataLayout(); const auto &TT = jl_ExecutionEngine->getTargetTriple(); - orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx, DL, TT); + jl_codegen_output_t output{name_from_method_instance(mi), DL, TT}; Function *F = nullptr; { uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - jl_codegen_params_t output(ctx, DL, TT); output.params = ¶ms; output.imaging_mode = jl_options.image_codegen; output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); JL_GC_PUSH1(&output.temporary_roots); - jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output); + std::optional decls = jl_emit_code(output, mi, src, mi->specTypes, src->rettype); + emit_always_inline(output); + emit_llvmcall_modules(output); // while not required, also emit the cfunc thunks, based on the // inferred ABIs of their targets in the current latest world, // since otherwise it is challenging to see all relevant codes - jl_compiled_functions_t compiled_functions; + // jl_compiled_functions_t compiled_functions; size_t latestworld = jl_atomic_load_acquire(&jl_world_counter); for (cfunc_decl_t &cfunc : output.cfuncs) { jl_value_t *sigt = cfunc.abi.sigt; @@ -2524,24 +2335,20 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ if (mi == jl_nothing) continue; jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim); - if (codeinst == nullptr || compiled_functions.count(codeinst)) + if (codeinst == nullptr || output.ci_funcs.count(codeinst)) continue; - orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT); - jl_llvm_functions_t decls; - if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) - decls.functionObject = "jl_fptr_const_return"; - else - decls = jl_emit_codedecls(decl_m, codeinst, output); - compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)}; + jl_emit_codedecls(output, codeinst); } - generate_cfunc_thunks(output, compiled_functions); - emit_always_inline(m, output); - output.workqueue.clear(); - compiled_functions.clear(); + generate_cfunc_thunks(output); + output.temporary_roots_set.clear(); output.temporary_roots = nullptr; JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it - if (m) { + if (decls) { + jl_codeinst_funcs_t decl_names; + decl_names.invoke_api = decls->invoke_api; + decl_names.invoke = decls->invoke ? decls->invoke->getName() : ""; + decl_names.specptr = decls->specptr ? decls->specptr->getName() : ""; // if compilation succeeded, prepare to return the result // Similar to jl_link_global from jitlayers.cpp, // so that code_llvm shows similar codegen to the jit @@ -2561,24 +2368,25 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ global.second->setVisibility(GlobalValue::DefaultVisibility); } } - if (!jl_options.image_codegen) { - optimizeDLSyms(*m.getModuleUnlocked()); - } - assert(!verifyLLVMIR(*m.getModuleUnlocked())); + // TODO: + // if (!jl_options.image_codegen) { + // optimizeDLSyms(*m.getModuleUnlocked()); + // } + assert(!verifyLLVMIR(output.get_module())); if (optimize) { NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level)}; //Safe b/c context lock is held by output - PM.run(*m.getModuleUnlocked()); - assert(!verifyLLVMIR(*m.getModuleUnlocked())); + PM.run(output.get_module()); + assert(!verifyLLVMIR(output.get_module())); } const std::string *fname; - if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam") + if (decls->invoke_api == JL_INVOKE_ARGS || decls->invoke_api == JL_INVOKE_SPARAM) getwrapper = false; if (!getwrapper) - fname = &decls.specFunctionObject; + fname = &decl_names.specptr; else - fname = &decls.functionObject; - F = cast(m.getModuleUnlocked()->getNamedValue(*fname)); + fname = &decl_names.invoke; + F = output.get_module().getFunction(*fname); } if (measure_compile_time_enabled) { auto end = jl_hrtime(); @@ -2586,7 +2394,8 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ } } if (F) { - dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m))); + output.unlock(); + dump->TSM = wrap(new orc::ThreadSafeModule(std::move(output.get_tsm()))); dump->F = wrap(F); return; } diff --git a/src/ccall.cpp b/src/ccall.cpp index f67268b1a0007..4e8a4db482754 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -91,10 +91,10 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const native_sym_arg_t &symarg, return false; } - auto M = &ctx.emission_context.shared_module(); + auto M = &ctx.emission_context.get_module(); bool runtime_lib = false; GlobalVariable *libptrgv; - jl_codegen_params_t::SymMapGV *symMap; + jl_codegen_output_t::SymMapGV *symMap; if ((intptr_t)f_lib == (intptr_t)JL_EXE_LIBNAME) { libptrgv = prepare_global_in(M, jlexe_var); symMap = &ctx.emission_context.symMapExe; @@ -147,7 +147,7 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const native_sym_arg_t &symarg, } static Value *runtime_sym_lookup( - jl_codegen_params_t &emission_context, + jl_codegen_output_t &emission_context, IRBuilder<> &irbuilder, jl_codectx_t *pctx, const native_sym_arg_t &symarg, Function *f, @@ -265,7 +265,7 @@ static GlobalVariable *emit_plt_thunk( ++PLTThunks; bool shared = libptrgv != nullptr; assert(shared && "not yet supported by runtime_sym_lookup"); - Module *M = shared ? &ctx.emission_context.shared_module() : jl_Module; + auto M = &ctx.emission_context.get_module(); if (shared) { assert(symarg.f_name); libptrgv = prepare_global_in(M, libptrgv); @@ -1177,7 +1177,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar // save the module to be linked later. // we cannot do this right now, because linking mutates the destination module, // which might invalidate LLVM values cached in cgval_t's (specifically constant arrays) - ctx.llvmcall_modules.push_back(std::move(Mod)); + ctx.emission_context.llvmcall_modules.push_back(std::move(Mod)); JL_GC_POP(); @@ -1242,9 +1242,9 @@ class function_sig_t { jl_unionall_t *unionall_env; // UnionAll environment for `at` and `rt` size_t nccallargs; // number of actual arguments size_t nreqargs; // number of required arguments in ccall function definition - jl_codegen_params_t *ctx; + jl_codegen_output_t *ctx; - function_sig_t(const char *fname, Type *lrt, jl_value_t *rt, bool retboxed, bool gc_safe, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nreqargs, CallingConv::ID cc, bool llvmcall, jl_codegen_params_t *ctx) + function_sig_t(const char *fname, Type *lrt, jl_value_t *rt, bool retboxed, bool gc_safe, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nreqargs, CallingConv::ID cc, bool llvmcall, jl_codegen_output_t *ctx) : lrt(lrt), retboxed(retboxed), gc_safe(gc_safe), prt(NULL), sret(0), cc(cc), llvmcall(llvmcall), at(at), rt(rt), unionall_env(unionall_env), @@ -1470,7 +1470,7 @@ static bool verify_ref_type(jl_codectx_t &ctx, jl_value_t* ref, jl_unionall_t *u static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at, jl_unionall_t *unionall_env, jl_svec_t *sparam_vals, - jl_codegen_params_t *ctx, + jl_codegen_output_t *ctx, Type *&lrt, LLVMContext &ctxt, bool &retboxed, bool &static_rt, bool llvmcall=false) { diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 62f31e237f4b6..16a201e63fd9d 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -104,7 +104,7 @@ AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order) // --- string constants --- static Value *stringConstPtr( - jl_codegen_params_t &emission_context, + jl_codegen_output_t &emission_context, IRBuilder<> &irbuilder, const Twine &txt) { @@ -187,7 +187,7 @@ static DICompileUnit *getOrCreateJuliaCU(Module &M, return CU; } -static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debuginfo, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed) +static DIType *_julia_type_to_di(jl_codegen_output_t *ctx, jl_debugcache_t &debuginfo, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed) { jl_datatype_t *jdt = (jl_datatype_t*)jt; if (isboxed || !jl_is_datatype(jt) || !jdt->isconcretetype) @@ -389,10 +389,10 @@ static llvm::SmallVector get_gc_roots_for(jl_codectx_t &ctx, const jl_ // --- emitting pointers directly into code --- -static void jl_temporary_root(jl_codegen_params_t &ctx, jl_value_t *val); +static void jl_temporary_root(jl_codegen_output_t &ctx, jl_value_t *val); static void jl_temporary_root(jl_codectx_t &ctx, jl_value_t *val); -static Constant *julia_pgv(jl_codegen_params_t ¶ms, Module *M, const char *cname, void *addr) +static Constant *julia_pgv(jl_codegen_output_t ¶ms, Module *M, const char *cname, void *addr) { // emit a GlobalVariable for a jl_value_t named "cname" // store the name given so we can reuse it (facilitating merging later) @@ -401,8 +401,7 @@ static Constant *julia_pgv(jl_codegen_params_t ¶ms, Module *M, const char *c StringRef localname; std::string gvname; if (!gv) { - uint64_t id = jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); // TODO: use params.global_targets.size() - raw_string_ostream(gvname) << cname << id; + gvname = params.make_name(cname); localname = StringRef(gvname); } else { @@ -424,7 +423,7 @@ static Constant *julia_pgv(jl_codegen_params_t ¶ms, Module *M, const char *c return gv; } -static Constant *julia_pgv(jl_codegen_params_t ¶ms, Module *M, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr) +static Constant *julia_pgv(jl_codegen_output_t ¶ms, Module *M, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr) { // emit a GlobalVariable for a jl_value_t, using the prefix, name, and module to // to create a readable name of the form prefixModA.ModB.name# @@ -453,8 +452,9 @@ static Constant *julia_pgv(jl_codegen_params_t ¶ms, Module *M, const char *p } static JuliaVariable *julia_const_gv(jl_value_t *val); -Constant *literal_pointer_val_slot(jl_codegen_params_t ¶ms, Module *M, jl_value_t *p) +Constant *literal_pointer_val_slot(jl_codegen_output_t ¶ms, jl_value_t *p) { + Module *M = ¶ms.get_module(); // emit a pointer to a jl_value_t* which will allow it to be valid across reloading code // also, try to give it a nice name for gdb, for easy identification if (JuliaVariable *gv = julia_const_gv(p)) { @@ -564,7 +564,7 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) { if (p == NULL) return Constant::getNullValue(ctx.types().T_pjlvalue); - Value *pgv = literal_pointer_val_slot(ctx.emission_context, jl_Module, p); + Value *pgv = literal_pointer_val_slot(ctx.emission_context, p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto load = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))), @@ -626,9 +626,9 @@ static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byt return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset); } -static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false); +static Type *_julia_struct_to_llvm(jl_codegen_output_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false); -static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed) +static Type *_julia_type_to_llvm(jl_codegen_output_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed) { // this function converts a Julia Type into the equivalent LLVM type if (isboxed) *isboxed = false; @@ -736,7 +736,7 @@ static StructType *get_memoryref_type(LLVMContext &ctxt, Type *T_size, const jl_ return get_jlmemoryref(ctxt, AS); } -static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall) +static Type *_julia_struct_to_llvm(jl_codegen_output_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall) { // this function converts a Julia Type into the equivalent LLVM struct // use this where C-compatible (unboxed) structs are desired @@ -1346,7 +1346,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull ptr = get_pointer_to_constant(ctx.emission_context, ConstantInt::get(expr_type, jt->smalltag << 4), Align(sizeof(jl_value_t*)), StringRef("_j_smalltag_") + jl_symbol_name(jt->name->name), *jl_Module); } else { - ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx.emission_context, jl_Module, (jl_value_t*)jt), datatype_or_p->getType()); + ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx.emission_context, (jl_value_t*)jt), datatype_or_p->getType()); } datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p); setName(ctx.emission_context, datatype_or_p, "typetag_ptr"); @@ -4503,7 +4503,7 @@ static Value *emit_defer_signal(jl_codectx_t &ctx) return emit_ptrgep(ctx, ptls, offsetof(jl_tls_states_t, defer_signal)); } -#ifndef JL_NDEBUG +#if 0 static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b) { return diff --git a/src/codegen.cpp b/src/codegen.cpp index b8e1061728127..92393b1cca2a2 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -146,7 +146,7 @@ typedef Instruction TerminatorInst; #undef DEBUG_TYPE //LLVM occasionally likes to set DEBUG_TYPE in a header... #define DEBUG_TYPE "julia_irgen_codegen" -void setName(jl_codegen_params_t ¶ms, Value *V, const Twine &Name) +void setName(jl_codegen_output_t &out, Value *V, const Twine &Name) { // we do the constant check again later, duplicating it here just makes sure the assertion // fires on debug builds even if debug info is not enabled @@ -159,24 +159,24 @@ void setName(jl_codegen_params_t ¶ms, Value *V, const Twine &Name) } } -void maybeSetName(jl_codegen_params_t ¶ms, Value *V, const Twine &Name) +void maybeSetName(jl_codegen_output_t &out, Value *V, const Twine &Name) { // To be used when we may get an Instruction or something that is not an instruction i.e Constants/Arguments if (isa(V)) V->setName(Name); } -void setName(jl_codegen_params_t ¶ms, Value *V, std::function GetName) +void setName(jl_codegen_output_t &out, Value *V, std::function GetName) { assert((isa(V) || isa(V)) && "Should only set names on instructions!"); - if (!params.getContext().shouldDiscardValueNames() && !isa(V)) + if (!out.get_context().shouldDiscardValueNames() && !isa(V)) V->setName(Twine(GetName())); } -void setNameWithField(jl_codegen_params_t ¶ms, Value *V, std::function GetObjName, jl_datatype_t *jt, unsigned idx, const Twine &suffix) +void setNameWithField(jl_codegen_output_t &out, Value *V, std::function GetObjName, jl_datatype_t *jt, unsigned idx, const Twine &suffix) { assert((isa(V) || isa(V)) && "Should only set names on instructions!"); - if (!params.getContext().shouldDiscardValueNames() && !isa(V)) { + if (!out.get_context().shouldDiscardValueNames() && !isa(V)) { if (jl_is_tuple_type(jt)){ V->setName(Twine(GetObjName()) + "[" + Twine(idx + 1) + "]"+ suffix); return; @@ -1854,8 +1854,7 @@ struct jl_varinfo_t { class jl_codectx_t { public: IRBuilder<> builder; - jl_codegen_params_t &emission_context; - llvm::MapVector call_targets; + jl_codegen_output_t &emission_context; Function *f = NULL; MDNode* LoopID = NULL; // local var info. globals are not in here. @@ -1894,25 +1893,20 @@ class jl_codectx_t { Instruction *topalloca = NULL; Value *world_age_at_entry = NULL; - bool use_cache = false; bool external_linkage = false; const jl_cgparams_t *params = NULL; - SmallVector, 0> llvmcall_modules; - - jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t ¶ms, size_t min_world, size_t max_world) - : builder(llvmctx), - emission_context(params), - call_targets(), + jl_codectx_t(jl_codegen_output_t &out, size_t min_world, size_t max_world) + : builder(out.get_context()), + emission_context(out), min_world(min_world), max_world(max_world), - use_cache(params.cache), - external_linkage(params.external_linkage), - params(params.params) { + external_linkage(out.external_linkage), + params(out.params) { } - jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t ¶ms, jl_code_instance_t *ci) : - jl_codectx_t(llvmctx, params, jl_atomic_load_relaxed(&ci->min_world), jl_atomic_load_relaxed(&ci->max_world)) {} + jl_codectx_t(jl_codegen_output_t &out, jl_code_instance_t *ci) : + jl_codectx_t(out, jl_atomic_load_relaxed(&ci->min_world), jl_atomic_load_relaxed(&ci->max_world)) {} jl_typecache_t &types() { type_cache.initialize(builder.getContext(), emission_context.DL); @@ -1928,12 +1922,6 @@ class jl_codectx_t { aliasscope_cache.initialize(builder.getContext()); return aliasscope_cache; } - - ~jl_codectx_t() { - // Transfer local delayed calls to the global queue - for (auto call_target : call_targets) - emission_context.workqueue.push_back(call_target); - } }; GlobalVariable *JuliaVariable::realize(jl_codectx_t &ctx) { @@ -2012,8 +2000,6 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) { } static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL); -static jl_returninfo_t get_specsig_function(jl_codegen_params_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, - ArrayRef ArgNames=None, unsigned nreq=0); static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1); static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa); static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i); @@ -2084,7 +2070,7 @@ static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, Value *byte_offset, co // --- convenience functions for tagging llvm values with julia types --- -static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, Align align, const Twine &name, Module &M) +static GlobalVariable *get_pointer_to_constant(jl_codegen_output_t &emission_context, Constant *val, Align align, const Twine &name, Module &M) { GlobalVariable *&gv = emission_context.mergedConstants[val]; auto get_gv = [&](const Twine &name) { @@ -2673,6 +2659,7 @@ std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &conte { ++ModulesCreated; auto m = std::make_unique(name, context); + m->setUwtable(UWTableKind::None); // TODO remove, testing // According to clang darwin above 10.10 supports dwarfv4 if (!m->getModuleFlag("Dwarf Version")) { m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 4); @@ -2696,7 +2683,7 @@ std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &conte return m; } -static void jl_name_jlfunc_args(jl_codegen_params_t ¶ms, Function *F) JL_NOTSAFEPOINT +static void jl_name_jlfunc_args(jl_codegen_output_t &out, Function *F) JL_NOTSAFEPOINT { assert(F->arg_size() == 3); F->getArg(0)->setName("function::Core.Function"); @@ -2704,7 +2691,7 @@ static void jl_name_jlfunc_args(jl_codegen_params_t ¶ms, Function *F) JL_NOT F->getArg(2)->setName("nargs::UInt32"); } -static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) JL_NOTSAFEPOINT +static void jl_name_jlfuncparams_args(jl_codegen_output_t &out, Function *F) JL_NOTSAFEPOINT { assert(F->arg_size() == 4); F->getArg(0)->setName("function::Core.Function"); @@ -3152,7 +3139,7 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr) // ---- Get Element Pointer (GEP) instructions within the GC frame ---- -static void jl_temporary_root(jl_codegen_params_t &ctx, jl_value_t *val) +static void jl_temporary_root(jl_codegen_output_t &ctx, jl_value_t *val) { if (!jl_is_globally_rooted(val)) { jl_array_t *roots = ctx.temporary_roots; @@ -3973,12 +3960,11 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, static jl_llvm_functions_t emit_function( - orc::ThreadSafeModule &TSM, + jl_codegen_output_t &out, jl_method_instance_t *lam, jl_code_info_t *src, jl_value_t *abi, - jl_value_t *jlrettype, - jl_codegen_params_t ¶ms); + jl_value_t *jlrettype); static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name); @@ -5125,7 +5111,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos return retval; } -static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal, +static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, ArrayRef argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *nreturn_roots, jl_value_t *inferred_retty) { ++EmittedSpecfunCalls; @@ -5133,70 +5119,34 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos jl_returninfo_t returninfo = get_specsig_function(ctx.emission_context, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure); *cc = returninfo.cc; *nreturn_roots = returninfo.return_roots; - if (fromexternal) { - std::string namep("p"); - Value *TheCallee = returninfo.decl.getCallee(); - namep += cast(TheCallee)->getName(); - GlobalVariable *GV = cast_or_null(jl_Module->getNamedValue(namep)); - if (GV == nullptr) { - GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false, GlobalVariable::ExternalLinkage, nullptr, namep); - ctx.emission_context.external_fns[std::make_tuple(fromexternal, true)] = GV; - } - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*)))); - setName(ctx.emission_context, TheCallee, namep); - returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), TheCallee); - } jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, specTypes, jlretty, returninfo, argv, nargs); // see if inference has a different / better type for the call than the lambda return update_julia_type(ctx, retval, inferred_retty); } -static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal, +static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject, ArrayRef argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty) { bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; return emit_call_specfun_other(ctx, is_opaque_closure, mi->specTypes, jlretty, NULL, - specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty); + specFunctionObject, argv, nargs, cc, return_roots, inferred_retty); } -static jl_value_t *get_ci_abi(jl_code_instance_t *ci) -{ - if (jl_typeof(ci->def) == (jl_value_t*)jl_abioverride_type) - return ((jl_abi_override_t*)ci->def)->abi; - return jl_get_ci_mi(ci)->specTypes; -} - -static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_code_instance_t *ci, StringRef specFunctionObject, jl_code_instance_t *fromexternal, +static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_code_instance_t *ci, StringRef specFunctionObject, ArrayRef argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty) { jl_method_instance_t *mi = jl_get_ci_mi(ci); bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; return emit_call_specfun_other(ctx, is_opaque_closure, get_ci_abi(ci), ci->rettype, NULL, - specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty); + specFunctionObject, argv, nargs, cc, return_roots, inferred_retty); } -static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal, +static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject, ArrayRef argv, size_t nargs, jl_value_t *inferred_retty) { Value *theFptr; - if (fromexternal) { - std::string namep("p"); - namep += specFunctionObject; - GlobalVariable *GV = cast_or_null(jl_Module->getNamedValue(namep)); - Type *pfunc = PointerType::getUnqual(ctx.builder.getContext()); - if (GV == nullptr) { - GV = new GlobalVariable(*jl_Module, pfunc, false, GlobalVariable::ExternalLinkage, nullptr, namep); - ctx.emission_context.external_fns[std::make_tuple(fromexternal, false)] = GV; - } - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*)))); - setName(ctx.emission_context, theFptr, specFunctionObject); - } - else { - theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee(); - addRetAttr(cast(theFptr), Attribute::NonNull); - } + theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee(); + addRetAttr(cast(theFptr), Attribute::NonNull); Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, julia_call); return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty); } @@ -5253,7 +5203,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR unsigned return_roots = 0; jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed; StringRef protoname = f->getName(); - result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, nullptr, argv, nargs, &cc, &return_roots, rt); + result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, argv, nargs, &cc, &return_roots, rt); } handled = true; } @@ -5273,65 +5223,14 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR result = mark_julia_type(ctx, r, true, rt); } else { - std::string name; - StringRef protoname; - bool need_to_emit = true; - bool cache_valid = (ctx.use_cache || ctx.external_linkage); - bool external = false; - - // Check if we already queued this up - auto it = ctx.call_targets.find(codeinst); - if (it != ctx.call_targets.end()) { - assert(it->second.specsig == specsig); - protoname = it->second.decl->getName(); - if (always_inline) - it->second.private_linkage = true; - else - it->second.external_linkage = true; - } - // Check if it is already compiled (either JIT or externally), and if so, re-use that name if possible - // This is just an optimization to emit the correct name immediately, if we know it, since the JIT and AOT code will be able to do this later also - if (cache_valid) { - // TODO: use `emitted` map here too to try to consolidate names? - uint8_t specsigflags; - jl_callptr_t invoke; - void *fptr; - jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); - if (specsig ? specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED : invoke == jl_fptr_args_addr) { - if (ctx.external_linkage) { - // TODO: Add !specsig support to aotcompile.cpp - // Check that the codeinst is containing native code - if (specsig && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) { - external = !always_inline; - need_to_emit = false; - } - } - else { // ctx.use_cache - need_to_emit = false; - } - if (!need_to_emit && protoname.empty()) - protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - } - } - if (it != ctx.call_targets.end()) - need_to_emit = false; - else if (always_inline) - need_to_emit = true; - if (protoname.empty()) { - raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); - protoname = StringRef(name); - } - + StringRef protoname = ctx.emission_context.get_call_target( + codeinst, specsig, always_inline); jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed; unsigned return_roots = 0; if (specsig) - result = emit_call_specfun_other(ctx, codeinst, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt); + result = emit_call_specfun_other(ctx, codeinst, protoname, argv, nargs, &cc, &return_roots, rt); else - result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt); - if (need_to_emit) { - Function *trampoline_decl = cast(jl_Module->getNamedValue(protoname)); - ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, nullptr, specsig, !always_inline, always_inline}; - } + result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt); } } handled = true; @@ -5427,7 +5326,7 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j Value *specptr = emit_unbox(ctx, ctx.types().T_size, closure_specptr, (jl_value_t*)jl_long_type); specptr = emit_inttoptr(ctx, specptr, ctx.types().T_ptr); JL_GC_PUSH1(&sigtype); - jl_cgval_t r = emit_call_specfun_other(ctx, true, sigtype, oc_rett, specptr, "", NULL, argv, nargs, + jl_cgval_t r = emit_call_specfun_other(ctx, true, sigtype, oc_rett, specptr, "", argv, nargs, &cc, &return_roots, oc_rett); JL_GC_POP(); return r; @@ -6212,31 +6111,9 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met bool specsig = uses_specsig(sigtype, false, rettype, true); std::string name; std::string oc; - StringRef protoname; - StringRef proto_oc; - - // Check if we already queued this up - auto it = ctx.call_targets.find(ci); - bool need_to_emit = it == ctx.call_targets.end(); - if (!need_to_emit) { - assert(specsig == it->second.specsig); - if (specsig) { - protoname = it->second.decl->getName(); - proto_oc = it->second.oc->getName(); - } - else { - proto_oc = it->second.decl->getName(); - } - need_to_emit = false; - } - else { - if (specsig) { - raw_string_ostream(name) << "j_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); - protoname = StringRef(name); - } - raw_string_ostream(oc) << "j1_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); - proto_oc = StringRef(oc); - } + + StringRef protoname = ctx.emission_context.get_call_target(ci, true, false); + StringRef proto_oc = ctx.emission_context.get_call_target(ci, false, false); // Get the fptr1 OC Function *F = nullptr; @@ -6254,21 +6131,13 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met // Get the specsig (if applicable) Function *specF = nullptr; - jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed; - unsigned return_roots = 0; bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; assert(is_opaque_closure); if (specsig) { jl_returninfo_t returninfo = get_specsig_function(ctx.emission_context, jl_Module, nullptr, protoname, mi->specTypes, rettype, is_opaque_closure); - cc = returninfo.cc; - return_roots = returninfo.return_roots; specF = cast(returninfo.decl.getCallee()); } - if (need_to_emit) { - ctx.call_targets[ci] = {cc, return_roots, specsig ? specF : F, specsig ? F : nullptr, specsig, true, false}; - } - JL_GC_POP(); return std::make_pair(F, specF); } @@ -6745,7 +6614,7 @@ static std::string get_function_name(bool specsig, bool needsparams, const char } static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_value_t *jlretty, jl_value_t *declrt, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName, - Module *M, jl_codegen_params_t ¶ms); + Module *M, jl_codegen_output_t &out); Function *get_or_emit_fptr1(StringRef preal_decl, Module *M) { @@ -6755,7 +6624,7 @@ Function *get_or_emit_fptr1(StringRef preal_decl, Module *M) static Function *emit_modifyhelper(jl_codectx_t &ctx2, const jl_cgval_t &op, const jl_cgval_t &modifyop, jl_value_t *jltype, Type *elty, jl_cgval_t rhs, const Twine &fname, bool gcstack_arg) { Module *M = ctx2.f->getParent(); - jl_codectx_t ctx(M->getContext(), ctx2.emission_context, ctx2.min_world, ctx2.max_world); + jl_codectx_t ctx(ctx2.emission_context, ctx2.min_world, ctx2.max_world); SmallVector ArgTy; ArgTy.push_back(elty); if (rhs.V) @@ -6820,20 +6689,20 @@ static Function *emit_modifyhelper(jl_codectx_t &ctx2, const jl_cgval_t &op, con } -Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Value *theFunc, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Value *theFunc, jl_codegen_output_t &out) JL_NOTSAFEPOINT { ++EmittedToJLInvokes; - jl_codectx_t ctx(M->getContext(), params, codeinst); + jl_codectx_t ctx(out, codeinst); std::string name; raw_string_ostream(name) << "tojlinvoke" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); Function *f = Function::Create(ctx.types().T_jlfunc, GlobalVariable::InternalLinkage, - name, M); - jl_init_function(f, params.TargetTriple); - jl_name_jlfunc_args(params, f); + name, out.get_module()); + jl_init_function(f, out.TargetTriple); + jl_name_jlfunc_args(out, f); //f->setAlwaysInline(); ctx.f = f; // for jl_Module - BasicBlock *b0 = BasicBlock::Create(M->getContext(), "top", f); + BasicBlock *b0 = BasicBlock::Create(out.get_context(), "top", f); ctx.builder.SetInsertPoint(b0); Value *theFarg; @@ -6848,18 +6717,20 @@ Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Value *theFunc, Module * } theFarg = track_pjlvalue(ctx, theFarg); auto args = f->arg_begin(); - CallInst *r = ctx.builder.CreateCall(FunctionCallee(jlinvoke_func->_type(M->getContext()), theFunc), { &*args, &*++args, &*++args, theFarg }); - r->setAttributes(jlinvoke_func->_attrs(M->getContext())); + CallInst *r = ctx.builder.CreateCall(FunctionCallee(jlinvoke_func->_type(out.get_context()), theFunc), { &*args, &*++args, &*++args, theFarg }); + r->setAttributes(jlinvoke_func->_attrs(out.get_context())); ctx.builder.CreateRet(r); return f; } -Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, jl_codegen_output_t &out) JL_NOTSAFEPOINT { Value *theFunc = nullptr; + auto &M = out.get_module(); + auto &Ctx = out.get_context(); if (!theFptrName.empty()) - theFunc = M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(M->getContext()), jlinvoke_func->_attrs(M->getContext())).getCallee(); - return emit_tojlinvoke(codeinst, theFunc, M, params); + theFunc = M.getOrInsertFunction(theFptrName, jlinvoke_func->_type(Ctx), jlinvoke_func->_attrs(Ctx)).getCallee(); + return emit_tojlinvoke(codeinst, theFunc, out); } static jl_value_t *get_oc_type(jl_value_t *calltype, jl_value_t *rettype) JL_ALWAYS_LEAFTYPE @@ -6876,7 +6747,7 @@ static void emit_specsig_to_specsig( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, - jl_codegen_params_t ¶ms, + jl_codegen_output_t &out, Value *target, jl_value_t *targetsig, jl_value_t *targetrt, @@ -6884,7 +6755,7 @@ static void emit_specsig_to_specsig( jl_value_t *rettype_const) { ++EmittedCFuncInvalidates; - jl_codectx_t ctx(gf_thunk->getParent()->getContext(), params, 0, 0); + jl_codectx_t ctx(out, 0, 0); ctx.f = gf_thunk; BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", gf_thunk); @@ -7018,21 +6889,44 @@ void emit_specsig_to_fptr1( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, - jl_codegen_params_t ¶ms, + jl_codegen_output_t &out, Function *target) { - emit_specsig_to_specsig(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, target, calltype, rettype, nullptr, nullptr); + emit_specsig_to_specsig(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, out, target, calltype, rettype, nullptr, nullptr); +} + +// Helper for JIT linking. +Function *emit_specsig_to_fptr1(jl_codegen_output_t &out, jl_code_instance_t *ci, + Function *func) +{ + jl_method_instance_t *mi = jl_get_ci_mi(ci); + size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed + bool is_opaque_closure = + jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + + std::string gf_thunk_name = + get_function_name(true, false, name_from_method_instance(mi), out.TargetTriple); + gf_thunk_name += "_gfthunk"; + + jl_value_t *specTypes = get_ci_abi(ci); + jl_returninfo_t info = + get_specsig_function(out, &out.get_module(), nullptr, gf_thunk_name, specTypes, + ci->rettype, is_opaque_closure); + Function *spec_func = cast(info.decl.getCallee()); + emit_specsig_to_fptr1(spec_func, info.cc, info.return_roots, specTypes, ci->rettype, + is_opaque_closure, nrealargs, out, func); + return spec_func; } -static void emit_fptr1_wrapper(Module *M, StringRef gf_thunk_name, Value *target, jl_value_t *rettype_const, jl_value_t *declrt, jl_value_t *jlrettype, jl_codegen_params_t ¶ms) +static void emit_fptr1_wrapper(Module *M, StringRef gf_thunk_name, Value *target, jl_value_t *rettype_const, jl_value_t *declrt, jl_value_t *jlrettype, jl_codegen_output_t &out) { Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, gf_thunk_name, M); - jl_init_function(w, params.TargetTriple); + jl_init_function(w, out.TargetTriple); w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()})); w->addFnAttr(Attribute::OptimizeNone); w->addFnAttr(Attribute::NoInline); - jl_codectx_t ctx(M->getContext(), params, 0, 0); + jl_codectx_t ctx(out, 0, 0); ctx.f = w; ctx.rettype = declrt; @@ -7062,91 +6956,95 @@ static void emit_specsig_to_specsig( Module *M, StringRef gf_thunk_name, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, - jl_codegen_params_t ¶ms, + jl_codegen_output_t &out, Value *target, jl_value_t *targetsig, jl_value_t *targetrt, jl_returninfo_t *targetspec, jl_value_t *rettype_const) { - jl_returninfo_t returninfo = get_specsig_function(params, M, nullptr, gf_thunk_name, calltype, rettype, is_for_opaque_closure); + jl_returninfo_t returninfo = get_specsig_function(out, M, nullptr, gf_thunk_name, calltype, rettype, is_for_opaque_closure); Function *gf_thunk = cast(returninfo.decl.getCallee()); - jl_init_function(gf_thunk, params.TargetTriple); + jl_init_function(gf_thunk, out.TargetTriple); gf_thunk->setAttributes(AttributeList::get(gf_thunk->getContext(), {returninfo.attrs, gf_thunk->getAttributes()})); - emit_specsig_to_specsig(gf_thunk, returninfo.cc, returninfo.return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, target, targetsig, targetrt, targetspec, rettype_const); + emit_specsig_to_specsig(gf_thunk, returninfo.cc, returninfo.return_roots, calltype, rettype, is_for_opaque_closure, nargs, out, target, targetsig, targetrt, targetspec, rettype_const); } -std::string emit_abi_converter(Module *M, jl_codegen_params_t ¶ms, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *target, bool target_specsig) +std::string emit_abi_converter(jl_codegen_output_t &out, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *target, bool target_specsig) { // this builds a method that calls a method with the same arguments but a different specsig // build a specsig -> specsig converter thunk // build a specsig -> arg1 converter thunk // build a args1 -> specsig converter thunk (gen_invoke_wrapper) // build a args1 -> args1 converter thunk (to add typeassert on result) + Module *M = &out.get_module(); bool needsparams = false; bool target_is_opaque_closure = false; jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - std::string gf_thunk_name = get_function_name(from_abi.specsig, needsparams, name_from_method_instance(mi), params.TargetTriple); + std::string gf_thunk_name = get_function_name(from_abi.specsig, needsparams, name_from_method_instance(mi), out.TargetTriple); gf_thunk_name += "_gfthunk"; if (target_specsig) { jl_value_t *abi = get_ci_abi(codeinst); - jl_returninfo_t targetspec = get_specsig_function(params, M, target, "", abi, codeinst->rettype, target_is_opaque_closure); + jl_returninfo_t targetspec = get_specsig_function(out, M, target, "", abi, codeinst->rettype, target_is_opaque_closure); if (from_abi.specsig) - emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params, + emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, out, target, mi->specTypes, codeinst->rettype, &targetspec, nullptr); else - gen_invoke_wrapper(mi, abi, codeinst->rettype, from_abi.rt, targetspec, from_abi.nargs, -1, from_abi.is_opaque_closure, gf_thunk_name, M, params); + gen_invoke_wrapper(mi, abi, codeinst->rettype, from_abi.rt, targetspec, from_abi.nargs, -1, from_abi.is_opaque_closure, gf_thunk_name, M, out); } else { if (from_abi.specsig) - emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params, + emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, out, target, mi->specTypes, codeinst->rettype, nullptr, nullptr); else - emit_fptr1_wrapper(M, gf_thunk_name, target, nullptr, from_abi.rt, codeinst->rettype, params); + emit_fptr1_wrapper(M, gf_thunk_name, target, nullptr, from_abi.rt, codeinst->rettype, out); } return gf_thunk_name; } -std::string emit_abi_dispatcher(Module *M, jl_codegen_params_t ¶ms, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *invoke) +// TODO: Return LLVM function directly +std::string emit_abi_dispatcher(jl_codegen_output_t &out, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *invoke) { // this builds a method that calls a method with the same arguments but a different specsig // build a specsig -> args1 (apply_generic) or invoke (emit_tojlinvoke) call // build a args1 -> args1 call (emit_fptr1_wrapper) // build a args1 -> invoke call (emit_tojlinvoke) + Module *M = &out.get_module(); Value *target; if (!codeinst) target = prepare_call_in(M, jlapplygeneric_func); else - target = emit_tojlinvoke(codeinst, invoke, M, params); // TODO: inline this call? + target = emit_tojlinvoke(codeinst, invoke, out); // TODO: inline this call? std::string gf_thunk_name; if (codeinst) - raw_string_ostream(gf_thunk_name) << "jfptr_" << name_from_method_instance(jl_get_ci_mi(codeinst)) << "_"; + raw_string_ostream(gf_thunk_name) << JL_SYM_INVOKE_SPECSIG << name_from_method_instance(jl_get_ci_mi(codeinst)) << "_"; else - raw_string_ostream(gf_thunk_name) << "j_"; + raw_string_ostream(gf_thunk_name) << JL_SYM_PROTO_SPECSIG; raw_string_ostream(gf_thunk_name) << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1) << "_gfthunk"; if (from_abi.specsig) - emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params, + emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, out, target, from_abi.sigt, codeinst ? codeinst->rettype : (jl_value_t*)jl_any_type, nullptr, nullptr); else - emit_fptr1_wrapper(M, gf_thunk_name, target, nullptr, from_abi.rt, codeinst ? codeinst->rettype : (jl_value_t*)jl_any_type, params); + emit_fptr1_wrapper(M, gf_thunk_name, target, nullptr, from_abi.rt, codeinst ? codeinst->rettype : (jl_value_t*)jl_any_type, out); return gf_thunk_name; } -std::string emit_abi_constreturn(Module *M, jl_codegen_params_t ¶ms, jl_abi_t from_abi, jl_value_t *rettype_const) +std::string emit_abi_constreturn(jl_codegen_output_t &out, jl_abi_t from_abi, jl_value_t *rettype_const) { + Module *M = &out.get_module(); std::string gf_thunk_name; - raw_string_ostream(gf_thunk_name) << "jconst_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + raw_string_ostream(gf_thunk_name) << JL_SYM_SPECPTR_CONST << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); if (from_abi.specsig) { - emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params, + emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, out, nullptr, from_abi.sigt, jl_typeof(rettype_const), nullptr, rettype_const); } else { - emit_fptr1_wrapper(M, gf_thunk_name, nullptr, rettype_const, from_abi.rt, jl_typeof(rettype_const), params); + emit_fptr1_wrapper(M, gf_thunk_name, nullptr, rettype_const, from_abi.rt, jl_typeof(rettype_const), out); } return gf_thunk_name; } -std::string emit_abi_constreturn(Module *M, jl_codegen_params_t ¶ms, bool specsig, jl_code_instance_t *codeinst) +std::string emit_abi_constreturn(jl_codegen_output_t &out, bool specsig, jl_code_instance_t *codeinst) { jl_value_t *sigt = get_ci_abi(codeinst); jl_value_t *rt = codeinst->rettype; @@ -7157,7 +7055,7 @@ std::string emit_abi_constreturn(Module *M, jl_codegen_params_t ¶ms, bool sp size_t nargs = specsig ? jl_nparams(sigt) : 0; jl_abi_t abi = {sigt, rt, nargs, specsig, is_opaque_closure}; - return emit_abi_constreturn(M, params, abi, codeinst->rettype_const); + return emit_abi_constreturn(out, abi, codeinst->rettype_const); } // release jl_world_counter @@ -7194,8 +7092,8 @@ static jl_cgval_t emit_abi_call(jl_codectx_t &ctx, jl_value_t *declrt, jl_value_ Vnull, Vnull, Vnull, - literal_pointer_val_slot(ctx.emission_context, M, declrt), - literal_pointer_val_slot(ctx.emission_context, M, sigt), + literal_pointer_val_slot(ctx.emission_context, declrt), + literal_pointer_val_slot(ctx.emission_context, sigt), literal_static_pointer_val((void*)flags, T_ptr)})); Value *last_world_p = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, cfuncdata, 1); LoadInst *last_world_v = ctx.builder.CreateAlignedLoad(T_size, last_world_p, ctx.types().alignof_ptr); @@ -7243,7 +7141,7 @@ static jl_cgval_t emit_abi_call(jl_codectx_t &ctx, jl_value_t *declrt, jl_value_ } static Function *gen_cfun_wrapper( - Module *into, jl_codegen_params_t ¶ms, + Module *into, jl_codegen_output_t &out, const function_sig_t &sig, jl_value_t *ff, const char *aliasname, jl_value_t *declrt, jl_value_t *sigt, jl_unionall_t *unionall_env, jl_svec_t *sparam_vals, jl_array_t **closure_types) @@ -7256,7 +7154,7 @@ static Function *gen_cfun_wrapper( bool nest = (!ff || unionall_env); std::string funcName; - raw_string_ostream(funcName) << "jlcapi_" << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); + raw_string_ostream(funcName) << JL_SYM_CFUNCTION << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); Module *M = into; // Safe because ctx lock is held by params AttributeList attributes = sig.attributes; @@ -7317,10 +7215,10 @@ static Function *gen_cfun_wrapper( Function *cw = Function::Create(functype, GlobalVariable::ExternalLinkage, funcName, M); - jl_init_function(cw, params.TargetTriple); + jl_init_function(cw, out.TargetTriple); cw->setAttributes(AttributeList::get(M->getContext(), {attributes, cw->getAttributes()})); - jl_codectx_t ctx(M->getContext(), params, 0, 0); + jl_codectx_t ctx(out, 0, 0); ctx.f = cw; ctx.name = name; ctx.funcName = name; @@ -7748,7 +7646,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con // do codegen to create a C-callable alias/wrapper, or if sysimg_handle is set, // restore one from a loaded system image. -const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t ¶ms) +const char *jl_generate_ccallable(jl_codegen_output_t &out, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt) { ++GeneratedCCallables; jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt); @@ -7762,7 +7660,7 @@ const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value crt = (jl_value_t*)jl_any_type; } bool toboxed; - Type *lcrt = _julia_struct_to_llvm(¶ms, *params.tsctx.getContext(), crt, &toboxed); + Type *lcrt = _julia_struct_to_llvm(&out, out.get_context(), crt, &toboxed); if (toboxed) lcrt = JuliaType::get_prjlvalue_ty(lcrt->getContext()); size_t nargs = jl_nparams(sigt)-1; @@ -7775,13 +7673,15 @@ const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value jl_value_t *err; { // scope block for sig function_sig_t sig("cfunction", lcrt, crt, toboxed, false, - argtypes, NULL, false, CallingConv::C, false, ¶ms); + argtypes, NULL, false, CallingConv::C, false, &out); if (sig.err_msg.empty()) { - //Safe b/c params holds context lock - Function *cw = gen_cfun_wrapper(llvmmod, params, sig, ff, name, declrt, sigt, NULL, NULL, NULL); - auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(), - GlobalValue::ExternalLinkage, name, cw, llvmmod); - if (params.TargetTriple.isOSBinFormatCOFF()) { + //Safe b/c out holds context lock + Function *cw = gen_cfun_wrapper(&out.get_module(), out, sig, ff, name, declrt, sigt, NULL, NULL, NULL); + auto alias = + GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(), + GlobalValue::ExternalLinkage, name, cw, + &out.get_module()); + if (out.TargetTriple.isOSBinFormatCOFF()) { alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass); } JL_GC_POP(); @@ -7795,12 +7695,12 @@ const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value // generate a julia-callable function that calls f (AKA lam) // if is_opaque_closure, then generate the OC invoke, rather than a real invoke static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_value_t *jlretty, jl_value_t *declrt, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName, - Module *M, jl_codegen_params_t ¶ms) + Module *M, jl_codegen_output_t &out) { ++GeneratedInvokeWrappers; Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M); - jl_init_function(w, params.TargetTriple); - jl_name_jlfunc_args(params, w); + jl_init_function(w, out.TargetTriple); + jl_name_jlfunc_args(out, w); w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()})); w->addFnAttr(Attribute::OptimizeNone); w->addFnAttr(Attribute::NoInline); @@ -7811,7 +7711,7 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_va //Value *mfunc = &*AI++; (void)mfunc; // unused assert(AI == w->arg_end()); - jl_codectx_t ctx(M->getContext(), params, 0, 0); + jl_codectx_t ctx(out, 0, 0); ctx.f = w; ctx.linfo = lam; ctx.rettype = jlretty; @@ -7869,10 +7769,10 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_va } } -static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, +jl_returninfo_t get_specsig_function(jl_codegen_output_t &out, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, ArrayRef ArgNames, unsigned nreq) { - bool gcstack_arg = params.params->gcstack_arg; + bool gcstack_arg = out.params->gcstack_arg; jl_returninfo_t props = {}; SmallVector fsig; SmallVector argnames; @@ -7907,7 +7807,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module } else if (!deserves_retbox(jlrettype)) { bool retboxed; - rt = _julia_type_to_llvm(¶ms, M->getContext(), jlrettype, &retboxed); + rt = _julia_type_to_llvm(&out, M->getContext(), jlrettype, &retboxed); assert(!retboxed); if (rt != getVoidTy(M->getContext()) && deserves_sret(jlrettype, rt)) { auto tracked = CountTrackedPointers(rt, true); @@ -7966,7 +7866,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module if (gcstack_arg) { AttrBuilder param(M->getContext()); - if (params.use_swiftcc) + if (out.use_swiftcc) param.addAttribute(Attribute::SwiftSelf); param.addAttribute("gcstack"); param.addAttribute(Attribute::NonNull); @@ -7984,7 +7884,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module if (is_uniquerep_Type(jt)) continue; isboxed = deserves_argbox(jt); - et = isboxed ? T_prjlvalue : _julia_type_to_llvm(¶ms, M->getContext(), jt, nullptr); + et = isboxed ? T_prjlvalue : _julia_type_to_llvm(&out, M->getContext(), jt, nullptr); if (type_is_ghost(et)) continue; } @@ -8040,8 +7940,8 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module Function *f = M ? cast_or_null(M->getNamedValue(name)) : NULL; if (f == NULL) { f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M); - jl_init_function(f, params.TargetTriple); - if (params.params->debug_info_level >= 2) { + jl_init_function(f, out.TargetTriple); + if (out.params->debug_info_level >= 2) { ios_t sigbuf; ios_mem(&sigbuf, 0); jl_static_show_func_sig((JL_STREAM*) &sigbuf, sig); @@ -8060,7 +7960,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module assert(fval->getType()->isPointerTy()); } if (auto F = dyn_cast(fval)) { - if (gcstack_arg && params.use_swiftcc) + if (gcstack_arg && out.use_swiftcc) F->setCallingConv(CallingConv::Swift); assert(F->arg_size() >= argnames.size()); for (size_t i = 0; i < argnames.size(); i++) { @@ -8110,19 +8010,18 @@ static jl_datatype_t *compute_va_type(jl_value_t *sig, size_t nreq) // Compile to LLVM IR, using a specialized signature if applicable. static jl_llvm_functions_t emit_function( - orc::ThreadSafeModule &TSM, + jl_codegen_output_t &out, jl_method_instance_t *lam, jl_code_info_t *src, jl_value_t *abi, - jl_value_t *jlrettype, - jl_codegen_params_t ¶ms) + jl_value_t *jlrettype) { ++EmittedFunctions; // step 1. unpack AST and allocate codegen context for this function size_t min_world = src->min_world; size_t max_world = src->max_world; - jl_llvm_functions_t declarations; - jl_codectx_t ctx(*params.tsctx.getContext(), params, min_world, max_world); + jl_llvm_functions_t declarations{}; + jl_codectx_t ctx(out, min_world, max_world); jl_datatype_t *vatyp = NULL; JL_GC_PUSH2(&ctx.code, &vatyp); ctx.code = src->code; @@ -8243,7 +8142,7 @@ static jl_llvm_functions_t ctx.ssavalue_usecount.assign(n_ssavalues, 0); bool specsig, needsparams; - std::tie(specsig, needsparams) = uses_specsig(abi, lam, jlrettype, params.params->prefer_specsig); + std::tie(specsig, needsparams) = uses_specsig(abi, lam, jlrettype, out.params->prefer_specsig); // step 3. some variable analysis size_t i; @@ -8296,12 +8195,15 @@ static jl_llvm_functions_t if (!specsig) ctx.nReqArgs--; // function not part of argArray in jlcall - std::string _funcName = get_function_name(specsig, needsparams, ctx.name, ctx.emission_context.TargetTriple); - declarations.specFunctionObject = _funcName; + auto specptr_name = out.make_name(JL_SYMBOL_SPECPTR_DEF, + needsparams ? JL_INVOKE_SPARAM : + specsig ? JL_INVOKE_SPECSIG : + JL_INVOKE_ARGS, + ctx.name); // allocate Function declarations and wrapper objects //Safe because params holds ctx lock - Module *M = TSM.getModuleUnlocked(); + Module *M = &out.get_module(); jl_debugcache_t debugcache; debugcache.initialize(M); jl_returninfo_t returninfo = {}; @@ -8322,10 +8224,11 @@ static jl_llvm_functions_t ArgNames[i] = name; } } - returninfo = get_specsig_function(params, M, NULL, declarations.specFunctionObject, abi, - jlrettype, ctx.is_opaque_closure, - ArgNames, nreq); + returninfo = + get_specsig_function(out, M, NULL, specptr_name, abi, + jlrettype, ctx.is_opaque_closure, ArgNames, nreq); f = cast(returninfo.decl.getCallee()); + declarations.specptr = f; has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); jl_init_function(f, ctx.emission_context.TargetTriple); @@ -8353,19 +8256,21 @@ static jl_llvm_functions_t return retarg; }(); - std::string wrapName; - raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); - declarations.functionObject = wrapName; + declarations.invoke_api = JL_INVOKE_SPECSIG; + auto invoke_name = out.make_name(JL_SYMBOL_INVOKE_DEF, JL_INVOKE_SPECSIG, ctx.name); size_t nparams = jl_nparams(abi); - gen_invoke_wrapper(lam, abi, jlrettype, jlrettype, returninfo, nparams, retarg, ctx.is_opaque_closure, declarations.functionObject, M, ctx.emission_context); + gen_invoke_wrapper(lam, abi, jlrettype, jlrettype, returninfo, nparams, retarg, + ctx.is_opaque_closure, invoke_name, M, + ctx.emission_context); + declarations.invoke = M->getFunction(invoke_name); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) // TODO: add attributes: dereferenceable // TODO: (if needsparams) add attributes: dereferenceable, readonly, nocapture } else { - f = Function::Create(needsparams ? ctx.types().T_jlfuncparams : ctx.types().T_jlfunc, - GlobalVariable::ExternalLinkage, - declarations.specFunctionObject, M); + f = Function::Create( + needsparams ? ctx.types().T_jlfuncparams : ctx.types().T_jlfunc, + GlobalVariable::ExternalLinkage, specptr_name, M); jl_init_function(f, ctx.emission_context.TargetTriple); if (needsparams) jl_name_jlfuncparams_args(ctx.emission_context, f); @@ -8373,10 +8278,11 @@ static jl_llvm_functions_t jl_name_jlfunc_args(ctx.emission_context, f); f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()})); returninfo.decl = f; - declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args"; + declarations.specptr = f; + declarations.invoke_api = needsparams ? JL_INVOKE_SPARAM : JL_INVOKE_ARGS; } - if (!params.getContext().shouldDiscardValueNames() && ctx.emission_context.params->debug_info_level >= 2 && lam->def.method && jl_is_method(lam->def.method) && lam->specTypes != (jl_value_t*)jl_emptytuple_type) { + if (!out.get_context().shouldDiscardValueNames() && ctx.emission_context.params->debug_info_level >= 2 && lam->def.method && jl_is_method(lam->def.method) && lam->specTypes != (jl_value_t*)jl_emptytuple_type) { ios_t sigbuf; ios_mem(&sigbuf, 0); jl_static_show_func_sig((JL_STREAM*) &sigbuf, (jl_value_t*)abi); @@ -8572,7 +8478,7 @@ static jl_llvm_functions_t // step 6b. Setup the GC frame and entry safepoint before any loads allocate_gc_frame(ctx, b0); - if (params.safepoint_on_entry && JL_FEAT_TEST(ctx, safepoint_on_entry)) + if (out.safepoint_on_entry && JL_FEAT_TEST(ctx, safepoint_on_entry)) emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const); Value *last_age = NULL; @@ -9726,79 +9632,49 @@ static jl_llvm_functions_t ctx.topalloca = nullptr; } - // link the dependent llvmcall modules, but switch their function's linkage to internal - // so that they don't conflict when they show up in the execution engine. - Linker L(*jl_Module); - for (auto &Mod : ctx.llvmcall_modules) { - SmallVector Exports; - for (const auto &F: Mod->functions()) - if (!F.isDeclaration()) - Exports.push_back(F.getName().str()); - bool error = L.linkInModule(std::move(Mod)); - assert(!error && "linking llvmcall modules failed"); - (void)error; - for (auto FN: Exports) - jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage); - } - JL_GC_POP(); return declarations; } // --- entry point --- -jl_llvm_functions_t jl_emit_codedecls( - orc::ThreadSafeModule &M, - jl_code_instance_t *codeinst, - jl_codegen_params_t ¶ms) +jl_llvm_functions_t jl_emit_codedecls(jl_codegen_output_t &out, + jl_code_instance_t *codeinst) { - jl_llvm_functions_t decls = {}; + jl_llvm_functions_t decls{}; jl_method_instance_t *mi = jl_get_ci_mi(codeinst); bool specsig, needsparams; - std::tie(specsig, needsparams) = uses_specsig(get_ci_abi(codeinst), mi, codeinst->rettype, params.params->prefer_specsig); - const char *name = name_from_method_instance(mi); - if (specsig) - raw_string_ostream(decls.functionObject) << "jfptr_" << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); - else if (needsparams) - decls.functionObject = "jl_fptr_sparam"; - else - decls.functionObject = "jl_fptr_args"; - raw_string_ostream(decls.specFunctionObject) << (specsig ? "j_" : "j1_") << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); - M.withModuleDo([&](Module &M) { - bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; - if (specsig) { - get_specsig_function(params, &M, nullptr, decls.specFunctionObject, get_ci_abi(codeinst), codeinst->rettype, is_opaque_closure); - } - else { - Function *f = Function::Create(needsparams ? JuliaType::get_jlfuncparams_ty(M.getContext()) : JuliaType::get_jlfunc_ty(M.getContext()), - GlobalVariable::ExternalLinkage, - decls.specFunctionObject, M); - jl_init_function(f, params.TargetTriple); - f->setAttributes(AttributeList::get(M.getContext(), {get_func_attrs(M.getContext()), f->getAttributes()})); - } - }); + std::tie(specsig, needsparams) = uses_specsig( + get_ci_abi(codeinst), mi, codeinst->rettype, out.params->prefer_specsig); + if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { + decls.invoke_api = JL_INVOKE_CONST; + } + else { + decls.invoke_api = specsig ? JL_INVOKE_SPECSIG : + needsparams ? JL_INVOKE_SPARAM : + JL_INVOKE_ARGS; + StringRef specname = out.get_call_target(codeinst, specsig, false); + decls.specptr = out.get_module().getFunction(specname); + } + out.ci_funcs[codeinst] = decls; return decls; } -jl_llvm_functions_t jl_emit_code( - orc::ThreadSafeModule &m, +std::optional jl_emit_code( + jl_codegen_output_t &out, jl_method_instance_t *li, jl_code_info_t *src, jl_value_t *abi_at, - jl_value_t *abi_rt, - jl_codegen_params_t ¶ms) + jl_value_t *abi_rt) { + std::optional ret; JL_TIMING(CODEGEN, CODEGEN_LLVM); jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK); - jl_llvm_functions_t decls = {}; - assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache || - compare_cgparams(params.params, &jl_default_cgparams)) && - "functions compiled with custom codegen params must not be cached"); JL_TRY { - decls = emit_function(m, li, src, abi_at, abi_rt, params); + ret = emit_function(out, li, src, abi_at, abi_rt); auto stream = *jl_ExecutionEngine->get_dump_emitted_mi_name_stream(); if (stream) { - jl_printf(stream, "%s\t", decls.specFunctionObject.c_str()); + jl_printf(stream, "%s\t", ret->specptr->getName().str().c_str()); // NOTE: We print the Type Tuple without surrounding quotes, because the quotes // break CSV parsing if there are any internal quotes in the Type name (e.g. in // Symbol("...")). The \t delineator should be enough to ensure whitespace is @@ -9811,141 +9687,156 @@ jl_llvm_functions_t jl_emit_code( JL_CATCH { // Something failed! This is very, very bad. // Try to pretend that it isn't and attempt to recover. - std::string mname = m.getModuleUnlocked()->getModuleIdentifier(); - m = orc::ThreadSafeModule(); - decls.functionObject = ""; - decls.specFunctionObject = ""; + std::string mname = out.get_module().getModuleIdentifier(); jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname.c_str()); jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(jl_current_task)); jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); jl_fprint_backtrace(ios_safe_stderr); } - return decls; + return ret; } -static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t ¶ms, jl_method_instance_t *mi, jl_value_t *rettype) +// TODO: handle jl_invoke_type properly +// TODO: this shouldn't return jl_llvm_functions_t +static jl_llvm_functions_t jl_emit_oc_wrapper(jl_codegen_output_t &out, jl_method_instance_t *mi, jl_value_t *rettype) { - jl_llvm_functions_t declarations; - declarations.functionObject = "jl_f_opaque_closure_call"; + jl_llvm_functions_t declarations{JL_INVOKE_ARGS}; if (uses_specsig(mi->specTypes, false, rettype, true)) { - // context lock is held by params - Module *M = m.getModuleUnlocked(); - jl_codectx_t ctx(M->getContext(), params, 0, 0); + Module *M = &out.get_module(); + jl_codectx_t ctx(out, 0, 0); ctx.name = M->getModuleIdentifier().data(); std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple); - jl_returninfo_t returninfo = get_specsig_function(params, M, NULL, funcName, mi->specTypes, rettype, true); + jl_returninfo_t returninfo = get_specsig_function(out, M, NULL, funcName, mi->specTypes, rettype, true); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context, prepare_call_in(gf_thunk->getParent(), jlopaque_closure_call_func)); // TODO: this could call emit_oc_call directly - declarations.specFunctionObject = funcName; + declarations.specptr = gf_thunk; } return declarations; } -jl_llvm_functions_t jl_emit_codeinst( - orc::ThreadSafeModule &m, + +std::optional jl_emit_codeinst( + jl_codegen_output_t &out, jl_code_instance_t *codeinst, - jl_code_info_t *src, - jl_codegen_params_t ¶ms) + jl_code_info_t *src) { JL_TIMING(CODEGEN, CODEGEN_Codeinst); jl_timing_show_method_instance(jl_get_ci_mi(codeinst), JL_TIMING_DEFAULT_BLOCK); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); + std::optional decls; if (!src) { // Assert that this this is the generic method for opaque closure wrappers: // this signals to instead compile specptr such that it holds the specptr -> invoke wrapper // to satisfy the dispatching implementation requirements of jl_f_opaque_closure_call - if (mi->def.method == jl_opaque_closure_method) { - return jl_emit_oc_wrapper(m, params, mi, codeinst->rettype); - } - m = orc::ThreadSafeModule(); - return jl_llvm_functions_t(); // user error + if (mi->def.method != jl_opaque_closure_method) + return {}; // user error + decls = jl_emit_oc_wrapper(out, mi, codeinst->rettype); + } else { + //assert(jl_egal((jl_value_t*)jl_atomic_load_relaxed(&codeinst->debuginfo), (jl_value_t*)src->debuginfo) && "trying to generate code for a codeinst for an incompatible src"); + decls = jl_emit_code(out, mi, src, get_ci_abi(codeinst), codeinst->rettype); } - //assert(jl_egal((jl_value_t*)jl_atomic_load_relaxed(&codeinst->debuginfo), (jl_value_t*)src->debuginfo) && "trying to generate code for a codeinst for an incompatible src"); - jl_llvm_functions_t decls = jl_emit_code(m, mi, src, get_ci_abi(codeinst), codeinst->rettype, params); + if (!decls) + return {}; + out.ci_funcs[codeinst] = *decls; return decls; } -/// Stolen from IRMover.cpp, since it is needlessly private there -void linkFunctionBody(Function &Dst, Function &Src) +static jl_code_info_t *jl_get_method_ir(jl_code_instance_t *ci, uint16_t max_cost = 0) { - assert(Dst.isDeclaration() && !Src.isDeclaration()); - - // Link in the operands without remapping. - if (Src.hasPrefixData()) - Dst.setPrefixData(Src.getPrefixData()); - if (Src.hasPrologueData()) - Dst.setPrologueData(Src.getPrologueData()); - if (Src.hasPersonalityFn()) - Dst.setPersonalityFn(Src.getPersonalityFn()); - if (Src.hasPersonalityFn()) - Dst.setPersonalityFn(Src.getPersonalityFn()); - assert(Src.IsNewDbgInfoFormat == Dst.IsNewDbgInfoFormat); - - // Copy over the metadata attachments without remapping. - Dst.copyMetadata(&Src, 0); - - // Steal arguments and splice the body of Src into Dst. - Dst.stealArgumentListFrom(Src); - Dst.splice(Dst.end(), &Src); + jl_value_t *src = jl_atomic_load_relaxed(&ci->inferred); + jl_method_instance_t *mi = jl_get_ci_mi(ci); + if (!src) + return nullptr; + if (jl_is_code_info(src) && (max_cost == 0 || jl_ir_inlining_cost(src) < max_cost)) + return (jl_code_info_t *)src; + if (jl_is_string(src) && jl_is_method(mi->def.method) && + (max_cost == 0 || jl_ir_inlining_cost(src) < max_cost)) + return jl_uncompress_ir(mi->def.method, ci, src); + return nullptr; } -void emit_always_inline(orc::ThreadSafeModule &result_m, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +void emit_always_inline(jl_codegen_output_t &out) { + SmallVector> + queue; + auto orig_ci_funcs = out.ci_funcs; while (true) { - SmallVector always_inline; - for (auto &it : params.workqueue) { - if (it.second.private_linkage && it.second.decl->isDeclaration()) - always_inline.push_back(it); - it.second.private_linkage = false; - } - if (always_inline.empty()) + for (auto &[call, target] : out.call_targets) { + auto [ci, api] = call; + if (target.private_linkage && target.decl->isDeclaration()) + queue.push_back({ci, api, target}); + } + if (queue.empty()) { + // Don't define functions for CIs that are here for inlining only + out.ci_funcs = std::move(orig_ci_funcs); return; - jl_task_t *ct = jl_current_task; - int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls) + } + jl_code_info_t *src = nullptr; - params.safepoint_on_entry = false; - params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); - JL_GC_PUSH2(¶ms.temporary_roots, &src); - for (auto &it : always_inline) { - jl_code_instance_t *codeinst = it.first; - auto &proto = it.second; - Function *decl = proto.decl; - if (decl->isDeclaration()) { - src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); - jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - jl_method_t *def = mi->def.method; - if (src && jl_is_string((jl_value_t*)src) && jl_is_method(def) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) - src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src); - if (src && jl_is_code_info(src) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) { - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints - if (!result_m) - break; - // TODO: jl_optimize_roots(params, mi, *result_m.getModuleUnlocked()); // contains safepoints - Module &M = *result_m.getModuleUnlocked(); - if (decls.functionObject != "jl_fptr_args" && - decls.functionObject != "jl_fptr_sparam" && - decls.functionObject != "jl_f_opaque_closure_call") { - Function *F = M.getFunction(decls.functionObject); - F->eraseFromParent(); - } - if (!decls.specFunctionObject.empty()) { - Function *specF = M.getFunction(decls.specFunctionObject); - linkFunctionBody(*decl, *specF); - decl->addFnAttr(Attribute::InlineHint); - decl->setLinkage(proto.external_linkage ? GlobalValue::AvailableExternallyLinkage : GlobalValue::PrivateLinkage); - specF->eraseFromParent(); - } - } + jl_task_t *ct = jl_current_task; + // codegen may contain safepoints (such as jl_subtype calls) + int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); + out.safepoint_on_entry = false; + JL_GC_PUSH1(&src); + for (auto &[ci, api, target] : queue) { + if (!target.decl->isDeclaration()) + continue; + auto it = out.ci_funcs.find(ci); + jl_llvm_functions_t decls; + if (it != out.ci_funcs.end()) { + decls = it->second; + } else { + target.private_linkage = false; + src = jl_get_method_ir(ci, UINT16_MAX); + if (!src) + continue; + decls = *jl_emit_codeinst(out, ci, src); // contains safepoints + auto linkage = target.external_linkage ? + GlobalValue::AvailableExternallyLinkage : + GlobalValue::PrivateLinkage; + if (decls.invoke) + decls.invoke->setLinkage(linkage); + decls.specptr->setLinkage(linkage); } + + // TODO: jl_optimize_roots? + assert(api == decls.invoke_api); + target.decl->replaceAllUsesWith(decls.specptr); + target.decl->addFnAttr(Attribute::InlineHint); + target.decl = decls.specptr; } - params.temporary_roots = nullptr; JL_GC_POP(); jl_gc_unsafe_leave(ct->ptls, gc_state); + queue.clear(); + } +} + +// Creating an llvm::Linker is very expensive, and the cost grows with the size +// of the module being linked into; do it only once and link every llvmcall +// module at once. +void emit_llvmcall_modules(jl_codegen_output_t &out) +{ + // link the dependent llvmcall modules, but switch their function's linkage to internal + // so that they don't conflict when they show up in the execution engine. + if (!out.llvmcall_modules.empty()) { + auto &M = out.get_module(); + Linker L{M}; + for (auto &Mod : out.llvmcall_modules) { + SmallVector Exports; + for (const auto &F : Mod->functions()) + if (!F.isDeclaration()) + Exports.push_back(F.getName().str()); + bool error = L.linkInModule(std::move(Mod)); + assert(!error && "linking llvmcall modules failed"); + (void)error; + for (auto FN : Exports) + M.getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage); + } } } diff --git a/src/debug-registry.h b/src/debug-registry.h index 00e3445200361..693b5b053455e 100644 --- a/src/debug-registry.h +++ b/src/debug-registry.h @@ -127,17 +127,10 @@ class JITDebugInfoRegistry objectmap_t objectmap{}; rev_map> cimap{}; - // Maintain a mapping of unrealized function names -> linfo objects - // so that when we see it get emitted, we can add a link back to the linfo - // that it came from (providing name, type signature, file info, etc.) - Locked> codeinst_in_flight{}; - Locked> image_info{}; Locked objfilemap{}; - static std::string mangle(llvm::StringRef Name, const llvm::DataLayout &DL) JL_NOTSAFEPOINT; - public: JITDebugInfoRegistry() JL_NOTSAFEPOINT; @@ -145,10 +138,10 @@ class JITDebugInfoRegistry libc_frames_t libc_frames{}; - void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT; jl_code_instance_t *lookupCodeInstance(size_t pointer) JL_NOTSAFEPOINT; void registerJITObject(const llvm::object::ObjectFile &Object, - std::function getLoadAddress) JL_NOTSAFEPOINT; + std::function getLoadAddress, + const jl_linker_info_t &Info) JL_NOTSAFEPOINT; objectmap_t& getObjectMap() JL_NOTSAFEPOINT; void add_image_info(image_info_t info) JL_NOTSAFEPOINT; bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT; diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index 752dc505092fa..f8e3042e89396 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -82,20 +82,6 @@ static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) } #endif -std::string JITDebugInfoRegistry::mangle(StringRef Name, const DataLayout &DL) -{ - std::string MangledName; - { - raw_string_ostream MangledNameStream(MangledName); - Mangler::getNameWithPrefix(MangledNameStream, Name, DL); - } - return MangledName; -} - -void JITDebugInfoRegistry::add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) { - (**codeinst_in_flight)[mangle(name, DL)] = codeinst; -} - jl_code_instance_t *JITDebugInfoRegistry::lookupCodeInstance(size_t pointer) { jl_lock_profile(); @@ -163,23 +149,7 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT jl_unlock_profile_wr(); } - // --- storing and accessing source location metadata --- -void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER -{ - // Non-opaque-closure MethodInstances are considered globally rooted - // through their methods, but for OC, we need to create a global root - // here. - jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) { - jl_task_t *ct = jl_current_task; - int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); - jl_as_global_root((jl_value_t*)mi, 1); - jl_gc_unsafe_leave(ct->ptls, gc_state); - } - getJITDebugRegistry().add_code_in_flight(name, codeinst, DL); -} - #if defined(_OS_WINDOWS_) static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnname, @@ -235,11 +205,26 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam } #endif -void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object, - std::function getLoadAddress) +void JITDebugInfoRegistry::registerJITObject( + const object::ObjectFile &Object, + std::function getLoadAddress, + const jl_linker_info_t &Info) { object::section_iterator EndSection = Object.section_end(); + StringMap sym_to_ci; + for (auto &[ci, funcs] : Info.ci_funcs) { + // don't remember toplevel thunks because + // they may not be rooted in the gc for the life of the program, + // and the runtime doesn't notify us when the code becomes unreachable :( + if (!jl_is_method(jl_get_ci_mi(ci)->def.method)) + continue; + if (funcs.invoke) + sym_to_ci[*funcs.invoke] = ci; + if (funcs.specptr) + sym_to_ci[*funcs.specptr] = ci; + } + bool anyfunctions = false; for (const object::SymbolRef &sym_iter : Object.symbols()) { object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType()); @@ -379,14 +364,9 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object, (uint8_t*)(uintptr_t)SectionLoadAddr, (size_t)SectionSize, UnwindData); #endif jl_code_instance_t *codeinst = NULL; - { - auto lock = *this->codeinst_in_flight; - auto &codeinst_in_flight = *lock; - StringMap::iterator codeinst_it = codeinst_in_flight.find(sName); - if (codeinst_it != codeinst_in_flight.end()) { - codeinst = codeinst_it->second; - codeinst_in_flight.erase(codeinst_it); - } + auto it = sym_to_ci.find(sName); + if (it != sym_to_ci.end()) { + codeinst = it->second; } jl_profile_atomic([&]() JL_NOTSAFEPOINT { if (codeinst) @@ -405,9 +385,10 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object, } void jl_register_jit_object(const object::ObjectFile &Object, - std::function getLoadAddress) + std::function getLoadAddress, + const jl_linker_info_t &Info) { - getJITDebugRegistry().registerJITObject(Object, getLoadAddress); + getJITDebugRegistry().registerJITObject(Object, getLoadAddress, Info); } // TODO: convert the safe names from aotcomile.cpp:makeSafeName back into symbols diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index eae266364932b..9ef9541df0a3b 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -38,6 +38,12 @@ #include #include +#include +#include +#include +#include +#include + // target machine computation #include #include @@ -151,40 +157,20 @@ void jl_dump_llvm_opt_impl(void *s) static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT; -void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT -{ - ++LinkedGlobals; - Constant *P = literal_static_pointer_val(addr, GV->getValueType()); - GV->setInitializer(P); - GV->setDSOLocal(true); - if (jl_options.image_codegen) { - // If we are forcing imaging mode codegen for debugging, - // emit external non-const symbol to avoid LLVM optimizing the code - // similar to non-imaging mode. - assert(GV->hasExternalLinkage()); - } - else { - GV->setConstant(true); - GV->setLinkage(GlobalValue::PrivateLinkage); - GV->setVisibility(GlobalValue::DefaultVisibility); - GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - } -} - // convert local roots into global roots, if they are needed -static void jl_optimize_roots(jl_codegen_params_t ¶ms, jl_method_instance_t *mi, Module &M) +static void jl_promote_method_roots(jl_codegen_output_t &out, jl_method_instance_t *mi, Module &M) { - JL_GC_PROMISE_ROOTED(params.temporary_roots); // rooted by caller - if (jl_array_dim0(params.temporary_roots) == 0) + JL_GC_PROMISE_ROOTED(out.temporary_roots); // rooted by caller + if (jl_array_dim0(out.temporary_roots) == 0) return; jl_method_t *m = mi->def.method; if (jl_is_method(m)) // the method might have a root for this already; use it if so JL_LOCK(&m->writelock); - for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) { - jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i); - auto ref = params.global_targets.find((void*)val); - if (ref == params.global_targets.end()) + for (size_t i = 0; i < jl_array_dim0(out.temporary_roots); i++) { + jl_value_t *val = jl_array_ptr_ref(out.temporary_roots, i); + auto ref = out.global_targets.find((void*)val); + if (ref == out.global_targets.end()) continue; auto get_global_root = [val, m]() { if (jl_is_globally_rooted(val)) @@ -203,14 +189,14 @@ static void jl_optimize_roots(jl_codegen_params_t ¶ms, jl_method_instance_t jl_value_t *mval = get_global_root(); if (mval != val) { GlobalVariable *GV = ref->second; - params.global_targets.erase(ref); - auto mref = params.global_targets.find((void*)mval); - if (mref != params.global_targets.end()) { + out.global_targets.erase(ref); + auto mref = out.global_targets.find((void*)mval); + if (mref != out.global_targets.end()) { GV->replaceAllUsesWith(mref->second); GV->eraseFromParent(); } else { - params.global_targets[(void*)mval] = GV; + out.global_targets[(void*)mval] = GV; } } } @@ -218,43 +204,88 @@ static void jl_optimize_roots(jl_codegen_params_t ¶ms, jl_method_instance_t JL_UNLOCK(&m->writelock); } -static void finish_params(Module *M, jl_codegen_params_t ¶ms, SmallVector &sharedmodules) JL_NOTSAFEPOINT +StringRef jl_codegen_output_t::strip_linux(StringRef name) { - if (params._shared_module) { - sharedmodules.push_back(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); - } - - // In imaging mode, we can't inline global variable initializers in order to preserve - // the fiction that we don't know what loads from the global will return. Thus, we - // need to emit a separate module for the globals before any functions are compiled, - // to ensure that the globals are defined when they are compiled. - if (jl_options.image_codegen) { - if (!params.global_targets.empty()) { - void **globalslots = new void*[params.global_targets.size()]; - void **slot = globalslots; - for (auto &global : params.global_targets) { - auto GV = global.second; - *slot = global.first; - jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot); - slot++; - } -#ifdef __clang_analyzer__ - static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it -#endif - } + if (TargetTriple.isOSLinux()) { + if (name[0] == '@') + return name.drop_front(); + } + return name; +} + +std::string jl_codegen_output_t::make_name(jl_symbol_prefix_t type, jl_invoke_api_t api, + StringRef orig_name) +{ + return make_name(jl_symbol_prefix(type, api), orig_name); +} + +std::string jl_codegen_output_t::make_name(StringRef prefix, StringRef orig_name) +{ + return names(prefix, strip_linux(orig_name)); +} + +std::string jl_codegen_output_t::make_name(StringRef orig_name) +{ + return names(strip_linux(orig_name)); +} + +// TODO: Don't repeat so much work in this and `emit_call_specfun_other` +// TODO: just take jl_invoke_api_t argument instead of specsig? +StringRef jl_codegen_output_t::get_call_target(jl_code_instance_t *ci, bool specsig, + bool always_inline) +{ + jl_invoke_api_t api = specsig ? JL_INVOKE_SPECSIG : JL_INVOKE_ARGS; + auto it = call_targets.find({ci, api}); + if (it != call_targets.end()) { + it->second.external_linkage |= !always_inline; + it->second.private_linkage |= always_inline; + return it->second.decl->getName(); + } + std::string protoname = make_name(JL_SYMBOL_SPECPTR_PROTO, api, + name_from_method_instance(jl_get_ci_mi(ci))); + jl_codegen_call_target_t &target = call_targets[{ci, api}]; + target.external_linkage = !always_inline; + target.private_linkage = always_inline; + if (specsig) { + jl_method_instance_t *mi = jl_get_ci_mi(ci); + bool is_opaque_closure = + jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; + jl_returninfo_t info = + get_specsig_function(*this, &get_module(), nullptr, protoname, get_ci_abi(ci), + ci->rettype, is_opaque_closure); + target.decl = cast(info.decl.getCallee()); } else { - StringMap NewGlobals; - for (auto &global : params.global_targets) { - NewGlobals[global.second->getName()] = global.first; - } - for (auto &GV : M->globals()) { - auto InitValue = NewGlobals.find(GV.getName()); - if (InitValue != NewGlobals.end()) { - jl_link_global(&GV, InitValue->second); - } - } + target.decl = get_or_emit_fptr1(protoname, &get_module()); + } + return target.decl->getName(); +} + +jl_emitted_output_t jl_codegen_output_t::finish(orc::SymbolStringPool &SSP) +{ + + auto info = std::make_unique(); + auto intern = [&](StringRef name) { + SmallString<128> buf; + Mangler::getNameWithPrefix(buf, name, DL); + return SSP.intern(buf); + }; + + // Mangle and intern each part of the linking metadata, before all the + // pointers to LLVM values are invaliated. + for (auto &[ci, funcs] : ci_funcs) { + info->ci_funcs[ci] = {funcs.invoke_api, + funcs.invoke ? intern(funcs.invoke->getName()) : nullptr, + funcs.specptr ? intern(funcs.specptr->getName()) : nullptr}; } + for (auto &[call, target] : call_targets) + info->call_targets[call] = intern(target.decl->getName()); + for (auto [val, gv] : global_targets) { + info->global_targets[val] = intern(gv->getName()); + } + + unlock(); + return {std::move(TSM), std::move(info)}; } // Return a specptr that is ABI-compatible with `from_abi` which invokes `codeinst`. @@ -299,61 +330,49 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi, orc::ThreadSafeModule result_m; std::string gf_thunk_name; + auto out = std::make_unique("gfthunk", + jl_ExecutionEngine->getDataLayout(), + jl_ExecutionEngine->getTargetTriple()); { - jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context - params.getContext().setDiscardValueNames(true); - params.cache = true; - params.imaging_mode = 0; - result_m = jl_create_ts_module("gfthunk", params.tsctx, params.DL, params.TargetTriple); - Module *M = result_m.getModuleUnlocked(); + out->get_context().setDiscardValueNames(true); + out->imaging_mode = 0; + auto &ctx = out->get_context(); if (target) { - Value *llvmtarget = literal_static_pointer_val((void*)target, PointerType::get(M->getContext(), 0)); - gf_thunk_name = emit_abi_converter(M, params, from_abi, codeinst, llvmtarget, target_specsig); + Value *llvmtarget = literal_static_pointer_val((void*)target, PointerType::get(ctx, 0)); + gf_thunk_name = emit_abi_converter(*out, from_abi, codeinst, llvmtarget, target_specsig); } else if (invoke == jl_fptr_const_return_addr) { - gf_thunk_name = emit_abi_constreturn(M, params, from_abi, codeinst->rettype_const); + gf_thunk_name = emit_abi_constreturn(*out, from_abi, codeinst->rettype_const); } else { - Value *llvminvoke = invoke ? literal_static_pointer_val((void*)invoke, PointerType::get(M->getContext(), 0)) : nullptr; - gf_thunk_name = emit_abi_dispatcher(M, params, from_abi, codeinst, llvminvoke); + Value *llvminvoke = invoke ? literal_static_pointer_val((void*)invoke, PointerType::get(ctx, 0)) : nullptr; + gf_thunk_name = emit_abi_dispatcher(*out, from_abi, codeinst, llvminvoke); } - SmallVector sharedmodules; - finish_params(M, params, sharedmodules); - assert(sharedmodules.empty()); } int8_t gc_state = jl_gc_safe_enter(ct->ptls); - jl_ExecutionEngine->addModule(std::move(result_m)); + auto &ES = jl_ExecutionEngine->getExecutionSession(); + auto emitted = out->finish(*ES.getSymbolStringPool()); + jl_ExecutionEngine->addOutput(std::move(emitted)); uintptr_t Addr = jl_ExecutionEngine->getFunctionAddress(gf_thunk_name); jl_gc_safe_leave(ct->ptls, gc_state); assert(Addr); return (void*)Addr; } - // lock for places where only single threaded behavior is implemented, so we need GC support static jl_mutex_t jitlock; - // locks and barriers for this state +// engine_lock protects emitted_code, threads_in_compiler_phase, engine_wait static std::mutex engine_lock; static std::condition_variable engine_wait; static int threads_in_compiler_phase; - // the TSM for each codeinst -static SmallVector sharedmodules; -static DenseMap emittedmodules; - // the invoke and specsig function names in the JIT -static DenseMap invokenames; - // everything that any thread wants to compile right now -static DenseSet compileready; - // everything that any thread has compiled recently -static DenseSet linkready; - // a map from a codeinst to the outgoing edges needed before linking it -static DenseMap> complete_graph; - // the state for each codeinst and the number of unresolved edges (we don't - // really need this once JITLink is available everywhere, since every module - // is automatically complete, and we can emit any required fixups later as a - // separate module) -static DenseMap> incompletemodules; - // the set of incoming unresolved edges resolved by a codeinstance -static DenseMap> incomplete_rgraph; +// Life cycle of a CodeInstance in the JIT (each step is atomic): +// 1. Emitted to new jl_codegen_output_t +// 2. Added to emitted_code, CodeInstance.invoke = jl_fptr_wait_for_compiled +// 3. Removed from emitted_code, compilation begins (invoke unchanged) +// 4. Added to compiled_code, compilation finished +// 5. Removed from compiled_code, invoke and specptr published +static SmallVector> emitted_code; +static SmallVector compiled_code; // Lock hierarchy here: // jitlock is outermost, can contain others and allows GC @@ -368,504 +387,126 @@ static DenseMap> incompl // However, this guarantee relies on Julia releasing all TSC locks before causing any materialization units to be dispatched // as materialization may need to acquire TSC locks. - -static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t ¶ms, bool forceall=false) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +static void jl_publish_compiled_ci(jl_code_instance_t *ci, + const jl_codeinst_funcs_t &addrs) JL_NOTSAFEPOINT { - jl_task_t *ct = jl_current_task; - jl_workqueue_t edges; - std::swap(params.workqueue, edges); - for (auto &it : edges) { - jl_code_instance_t *codeinst = it.first; - JL_GC_PROMISE_ROOTED(codeinst); - auto &proto = it.second; - if (proto.external_linkage || proto.decl->isDeclaration()) { // if it is not expected externally and has a definition locally, there is no need to patch this edge up - // try to emit code for this item from the workqueue - StringRef invokeName = ""; - StringRef preal_decl = ""; - bool preal_specsig = false; - jl_callptr_t invoke = nullptr; - bool isedge = false; - assert(params.cache); - // Checking the cache here is merely an optimization and not strictly required - // But it must be consistent with the following invokenames lookup, which is protected by the engine_lock - uint8_t specsigflags; - void *fptr; - void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT; // declare it is not a safepoint (or deadlock) in this file due to 0 parameter - jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); - //if (specsig ? specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED : invoke == jl_fptr_args_addr) - if (invoke == jl_fptr_args_addr) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - } - else if (specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - preal_specsig = true; - } - bool force = forceall || invoke != nullptr; - if (preal_decl.empty()) { - auto it = invokenames.find(codeinst); - if (it != invokenames.end()) { - auto &decls = it->second; - invokeName = decls.functionObject; - if (decls.functionObject == "jl_fptr_args") { - preal_decl = decls.specFunctionObject; - isedge = true; - } - else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { - preal_decl = decls.specFunctionObject; - preal_specsig = true; - isedge = true; - } - force = true; - } - } - if (preal_decl.empty()) { - // there may be an equivalent method already compiled (or at least registered with the JIT to compile), in which case we should be using that instead - jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 0); - if (compiled_ci != codeinst) { - codeinst = compiled_ci; - uint8_t specsigflags; - void *fptr; - jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0); - //if (specsig ? specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED : invoke == jl_fptr_args_addr) - if (invoke == jl_fptr_args_addr) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - } - else if (specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst); - preal_specsig = true; - } - if (preal_decl.empty()) { - auto it = invokenames.find(codeinst); - if (it != invokenames.end()) { - auto &decls = it->second; - invokeName = decls.functionObject; - if (decls.functionObject == "jl_fptr_args") { - preal_decl = decls.specFunctionObject; - isedge = true; - } - else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") { - preal_decl = decls.specFunctionObject; - preal_specsig = true; - isedge = true; - } - } - } - } - } - if (!preal_decl.empty() || force) { - // if we have a prototype emitted, compare it to what we emitted earlier - Module *mod = proto.decl->getParent(); - Function *pinvoke = nullptr; - if (proto.decl->isDeclaration()) { - if (preal_decl.empty()) { - if (invoke != nullptr && invokeName.empty()) { - assert(invoke != jl_fptr_args_addr); - if (invoke == jl_fptr_sparam_addr) - invokeName = "jl_fptr_sparam"; - else if (invoke == jl_f_opaque_closure_call_addr) - invokeName = "jl_f_opaque_closure_call"; - else - invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - } - pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params); - if (!proto.specsig) { - proto.decl->replaceAllUsesWith(pinvoke); - proto.decl->eraseFromParent(); - proto.decl = pinvoke; - } - isedge = false; - } - if (proto.specsig && !preal_specsig) { - // get or build an fptr1 that can invoke codeinst - if (pinvoke == nullptr) - pinvoke = get_or_emit_fptr1(preal_decl, mod); - // emit specsig-to-(jl)invoke conversion - proto.decl->setLinkage(GlobalVariable::InternalLinkage); - //protodecl->setAlwaysInline(); - jl_init_function(proto.decl, params.TargetTriple); - // TODO: maybe this can be cached in codeinst->specfptr? - int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls) - jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed - bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure; - emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke); - jl_gc_unsafe_leave(ct->ptls, gc_state); - preal_decl = ""; // no need to fixup the name - } - } - else if (proto.specsig && !preal_specsig) { - // privatize our definition, since for some reason we couldn't use the external one but have an internal one - proto.decl->setLinkage(GlobalValue::PrivateLinkage); - preal_decl = ""; // no need to fixup the name - } - if (!preal_decl.empty()) { - // merge and/or rename this prototype to the real function - if (Function *specfun = cast_or_null(mod->getNamedValue(preal_decl))) { - if (proto.decl != specfun) { - proto.decl->replaceAllUsesWith(specfun); - if (!proto.decl->isDeclaration() && specfun->isDeclaration()) - linkFunctionBody(*specfun, *proto.decl); - proto.decl->eraseFromParent(); - proto.decl = specfun; - } - } - else { - proto.decl->setName(preal_decl); - } - } - if (proto.oc) { // additionally, if we are dealing with an OC constructor, then we might also need to fix up the fptr1 reference too - assert(proto.specsig); - StringRef ocinvokeDecl = invokeName; - if (invoke != nullptr && ocinvokeDecl.empty()) { - // check for some special tokens used by opaque_closure.c and convert those to their real functions - assert(invoke != jl_fptr_args_addr); - assert(invoke != jl_fptr_sparam_addr); - if (invoke == jl_fptr_interpret_call_addr) - ocinvokeDecl = "jl_fptr_interpret_call"; - else if (invoke == jl_fptr_const_return_addr) - ocinvokeDecl = "jl_fptr_const_return"; - else if (invoke == jl_f_opaque_closure_call_addr) - ocinvokeDecl = "jl_f_opaque_closure_call"; - //else if (invoke == jl_interpret_opaque_closure_addr) - else - ocinvokeDecl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst); - } - // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too - // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure - if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") { - if (pinvoke == nullptr) - ocinvokeDecl = get_or_emit_fptr1(preal_decl, mod)->getName(); - else - ocinvokeDecl = pinvoke->getName(); - } - assert(!ocinvokeDecl.empty()); - assert(ocinvokeDecl != "jl_fptr_args"); - assert(ocinvokeDecl != "jl_fptr_sparam"); - // merge and/or rename this prototype to the real function - if (Function *specfun = cast_or_null(mod->getNamedValue(ocinvokeDecl))) { - if (proto.oc != specfun) { - proto.oc->replaceAllUsesWith(specfun); - proto.oc->eraseFromParent(); - proto.oc = specfun; - } - } - else { - proto.oc->setName(ocinvokeDecl); - } - } - } - else { - isedge = true; - params.workqueue.push_back(it); - incomplete_rgraph[codeinst].push_back(callee); - } - if (isedge) - complete_graph[callee].push_back(codeinst); - } + void *spec = addrs.specptr; + jl_callptr_t invoke = addrs.invoke_api == JL_INVOKE_SPECSIG ? + (jl_callptr_t)addrs.invoke : + jl_invoke_api_callptr(addrs.invoke_api); + + void *prev = nullptr; + if (jl_atomic_cmpswap_acqrel(&ci->specptr.fptr, &prev, spec)) { + // only set specsig and invoke if we were the first to set specptr + // Clear compilation state bits, then set SPECPTR_SPECIALIZED if needed + if (addrs.invoke_api == JL_INVOKE_SPECSIG) + jl_atomic_fetch_or_relaxed(&ci->flags, JL_CI_FLAGS_SPECPTR_SPECIALIZED); + // we might overwrite invokeptr here; that's ok, anybody who relied on the identity + // of invokeptr either assumes that specptr was null, doesn't care about specptr, or + // will wait until flags has 0b10 set before reloading invoke + jl_atomic_store_release(&ci->invoke, invoke); + // Set INVOKE_MATCHES_SPECPTR to signal completion + jl_atomic_fetch_or_relaxed(&ci->flags, JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR); } - return params.workqueue.size(); -} - -// move codeinst (and deps) from incompletemodules to emitted modules -// and populate compileready from complete_graph -static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER -{ - SmallVector workqueue; - workqueue.push_back(codeinst); - while (!workqueue.empty()) { - codeinst = workqueue.pop_back_val(); - if (!invokenames.count(codeinst)) { - // this means it should be compiled already while the callee was in stasis - assert(jl_is_compiled_codeinst(codeinst)); - continue; - } - // if this was incomplete, force completion now of it - auto it = incompletemodules.find(codeinst); - if (it != incompletemodules.end()) { - int waiting = 0; - auto &edges = complete_graph[codeinst]; - auto edges_end = std::remove_if(edges.begin(), edges.end(), [&waiting, codeinst] (jl_code_instance_t *edge) JL_NOTSAFEPOINT -> bool { - auto &redges = incomplete_rgraph[edge]; - // waiting += std::erase(redges, codeinst); - auto redges_end = std::remove(redges.begin(), redges.end(), codeinst); - if (redges_end != redges.end()) { - waiting += redges.end() - redges_end; - redges.erase(redges_end, redges.end()); - assert(!invokenames.count(edge)); - } - return !invokenames.count(edge); - }); - edges.erase(edges_end, edges.end()); - assert(waiting == std::get<1>(it->second)); - std::get<1>(it->second) = 0; - auto ¶ms = std::get<0>(it->second); - params.tsctx_lock = params.tsctx.getLock(); - waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint - assert(!waiting); (void)waiting; - Module *M = emittedmodules[codeinst].getModuleUnlocked(); - finish_params(M, params, sharedmodules); - incompletemodules.erase(it); - } - // and then indicate this should be compiled now - if (!linkready.count(codeinst) && compileready.insert(codeinst).second) { - auto edges = complete_graph.find(codeinst); - if (edges != complete_graph.end()) { - workqueue.append(edges->second); - } - } + else { + // someone else beat us, don't commit any results + while (!(jl_atomic_load_acquire(&ci->flags) & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR)) + jl_cpu_pause(); } } -// notify any other pending work that this edge now has code defined -static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER +static void jl_do_dump_compile(jl_code_instance_t *codeinst, uint64_t time) { - auto notify = incomplete_rgraph.find(edge); - if (notify == incomplete_rgraph.end()) - return; - auto redges = std::move(notify->second); - incomplete_rgraph.erase(notify); - for (size_t i = 0; i < redges.size(); i++) { - jl_code_instance_t *callee = redges[i]; - auto it = incompletemodules.find(callee); - assert(it != incompletemodules.end()); - if (--std::get<1>(it->second) == 0) { - auto ¶ms = std::get<0>(it->second); - params.tsctx_lock = params.tsctx.getLock(); - assert(callee == it->first); - orc::ThreadSafeModule &M = emittedmodules[callee]; - emit_always_inline(M, params); // may safepoint - int waiting = jl_analyze_workqueue(callee, params); // may safepoint - assert(!waiting); (void)waiting; - finish_params(M.getModuleUnlocked(), params, sharedmodules); - incompletemodules.erase(it); + jl_method_instance_t *mi = jl_get_ci_mi(codeinst); + if (jl_is_method(mi->def.method)) { + auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); + if (stream) { + ios_printf(stream, "%" PRIu64 "\t\"", time); + jl_static_show((JL_STREAM *)stream, mi->specTypes); + ios_printf(stream, "\"\n"); } } } - // set the invoke field for codeinst (and all deps, and assist with other pending work from other threads) now static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) { - jl_unique_gcsafe_lock lock(engine_lock); - if (!invokenames.count(codeinst)) + jl_unique_gcsafe_lock lock{engine_lock}; + if (jl_is_compiled_codeinst(codeinst)) return; + threads_in_compiler_phase++; - prepare_compile(codeinst); // may safepoint - while (1) { - // TODO: split up this work by ThreadSafeContext, so two threads don't need to get the same locks and stall - if (!sharedmodules.empty()) { - auto TSM = sharedmodules.pop_back_val(); - lock.native.unlock(); - { - auto Lock = TSM.getContext().getLock(); - jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint - } - jl_ExecutionEngine->addModule(std::move(TSM)); - lock.native.lock(); - } - else if (!compileready.empty()) { - // move a function from compileready to linkready then compile it - auto compilenext = compileready.begin(); - codeinst = *compilenext; - compileready.erase(compilenext); - auto TSMref = emittedmodules.find(codeinst); - assert(TSMref != emittedmodules.end()); - auto TSM = std::move(TSMref->second); - linkready.insert(codeinst); - emittedmodules.erase(TSMref); - lock.native.unlock(); - uint64_t start_time = jl_hrtime(); - { - auto Lock = TSM.getContext().getLock(); - jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint - } - jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint - // If logging of the compilation stream is enabled, - // then dump the method-instance specialization type to the stream - jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - uint64_t end_time = jl_hrtime(); - if (jl_is_method(mi->def.method)) { - auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); - if (stream) { - ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); - jl_static_show((JL_STREAM*)stream, mi->specTypes); - ios_printf(stream, "\"\n"); - } - } - jl_atomic_store_relaxed(&codeinst->time_compile, - julia_double_to_half(julia_half_to_float(jl_atomic_load_relaxed(&codeinst->time_compile)) - + (end_time - start_time) * 1e-9)); - lock.native.lock(); - } - else { - break; - } + while (!emitted_code.empty()) { + auto [ci, out] = emitted_code.pop_back_val(); + lock.native.unlock(); + uint64_t start_time = jl_hrtime(); + jl_ExecutionEngine->addOutput(std::move(out)); + jl_do_dump_compile(ci, jl_hrtime() - start_time); + lock.native.lock(); + compiled_code.push_back(ci); } - codeinst = nullptr; - // barrier until all threads have finished calling addModule - if (--threads_in_compiler_phase == 0) { - // the last thread out will finish linking everything - // then release all of the other threads - // move the function pointers out from invokenames to the codeinst - - // batch compile job for all new functions - SmallVector NewDefs; - for (auto &this_code : linkready) { - auto it = invokenames.find(this_code); - assert(it != invokenames.end()); - jl_llvm_functions_t &decls = it->second; - assert(!decls.functionObject.empty()); - if (decls.functionObject != "jl_fptr_args" && - decls.functionObject != "jl_fptr_sparam" && - decls.functionObject != "jl_f_opaque_closure_call") - NewDefs.push_back(decls.functionObject); - if (!decls.specFunctionObject.empty()) - NewDefs.push_back(decls.specFunctionObject); - } - auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); - - size_t nextaddr = 0; - for (auto &this_code : linkready) { - auto it = invokenames.find(this_code); - assert(it != invokenames.end()); - jl_llvm_functions_t &decls = it->second; - jl_callptr_t addr; - bool isspecsig = false; - if (decls.functionObject == "jl_fptr_args") { - addr = jl_fptr_args_addr; - } - else if (decls.functionObject == "jl_fptr_sparam") { - addr = jl_fptr_sparam_addr; - } - else if (decls.functionObject == "jl_f_opaque_closure_call") { - addr = jl_f_opaque_closure_call_addr; - } - else { - assert(NewDefs[nextaddr] == decls.functionObject); - addr = (jl_callptr_t)Addrs[nextaddr++]; - assert(addr); - isspecsig = true; - } - if (!decls.specFunctionObject.empty()) { - void *prev_specptr = nullptr; - assert(NewDefs[nextaddr] == decls.specFunctionObject); - void *spec = (void*)Addrs[nextaddr++]; - assert(spec); - if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) { - // only set specsig and invoke if we were the first to set specptr - // Clear compilation state bits, then set SPECPTR_SPECIALIZED if needed - if (isspecsig) - jl_atomic_fetch_or_relaxed(&this_code->flags, JL_CI_FLAGS_SPECPTR_SPECIALIZED); - // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr - // either assumes that specptr was null, doesn't care about specptr, - // or will wait until flags has 0b10 set before reloading invoke - jl_atomic_store_release(&this_code->invoke, addr); - // Set INVOKE_MATCHES_SPECPTR to signal completion - jl_atomic_fetch_or_relaxed(&this_code->flags, JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR); - } - else { - //someone else beat us, don't commit any results - while (!(jl_atomic_load_acquire(&this_code->flags) & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR)) { - jl_cpu_pause(); - } - addr = jl_atomic_load_relaxed(&this_code->invoke); - } - } - else { - jl_callptr_t prev_invoke = nullptr; - // Allow replacing addr if it is either nullptr or our special waiting placeholder. - if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) { - addr = prev_invoke; - //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other - //known lesser function) - } - } - } - invokenames.erase(it); - complete_graph.erase(this_code); - } - linkready.clear(); - engine_wait.notify_all(); - } - else while (threads_in_compiler_phase) { + + if (--threads_in_compiler_phase > 0) { lock.wait(engine_wait); + return; } -} -void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; + assert(emitted_code.empty()); + auto addrs = jl_ExecutionEngine->findCIs(compiled_code); + for (size_t i = 0; i < compiled_code.size(); i++) + jl_publish_compiled_ci(compiled_code[i], addrs[i]); + compiled_code.clear(); + engine_wait.notify_all(); +} -extern "C" JL_DLLEXPORT_CODEGEN -void jl_emit_codeinst_to_jit_impl( - jl_code_instance_t *codeinst, - jl_code_info_t *src) +extern "C" JL_DLLEXPORT_CODEGEN void +jl_emit_codeinst_to_jit_impl(jl_code_instance_t *codeinst, jl_code_info_t *src) { - if (jl_is_compiled_codeinst(codeinst)) + if (jl_atomic_load_relaxed(&codeinst->invoke)) return; - { // lock scope - jl_unique_gcsafe_lock lock(engine_lock); - if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) - return; - } + JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); - // emit the code in LLVM IR form to the new context - jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context - params.getContext().setDiscardValueNames(true); - params.cache = true; - params.imaging_mode = 0; - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple); - params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); - JL_GC_PUSH1(¶ms.temporary_roots); - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints - if (!result_m) { + jl_method_instance_t *mi = jl_get_ci_mi(codeinst); + jl_codegen_output_t out{name_from_method_instance(mi), + jl_ExecutionEngine->getDataLayout(), + jl_ExecutionEngine->getTargetTriple()}; + out.get_context().setDiscardValueNames(true); + out.imaging_mode = false; + out.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); + JL_GC_PUSH1(&out.temporary_roots); + + if (!jl_emit_codeinst(out, codeinst, src)) { // contains safepoints JL_GC_POP(); return; } - jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints - params.temporary_roots = nullptr; - params.temporary_roots_set.clear(); + + // contains safepoints + jl_promote_method_roots(out, mi, out.get_module()); + emit_always_inline(out); // contains safepoints + emit_llvmcall_modules(out); + out.temporary_roots = nullptr; + out.temporary_roots_set.clear(); JL_GC_POP(); - { // drop lock before acquiring engine_lock - auto release = std::move(params.tsctx_lock); - } - jl_unique_gcsafe_lock lock(engine_lock); - if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) - return; // destroy everything - const std::string &specf = decls.specFunctionObject; - const std::string &f = decls.functionObject; - assert(!f.empty()); - // Prepare debug info to receive this function - // record that this function name came from this linfo, - // so we can build a reverse mapping for debug-info. - bool toplevel = !jl_is_method(jl_get_ci_mi(codeinst)->def.method); - if (!toplevel) { - // don't remember toplevel thunks because - // they may not be rooted in the gc for the life of the program, - // and the runtime doesn't notify us when the code becomes unreachable :( - if (!specf.empty()) - jl_add_code_in_flight(specf, codeinst, params.DL); - if (f != "jl_fptr_args" && f != "jl_fptr_sparam") - jl_add_code_in_flight(f, codeinst, params.DL); - } + + // Non-opaque-closure MethodInstances are considered globally rooted + // through their methods, but for OC, we need to create a global root + // here. + if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) + jl_as_global_root((jl_value_t*)mi, 1); + + auto &ES = jl_ExecutionEngine->getExecutionSession(); + jl_emitted_output_t emitted = out.finish(*ES.getSymbolStringPool()); + + jl_unique_gcsafe_lock lock{engine_lock}; + // Bail out and clean up if another thread has started or finished compiling + // this CI. jl_callptr_t expected = NULL; - jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected, jl_fptr_wait_for_compiled_addr); - invokenames[codeinst] = std::move(decls); - complete_emit(codeinst); - params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock - emit_always_inline(result_m, params); - int waiting = jl_analyze_workqueue(codeinst, params); - if (waiting) { - auto release = std::move(params.tsctx_lock); // unlock again before moving from it - incompletemodules.try_emplace(codeinst, std::move(params), waiting); - } - else { - finish_params(result_m.getModuleUnlocked(), params, sharedmodules); - } - emittedmodules[codeinst] = std::move(result_m); + if (!jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected, + jl_fptr_wait_for_compiled_addr)) + return; + emitted_code.emplace_back(codeinst, std::move(emitted)); } - extern "C" JL_DLLEXPORT_CODEGEN int jl_compile_codeinst_impl(jl_code_instance_t *ci) { @@ -1026,124 +667,113 @@ static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::Mate return selectOptLevel(std::move(TSM)); } -void jl_register_jit_object(const object::ObjectFile &debugObj, - std::function getLoadAddress); - -namespace { - -using namespace llvm::orc; +void jl_register_jit_object(const object::ObjectFile &Object, + std::function getLoadAddress, + const jl_linker_info_t &Info); -struct JITObjectInfo { - std::unique_ptr BackingBuffer; - std::unique_ptr Object; - StringMap SectionLoadAddresses; -}; - -class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { - std::mutex PluginMutex; - std::map> PendingObjs; +void JLDebuginfoPlugin::notifyMaterializingWithInfo( + orc::MaterializationResponsibility &MR, jitlink::LinkGraph &G, + MemoryBufferRef InputObject, std::unique_ptr LinkerInfo) +{ + auto NewBuffer = + MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName()); + // Re-parsing the InputObject is wasteful, but for now, this lets us + // reuse the existing debuginfo.cpp code. Should look into just + // directly pulling out all the information required in a JITLink pass + // and just keeping the required tables/DWARF sections around (perhaps + // using the LLVM DebuggerSupportPlugin as a reference). + auto NewObj = + cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef())); -public: - void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G, - jitlink::JITLinkContext &Ctx, - MemoryBufferRef InputObject) override { - auto NewBuffer = - MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName()); - // Re-parsing the InputObject is wasteful, but for now, this lets us - // reuse the existing debuginfo.cpp code. Should look into just - // directly pulling out all the information required in a JITLink pass - // and just keeping the required tables/DWARF sections around (perhaps - // using the LLVM DebuggerSupportPlugin as a reference). - auto NewObj = - cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef())); - - { - std::lock_guard lock(PluginMutex); - assert(PendingObjs.count(&MR) == 0); - PendingObjs[&MR] = std::unique_ptr(new JITObjectInfo{ - std::move(NewBuffer), std::move(NewObj), {}}); - } + std::lock_guard lock{PluginMutex}; + assert(PendingObjs.count(&MR) == 0); + PendingObjs[&MR] = std::unique_ptr(new JITObjectInfo{ + std::move(NewBuffer), std::move(NewObj), {}, std::move(LinkerInfo)}); } +} - Error notifyEmitted(MaterializationResponsibility &MR) override +Error JLDebuginfoPlugin::notifyEmitted(MaterializationResponsibility &MR) +{ { - { - std::lock_guard lock(PluginMutex); - auto It = PendingObjs.find(&MR); - if (It == PendingObjs.end()) - return Error::success(); - - auto NewInfo = PendingObjs[&MR].get(); - auto getLoadAddress = [NewInfo](const StringRef &Name) -> uint64_t { - auto result = NewInfo->SectionLoadAddresses.find(Name); - if (result == NewInfo->SectionLoadAddresses.end()) { - LLVM_DEBUG({ - dbgs() << "JLDebuginfoPlugin: No load address found for section '" - << Name << "'\n"; - }); - return 0; - } - return result->second; - }; - - jl_register_jit_object(*NewInfo->Object, getLoadAddress); - PendingObjs.erase(&MR); - } + std::lock_guard lock(PluginMutex); + auto It = PendingObjs.find(&MR); + if (It == PendingObjs.end()) + return Error::success(); - return Error::success(); - } + auto NewInfo = PendingObjs[&MR].get(); + auto getLoadAddress = [NewInfo](const StringRef &Name) -> uint64_t { + auto result = NewInfo->SectionLoadAddresses.find(Name); + if (result == NewInfo->SectionLoadAddresses.end()) { + LLVM_DEBUG({ + dbgs() << "JLDebuginfoPlugin: No load address found for section '" + << Name << "'\n"; + }); + return 0; + } + return result->second; + }; - Error notifyFailed(MaterializationResponsibility &MR) override - { - std::lock_guard lock(PluginMutex); + jl_register_jit_object(*NewInfo->Object, getLoadAddress, *NewInfo->LinkerInfo); PendingObjs.erase(&MR); - return Error::success(); } - Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override - { - return Error::success(); - } + return Error::success(); +} - void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey, - orc::ResourceKey SrcKey) override {} +Error JLDebuginfoPlugin::notifyFailed(MaterializationResponsibility &MR) +{ + std::lock_guard lock(PluginMutex); + PendingObjs.erase(&MR); + return Error::success(); +} - void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &, - jitlink::PassConfiguration &PassConfig) override - { - std::lock_guard lock(PluginMutex); - auto It = PendingObjs.find(&MR); - if (It == PendingObjs.end()) - return; +Error JLDebuginfoPlugin::notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) +{ + return Error::success(); +} + +void JLDebuginfoPlugin::notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey, + orc::ResourceKey SrcKey) {} - JITObjectInfo &Info = *It->second; - PassConfig.PostAllocationPasses.push_back([&Info, this](jitlink::LinkGraph &G) -> Error { - std::lock_guard lock(PluginMutex); - for (const jitlink::Section &Sec : G.sections()) { +void JLDebuginfoPlugin::modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &, + jitlink::PassConfiguration &PassConfig) +{ + std::lock_guard lock(PluginMutex); + auto It = PendingObjs.find(&MR); + if (It == PendingObjs.end()) + return; + + JITObjectInfo &Info = *It->second; + PassConfig.PostAllocationPasses.push_back([&Info, this](jitlink::LinkGraph &G) -> Error { + std::lock_guard lock(PluginMutex); + for (const jitlink::Section &Sec : G.sections()) { #if defined(_OS_DARWIN_) - // Canonical JITLink section names have the segment name included, e.g. - // "__TEXT,__text" or "__DWARF,__debug_str". There are some special internal - // sections without a comma separator, which we can just ignore. - size_t SepPos = Sec.getName().find(','); - if (SepPos >= 16 || (Sec.getName().size() - (SepPos + 1) > 16)) { - LLVM_DEBUG({ - dbgs() << "JLDebuginfoPlugin: Ignoring section '" << Sec.getName() - << "'\n"; - }); - continue; - } - auto SecName = Sec.getName().substr(SepPos + 1); + // Canonical JITLink section names have the segment name included, e.g. + // "__TEXT,__text" or "__DWARF,__debug_str". There are some special internal + // sections without a comma separator, which we can just ignore. + size_t SepPos = Sec.getName().find(','); + if (SepPos >= 16 || (Sec.getName().size() - (SepPos + 1) > 16)) { + LLVM_DEBUG({ + dbgs() << "JLDebuginfoPlugin: Ignoring section '" << Sec.getName() + << "'\n"; + }); + continue; + } + auto SecName = Sec.getName().substr(SepPos + 1); #else - auto SecName = Sec.getName(); + auto SecName = Sec.getName(); #endif - // https://github.com/llvm/llvm-project/commit/118e953b18ff07d00b8f822dfbf2991e41d6d791 - Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart().getValue(); - } - return Error::success(); - }); - } -}; + // https://github.com/llvm/llvm-project/commit/118e953b18ff07d00b8f822dfbf2991e41d6d791 + Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart().getValue(); + } + return Error::success(); + }); +} + +namespace { + +using namespace llvm::orc; class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { private: @@ -1223,6 +853,73 @@ std::unique_ptr createJITLinkMemoryManager() JL_N #endif } +class JLMaterializationUnit : public orc::MaterializationUnit { +public: + static JLMaterializationUnit Create(JuliaOJIT &JIT, ObjectLinkingLayer &OL, + std::unique_ptr Info, + std::unique_ptr Obj) JL_NOTSAFEPOINT + { + Interface I = ExitOnError()( + orc::getObjectFileInterface(JIT.getExecutionSession(), Obj->getMemBufferRef())); + + // Replace the original symbols for the compiled CodeInstances with the + // globally unique identifiers that will be used for linking. + auto ReplaceSym = [&](SymbolStringPtr &Old, SymbolStringPtr &New) JL_NOTSAFEPOINT { + bool Replaced = I.SymbolFlags.erase(Old); + assert(Replaced); + (void)Replaced; + I.SymbolFlags[New] = JITSymbolFlags::Callable | JITSymbolFlags::Exported; + }; + for (auto &[CI, Funcs] : Info->ci_funcs) { + auto Unique = JIT.makeUniqueCIName(CI, Funcs); + if (Funcs.invoke) { + assert(Funcs.invoke_api == JL_INVOKE_SPECSIG); + ReplaceSym(Funcs.invoke, Unique.invoke); + } + if (Funcs.specptr) + ReplaceSym(Funcs.specptr, Unique.specptr); + } + + return JLMaterializationUnit{JIT, OL, std::move(Info), std::move(Obj), std::move(I)}; + } + + virtual StringRef getName() const override { return Obj->getBufferIdentifier(); } + + void materialize(std::unique_ptr R) override + { + auto G = jitlink::createLinkGraphFromObject( + Obj->getMemBufferRef(), JIT.getExecutionSession().getSymbolStringPool()); + if (!G) { + R->getExecutionSession().reportError(G.takeError()); + R->failMaterialization(); + return; + } + + JIT.linkOutput(*R, Obj->getMemBufferRef(), **G, std::move(Info)); + OL.emit(std::move(R), std::move(*G), std::move(Obj)); + } + + void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {} + +protected: + JLMaterializationUnit(JuliaOJIT &JIT, ObjectLinkingLayer &OL, + std::unique_ptr Info, + std::unique_ptr Obj, Interface I) JL_NOTSAFEPOINT + : orc::MaterializationUnit(I), + JIT(JIT), + OL(OL), + Info(std::move(Info)), + Obj(std::move(Obj)) + { + } + +private: + JuliaOJIT &JIT; + ObjectLinkingLayer &OL; + std::unique_ptr Info; + std::unique_ptr Obj; +}; + class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { public: Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override { @@ -1286,33 +983,6 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { } }; -#ifndef JL_USE_JITLINK -static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR, - const object::ObjectFile &Object, - const RuntimeDyld::LoadedObjectInfo &L) -{ - StringMap loadedSections; - for (const object::SectionRef &lSection : Object.sections()) { - auto sName = lSection.getName(); - if (sName) { - bool inserted = loadedSections.insert(std::make_pair(*sName, lSection)).second; - assert(inserted); - (void)inserted; - } - } - auto getLoadAddress = [loadedSections = std::move(loadedSections), - &L](const StringRef &sName) -> uint64_t { - auto search = loadedSections.find(sName); - if (search == loadedSections.end()) - return 0; - return L.getSectionLoadAddress(search->second); - }; - - auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op. - jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress); -} -#endif - namespace { static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); @@ -1496,7 +1166,7 @@ namespace { { if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) { for (auto &F : M.functions()) { - if (F.isDeclaration() || F.getName().starts_with("jfptr_")) { + if (F.isDeclaration() || F.getName().starts_with(JL_SYM_INVOKE_SPECSIG)) { continue; } // Each function is printed as a YAML object with several attributes @@ -1549,7 +1219,7 @@ namespace { // Print LLVM function statistics _after_ optimization ios_printf(stream, " after: \n"); for (auto &F : M.functions()) { - if (F.isDeclaration() || F.getName().starts_with("jfptr_")) { + if (F.isDeclaration() || F.getName().starts_with(JL_SYM_INVOKE_SPECSIG)) { continue; } Stat(F).dump(stream); @@ -1895,9 +1565,9 @@ struct JuliaOJIT::DLSymOptimizer { bool named; }; -void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { - JuliaOJIT::DLSymOptimizer(true)(M); -} +// void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { +// JuliaOJIT::DLSymOptimizer(true)(M); +// } void fixupTM(TargetMachine &TM) { auto TheTriple = TM.getTargetTriple(); @@ -1924,28 +1594,16 @@ JuliaOJIT::JuliaOJIT() JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), DLSymOpt(std::make_unique(false)), -#ifdef JL_USE_JITLINK MemMgr(createJITLinkMemoryManager()), ObjectLayer(ES, *MemMgr), -#else - MemMgr(createRTDyldMemoryManager()), - UnlockedObjectLayer( - ES, - [this]() { - std::unique_ptr result(new ForwardingMemoryManager(MemMgr)); - return result; - } - ), - ObjectLayer(UnlockedObjectLayer), -#endif CompileLayer(ES, ObjectLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), - JITPointers(std::make_unique(SharedBytes, RLST_mutex)), + JITPointers(std::make_unique(SharedBytes, SharedBytesMutex)), JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)), Optimizers(std::make_unique(*TM, PrintLLVMTimers, llvm_printing_mutex)), OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)), - OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)) + OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)), + DebuginfoPlugin(std::make_shared()) { -#ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) // When dynamically linking against LLVM, use our custom EH frame registration code // also used with RTDyld to inform both our and the libc copy of libunwind. @@ -1956,11 +1614,8 @@ JuliaOJIT::JuliaOJIT() ObjectLayer.addPlugin(std::make_unique( ES, std::move(ehRegistrar))); - ObjectLayer.addPlugin(std::make_unique()); + ObjectLayer.addPlugin(DebuginfoPlugin); ObjectLayer.addPlugin(std::make_unique(&jit_bytes_size)); -#else - UnlockedObjectLayer.setNotifyLoaded(registerRTDyldJITObject); -#endif std::string ErrorStr; @@ -2098,35 +1753,28 @@ void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr) cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), {ExecutorAddr::fromPtr((void*)Addr), JITSymbolFlags::Exported}}}))); } + +static void timing_print_module_names(ThreadSafeModule &TSM) JL_NOTSAFEPOINT +{ +#ifdef ENABLE_TIMINGS + TSM.withModuleDo([](Module &M) { + for (auto &f : M) { + if (!f.isDeclaration()){ + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str()); + } + } + }); +#endif +} + void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) { JL_TIMING(LLVM_JIT, JIT_Total); ++ModulesAdded; - TSM = selectOptLevel(std::move(TSM)); - TSM = (*Optimizers)(std::move(TSM)); - TSM = (*JITPointers)(std::move(TSM)); - auto Lock = TSM.getContext().getLock(); - Module &M = *TSM.getModuleUnlocked(); - - for (auto &f : M) { - if (!f.isDeclaration()){ - jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str()); - } - } - - // Treat this as if one of the passes might contain a safepoint - // even though that shouldn't be the case and might be unwise - Expected> Obj = CompileLayer.getCompiler()(M); - if (!Obj) { -#ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint - ES.reportError(Obj.takeError()); -#endif - errs() << "Failed to add module to JIT!\n"; - errs() << "Dumping failing module\n" << M << "\n"; - return; - } - { auto release = std::move(Lock); } - auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj)); + TSM = optimizeModule(std::move(TSM)); + timing_print_module_names(TSM); + auto Obj = compileModule(std::move(TSM)); + auto Err = JuliaOJIT::addObjectFile(JD, std::move(Obj)); if (Err) { #ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint ES.reportError(std::move(Err)); @@ -2136,6 +1784,25 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) } } +void JuliaOJIT::addOutput(jl_emitted_output_t O) +{ + JL_TIMING(LLVM_JIT, JIT_Total); + ++ModulesAdded; + auto TSM = std::move(O.module); + LLVM_DEBUG({ + dbgs() << "Before optimization:\n"; + TSM.getModuleUnlocked()->dump(); + }); + + TSM = optimizeModule(std::move(TSM)); + timing_print_module_names(TSM); + auto Obj = compileModule(std::move(TSM)); + auto MU = std::make_unique( + JLMaterializationUnit::Create(*this, ObjectLayer, std::move(O.linker_info), std::move(Obj))); + ExitOnError check{"Failed to add objectfile to JIT!"}; + check(JD.define(MU, JD.getDefaultResourceTracker())); +} + Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize) { if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error { @@ -2221,36 +1888,39 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) return addr->getAddress().getValue(); } -StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) +SmallVector> +JuliaOJIT::findCIs(ArrayRef CIs) { - std::lock_guard lock(RLST_mutex); - assert(Addr != (uint64_t)jl_fptr_wait_for_compiled_addr); - std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr]; - if (fname->empty()) { - std::string string_fname; - raw_string_ostream stream_fname(string_fname); - // try to pick an appropriate name that describes it - if (Addr == (uintptr_t)invoke) { - stream_fname << "jsysw_"; - } - else if (invoke == jl_fptr_args_addr) { - stream_fname << "jsys1_"; - } - else if (invoke == jl_fptr_sparam_addr) { - stream_fname << "jsys3_"; - } - else { - stream_fname << "jlsys_"; + orc::SymbolLookupSet Exports; + { + // Must release lock before materializing anything that may need to use + // the julia linker. + std::lock_guard Lock{LinkerMutex}; + for (auto CI : CIs) { + auto &CISym = CISymbols.at(CI); + if (CISym.invoke) + Exports.add(CISym.invoke); + if (CISym.specptr) + Exports.add(CISym.specptr); } - const char* unadorned_name = jl_symbol_name(jl_get_ci_mi(codeinst)->def.method->name); - stream_fname << unadorned_name << "_" << RLST_inc++; - *fname = std::move(stream_fname.str()); // store to ReverseLocalSymbolTable - addGlobalMapping(*fname, Addr); } - return *fname; + + SymbolMap Syms = cantFail( + ::safelookup(ES, orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports))); + SmallVector> Addrs{CIs.size()}; + for (size_t i = 0; i < CIs.size(); i++) { + const auto &S = CISymbols.at(CIs[i]); + Addrs[i].invoke_api = S.invoke_api; + if (S.invoke) + Addrs[i].invoke = + (void *)Syms.at(S.invoke).getAddress().getValue(); + if (S.specptr) + Addrs[i].specptr = + (void *)Syms.at(S.specptr).getAddress().getValue(); + } + return Addrs; } -#ifdef JL_USE_JITLINK #define addAbsoluteToMap(map,name) \ (map[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name)) @@ -2311,54 +1981,225 @@ void JuliaOJIT::enablePerfJITEventListener() } #endif } -#else -void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) + +const DataLayout& JuliaOJIT::getDataLayout() const { - if (L) - UnlockedObjectLayer.registerJITEventListener(*L); + return DL; } -void JuliaOJIT::enableJITDebuggingSupport() + +std::string JuliaOJIT::getMangledName(StringRef Name) { - RegisterJITEventListener(JITEventListener::createGDBRegistrationListener()); + SmallString<128> FullName; + Mangler::getNameWithPrefix(FullName, Name, DL); + return FullName.str().str(); } -void JuliaOJIT::enableIntelJITEventListener() + +std::string JuliaOJIT::getMangledName(const GlobalValue *GV) { - RegisterJITEventListener(JITEventListener::createIntelJITEventListener()); + return getMangledName(GV->getName()); } -void JuliaOJIT::enableOProfileJITEventListener() + +CISymbolPtr JuliaOJIT::makeUniqueCIName(jl_code_instance_t *CI, const CISymbolPtr &Funcs) { - RegisterJITEventListener(JITEventListener::createOProfileJITEventListener()); -} -void JuliaOJIT::enablePerfJITEventListener() + std::lock_guard Lock{LinkerMutex}; + orc::SymbolStringPtr wrapper, specialized; + if (Funcs.invoke) + wrapper = ES.intern(Names(*Funcs.invoke)); + if (Funcs.specptr) + specialized = ES.intern(Names(*Funcs.specptr)); + CISymbolPtr Ret{Funcs.invoke_api, wrapper, specialized}; + CISymbols[CI] = Ret; + return Ret; +} + +// Convenience function to get a map from string pool symbols to symbols in this +// LinkGraph that participate in linking (defined and external). +static DenseMap +linkGraphSymbols(jitlink::LinkGraph &G) { - RegisterJITEventListener(JITEventListener::createPerfJITEventListener()); + DenseMap Syms; + auto AddSyms = [&](auto Symbols) { + for (auto S : Symbols) + if (S->getName()) + Syms[S->getName()] = S; + }; + AddSyms(G.defined_symbols()); + AddSyms(G.external_symbols()); + return Syms; } + +void JuliaOJIT::linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferRef ObjBuf, + jitlink::LinkGraph &G, std::unique_ptr Info) +{ + auto Syms = linkGraphSymbols(G); + + // Rename the defined CI functions. + std::lock_guard Lock{LinkerMutex}; + auto RenameDef = [&](const SymbolStringPtr &Orig, const SymbolStringPtr &Dest) { + Syms.at(Orig)->setName(Dest); + }; + for (auto &[CI, Funcs] : Info->ci_funcs) { + auto &S = CISymbols.at(CI); + if (Funcs.invoke) + RenameDef(Funcs.invoke, S.invoke); + if (Funcs.specptr) + RenameDef(Funcs.specptr, S.specptr); + } + + // Rename referenced CIs in the workqueue. + for (auto &[Call, T] : Info->call_targets) { + auto [CI, API] = Call; + if (!Syms.contains(T)) + continue; + JL_GC_PROMISE_ROOTED(CI); + Syms.at(T)->setName(linkCallTarget(CI, API)); + } + + // Rename globals and add mappings + // TODO: don't leak when we have a way to GC code +#ifdef __clang_analyzer__ + [[clang::suppress]] #endif + void **Ptrs = new void *[Info->global_targets.size()]; + size_t i = 0; + orc::SymbolMap GlobalSyms; + for (auto &[Addr, Orig] : Info->global_targets) { + auto Sym = ES.intern(Names(*Orig)); + auto It = Syms.find(Orig); + if (It == Syms.end()) + continue; + It->second->setName(Sym); + Ptrs[i] = Addr; + GlobalSyms[Sym] = {ExecutorAddr::fromPtr(Ptrs + i), JITSymbolFlags::Exported}; + ++i; + ++LinkedGlobals; + } + cantFail(JD.define(orc::absoluteSymbols(std::move(GlobalSyms)))); -const DataLayout& JuliaOJIT::getDataLayout() const + DebuginfoPlugin->notifyMaterializingWithInfo(MR, G, ObjBuf, std::move(Info)); +} + +// Must hold LinkerMutex. +orc::SymbolStringPtr JuliaOJIT::linkCallTarget(jl_code_instance_t *CI, jl_invoke_api_t API) { - return DL; + auto It = CISymbols.find(CI); + if (It != CISymbols.end()) + return It->second.specptr; + + CISymbolPtr *Sym = linkCISymbol(CI); + + orc::SymbolStringPtr Result; + CISymbolPtr Trampoline; + std::unique_ptr Out; + + if (!Sym) { + // The target CI was not compiled when we observed it, so generate a + // tojlinvoke trampoline that will cause it to be compiled. + // TODO: replace this with a GOT/PLT mechanism that avoids the jl_invoke + // after it has been compiled. + Out = std::make_unique("tojlinvoke", getDataLayout(), + getTargetTriple()); + Function *F = emit_tojlinvoke(CI, StringRef(), *Out); + Sym = &Trampoline; + Trampoline.invoke_api = API; + + if (API == JL_INVOKE_SPECSIG) { + // codegen may contain safepoints (such as jl_subtype calls) + int8_t GCState = jl_gc_unsafe_enter(jl_current_task->ptls); + Function *G = emit_specsig_to_fptr1(*Out, CI, F); + G->setLinkage(GlobalValue::ExternalLinkage); + jl_gc_unsafe_leave(jl_current_task->ptls, GCState); + Trampoline.specptr = mangle(G->getName()); + } else if (API == JL_INVOKE_ARGS) { + F->setLinkage(GlobalValue::ExternalLinkage); + Trampoline.invoke_api = JL_INVOKE_ARGS; + Trampoline.specptr = mangle(F->getName()); + } else { + abort(); + } + } + + if (Sym && Sym->invoke_api == API) { + // If the CI has a compiled specptr, and this call site uses the same + // calling convention, link to it directly. + Result = Sym->specptr; + } + else { + // TODO: Check invariants to make sure this can't happen. + abort(); + } + + // Trampolines shouldn't generate code that invokes other CodeInstances. + if (Out) { + assert(Out->call_targets.empty()); + addOutput(Out->finish(*ES.getSymbolStringPool())); + } + + return Result; } -std::string JuliaOJIT::getMangledName(StringRef Name) +// Must hold LinkerMutex. +CISymbolPtr *JuliaOJIT::linkCISymbol(jl_code_instance_t *CI) { - SmallString<128> FullName; - Mangler::getNameWithPrefix(FullName, Name, DL); - return FullName.str().str(); + uint8_t Flags; + jl_callptr_t Invoke; + void *SpecPtr; + + // Tell the analyzer no safepoint is possible with waitcompile = 0 + void jl_read_codeinst_invoke(jl_code_instance_t *, uint8_t *, jl_callptr_t *, void **, int) JL_NOTSAFEPOINT; + jl_read_codeinst_invoke(CI, &Flags, &Invoke, &SpecPtr, 0); + + if (!(Flags & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR)) + return nullptr; + + // TODO: Remove specialized CI flag entirely? + jl_invoke_api_t API = jl_callptr_invoke_api(Invoke); + assert((API == JL_INVOKE_SPECSIG) == bool(Flags & JL_CI_FLAGS_SPECPTR_SPECIALIZED)); + + orc::SymbolStringPtr InvokeSym; + SymbolMap Symbols; + const char *Name = jl_symbol_name(jl_get_ci_mi(CI)->def.method->name); + + auto SpecSym = mangle(Names(jl_symbol_prefix(JL_SYMBOL_SPECPTR_IMG, API), Name)); + Symbols[SpecSym] = {ExecutorAddr::fromPtr(SpecPtr), JITSymbolFlags::Exported}; + if (API == JL_INVOKE_SPECSIG) { + InvokeSym = mangle(Names(jl_symbol_prefix(JL_SYMBOL_INVOKE_IMG, API), Name)); + Symbols[InvokeSym] = {ExecutorAddr::fromPtr(Invoke), JITSymbolFlags::Exported}; + } + cantFail(JD.define(orc::absoluteSymbols(Symbols))); + auto &CISym = CISymbols[CI] = {API, InvokeSym, SpecSym}; + return &CISym; } -std::string JuliaOJIT::getMangledName(const GlobalValue *GV) +orc::ThreadSafeModule JuliaOJIT::optimizeModule(orc::ThreadSafeModule TSM) { - return getMangledName(GV->getName()); + TSM = selectOptLevel(std::move(TSM)); + TSM = (*Optimizers)(std::move(TSM)); + TSM = (*JITPointers)(std::move(TSM)); + return TSM; +} + +std::unique_ptr JuliaOJIT::compileModule(orc::ThreadSafeModule TSM) +{ + auto Lock = TSM.getContext().getLock(); + Module &M = *TSM.getModuleUnlocked(); + // Treat this as if one of the passes might contain a safepoint + // even though that shouldn't be the case and might be unwise + Expected> Obj = CompileLayer.getCompiler()(M); + if (!Obj) { +#ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint + ES.reportError(Obj.takeError()); +#endif + errs() << "Failed to add module to JIT!\n"; + errs() << "Dumping failing module\n" << M << "\n"; + return {}; + } + return std::move(*Obj); } size_t JuliaOJIT::getTotalBytes() const { auto bytes = jl_atomic_load_relaxed(&jit_bytes_size); -#ifndef JL_USE_JITLINK - size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT; - bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get()); -#endif return bytes; } @@ -2375,9 +2216,10 @@ void JuliaOJIT::printTimers() reportAndResetTimings(); } -void JuliaOJIT::optimizeDLSyms(Module &M) { - (*DLSymOpt)(M); -} +// TODO: reintroduce this +// void JuliaOJIT::optimizeDLSyms(Module &M) { +// (*DLSymOpt)(M); +// } JuliaOJIT *jl_ExecutionEngine; diff --git a/src/jitlayers.h b/src/jitlayers.h index 331d9accc8fb8..9b61a1b345bbb 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -55,14 +55,6 @@ #endif // The sanitizers don't play well with our memory manager -#if defined(JL_FORCE_JITLINK) || defined(_CPU_AARCH64_) || defined(HAS_SANITIZER) -# define JL_USE_JITLINK -#endif - -#if defined(_CPU_RISCV64_) -# define JL_USE_JITLINK -#endif - # include # include # include @@ -178,16 +170,36 @@ struct jl_locked_stream { } }; -struct jl_llvm_functions_t { - std::string functionObject; // jlcall llvm Function name - std::string specFunctionObject; // specialized llvm Function name - jl_llvm_functions_t() JL_NOTSAFEPOINT = default; - jl_llvm_functions_t &operator=(const jl_llvm_functions_t&) JL_NOTSAFEPOINT = default; - jl_llvm_functions_t(const jl_llvm_functions_t &) JL_NOTSAFEPOINT = default; - jl_llvm_functions_t(jl_llvm_functions_t &&) JL_NOTSAFEPOINT = default; - ~jl_llvm_functions_t() JL_NOTSAFEPOINT = default; +// jl_codeinst_funcs_t holds the results of compiling a CodeInstance, which can +// produce one, two, or zero entrypoints. The `invoke_api` field determines +// what the CodeInstance's `invoke` should be set to, and whether `invoke` and +// `specptr` are compiled functions. +// +// JL_INVOKE_ARGS +// specptr: jl_fptr_args_t convention +// JL_INVOKE_CONST +// (no compiled functions) +// JL_INVOKE_SPARAM +// specptr: jl_fptr_sparam_t convention +// JL_INVOKE_INTERPRETED +// (not produced by compilation) +// JL_INVOKE_SPECSIG +// invoke: jfptr_* wrapper around specptr +// specptr: specsig function +template +struct jl_codeinst_funcs_t { + jl_invoke_api_t invoke_api; + T invoke; + T specptr; + jl_codeinst_funcs_t() JL_NOTSAFEPOINT = default; + jl_codeinst_funcs_t &operator=(const jl_codeinst_funcs_t&) JL_NOTSAFEPOINT = default; + jl_codeinst_funcs_t(const jl_codeinst_funcs_t &) JL_NOTSAFEPOINT = default; + jl_codeinst_funcs_t(jl_codeinst_funcs_t &&) JL_NOTSAFEPOINT = default; + ~jl_codeinst_funcs_t() JL_NOTSAFEPOINT = default; }; +using jl_llvm_functions_t = jl_codeinst_funcs_t; + struct jl_returninfo_t { llvm::FunctionCallee decl; llvm::AttributeList attrs; @@ -205,11 +217,7 @@ struct jl_returninfo_t { }; struct jl_codegen_call_target_t { - jl_returninfo_t::CallingConv cc; - unsigned return_roots; llvm::Function *decl; - llvm::Function *oc; - bool specsig; bool external_linkage; // whether codegen would like this edge to be externally-available bool private_linkage; // whether codegen would like this edge to be internally-available // external = ExternalLinkage (similar to "extern") @@ -224,31 +232,95 @@ struct cfunc_decl_t { llvm::GlobalVariable *cfuncdata; }; -typedef SmallVector, 0> jl_workqueue_t; +std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT; typedef std::list> CallFrames; -struct jl_codegen_params_t { - orc::ThreadSafeContext tsctx; - orc::ThreadSafeContext::Lock tsctx_lock; - DataLayout DL; - Triple TargetTriple; - inline LLVMContext &getContext() JL_NOTSAFEPOINT { - return *tsctx.getContext(); +class jl_name_counter_t { +public: + template + std::string operator()(Ts... args) JL_NOTSAFEPOINT + { + std::string name; + raw_string_ostream s{name}; + (s << ... << args); + unsigned n = counter[name]++; + s << "_" << n; + return name; } - typedef StringMap SymMapGV; + + jl_name_counter_t() JL_NOTSAFEPOINT = default; + jl_name_counter_t(jl_name_counter_t &&) JL_NOTSAFEPOINT = default; + ~jl_name_counter_t() JL_NOTSAFEPOINT = default; + +private: + StringMap counter; +}; + +struct jl_linker_info_t { + DenseMap> ci_funcs; + DenseMap, orc::SymbolStringPtr> + call_targets; + DenseMap global_targets; +}; + +struct jl_emitted_output_t { + orc::ThreadSafeModule module; + std::unique_ptr linker_info; + + jl_emitted_output_t() JL_NOTSAFEPOINT = default; + jl_emitted_output_t(jl_emitted_output_t &&) JL_NOTSAFEPOINT = default; + jl_emitted_output_t &operator=(jl_emitted_output_t &&) JL_NOTSAFEPOINT = default; + ~jl_emitted_output_t() JL_NOTSAFEPOINT = default; +}; + +// A jl_codegen_output_t is the target for LLVM IR generation, containing an +// LLVM module and the metadata for linking it into the current session or a +// system image. Many code instances can be emitted to a single codegen output. +class jl_codegen_output_t { +private: + orc::ThreadSafeModule owned_TSM; + orc::ThreadSafeModule &TSM; + orc::ThreadSafeContext::Lock tsctx_lock; + + jl_name_counter_t names; + +public: + LLVMContext &get_context() { return *TSM.getContext().getContext(); } + Module &get_module() { return *TSM.getModuleUnlocked(); } + orc::ThreadSafeModule &get_tsm() { return TSM; } + void lock() { tsctx_lock = TSM.getContext().getLock(); } + void unlock() { auto _ = std::move(tsctx_lock); } + + StringRef strip_linux(StringRef name); + std::string make_name(jl_symbol_prefix_t type, jl_invoke_api_t api, + StringRef orig_name); + std::string make_name(StringRef prefix, StringRef orig_name); + std::string make_name(StringRef orig_name); + + StringRef get_call_target(jl_code_instance_t *ci, bool specsig, bool always_inline); + + // Discard all the context that will be invalidated when we compile the + // module. Must hold the context lock. + jl_emitted_output_t finish(orc::SymbolStringPool &SSP) JL_NOTSAFEPOINT; + +public: // outputs - jl_workqueue_t workqueue; + DenseMap, jl_codegen_call_target_t> + call_targets; + DenseMap ci_funcs; + SmallVector, 0> external_fns; + SmallVector cfuncs; std::map global_targets; jl_array_t *temporary_roots = nullptr; SmallSet temporary_roots_set; - std::map, GlobalVariable*> external_fns; std::map ditypes; std::map llvmtypes; DenseMap mergedConstants; // Map from symbol name (in a certain library) to its GV in sysimg and the // DL handle address in the current session. + typedef StringMap SymMapGV; StringMap> libMapGV; SymMapGV symMapDefault; // These symMaps are Windows-only @@ -264,53 +336,70 @@ struct jl_codegen_params_t { DenseMap, GlobalVariable*>> allPltMap; - std::unique_ptr _shared_module; - inline Module &shared_module(); + SmallVector, 0> llvmcall_modules; + // inputs + const DataLayout &DL; + Triple TargetTriple; const jl_cgparams_t *params = &jl_default_cgparams; - bool cache = false; bool external_linkage = false; - bool imaging_mode; + bool imaging_mode = true; bool safepoint_on_entry = true; bool use_swiftcc = true; - jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER - : tsctx(std::move(ctx)), - tsctx_lock(tsctx.getLock()), - DL(std::move(DL)), - TargetTriple(std::move(triple)), - imaging_mode(1) + + jl_codegen_output_t(orc::ThreadSafeModule &TSM) + : TSM(TSM), + tsctx_lock(TSM.getContext().getLock()), + DL(TSM.getModuleUnlocked()->getDataLayout()), + TargetTriple(this->TSM.getModuleUnlocked()->getTargetTriple()) + { + if (TargetTriple.isRISCV()) + use_swiftcc = false; + } + + static orc::ThreadSafeModule create_ts_module(StringRef name, const DataLayout &DL, + const Triple &triple) + { + auto ctx = std::make_unique(); + auto M = jl_create_llvm_module(name, *ctx, DL, triple); + return orc::ThreadSafeModule(std::move(M), std::move(ctx)); + } + + jl_codegen_output_t(StringRef name, const DataLayout &DL, const Triple &triple) + : owned_TSM(create_ts_module(name, DL, triple)), + TSM(owned_TSM), + tsctx_lock(TSM.getContext().getLock()), + DL(DL), + TargetTriple(triple) { - // LLVM's RISC-V back-end currently does not support the Swift calling convention if (TargetTriple.isRISCV()) use_swiftcc = false; } - jl_codegen_params_t(jl_codegen_params_t &&) JL_NOTSAFEPOINT = default; - ~jl_codegen_params_t() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; + + jl_codegen_output_t(jl_codegen_output_t &&) JL_NOTSAFEPOINT = default; + ~jl_codegen_output_t() JL_NOTSAFEPOINT = default; }; -const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t ¶ms); +const char *jl_generate_ccallable(jl_codegen_output_t &out, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt); -jl_llvm_functions_t jl_emit_code( - orc::ThreadSafeModule &M, +std::optional jl_emit_code( + jl_codegen_output_t &out, jl_method_instance_t *mi, jl_code_info_t *src, jl_value_t *abi_at, - jl_value_t *abi_rt, - jl_codegen_params_t ¶ms); + jl_value_t *abi_rt); -jl_llvm_functions_t jl_emit_codeinst( - orc::ThreadSafeModule &M, +std::optional jl_emit_codeinst( + jl_codegen_output_t &out, jl_code_instance_t *codeinst, - jl_code_info_t *src, - jl_codegen_params_t ¶ms); + jl_code_info_t *src); jl_llvm_functions_t jl_emit_codedecls( - orc::ThreadSafeModule &M, - jl_code_instance_t *codeinst, - jl_codegen_params_t ¶ms); + jl_codegen_output_t &out, + jl_code_instance_t *codeinst); -void linkFunctionBody(Function &Dst, Function &Src) JL_NOTSAFEPOINT; -void emit_always_inline(orc::ThreadSafeModule &result_m, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; +void emit_always_inline(jl_codegen_output_t &out); +void emit_llvmcall_modules(jl_codegen_output_t &out); enum CompilationPolicy { Default = 0, @@ -318,28 +407,36 @@ enum CompilationPolicy { }; Function *jl_cfunction_object(jl_value_t *f, jl_value_t *rt, jl_tupletype_t *argt, - jl_codegen_params_t ¶ms); + jl_codegen_output_t &out); extern "C" JL_DLLEXPORT_CODEGEN void *jl_jit_abi_convert(jl_task_t *ct, jl_abi_t from_abi, _Atomic(void*) *fptr, _Atomic(size_t) *last_world, void *data); -std::string emit_abi_dispatcher(Module *M, jl_codegen_params_t ¶ms, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *invoke); -std::string emit_abi_converter(Module *M, jl_codegen_params_t ¶ms, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *target, bool target_specsig); -std::string emit_abi_constreturn(Module *M, jl_codegen_params_t ¶ms, jl_abi_t from_abi, jl_value_t *rettype_const); -std::string emit_abi_constreturn(Module *M, jl_codegen_params_t ¶ms, bool specsig, jl_code_instance_t *codeinst); +std::string emit_abi_dispatcher(jl_codegen_output_t &out, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *invoke); +std::string emit_abi_converter(jl_codegen_output_t &out, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *target, bool target_specsig); +std::string emit_abi_constreturn(jl_codegen_output_t &out, jl_abi_t from_abi, jl_value_t *rettype_const); +std::string emit_abi_constreturn(jl_codegen_output_t &out, bool specsig, jl_code_instance_t *codeinst); -Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) JL_NOTSAFEPOINT; +Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, jl_codegen_output_t &out) JL_NOTSAFEPOINT; void emit_specsig_to_fptr1( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, size_t nargs, - jl_codegen_params_t ¶ms, + jl_codegen_output_t &out, Function *target) JL_NOTSAFEPOINT; +Function *emit_specsig_to_fptr1(jl_codegen_output_t &out, jl_code_instance_t *ci, + Function *func) JL_NOTSAFEPOINT; Function *get_or_emit_fptr1(StringRef Name, Module *M) JL_NOTSAFEPOINT; void jl_init_function(Function *F, const Triple &TT) JL_NOTSAFEPOINT; +jl_returninfo_t get_specsig_function(jl_codegen_output_t &ctx, Module *M, Value *fval, + StringRef name, jl_value_t *sig, jl_value_t *jlrettype, + bool is_opaque_closure, + ArrayRef ArgNames = None, + unsigned nreq = 0); + void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT; -Constant *literal_pointer_val_slot(jl_codegen_params_t ¶ms, Module *M, jl_value_t *p); +Constant *literal_pointer_val_slot(jl_codegen_output_t &out, jl_value_t *p); static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT { @@ -358,6 +455,13 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope"; } +static inline jl_value_t *get_ci_abi(jl_code_instance_t *ci) +{ + if (jl_typeof(ci->def) == (jl_value_t*)jl_abioverride_type) + return ((jl_abi_override_t*)ci->def)->abi; + return jl_get_ci_mi(ci)->specTypes; +} + template class MaxAlignedAllocImpl : public AllocatorBase> { @@ -402,35 +506,43 @@ using CompilerResultT = Expected>; using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; +using CISymbolPtr = jl_codeinst_funcs_t; +using CISymbolMap = DenseMap; + +class JLMaterializationUnit; + +struct JITObjectInfo { + std::unique_ptr BackingBuffer; + std::unique_ptr Object; + StringMap SectionLoadAddresses; + std::unique_ptr LinkerInfo; +}; + +class JLDebuginfoPlugin : public orc::ObjectLinkingLayer::Plugin { + std::mutex PluginMutex; + std::map> PendingObjs; +public: + void notifyMaterializingWithInfo(orc::MaterializationResponsibility &MR, + jitlink::LinkGraph &G, MemoryBufferRef InputObject, + std::unique_ptr LinkerInfo); + Error notifyEmitted(orc::MaterializationResponsibility &MR) override; + Error notifyFailed(orc::MaterializationResponsibility &MR) override; + Error notifyRemovingResources(orc::JITDylib &JD, orc::ResourceKey K) override; + void notifyTransferringResources(orc::JITDylib &JD, orc::ResourceKey DstKey, + orc::ResourceKey SrcKey) override; + void modifyPassConfig(orc::MaterializationResponsibility &MR, jitlink::LinkGraph &, + jitlink::PassConfiguration &PassConfig) override; +}; + class JuliaOJIT { + friend JLMaterializationUnit; private: // any verification the user wants to do when adding an OwningResource to the pool template static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { } static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); } public: -#ifdef JL_USE_JITLINK typedef orc::ObjectLinkingLayer ObjLayerT; -#else - typedef orc::RTDyldObjectLinkingLayer ObjLayerT; - struct LockLayerT : public orc::ObjectLayer { - - LockLayerT(orc::ObjectLayer &BaseLayer) JL_NOTSAFEPOINT : orc::ObjectLayer(BaseLayer.getExecutionSession()), BaseLayer(BaseLayer) {} - ~LockLayerT() JL_NOTSAFEPOINT = default; - - void emit(std::unique_ptr R, - std::unique_ptr O) override { - JL_TIMING(LLVM_JIT, JIT_Link); -#ifndef JL_USE_JITLINK - std::lock_guard lock(EmissionMutex); -#endif - BaseLayer.emit(std::move(R), std::move(O)); - } - private: - orc::ObjectLayer &BaseLayer; - std::recursive_mutex EmissionMutex; - }; -#endif typedef orc::IRCompileLayer CompileLayerT; typedef orc::IRTransformLayer JITPointersLayerT; typedef orc::IRTransformLayer OptimizeLayerT; @@ -558,10 +670,6 @@ class JuliaOJIT { struct OptimizerT; struct JITPointersT; -#ifndef JL_USE_JITLINK - void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT; -#endif - public: JuliaOJIT() JL_NOTSAFEPOINT; @@ -574,7 +682,8 @@ class JuliaOJIT { orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT; void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT; - void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; + void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT; + void addOutput(jl_emitted_output_t O) JL_NOTSAFEPOINT; //Methods for the C API Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, @@ -591,7 +700,12 @@ class JuliaOJIT { SmallVector findSymbols(ArrayRef Names) JL_NOTSAFEPOINT; uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT; - StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT; + + // Look up the symbols for each CI in the array, all of which have been + // defined in a jl_emitted_output_t added with JuliaOJIT::addOutput. + SmallVector> + findCIs(ArrayRef CIs) JL_NOTSAFEPOINT; + orc::ThreadSafeContext makeContext() JL_NOTSAFEPOINT; const DataLayout& getDataLayout() const JL_NOTSAFEPOINT; @@ -622,7 +736,40 @@ class JuliaOJIT { // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls // but may be called from inside safe-regions due to jit compilation locks - void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; + void optimizeDLSyms(Module &M); + +protected: // Called from JLMaterializationUnit + // Choose globally unique names for the functions defined by the given CI + // and register the mapping in CISymbols. + CISymbolPtr makeUniqueCIName(jl_code_instance_t *CI, + const CISymbolPtr &Funcs) JL_NOTSAFEPOINT; + + // void registerJITOutput(MemoryBufferRef Obj, const jl_linker_info_t &Info); + + // Rename LinkGraph symbols to match the previously chosen names and + // register debug info for defined symbols. + void linkOutput(orc::MaterializationResponsibility &MR, + MemoryBufferRef ObjBuf, jitlink::LinkGraph &G, + std::unique_ptr Info); + + // Return a symbol that should be linked to the call target. The origin of + // this symbol depends on the code instance: + // - If the call target is for a specialized function defined by a CI added + // to the JIT, return the symbol that was registered by makeUniqueCIName. + // - If the CI already exists and has code that matches the expected calling + // convention, generate a symbol for it and cache it in CISymbols. + // - If the CI exists but the code has the wrong calling convention (a + // specialized function is expected but only a jlcall exists, or neither + // exists and we should go through jl_invoke), emit the trampoline into a + // new module and return a symbol for it. + orc::SymbolStringPtr linkCallTarget(jl_code_instance_t *CI, jl_invoke_api_t API); + + // Create an ORC symbol and entry in CISymbols for the CI's specptr, + // returning a pointer into CISymbols or NULL if the CI is not compiled. + CISymbolPtr *linkCISymbol(jl_code_instance_t *CI) JL_NOTSAFEPOINT; + + orc::ThreadSafeModule optimizeModule(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT; + std::unique_ptr compileModule(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT; private: @@ -633,12 +780,17 @@ class JuliaOJIT { orc::JITDylib &GlobalJD; orc::JITDylib &JD; orc::JITDylib &ExternalJD; - //Map and inc are guarded by RLST_mutex - std::mutex RLST_mutex{}; - int RLST_inc = 0; - DenseMap ReverseLocalSymbolTable; + std::mutex SharedBytesMutex{}; SharedBytesT SharedBytes; + // LinkerMutex protects CISymbols, Names + std::mutex LinkerMutex; + // CISymbols maps CodeInstance (weak) pointers to their ORC symbols. It is + // ok for the a garbage collected CISymbol to remain as a key; it will be + // replaced when the address is reused for another CI. + CISymbolMap CISymbols; + jl_name_counter_t Names; + std::unique_ptr DLSymOpt; //Compilation streams @@ -651,37 +803,63 @@ class JuliaOJIT { _Atomic(size_t) jit_bytes_size{0}; _Atomic(size_t) jitcounter{0}; -#ifdef JL_USE_JITLINK const std::unique_ptr MemMgr; ObjLayerT ObjectLayer; -#else - const std::shared_ptr MemMgr; // shared_ptr protected by LockLayerT.EmissionMutex - ObjLayerT UnlockedObjectLayer; - LockLayerT ObjectLayer; -#endif CompileLayerT CompileLayer; std::unique_ptr JITPointers; JITPointersLayerT JITPointersLayer; std::unique_ptr Optimizers; OptimizeLayerT OptimizeLayer; OptSelLayerT OptSelLayer; + std::shared_ptr DebuginfoPlugin; }; extern JuliaOJIT *jl_ExecutionEngine; -std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT; + inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT { auto lock = ctx.getLock(); return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), DL, triple), ctx); } -Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT { - if (!_shared_module) { - _shared_module = jl_create_llvm_module("globals", getContext(), DL, TargetTriple); - } - return *_shared_module; -} void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT; -void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER; +static inline const char *jl_symbol_prefix(jl_symbol_prefix_t type, + jl_invoke_api_t api) JL_NOTSAFEPOINT +{ + switch (type) { + case JL_SYMBOL_INVOKE_DEF: + switch (api) { + case JL_INVOKE_SPECSIG: return JL_SYM_INVOKE_SPECSIG; + default: jl_unreachable(); + }; + case JL_SYMBOL_INVOKE_IMG: + switch (api) { + case JL_INVOKE_SPECSIG: return JL_SYM_INVOKE_IMG_SPECSIG; + default: jl_unreachable(); + } + case JL_SYMBOL_SPECPTR_DEF: + switch (api) { + case JL_INVOKE_ARGS: return JL_SYM_SPECPTR_ARGS; + case JL_INVOKE_CONST: return JL_SYM_SPECPTR_CONST; + case JL_INVOKE_SPARAM: return JL_SYM_SPECPTR_SPARAM; + case JL_INVOKE_SPECSIG: return JL_SYM_SPECPTR_SPECSIG; + default: jl_unreachable(); + }; + case JL_SYMBOL_SPECPTR_PROTO: + switch (api) { + case JL_INVOKE_ARGS: return JL_SYM_PROTO_ARGS; + case JL_INVOKE_SPECSIG: return JL_SYM_PROTO_SPECSIG; + default: jl_unreachable(); + } + case JL_SYMBOL_SPECPTR_IMG: + switch (api) { + case JL_INVOKE_ARGS: return JL_SYM_SPECPTR_IMG_ARGS; + case JL_INVOKE_SPARAM: return JL_SYM_SPECPTR_IMG_SPARAM; + case JL_INVOKE_SPECSIG: return JL_SYM_SPECPTR_IMG_SPECSIG; + default: jl_unreachable(); + } + default: jl_unreachable(); + } +} // NewPM #include "passes.h" diff --git a/src/julia_internal.h b/src/julia_internal.h index 55b1eae1b0107..13ec47b2a35ae 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -433,19 +433,53 @@ typedef struct _jl_abi_t { // The compiler uses the specific integer values returned by jl_invoke_api typedef enum { - JL_INVOKE_ARGS = 1, // jl_fptr_args - JL_INVOKE_CONST = 2, // jl_fptr_const - JL_INVOKE_SPARAM = 3, // jl_fptr_sparam + JL_INVOKE_ARGS = 1, // jl_fptr_args + JL_INVOKE_CONST = 2, // jl_fptr_const + JL_INVOKE_SPARAM = 3, // jl_fptr_sparam JL_INVOKE_INTERPRETED = 4, // jl_fptr_interpret_call JL_INVOKE_SPECSIG = 5, // jfptr_* wrapper } jl_invoke_api_t; +// The symbol prefix for invoke -> specsig wrappers +#define JL_SYM_INVOKE_SPECSIG "jfptr_" +#define JL_SYM_INVOKE_IMG_SPECSIG "jsysw_" + +// Symbol prefixes for specptr functions +#define JL_SYM_SPECPTR_ARGS "japi1_" +#define JL_SYM_SPECPTR_CONST "jconst_" +#define JL_SYM_SPECPTR_SPARAM "japi3_" +#define JL_SYM_SPECPTR_SPECSIG "julia_" + +#define JL_SYM_SPECPTR_IMG_ARGS "jsys1_" +#define JL_SYM_SPECPTR_IMG_SPARAM "jsys3_" +#define JL_SYM_SPECPTR_IMG_SPECSIG "jlsys_" + +// Other defined symbols +#define JL_SYM_CFUNCTION "jlcapi_" + +// Symbol prefixes for pre-linking specptr function prototypes +#define JL_SYM_PROTO_ARGS "j1_" +#define JL_SYM_PROTO_SPECSIG "j_" + +// Symbol prefix for the GOT entry for a CodeInstance PLT +#define JL_SYM_JLPLT_GOT "jlpkg_got_" +// Symbol prefix for the PLT thunk for a CodeInstance +#define JL_SYM_JLPLT "jlpkg_" + +typedef enum { + JL_SYMBOL_INVOKE_DEF, + JL_SYMBOL_INVOKE_IMG, + JL_SYMBOL_SPECPTR_DEF, + JL_SYMBOL_SPECPTR_PROTO, + JL_SYMBOL_SPECPTR_IMG, +} jl_symbol_prefix_t; + static inline int jl_jlcall_specptr_is_native(jl_invoke_api_t type) { return type == JL_INVOKE_ARGS || type == JL_INVOKE_SPARAM || type == JL_INVOKE_SPECSIG; } -static inline jl_invoke_api_t jl_callptr_invoke_api(jl_callptr_t ptr) +static inline jl_invoke_api_t jl_callptr_invoke_api(jl_callptr_t ptr) JL_NOTSAFEPOINT { if (ptr == jl_fptr_args_addr) return JL_INVOKE_ARGS; @@ -458,7 +492,7 @@ static inline jl_invoke_api_t jl_callptr_invoke_api(jl_callptr_t ptr) return JL_INVOKE_SPECSIG; } -static inline jl_callptr_t jl_invoke_api_callptr(jl_invoke_api_t type) +static inline jl_callptr_t jl_invoke_api_callptr(jl_invoke_api_t type) JL_NOTSAFEPOINT { switch (type) { case JL_INVOKE_ARGS: return jl_fptr_args_addr; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index ae1351ae41ca1..2e3d1139835fe 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2581,6 +2581,9 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(ArrayRef Colors, int PreAss } bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { + if (F.hasFnAttribute("thunk")) + return false; + initAll(*F.getParent()); smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc); LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); diff --git a/test/choosetests.jl b/test/choosetests.jl index ec1ee983a1f4c..7461a90da11ba 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -30,7 +30,7 @@ const TESTNAMES = [ "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", "smallarrayshrink", "opaque_closure", "filesystem", "download", "scopedvalues", "compileall", "rebinding", - "faulty_constructor_method_should_not_cause_stack_overflows" + "faulty_constructor_method_should_not_cause_stack_overflows", "jit", ] const INTERNET_REQUIRED_LIST = [ diff --git a/test/jit.jl b/test/jit.jl new file mode 100644 index 0000000000000..9dc8e759f8d8c --- /dev/null +++ b/test/jit.jl @@ -0,0 +1,53 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Core: CodeInstance, MethodInstance +using Test + +struct TestOwner end +const owner = TestOwner() + +function compile_no_deps(f, argtypes) + @nospecialize + mi = Base.method_instance(f, argtypes) + source, _ = only(code_typed(f, argtypes)) + ci = CodeInstance( + mi, owner, source.rettype, #=exctype=#Any, #=inferred_const=#nothing, + #=inferred=#nothing, #=const_flags=#Int32(0), source.min_world, + #=max_world=#typemax(UInt), #=effects=#UInt32(0), + #=analysis_results=#nothing, source.debuginfo, source.edges + ) + ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, source) + ci +end + +function check_edges_not_compiled(ci::CodeInstance, target) + @nospecialize + for e in ci.edges + e isa CodeInstance || continue + e.def isa MethodInstance || continue + e.def.def isa Method || continue + if e.def.def.sig <: Tuple{typeof(target), Vararg} + e.invoke == Ptr{Nothing}(0) || return false + e.specptr == Ptr{Nothing}(0) || return false + end + end + true +end + +# Test fptr1 -> tojlinvoke trampoline +module M1 + @noinline foo(xs...) = xs[2] + bar(x) = 2*foo(x, x, x, x, x, x) +end +ci = compile_no_deps(M1.bar, (Int,)) +@test check_edges_not_compiled(ci, M1.foo) +@test invoke(M1.bar, ci, 100) == 200 + +# Test specsig -> tojlinvoke trampoline +module M2 + @noinline foo(x) = x+100 + bar(x) = 2*foo(x) +end +ci = compile_no_deps(M2.bar, (Int,)) +@test check_edges_not_compiled(ci, M2.foo) +@test invoke(M2.bar, ci, 5) == 210 From da849ec632579ebf866e738f9bd134a4a9821f34 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Tue, 4 Nov 2025 10:27:31 -0800 Subject: [PATCH 4/5] Add CodegenParams.unique_names so they can be enabled in llvmpasses tests --- base/reflection.jl | 13 +++++++++++-- src/jitlayers.cpp | 16 ++++++++++++++++ src/julia.h | 1 + test/testhelpers/llvmpasses.jl | 7 ++++++- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index b3252df78d1b8..9f99c1477e300 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -170,17 +170,26 @@ struct CodegenParams """ force_emit_all::Cint + """ + When enabled, generate names that are globally unique in this Julia session, + across all code generated with this flag set. Intended for llvmpasses + tests. + """ + unique_names::Cint + function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true, prefer_specsig::Bool=false, gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(), debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true, - gcstack_arg::Bool=true, use_jlplt::Bool=true, force_emit_all::Bool=false) + gcstack_arg::Bool=true, use_jlplt::Bool=true, force_emit_all::Bool=false, + unique_names::Bool=false) return new( Cint(track_allocations), Cint(code_coverage), Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, debug_info_level, Cint(safepoint_on_entry), - Cint(gcstack_arg), Cint(use_jlplt), Cint(force_emit_all)) + Cint(gcstack_arg), Cint(use_jlplt), Cint(force_emit_all), + Cint(unique_names)) end end diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 9ef9541df0a3b..1ec91b72b1b45 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -219,13 +219,29 @@ std::string jl_codegen_output_t::make_name(jl_symbol_prefix_t type, jl_invoke_ap return make_name(jl_symbol_prefix(type, api), orig_name); } +static std::atomic global_name_counter; + +template +static std::string make_name_unique(Ts... args) JL_NOTSAFEPOINT +{ + std::string name; + raw_string_ostream s{name}; + (s << ... << args); + s << "_" << global_name_counter.fetch_add(1, memory_order_relaxed); + return name; +} + std::string jl_codegen_output_t::make_name(StringRef prefix, StringRef orig_name) { + if (params->unique_names) + return make_name_unique(prefix, strip_linux(orig_name)); return names(prefix, strip_linux(orig_name)); } std::string jl_codegen_output_t::make_name(StringRef orig_name) { + if (params->unique_names) + return make_name_unique(strip_linux(orig_name)); return names(strip_linux(orig_name)); } diff --git a/src/julia.h b/src/julia.h index e5cb3fd4e4a7a..25db70d6388c3 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2656,6 +2656,7 @@ typedef struct { int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly int force_emit_all; // Force emission of code for const return functions + int unique_names; // Emit globally unique names } jl_cgparams_t; extern JL_DLLEXPORT int jl_default_debug_info_kind; extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams; diff --git a/test/testhelpers/llvmpasses.jl b/test/testhelpers/llvmpasses.jl index 9900dd15b5d40..9e34fa6e49aa8 100644 --- a/test/testhelpers/llvmpasses.jl +++ b/test/testhelpers/llvmpasses.jl @@ -20,7 +20,12 @@ function emit(f, tt...) global counter name = nameof(f) open(joinpath(dir, @sprintf("%05d-%s.ll", counter, name)), "w") do io - code_llvm(io, f, tt, raw=true, optimize=optimize, dump_module=true, debuginfo=:none) + params = Base.CodegenParams( + # defaults for code_llvm + unique_names + debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=true, + gcstack_arg=true, unique_names=true, + ) + code_llvm(io, f, tt, raw=true, optimize=optimize, dump_module=true, debuginfo=:none, params=params) end counter+=1 end From f8ab8e993f591705432912f78f90fef5b9135693 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Tue, 4 Nov 2025 10:40:37 -0800 Subject: [PATCH 5/5] Fix timing_print_module_names not capturing JL_TIMING_DEFAULT_BLOCK --- src/jitlayers.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 1ec91b72b1b45..e7cc38c6395be 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1770,25 +1770,28 @@ void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr) } -static void timing_print_module_names(ThreadSafeModule &TSM) JL_NOTSAFEPOINT -{ #ifdef ENABLE_TIMINGS - TSM.withModuleDo([](Module &M) { +static void timing_print_module_names(jl_timing_block_t *block, + ThreadSafeModule &TSM) JL_NOTSAFEPOINT +{ + TSM.withModuleDo([block](Module &M) { for (auto &f : M) { - if (!f.isDeclaration()){ - jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str()); + if (!f.isDeclaration()) { + jl_timing_puts(block, f.getName().str().c_str()); } } }); -#endif } +#endif void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) { JL_TIMING(LLVM_JIT, JIT_Total); ++ModulesAdded; TSM = optimizeModule(std::move(TSM)); - timing_print_module_names(TSM); +#ifdef ENABLE_TIMINGS + timing_print_module_names(JL_TIMING_DEFAULT_BLOCK, TSM); +#endif auto Obj = compileModule(std::move(TSM)); auto Err = JuliaOJIT::addObjectFile(JD, std::move(Obj)); if (Err) { @@ -1811,7 +1814,9 @@ void JuliaOJIT::addOutput(jl_emitted_output_t O) }); TSM = optimizeModule(std::move(TSM)); - timing_print_module_names(TSM); +#ifdef ENABLE_TIMINGS + timing_print_module_names(JL_TIMING_DEFAULT_BLOCK, TSM); +#endif auto Obj = compileModule(std::move(TSM)); auto MU = std::make_unique( JLMaterializationUnit::Create(*this, ObjectLayer, std::move(O.linker_info), std::move(Obj)));