From 3f2a3237e9de0f58c3cbc16bcbaceaa535dc4b42 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 4 Feb 2022 01:47:00 +0000 Subject: [PATCH] Add a DCE barrier builtin In #43852 we noticed that the compiler is getting good enough to completely DCE a number of our benchmarks. We need to add some sort of mechanism to prevent the compiler from doing so. This adds just such an intrinsic. The intrinsic itself doesn't do anything, but it is considered effectful by our optimizer, preventing it from being DCE'd. At the LLVM level, it turns into a volatile store to an alloca (or an llvm.sideeffect if the values passed to the `dcebarrier` do not have any actual LLVM-level representation). The docs for the new intrinsic are as follows: ``` dcebarrier(args...) This function prevents dead-code elimination (DCE) of itself and any arguments passed to it, but is otherwise the lightest barrier possible. In particular, it is not a GC safepoint, does model an observable heap effect, does not expand to any code itself and may be re-ordered with respect to other side effects (though the total number of executions may not change). A useful model for this function is that it hashes all memory `reachable` from args and escapes this information through some observable side-channel that does not otherwise impact program behavior. Of course that's just a model. The function does nothing and returns `nothing`. This is intended for use in benchmarks that want to guarantee that `args` are actually computed. (Otherwise DCE may see that the result of the benchmark is unused and delete the entire benchmark code). **Note**: `dcebarrier` does not affect constant foloding. For example, in `dcebarrier(1+1)`, no add instruction needs to be executed at runtime and the code is semantically equivalent to `dcebarrier(2).` *# Examples function loop() for i = 1:1000 # The complier must guarantee that there are 1000 program points (in the correct # order) at which the value of `i` is in a register, but has otherwise # total control over the program. dcebarrier(i) end end ``` I believe the voltatile store at the LLVM level is actually somewhat stronger than what we want here. Ideally the `dcebarrier` would not and up generating any machine code at all and would also be compatible with optimizations like SROA and vectorization. However, I think this is fine for now. --- base/compiler/tfuncs.jl | 3 +++ base/docs/basedocs.jl | 35 ++++++++++++++++++++++++++++++++ base/essentials.jl | 2 +- src/builtin_proto.h | 2 ++ src/builtins.c | 6 ++++++ src/codegen.cpp | 43 ++++++++++++++++++++++++++++++++++++++++ src/staticdata.c | 5 +++-- test/compiler/codegen.jl | 6 ++++++ 8 files changed, 99 insertions(+), 3 deletions(-) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index d1df40f0471b0..d335995558d8f 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -527,6 +527,7 @@ add_tfunc(atomic_pointerset, 3, 3, (a, v, order) -> (@nospecialize; a), 5) add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_eltype(a)), 5) add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5) add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5) +add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0) # more accurate typeof_tfunc for vararg tuples abstract only in length function typeof_concrete_vararg(t::DataType) @@ -1697,6 +1698,8 @@ function _builtin_nothrow(@nospecialize(f), argtypes::Array{Any,1}, @nospecializ return false elseif f === Core.get_binding_type return length(argtypes) == 2 + elseif f === donotdelete + return true end return false end diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 3cbe180233d9c..131f0f26562cd 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2897,4 +2897,39 @@ See also [`"`](@ref \") """ kw"\"\"\"" +""" + donotdelete(args...) + +This function prevents dead-code elimination (DCE) of itself and any arguments +passed to it, but is otherwise the lightest barrier possible. In particular, +it is not a GC safepoint, does model an observable heap effect, does not expand +to any code itself and may be re-ordered with respect to other side effects +(though the total number of executions may not change). + +A useful model for this function is that it hashes all memory `reachable` from +args and escapes this information through some observable side-channel that does +not otherwise impact program behavior. Of course that's just a model. The +function does nothing and returns `nothing`. + +This is intended for use in benchmarks that want to guarantee that `args` are +actually computed. (Otherwise DCE may see that the result of the benchmark is +unused and delete the entire benchmark code). + +**Note**: `donotdelete` does not affect constant folding. For example, in + `donotdelete(1+1)`, no add instruction needs to be executed at runtime and + the code is semantically equivalent to `donotdelete(2).` + +# Examples + +function loop() + for i = 1:1000 + # The complier must guarantee that there are 1000 program points (in the correct + # order) at which the value of `i` is in a register, but has otherwise + # total control over the program. + donotdelete(i) + end +end +""" +Base.donotdelete + end diff --git a/base/essentials.jl b/base/essentials.jl index 1e4fea20bb4ae..c23294cb2c218 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using Core: CodeInfo, SimpleVector +using Core: CodeInfo, SimpleVector, donotdelete const Callable = Union{Function,Type} diff --git a/src/builtin_proto.h b/src/builtin_proto.h index bc01c078de602..c7027f1b67f9e 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -53,6 +53,7 @@ DECLARE_BUILTIN(typeassert); DECLARE_BUILTIN(_typebody); DECLARE_BUILTIN(typeof); DECLARE_BUILTIN(_typevar); +DECLARE_BUILTIN(donotdelete); JL_CALLABLE(jl_f_invoke_kwsorter); #ifdef DEFINE_BUILTIN_GLOBALS @@ -67,6 +68,7 @@ JL_CALLABLE(jl_f__setsuper); JL_CALLABLE(jl_f__equiv_typedef); JL_CALLABLE(jl_f_get_binding_type); JL_CALLABLE(jl_f_set_binding_type); +JL_CALLABLE(jl_f_donotdelete); #ifdef __cplusplus } diff --git a/src/builtins.c b/src/builtins.c index 1fd26fd8ffbc4..1b4d35cb964ee 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1472,6 +1472,11 @@ JL_CALLABLE(jl_f__setsuper) return jl_nothing; } +JL_CALLABLE(jl_f_donotdelete) +{ + return jl_nothing; +} + static int equiv_field_types(jl_value_t *old, jl_value_t *ft) { size_t nf = jl_svec_len(ft); @@ -1874,6 +1879,7 @@ void jl_init_primitives(void) JL_GC_DISABLED add_builtin_func("_equiv_typedef", jl_f__equiv_typedef); add_builtin_func("get_binding_type", jl_f_get_binding_type); add_builtin_func("set_binding_type!", jl_f_set_binding_type); + jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete); // builtin types add_builtin("Any", (jl_value_t*)jl_any_type); diff --git a/src/codegen.cpp b/src/codegen.cpp index 596693f3830f5..c754e7039b72b 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -473,6 +473,18 @@ static AttributeList get_func_attrs(LLVMContext &C) None); } +static AttributeList get_donotdelete_func_attrs(LLVMContext &C) +{ + AttributeSet FnAttrs = AttributeSet::get(C, makeArrayRef({Attribute::get(C, "thunk")})); + FnAttrs.addAttribute(C, Attribute::InaccessibleMemOnly); + FnAttrs.addAttribute(C, Attribute::WillReturn); + FnAttrs.addAttribute(C, Attribute::NoUnwind); + return AttributeList::get(C, + FnAttrs, + Attributes(C, {Attribute::NonNull}), + None); +} + static AttributeList get_attrs_noreturn(LLVMContext &C) { return AttributeList::get(C, @@ -3464,6 +3476,36 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, return true; } + else if (f == jl_builtin_donotdelete) { + // For now we emit this as a vararg call to the builtin + // (which doesn't look at the arguments). In the future, + // this should be an LLVM builtin. + auto it = builtin_func_map.find(jl_f_donotdelete); + if (it == builtin_func_map.end()) { + return false; + } + + *ret = mark_julia_const(ctx, jl_nothing); + FunctionType *Fty = FunctionType::get(getVoidTy(ctx.builder.getContext()), true); + Function *dnd = prepare_call(it->second); + SmallVector call_args; + + for (size_t i = 1; i <= nargs; ++i) { + const jl_cgval_t &obj = argv[i]; + if (obj.V) { + // TODO is this strong enough to constitute a read of any contained + // pointers? + Value *V = obj.V; + if (obj.isboxed) { + V = emit_pointer_from_objref(ctx, V); + } + call_args.push_back(V); + } + } + ctx.builder.CreateCall(Fty, dnd, call_args); + return true; + } + return false; } @@ -8133,6 +8175,7 @@ extern "C" void jl_init_llvm(void) { jl_f_arrayset_addr, new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} }, { jl_f_arraysize_addr, new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} }, { jl_f_apply_type_addr, new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} }, + { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_func_sig, get_donotdelete_func_attrs} } }; jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug; diff --git a/src/staticdata.c b/src/staticdata.c index 28fbdce09d2d3..fb42d9cdf23f9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -26,7 +26,7 @@ extern "C" { // TODO: put WeakRefs on the weak_refs list during deserialization // TODO: handle finalizers -#define NUM_TAGS 152 +#define NUM_TAGS 153 // An array of references that need to be restored from the sysimg // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C. @@ -198,6 +198,7 @@ jl_value_t **const*const get_tags(void) { INSERT_TAG(jl_builtin__expr); INSERT_TAG(jl_builtin_ifelse); INSERT_TAG(jl_builtin__typebody); + INSERT_TAG(jl_builtin_donotdelete); // All optional tags must be placed at the end, so that we // don't accidentally have a `NULL` in the middle @@ -252,7 +253,7 @@ static const jl_fptr_args_t id_to_fptrs[] = { &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar, &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype, &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type, - &jl_f_set_binding_type, &jl_f_opaque_closure_call, + &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, NULL }; typedef struct { diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 7469dc74c8156..ec89ac9cd72a4 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -711,3 +711,9 @@ end @test !cmp43123(Ref{Function}(+), Ref{Union{typeof(+), typeof(-)}}(-)) @test cmp43123(Function[+], Union{typeof(+), typeof(-)}[+]) @test !cmp43123(Function[+], Union{typeof(+), typeof(-)}[-]) + +# Test that donotdelete survives through to LLVM time +f_donotdelete_input(x) = Base.donotdelete(x+1) +f_donotdelete_const() = Base.donotdelete(1+1) +@test occursin("call void (...) @jl_f_donotdelete(i64", get_llvm(f_donotdelete_input, Tuple{Int64}, true, false, false)) +@test occursin("call void (...) @jl_f_donotdelete()", get_llvm(f_donotdelete_const, Tuple{}, true, false, false)) \ No newline at end of file