llvm · wenju-he · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
@@ -41,8 +41,6 @@ set( LIBCLC_MIN_LLVM 3.9.0 )
 set( LIBCLC_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of libclc targets to build, or 'all'." )
 
-option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF )
-
 option(
   LIBCLC_USE_SPIRV_BACKEND "Build SPIR-V targets with the SPIR-V backend." OFF
 )
@@ -231,19 +229,6 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
 configure_file( libclc.pc.in libclc.pc @ONLY )
 install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTALL_DATADIR}/pkgconfig" )
 
-if( ENABLE_RUNTIME_SUBNORMAL )
-  foreach( file IN ITEMS subnormal_use_default subnormal_disable )
-    link_bc(
-       TARGET ${file}
-       INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/${file}.ll
-    )
-    install(
-      FILES $<TARGET_PROPERTY:${file},TARGET_FILE>
-      DESTINATION "${CMAKE_INSTALL_DATADIR}/clc"
-    )
-  endforeach()
-endif()
-
 find_package( Python3 REQUIRED COMPONENTS Interpreter )
 file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/gen_convert.py script_loc )
 add_custom_command(
@@ -371,9 +356,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
       list( APPEND opencl_gen_files clspv-convert.cl )
     else()
       list( APPEND opencl_gen_files convert.cl )
-      if ( NOT ENABLE_RUNTIME_SUBNORMAL )
-        list( APPEND opencl_lib_files opencl/lib/generic/subnormal_use_default.ll )
-      endif()
     endif()
   endif()
 
@@ -430,6 +412,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
       # Error on undefined macros
       -Werror=undef
       -fdiscard-value-names
+      -Xclang -fdenormal-fp-math-f32=dynamic
     )
 
     if( NOT "${cpu}" STREQUAL "" )

diff --git a/libclc/clc/include/clc/math/clc_subnormal_config.h b/libclc/clc/include/clc/math/clc_subnormal_config.h
@@ -10,7 +10,6 @@
 
 #include <clc/clcfunc.h>
 
-_CLC_DECL bool __clc_subnormals_disabled();
 _CLC_DECL bool __clc_fp16_subnormals_supported();
 _CLC_DECL bool __clc_fp32_subnormals_supported();
 _CLC_DECL bool __clc_fp64_subnormals_supported();

diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h
@@ -11,7 +11,6 @@
 
 #include <clc/clc_as_type.h>
 #include <clc/clcfunc.h>
-#include <clc/math/clc_subnormal_config.h>
 
 #define SNAN 0x001
 #define QNAN 0x002
@@ -65,14 +64,12 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
 
 #define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
 
-_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
-  int ix = __clc_as_int(x);
-  if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
-      ((ix & MANTBITS_SP32) != 0)) {
-    ix &= SIGNBIT_SP32;
-    x = __clc_as_float(ix);
-  }
-  return x;
+_CLC_OVERLOAD _CLC_INLINE float __clc_soft_flush_denormal(float x) {
+  // Avoid calling __clc_fp32_subnormals_supported here: it uses
+  // llvm.canonicalize, which quiets sNaN.
+  return __builtin_elementwise_abs(x) < 0x1p-149f
+             ? __builtin_elementwise_copysign(0.0f, x)
+             : x;
 }
 
 #ifdef cl_khr_fp64

diff --git a/libclc/clc/lib/clspv/math/clc_sw_fma.cl b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
@@ -127,9 +127,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
     return c;
   }
 
-  a = __clc_flush_denormal_if_not_supported(a);
-  b = __clc_flush_denormal_if_not_supported(b);
-  c = __clc_flush_denormal_if_not_supported(c);
+  a = __clc_soft_flush_denormal(a);
+  b = __clc_soft_flush_denormal(b);
+  c = __clc_soft_flush_denormal(c);
 
   if (a == 0.0f || b == 0.0f) {
     return c;

diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
@@ -137,6 +137,7 @@ math/clc_sincos_helpers.cl
 math/clc_sinh.cl
 math/clc_sinpi.cl
 math/clc_sqrt.cl
+math/clc_subnormal_config.cl
 math/clc_sw_fma.cl
 math/clc_tables.cl
 math/clc_tan.cl

diff --git a/libclc/clc/lib/generic/math/clc_exp10.cl b/libclc/clc/lib/generic/math/clc_exp10.cl
@@ -11,7 +11,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_isnan.h>

diff --git a/libclc/clc/lib/generic/math/clc_hypot.cl b/libclc/clc/lib/generic/math/clc_hypot.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_sqrt.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/relational/clc_isnan.h>
 #include <clc/shared/clc_clamp.h>

diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_select.h>

diff --git a/libclc/clc/lib/generic/math/clc_pown.cl b/libclc/clc/lib/generic/math/clc_pown.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_select.h>

diff --git a/libclc/clc/lib/generic/math/clc_powr.cl b/libclc/clc/lib/generic/math/clc_powr.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_select.h>

diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_floor.h>
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
 #include <clc/shared/clc_max.h>

diff --git a/libclc/clc/lib/generic/math/clc_remquo.inc b/libclc/clc/lib/generic/math/clc_remquo.inc
@@ -8,8 +8,8 @@
 
 _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
                                           __CLC_ADDRESS_SPACE int *quo) {
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
+  x = __clc_soft_flush_denormal(x);
+  y = __clc_soft_flush_denormal(y);
   int ux = __clc_as_int(x);
   int ax = ux & EXSIGNBIT_SP32;
   float xa = __clc_as_float(ax);

diff --git a/libclc/clc/lib/generic/math/clc_subnormal_config.cl b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+#include <clc/math/clc_subnormal_config.h>
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEF bool __clc_fp16_subnormals_supported() {
+#ifdef CLC_SPIRV
+  // SPIR-V doesn't support llvm.canonicalize for now.
+  return false;
+#else
+  return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-24h),
+                              __FPCLASS_POSZERO);
+#endif
+}
+#endif // cl_khr_fp16
+
+_CLC_DEF bool __clc_fp32_subnormals_supported() {
+#ifdef CLC_SPIRV
+  // SPIR-V doesn't support llvm.canonicalize for now.
+  return false;
+#else
+  return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-149f),
+                              __FPCLASS_POSZERO);
+#endif
+}
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DEF bool __clc_fp64_subnormals_supported() {
+#ifdef CLC_SPIRV
+  // SPIR-V doesn't support llvm.canonicalize for now.
+  return false;
+#else
+  return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-1074),
+                              __FPCLASS_POSZERO);
+#endif
+}
+#endif // cl_khr_fp64
diff --git a/libclc/clc/lib/generic/math/clc_sw_fma.cl b/libclc/clc/lib/generic/math/clc_sw_fma.cl
@@ -36,9 +36,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
     return c;
   }
 
-  a = __clc_flush_denormal_if_not_supported(a);
-  b = __clc_flush_denormal_if_not_supported(b);
-  c = __clc_flush_denormal_if_not_supported(c);
+  a = __clc_soft_flush_denormal(a);
+  b = __clc_soft_flush_denormal(b);
+  c = __clc_soft_flush_denormal(c);
 
   if (c == 0) {
     return a * b;

diff --git a/libclc/opencl/lib/clspv/SOURCES b/libclc/opencl/lib/clspv/SOURCES
@@ -1,6 +1,5 @@
 math/fma.cl
 shared/vstore_half.cl
-subnormal_config.cl
 ../generic/geometric/distance.cl
 ../generic/geometric/length.cl
 ../generic/math/acos.cl

diff --git a/libclc/opencl/lib/clspv/subnormal_config.cl b/libclc/opencl/lib/clspv/subnormal_config.cl
diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES
@@ -1,5 +1,3 @@
-subnormal_config.cl
-subnormal_helper_func.ll
 async/async_work_group_copy.cl
 async/async_work_group_strided_copy.cl
 async/prefetch.cl

diff --git a/libclc/opencl/lib/generic/subnormal_config.cl b/libclc/opencl/lib/generic/subnormal_config.cl
diff --git a/libclc/opencl/lib/generic/subnormal_disable.ll b/libclc/opencl/lib/generic/subnormal_disable.ll
diff --git a/libclc/opencl/lib/generic/subnormal_helper_func.ll b/libclc/opencl/lib/generic/subnormal_helper_func.ll
diff --git a/libclc/opencl/lib/generic/subnormal_use_default.ll b/libclc/opencl/lib/generic/subnormal_use_default.ll
diff --git a/libclc/opencl/lib/spirv/SOURCES b/libclc/opencl/lib/spirv/SOURCES
@@ -1,4 +1,3 @@
-subnormal_config.cl
 ../generic/async/async_work_group_strided_copy.cl
 ../generic/async/wait_group_events.cl
 ../generic/common/degrees.cl

diff --git a/libclc/opencl/lib/spirv/subnormal_config.cl b/libclc/opencl/lib/spirv/subnormal_config.cl