Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 1 addition & 18 deletions libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ set( LIBCLC_MIN_LLVM 3.9.0 )
set( LIBCLC_TARGETS_TO_BUILD "all"
CACHE STRING "Semicolon-separated list of libclc targets to build, or 'all'." )

option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF )

option(
LIBCLC_USE_SPIRV_BACKEND "Build SPIR-V targets with the SPIR-V backend." OFF
)
Expand Down Expand Up @@ -231,19 +229,6 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
configure_file( libclc.pc.in libclc.pc @ONLY )
install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTALL_DATADIR}/pkgconfig" )

if( ENABLE_RUNTIME_SUBNORMAL )
foreach( file IN ITEMS subnormal_use_default subnormal_disable )
link_bc(
TARGET ${file}
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/${file}.ll
)
install(
FILES $<TARGET_PROPERTY:${file},TARGET_FILE>
DESTINATION "${CMAKE_INSTALL_DATADIR}/clc"
)
endforeach()
endif()

find_package( Python3 REQUIRED COMPONENTS Interpreter )
file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/gen_convert.py script_loc )
add_custom_command(
Expand Down Expand Up @@ -371,9 +356,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
list( APPEND opencl_gen_files clspv-convert.cl )
else()
list( APPEND opencl_gen_files convert.cl )
if ( NOT ENABLE_RUNTIME_SUBNORMAL )
list( APPEND opencl_lib_files opencl/lib/generic/subnormal_use_default.ll )
endif()
endif()
endif()

Expand Down Expand Up @@ -430,6 +412,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Error on undefined macros
-Werror=undef
-fdiscard-value-names
-Xclang -fdenormal-fp-math-f32=dynamic
)

if( NOT "${cpu}" STREQUAL "" )
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/include/clc/math/clc_subnormal_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

#include <clc/clcfunc.h>

_CLC_DECL bool __clc_subnormals_disabled();
_CLC_DECL bool __clc_fp16_subnormals_supported();
_CLC_DECL bool __clc_fp32_subnormals_supported();
_CLC_DECL bool __clc_fp64_subnormals_supported();
Expand Down
15 changes: 6 additions & 9 deletions libclc/clc/include/clc/math/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

#include <clc/clc_as_type.h>
#include <clc/clcfunc.h>
#include <clc/math/clc_subnormal_config.h>

#define SNAN 0x001
#define QNAN 0x002
Expand Down Expand Up @@ -65,14 +64,12 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);

#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)

_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
int ix = __clc_as_int(x);
if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
((ix & MANTBITS_SP32) != 0)) {
ix &= SIGNBIT_SP32;
x = __clc_as_float(ix);
}
return x;
_CLC_OVERLOAD _CLC_INLINE float __clc_soft_flush_denormal(float x) {
// Avoid calling __clc_fp32_subnormals_supported here: it uses
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might have less trouble just using canonicalize for now and trying to relax it later

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might have less trouble just using canonicalize for now and trying to relax it later

do you mean reverting __clc_soft_flush_denormal to use __clc_fp32_subnormals_supported which uses llvm.canonicalize, or just replacing use of __clc_fp32_subnormals_supported with __builtin_elementwise_canonicalize?

// llvm.canonicalize, which quiets sNaN.
return __builtin_elementwise_abs(x) < 0x1p-149f
? __builtin_elementwise_copysign(0.0f, x)
: x;
}

#ifdef cl_khr_fp64
Expand Down
6 changes: 3 additions & 3 deletions libclc/clc/lib/clspv/math/clc_sw_fma.cl
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
return c;
}

a = __clc_flush_denormal_if_not_supported(a);
b = __clc_flush_denormal_if_not_supported(b);
c = __clc_flush_denormal_if_not_supported(c);
a = __clc_soft_flush_denormal(a);
b = __clc_soft_flush_denormal(b);
c = __clc_soft_flush_denormal(c);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unconditionally forcing flush of denormals is not desirable. In this context I'm not sure why it's trying to flush in the first place.

The below code extracting the exponent can be replaced with frexp, and the return c on the above paths is missing a canonicalize.

But on a deeper level I don't think libclc should be trying to provide a software FMA implementation in the first place; that's a decision for the compiler when codegening llvm.fma, surely compiler-rt already has an implementation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unconditionally forcing flush of denormals is not desirable. In this context I'm not sure why it's trying to flush in the first place.

The below code extracting the exponent can be replaced with frexp, and the return c on the above paths is missing a canonicalize.

But on a deeper level I don't think libclc should be trying to provide a software FMA implementation in the first place; that's a decision for the compiler when codegening llvm.fma, surely compiler-rt already has an implementation?

Deleted clc_sw_fma in 7b290a2

Now clc_fma is implemented with __builtin_elementwise_fma

Copy link
Contributor

@Maetveis Maetveis Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

surely compiler-rt already has an implementation?

It doesn't. LLVM libc has one but it uses FP64, so I don't think it is of much help. I'd expect most targets that don't have hardware fma don't have fp64 either.

I think dropping sw fma would impact:

  • SPIR-V, which then starts generating the GLS.std.450 extended instruction FMA. The problem there is that instruction is (AFAICT) allowed to round intermediate products, but the OpenCL spec doesn't allow that. I'm not sure if drivers actually implement it as fused or not.
    Arguably the lowering @llvm.fma to this instruction is bug in LLVM as @llvm.fma is specified to be fused without fast math flags.
  • Not all old R600 targets have FMA, I think this change would be breaking them. These are >10 years old GPUs at this point though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think r600 always had FMA, it's just not "fast" on all of them. In any case, the backed is obligated to implement llvm.fma correctly


if (a == 0.0f || b == 0.0f) {
return c;
Expand Down
1 change: 1 addition & 0 deletions libclc/clc/lib/generic/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ math/clc_sincos_helpers.cl
math/clc_sinh.cl
math/clc_sinpi.cl
math/clc_sqrt.cl
math/clc_subnormal_config.cl
math/clc_sw_fma.cl
math/clc_tables.cl
math/clc_tan.cl
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/lib/generic/math/clc_exp10.cl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_isnan.h>
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/lib/generic/math/clc_hypot.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_sqrt.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_clamp.h>
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/lib/generic/math/clc_pow.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_select.h>
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/lib/generic/math/clc_pown.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_select.h>
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/lib/generic/math/clc_powr.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_select.h>
Expand Down
1 change: 0 additions & 1 deletion libclc/clc/lib/generic/math/clc_remquo.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include <clc/math/clc_floor.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/clc_trunc.h>
#include <clc/math/math.h>
#include <clc/shared/clc_max.h>
Expand Down
4 changes: 2 additions & 2 deletions libclc/clc/lib/generic/math/clc_remquo.inc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
__CLC_ADDRESS_SPACE int *quo) {
x = __clc_flush_denormal_if_not_supported(x);
y = __clc_flush_denormal_if_not_supported(y);
x = __clc_soft_flush_denormal(x);
y = __clc_soft_flush_denormal(y);
Comment on lines +11 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is necessary. In the rocm-device-libs version of this, I managed to delete the explicit canonicalizes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@wenju-he wenju-he Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/ROCm/llvm-project/blob/0e9e3946cb257d1ed7b119333db451805865b36b/amd/device-libs/ocml/src/remainderF_base.h#L47

See this series of patches: ROCm@b3beb93 ROCm@9a7bc19 ROCm@e9198f7

Should just copy what these did

I have tried to port both libclc __clc_remquo and ocml remquo2 to replace intel gpu implementation at https://github.com/intel/intel-graphics-compiler/blob/fc97dc482697b320667a52914f1225556f0856e8/IGC/BiFModule/Implementation/Math/remquo.cl#L12-L104, however, the ported code can't pass OpenCL CTS test ./test_bruteforce remquo on intel gpu.
Can I copy intel gpu implementation to overwrite libclc __clc_remquo?

int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
float xa = __clc_as_float(ax);
Expand Down
46 changes: 46 additions & 0 deletions libclc/clc/lib/generic/math/clc_subnormal_config.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/internal/clc.h>
#include <clc/math/clc_subnormal_config.h>

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEF bool __clc_fp16_subnormals_supported() {
#ifdef CLC_SPIRV
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-24h),
__FPCLASS_POSZERO);
#endif
}
#endif // cl_khr_fp16

_CLC_DEF bool __clc_fp32_subnormals_supported() {
#ifdef CLC_SPIRV
// SPIR-V doesn't support llvm.canonicalize for now.
return false;
#else
return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-149f),
__FPCLASS_POSZERO);
#endif
}

#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEF bool __clc_fp64_subnormals_supported() {
#ifdef CLC_SPIRV
// SPIR-V doesn't support llvm.canonicalize for now.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you just fix that instead of special casing it here? It's not difficult to implement

return false;
#else
return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-1074),
__FPCLASS_POSZERO);
#endif
}
#endif // cl_khr_fp64
6 changes: 3 additions & 3 deletions libclc/clc/lib/generic/math/clc_sw_fma.cl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
return c;
}

a = __clc_flush_denormal_if_not_supported(a);
b = __clc_flush_denormal_if_not_supported(b);
c = __clc_flush_denormal_if_not_supported(c);
a = __clc_soft_flush_denormal(a);
b = __clc_soft_flush_denormal(b);
c = __clc_soft_flush_denormal(c);

if (c == 0) {
return a * b;
Expand Down
1 change: 0 additions & 1 deletion libclc/opencl/lib/clspv/SOURCES
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
math/fma.cl
shared/vstore_half.cl
subnormal_config.cl
../generic/geometric/distance.cl
../generic/geometric/length.cl
../generic/math/acos.cl
Expand Down
16 changes: 0 additions & 16 deletions libclc/opencl/lib/clspv/subnormal_config.cl

This file was deleted.

2 changes: 0 additions & 2 deletions libclc/opencl/lib/generic/SOURCES
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
subnormal_config.cl
subnormal_helper_func.ll
async/async_work_group_copy.cl
async/async_work_group_strided_copy.cl
async/prefetch.cl
Expand Down
18 changes: 0 additions & 18 deletions libclc/opencl/lib/generic/subnormal_config.cl

This file was deleted.

9 changes: 0 additions & 9 deletions libclc/opencl/lib/generic/subnormal_disable.ll

This file was deleted.

16 changes: 0 additions & 16 deletions libclc/opencl/lib/generic/subnormal_helper_func.ll

This file was deleted.

9 changes: 0 additions & 9 deletions libclc/opencl/lib/generic/subnormal_use_default.ll

This file was deleted.

1 change: 0 additions & 1 deletion libclc/opencl/lib/spirv/SOURCES
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
subnormal_config.cl
../generic/async/async_work_group_strided_copy.cl
../generic/async/wait_group_events.cl
../generic/common/degrees.cl
Expand Down
16 changes: 0 additions & 16 deletions libclc/opencl/lib/spirv/subnormal_config.cl

This file was deleted.

Loading