diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index c75f450d8d3ad..97896715e2712 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -41,8 +41,6 @@ set( LIBCLC_MIN_LLVM 3.9.0 ) set( LIBCLC_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of libclc targets to build, or 'all'." ) -option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF ) - option( LIBCLC_USE_SPIRV_BACKEND "Build SPIR-V targets with the SPIR-V backend." OFF ) @@ -231,19 +229,6 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii configure_file( libclc.pc.in libclc.pc @ONLY ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTALL_DATADIR}/pkgconfig" ) -if( ENABLE_RUNTIME_SUBNORMAL ) - foreach( file IN ITEMS subnormal_use_default subnormal_disable ) - link_bc( - TARGET ${file} - INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/${file}.ll - ) - install( - FILES $ - DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" - ) - endforeach() -endif() - find_package( Python3 REQUIRED COMPONENTS Interpreter ) file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/gen_convert.py script_loc ) add_custom_command( @@ -371,9 +356,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND opencl_gen_files clspv-convert.cl ) else() list( APPEND opencl_gen_files convert.cl ) - if ( NOT ENABLE_RUNTIME_SUBNORMAL ) - list( APPEND opencl_lib_files opencl/lib/generic/subnormal_use_default.ll ) - endif() endif() endif() @@ -430,6 +412,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Error on undefined macros -Werror=undef -fdiscard-value-names + -fdenormal-fp-math=dynamic ) if( NOT "${cpu}" STREQUAL "" ) diff --git a/libclc/clc/include/clc/internal/math/clc_sw_fma.h b/libclc/clc/include/clc/internal/math/clc_sw_fma.h deleted file mode 100644 index 5d6c76879ceb9..0000000000000 --- a/libclc/clc/include/clc/internal/math/clc_sw_fma.h +++ /dev/null @@ -1,19 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef __CLC_INTERNAL_MATH_CLC_SW_FMA_H__ -#define __CLC_INTERNAL_MATH_CLC_SW_FMA_H__ - -#define __CLC_FUNCTION __clc_sw_fma -#define __CLC_BODY - -#include - -#undef __CLC_FUNCTION - -#endif // __CLC_INTERNAL_MATH_CLC_SW_FMA_H__ diff --git a/libclc/clc/include/clc/math/clc_subnormal_config.h b/libclc/clc/include/clc/math/clc_subnormal_config.h index 14693ed01e033..e44ec1958b101 100644 --- a/libclc/clc/include/clc/math/clc_subnormal_config.h +++ b/libclc/clc/include/clc/math/clc_subnormal_config.h @@ -10,7 +10,6 @@ #include -_CLC_DECL bool __clc_subnormals_disabled(); _CLC_DECL bool __clc_fp16_subnormals_supported(); _CLC_DECL bool __clc_fp32_subnormals_supported(); _CLC_DECL bool __clc_fp64_subnormals_supported(); diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h index c2647f66b4006..cc4eb4ce3ec76 100644 --- a/libclc/clc/include/clc/math/math.h +++ b/libclc/clc/include/clc/math/math.h @@ -11,7 +11,6 @@ #include #include -#include #define SNAN 0x001 #define QNAN 0x002 @@ -24,15 +23,6 @@ #define PNOR 0x100 #define PINF 0x200 -#if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__ -#define __CLC_HAVE_HW_FMA32() (0) -#elif defined(CLC_SPIRV) -bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); -#define __CLC_HAVE_HW_FMA32() __clc_runtime_has_hw_fma32() -#else -#define __CLC_HAVE_HW_FMA32() (1) -#endif - #define HAVE_BITALIGN() (0) #define HAVE_FAST_FMA32() (0) @@ -65,14 +55,12 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); #define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32) -_CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) { - int ix = __clc_as_int(x); - if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) && - ((ix & MANTBITS_SP32) != 0)) { - ix &= SIGNBIT_SP32; - x = __clc_as_float(ix); - } - return x; +_CLC_OVERLOAD _CLC_INLINE float __clc_soft_flush_denormal(float x) { + // Avoid calling __clc_fp32_subnormals_supported here: it uses + // llvm.canonicalize, which quiets sNaN. + return __builtin_elementwise_abs(x) < 0x1p-149f + ? __builtin_elementwise_copysign(0.0f, x) + : x; } #ifdef cl_khr_fp64 diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index b91b0e70a397d..2faea79cbc0bf 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -1,2 +1 @@ -math/clc_sw_fma.cl integer/clc_mul_hi.cl diff --git a/libclc/clc/lib/clspv/math/clc_sw_fma.cl b/libclc/clc/lib/clspv/math/clc_sw_fma.cl deleted file mode 100644 index c28b9441b05ff..0000000000000 --- a/libclc/clc/lib/clspv/math/clc_sw_fma.cl +++ /dev/null @@ -1,274 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This version is derived from the generic fma software implementation -// (__clc_sw_fma), but avoids the use of ulong in favor of uint2. The logic has -// been updated as appropriate. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct fp { - uint2 mantissa; - int exponent; - uint sign; -}; - -static uint2 u2_set(uint hi, uint lo) { - uint2 res; - res.lo = lo; - res.hi = hi; - return res; -} - -static uint2 u2_set_u(uint val) { return u2_set(0, val); } - -static uint2 u2_mul(uint a, uint b) { - uint2 res; - res.hi = __clc_mul_hi(a, b); - res.lo = a * b; - return res; -} - -static uint2 u2_sll(uint2 val, uint shift) { - if (shift == 0) - return val; - if (shift < 32) { - val.hi <<= shift; - val.hi |= val.lo >> (32 - shift); - val.lo <<= shift; - } else { - val.hi = val.lo << (shift - 32); - val.lo = 0; - } - return val; -} - -static uint2 u2_srl(uint2 val, uint shift) { - if (shift == 0) - return val; - if (shift < 32) { - val.lo >>= shift; - val.lo |= val.hi << (32 - shift); - val.hi >>= shift; - } else { - val.lo = val.hi >> (shift - 32); - val.hi = 0; - } - return val; -} - -static uint2 u2_or(uint2 a, uint b) { - a.lo |= b; - return a; -} - -static uint2 u2_and(uint2 a, uint2 b) { - a.lo &= b.lo; - a.hi &= b.hi; - return a; -} - -static uint2 u2_add(uint2 a, uint2 b) { - uint carry = (__clc_hadd(a.lo, b.lo) >> 31) & 0x1; - a.lo += b.lo; - a.hi += b.hi + carry; - return a; -} - -static uint2 u2_add_u(uint2 a, uint b) { return u2_add(a, u2_set_u(b)); } - -static uint2 u2_inv(uint2 a) { - a.lo = ~a.lo; - a.hi = ~a.hi; - return u2_add_u(a, 1); -} - -static uint u2_clz(uint2 a) { - uint leading_zeroes = __clc_clz(a.hi); - if (leading_zeroes == 32) { - leading_zeroes += __clc_clz(a.lo); - } - return leading_zeroes; -} - -static bool u2_eq(uint2 a, uint2 b) { return a.lo == b.lo && a.hi == b.hi; } - -static bool u2_zero(uint2 a) { return u2_eq(a, u2_set_u(0)); } - -static bool u2_gt(uint2 a, uint2 b) { - return a.hi > b.hi || (a.hi == b.hi && a.lo > b.lo); -} - -_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) { - /* special cases */ - if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) || - __clc_isinf(b)) { - return __clc_mad(a, b, c); - } - - /* If only c is inf, and both a,b are regular numbers, the result is c*/ - if (__clc_isinf(c)) { - return c; - } - - a = __clc_flush_denormal_if_not_supported(a); - b = __clc_flush_denormal_if_not_supported(b); - c = __clc_flush_denormal_if_not_supported(c); - - if (a == 0.0f || b == 0.0f) { - return c; - } - - if (c == 0) { - return a * b; - } - - struct fp st_a, st_b, st_c; - - st_a.exponent = a == .0f ? 0 : ((__clc_as_uint(a) & 0x7f800000) >> 23) - 127; - st_b.exponent = b == .0f ? 0 : ((__clc_as_uint(b) & 0x7f800000) >> 23) - 127; - st_c.exponent = c == .0f ? 0 : ((__clc_as_uint(c) & 0x7f800000) >> 23) - 127; - - st_a.mantissa = - u2_set_u(a == .0f ? 0 : (__clc_as_uint(a) & 0x7fffff) | 0x800000); - st_b.mantissa = - u2_set_u(b == .0f ? 0 : (__clc_as_uint(b) & 0x7fffff) | 0x800000); - st_c.mantissa = - u2_set_u(c == .0f ? 0 : (__clc_as_uint(c) & 0x7fffff) | 0x800000); - - st_a.sign = __clc_as_uint(a) & 0x80000000; - st_b.sign = __clc_as_uint(b) & 0x80000000; - st_c.sign = __clc_as_uint(c) & 0x80000000; - - // Multiplication. - // Move the product to the highest bits to maximize precision - // mantissa is 24 bits => product is 48 bits, 2bits non-fraction. - // Add one bit for future addition overflow, - // add another bit to detect subtraction underflow - struct fp st_mul; - st_mul.sign = st_a.sign ^ st_b.sign; - st_mul.mantissa = u2_sll(u2_mul(st_a.mantissa.lo, st_b.mantissa.lo), 14); - st_mul.exponent = - !u2_zero(st_mul.mantissa) ? st_a.exponent + st_b.exponent : 0; - - // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel - if (st_mul.exponent == 0 && u2_zero(st_mul.mantissa)) - return c; - -// Mantissa is 23 fractional bits, shift it the same way as product mantissa -#define C_ADJUST 37ul - - // both exponents are bias adjusted - int exp_diff = st_mul.exponent - st_c.exponent; - - st_c.mantissa = u2_sll(st_c.mantissa, C_ADJUST); - uint2 cutoff_bits = u2_set_u(0); - uint2 cutoff_mask = u2_add(u2_sll(u2_set_u(1), __clc_abs(exp_diff)), - u2_set(0xffffffff, 0xffffffff)); - if (exp_diff > 0) { - cutoff_bits = - exp_diff >= 64 ? st_c.mantissa : u2_and(st_c.mantissa, cutoff_mask); - st_c.mantissa = - exp_diff >= 64 ? u2_set_u(0) : u2_srl(st_c.mantissa, exp_diff); - } else { - cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa - : u2_and(st_mul.mantissa, cutoff_mask); - st_mul.mantissa = - -exp_diff >= 64 ? u2_set_u(0) : u2_srl(st_mul.mantissa, -exp_diff); - } - - struct fp st_fma; - st_fma.sign = st_mul.sign; - st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent); - if (st_c.sign == st_mul.sign) { - st_fma.mantissa = u2_add(st_mul.mantissa, st_c.mantissa); - } else { - // cutoff bits borrow one - st_fma.mantissa = - u2_add(u2_add(st_mul.mantissa, u2_inv(st_c.mantissa)), - (!u2_zero(cutoff_bits) && (st_mul.exponent > st_c.exponent) - ? u2_set(0xffffffff, 0xffffffff) - : u2_set_u(0))); - } - - // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign - if (u2_gt(st_fma.mantissa, u2_set(0x7fffffff, 0xffffffff))) { - st_fma.mantissa = u2_inv(st_fma.mantissa); - st_fma.sign = st_mul.sign ^ 0x80000000; - } - - // detect overflow/underflow - int overflow_bits = 3 - u2_clz(st_fma.mantissa); - - // adjust exponent - st_fma.exponent += overflow_bits; - - // handle underflow - if (overflow_bits < 0) { - st_fma.mantissa = u2_sll(st_fma.mantissa, -overflow_bits); - overflow_bits = 0; - } - - // rounding - uint2 trunc_mask = u2_add(u2_sll(u2_set_u(1), C_ADJUST + overflow_bits), - u2_set(0xffffffff, 0xffffffff)); - uint2 trunc_bits = - u2_or(u2_and(st_fma.mantissa, trunc_mask), !u2_zero(cutoff_bits)); - uint2 last_bit = - u2_and(st_fma.mantissa, u2_sll(u2_set_u(1), C_ADJUST + overflow_bits)); - uint2 grs_bits = u2_sll(u2_set_u(4), C_ADJUST - 3 + overflow_bits); - - // round to nearest even - if (u2_gt(trunc_bits, grs_bits) || - (u2_eq(trunc_bits, grs_bits) && !u2_zero(last_bit))) { - st_fma.mantissa = - u2_add(st_fma.mantissa, u2_sll(u2_set_u(1), C_ADJUST + overflow_bits)); - } - - // Shift mantissa back to bit 23 - st_fma.mantissa = u2_srl(st_fma.mantissa, C_ADJUST + overflow_bits); - - // Detect rounding overflow - if (u2_gt(st_fma.mantissa, u2_set_u(0xffffff))) { - ++st_fma.exponent; - st_fma.mantissa = u2_srl(st_fma.mantissa, 1); - } - - if (u2_zero(st_fma.mantissa)) { - return 0.0f; - } - - // Flating point range limit - if (st_fma.exponent > 127) { - return __clc_as_float(__clc_as_uint(INFINITY) | st_fma.sign); - } - - // Flush denormals - if (st_fma.exponent <= -127) { - return __clc_as_float(st_fma.sign); - } - - return __clc_as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | - ((uint)st_fma.mantissa.lo & 0x7fffff)); -} - -#define __CLC_FLOAT_ONLY -#define __CLC_FUNCTION __clc_sw_fma -#define __CLC_BODY -#include diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index ee4f771799e8e..ef35c43ce443e 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -137,7 +137,7 @@ math/clc_sincos_helpers.cl math/clc_sinh.cl math/clc_sinpi.cl math/clc_sqrt.cl -math/clc_sw_fma.cl +math/clc_subnormal_config.cl math/clc_tables.cl math/clc_tan.cl math/clc_tanh.cl diff --git a/libclc/clc/lib/generic/math/clc_exp10.cl b/libclc/clc/lib/generic/math/clc_exp10.cl index 0c394ee19475a..fb33367851fda 100644 --- a/libclc/clc/lib/generic/math/clc_exp10.cl +++ b/libclc/clc/lib/generic/math/clc_exp10.cl @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/libclc/clc/lib/generic/math/clc_fma.cl b/libclc/clc/lib/generic/math/clc_fma.cl index e69ef614e780f..27ea962af398d 100644 --- a/libclc/clc/lib/generic/math/clc_fma.cl +++ b/libclc/clc/lib/generic/math/clc_fma.cl @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #define __CLC_BODY diff --git a/libclc/clc/lib/generic/math/clc_fma.inc b/libclc/clc/lib/generic/math/clc_fma.inc index b23b6433d2922..a55e9c0f9b2b7 100644 --- a/libclc/clc/lib/generic/math/clc_fma.inc +++ b/libclc/clc/lib/generic/math/clc_fma.inc @@ -8,9 +8,5 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { -#if __CLC_FPSIZE == 32 - if (!__CLC_HAVE_HW_FMA32()) - return __clc_sw_fma(a, b, c); -#endif return __builtin_elementwise_fma(a, b, c); } diff --git a/libclc/clc/lib/generic/math/clc_hypot.cl b/libclc/clc/lib/generic/math/clc_hypot.cl index c934ab29da91b..fd046bccaed51 100644 --- a/libclc/clc/lib/generic/math/clc_hypot.cl +++ b/libclc/clc/lib/generic/math/clc_hypot.cl @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl index 70d3d614a8d36..c20d3829ea076 100644 --- a/libclc/clc/lib/generic/math/clc_pow.cl +++ b/libclc/clc/lib/generic/math/clc_pow.cl @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/libclc/clc/lib/generic/math/clc_pown.cl b/libclc/clc/lib/generic/math/clc_pown.cl index 5aa9560174b99..cfc415753fd1a 100644 --- a/libclc/clc/lib/generic/math/clc_pown.cl +++ b/libclc/clc/lib/generic/math/clc_pown.cl @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/libclc/clc/lib/generic/math/clc_powr.cl b/libclc/clc/lib/generic/math/clc_powr.cl index 0556ec97d6f3c..c35a3e2c382c5 100644 --- a/libclc/clc/lib/generic/math/clc_powr.cl +++ b/libclc/clc/lib/generic/math/clc_powr.cl @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl index fd83ead06d89a..cdebe4922baa0 100644 --- a/libclc/clc/lib/generic/math/clc_remquo.cl +++ b/libclc/clc/lib/generic/math/clc_remquo.cl @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/libclc/clc/lib/generic/math/clc_remquo.inc b/libclc/clc/lib/generic/math/clc_remquo.inc index 3a76ffed7f039..681020f501d65 100644 --- a/libclc/clc/lib/generic/math/clc_remquo.inc +++ b/libclc/clc/lib/generic/math/clc_remquo.inc @@ -8,8 +8,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, __CLC_ADDRESS_SPACE int *quo) { - x = __clc_flush_denormal_if_not_supported(x); - y = __clc_flush_denormal_if_not_supported(y); + x = __clc_soft_flush_denormal(x); + y = __clc_soft_flush_denormal(y); int ux = __clc_as_int(x); int ax = ux & EXSIGNBIT_SP32; float xa = __clc_as_float(ax); diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc index bddc0998cf950..e902bf3830626 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc @@ -97,19 +97,9 @@ _CLC_DEF _CLC_OVERLOAD void __clc_fullMulS(private __CLC_FLOATN *hi, private __CLC_FLOATN *lo, __CLC_FLOATN a, __CLC_FLOATN b, __CLC_FLOATN bh, __CLC_FLOATN bt) { - if (__CLC_HAVE_HW_FMA32()) { - __CLC_FLOATN ph = a * b; - *hi = ph; - *lo = __clc_fma(a, b, -ph); - } else { - __CLC_FLOATN ah = __CLC_AS_FLOATN(__CLC_AS_UINTN(a) & 0xfffff000U); - __CLC_FLOATN at = a - ah; - __CLC_FLOATN ph = a * b; - __CLC_FLOATN pt = __clc_mad( - at, bt, __clc_mad(at, bh, __clc_mad(ah, bt, __clc_mad(ah, bh, -ph)))); - *hi = ph; - *lo = pt; - } + __CLC_FLOATN ph = a * b; + *hi = ph; + *lo = __clc_fma(a, b, -ph); } _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_removePi2S(private __CLC_FLOATN *hi, @@ -280,20 +270,8 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS( const __CLC_FLOATN pio2t = (__CLC_FLOATN)0xa22168 / 0x1.0p+47f; __CLC_FLOATN rh, rt; - - if (__CLC_HAVE_HW_FMA32()) { - rh = q1 * pio2h; - rt = __clc_fma(q0, pio2h, __clc_fma(q1, pio2t, __clc_fma(q1, pio2h, -rh))); - } else { - __CLC_FLOATN q1h = __CLC_AS_FLOATN(__CLC_AS_UINTN(q1) & 0xfffff000); - __CLC_FLOATN q1t = q1 - q1h; - rh = q1 * pio2h; - rt = __clc_mad( - q1t, pio2ht, - __clc_mad(q1t, pio2hh, - __clc_mad(q1h, pio2ht, __clc_mad(q1h, pio2hh, -rh)))); - rt = __clc_mad(q0, pio2h, __clc_mad(q1, pio2t, rt)); - } + rh = q1 * pio2h; + rt = __clc_fma(q0, pio2h, __clc_fma(q1, pio2t, __clc_fma(q1, pio2h, -rh))); __CLC_FLOATN t = rh + rt; rt = rt - (t - rh); diff --git a/libclc/clc/lib/generic/math/clc_subnormal_config.cl b/libclc/clc/lib/generic/math/clc_subnormal_config.cl new file mode 100644 index 0000000000000..55e28a35f6ce8 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_subnormal_config.cl @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_DEF bool __clc_fp16_subnormals_supported() { +#ifdef CLC_SPIRV + // SPIR-V doesn't support llvm.canonicalize for now. + return false; +#else + return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-24h), + __FPCLASS_POSZERO); +#endif +} +#endif // cl_khr_fp16 + +_CLC_DEF bool __clc_fp32_subnormals_supported() { +#ifdef CLC_SPIRV + // SPIR-V doesn't support llvm.canonicalize for now. + return false; +#else + return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-149f), + __FPCLASS_POSZERO); +#endif +} + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_DEF bool __clc_fp64_subnormals_supported() { +#ifdef CLC_SPIRV + // SPIR-V doesn't support llvm.canonicalize for now. + return false; +#else + return !__builtin_isfpclass(__builtin_elementwise_canonicalize(0x1p-1074), + __FPCLASS_POSZERO); +#endif +} +#endif // cl_khr_fp64 diff --git a/libclc/clc/lib/generic/math/clc_sw_fma.cl b/libclc/clc/lib/generic/math/clc_sw_fma.cl deleted file mode 100644 index 606e4df320a89..0000000000000 --- a/libclc/clc/lib/generic/math/clc_sw_fma.cl +++ /dev/null @@ -1,165 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct fp { - ulong mantissa; - int exponent; - uint sign; -}; - -_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) { - /* special cases */ - if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) || - __clc_isinf(b)) { - return __clc_mad(a, b, c); - } - - /* If only c is inf, and both a,b are regular numbers, the result is c*/ - if (__clc_isinf(c)) { - return c; - } - - a = __clc_flush_denormal_if_not_supported(a); - b = __clc_flush_denormal_if_not_supported(b); - c = __clc_flush_denormal_if_not_supported(c); - - if (c == 0) { - return a * b; - } - - struct fp st_a, st_b, st_c; - - st_a.exponent = a == .0f ? 0 : ((__clc_as_uint(a) & 0x7f800000) >> 23) - 127; - st_b.exponent = b == .0f ? 0 : ((__clc_as_uint(b) & 0x7f800000) >> 23) - 127; - st_c.exponent = c == .0f ? 0 : ((__clc_as_uint(c) & 0x7f800000) >> 23) - 127; - - st_a.mantissa = a == .0f ? 0 : (__clc_as_uint(a) & 0x7fffff) | 0x800000; - st_b.mantissa = b == .0f ? 0 : (__clc_as_uint(b) & 0x7fffff) | 0x800000; - st_c.mantissa = c == .0f ? 0 : (__clc_as_uint(c) & 0x7fffff) | 0x800000; - - st_a.sign = __clc_as_uint(a) & 0x80000000; - st_b.sign = __clc_as_uint(b) & 0x80000000; - st_c.sign = __clc_as_uint(c) & 0x80000000; - - // Multiplication. - // Move the product to the highest bits to maximize precision - // mantissa is 24 bits => product is 48 bits, 2bits non-fraction. - // Add one bit for future addition overflow, - // add another bit to detect subtraction underflow - struct fp st_mul; - st_mul.sign = st_a.sign ^ st_b.sign; - st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul; - st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0; - - // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel - if (st_mul.exponent == 0 && st_mul.mantissa == 0) - return c; - -// Mantissa is 23 fractional bits, shift it the same way as product mantissa -#define C_ADJUST 37ul - - // both exponents are bias adjusted - int exp_diff = st_mul.exponent - st_c.exponent; - - st_c.mantissa <<= C_ADJUST; - ulong cutoff_bits = 0; - ulong cutoff_mask = (1ul << __clc_abs(exp_diff)) - 1ul; - if (exp_diff > 0) { - cutoff_bits = - exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask); - st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff); - } else { - cutoff_bits = - -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask); - st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff); - } - - struct fp st_fma; - st_fma.sign = st_mul.sign; - st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent); - if (st_c.sign == st_mul.sign) { - st_fma.mantissa = st_mul.mantissa + st_c.mantissa; - } else { - // cutoff bits borrow one - st_fma.mantissa = - st_mul.mantissa - st_c.mantissa - - (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0); - } - - // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign - if (st_fma.mantissa > LONG_MAX) { - st_fma.mantissa = 0 - st_fma.mantissa; - st_fma.sign = st_mul.sign ^ 0x80000000; - } - - // detect overflow/underflow - int overflow_bits = 3 - __clc_clz(st_fma.mantissa); - - // adjust exponent - st_fma.exponent += overflow_bits; - - // handle underflow - if (overflow_bits < 0) { - st_fma.mantissa <<= -overflow_bits; - overflow_bits = 0; - } - - // rounding - ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1; - ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0); - ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits)); - ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits)); - - // round to nearest even - if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0)) { - st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits)); - } - - // Shift mantissa back to bit 23 - st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits)); - - // Detect rounding overflow - if (st_fma.mantissa > 0xffffff) { - ++st_fma.exponent; - st_fma.mantissa >>= 1; - } - - if (st_fma.mantissa == 0) { - return .0f; - } - - // Flating point range limit - if (st_fma.exponent > 127) { - return __clc_as_float(__clc_as_uint(INFINITY) | st_fma.sign); - } - - // Flush denormals - if (st_fma.exponent <= -127) { - return __clc_as_float(st_fma.sign); - } - - return __clc_as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | - ((uint)st_fma.mantissa & 0x7fffff)); -} - -#define __CLC_FLOAT_ONLY -#define __CLC_FUNCTION __clc_sw_fma -#define __CLC_BODY -#include diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index 07bc7aaead8e8..ed63fe6b7c529 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -1,3 +1,2 @@ math/clc_fmax.cl math/clc_fmin.cl -math/clc_runtime_has_hw_fma32.cl diff --git a/libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl b/libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl deleted file mode 100644 index 2f6ad2c5175dd..0000000000000 --- a/libclc/clc/lib/spirv/math/clc_runtime_has_hw_fma32.cl +++ /dev/null @@ -1,9 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -bool __clc_runtime_has_hw_fma32() { return false; } diff --git a/libclc/opencl/lib/clspv/SOURCES b/libclc/opencl/lib/clspv/SOURCES index 0a142ed3e6043..3d9f871ff57ca 100644 --- a/libclc/opencl/lib/clspv/SOURCES +++ b/libclc/opencl/lib/clspv/SOURCES @@ -1,6 +1,5 @@ math/fma.cl shared/vstore_half.cl -subnormal_config.cl ../generic/geometric/distance.cl ../generic/geometric/length.cl ../generic/math/acos.cl diff --git a/libclc/opencl/lib/clspv/math/fma.cl b/libclc/opencl/lib/clspv/math/fma.cl index 0e328903ba263..5b5b13d81cf68 100644 --- a/libclc/opencl/lib/clspv/math/fma.cl +++ b/libclc/opencl/lib/clspv/math/fma.cl @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #define __CLC_FLOAT_ONLY #define __CLC_FUNCTION fma -#define __CLC_IMPL_FUNCTION(x) __clc_sw_fma +#define __CLC_IMPL_FUNCTION(x) __clc_fma #define __CLC_BODY #include diff --git a/libclc/opencl/lib/clspv/subnormal_config.cl b/libclc/opencl/lib/clspv/subnormal_config.cl deleted file mode 100644 index 114aabb2e9435..0000000000000 --- a/libclc/opencl/lib/clspv/subnormal_config.cl +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; } - -_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; } - -_CLC_DEF bool __clc_fp64_subnormals_supported() { return false; } diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES index 61757efbcaad7..410fbdee2c71f 100644 --- a/libclc/opencl/lib/generic/SOURCES +++ b/libclc/opencl/lib/generic/SOURCES @@ -1,5 +1,3 @@ -subnormal_config.cl -subnormal_helper_func.ll async/async_work_group_copy.cl async/async_work_group_strided_copy.cl async/prefetch.cl diff --git a/libclc/opencl/lib/generic/subnormal_config.cl b/libclc/opencl/lib/generic/subnormal_config.cl deleted file mode 100644 index aa2e30935e5f0..0000000000000 --- a/libclc/opencl/lib/generic/subnormal_config.cl +++ /dev/null @@ -1,18 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; } - -_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; } - -_CLC_DEF bool __clc_fp64_subnormals_supported() { - return !__clc_subnormals_disabled(); -} diff --git a/libclc/opencl/lib/generic/subnormal_disable.ll b/libclc/opencl/lib/generic/subnormal_disable.ll deleted file mode 100644 index 732d09ff09ab4..0000000000000 --- a/libclc/opencl/lib/generic/subnormal_disable.ll +++ /dev/null @@ -1,9 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -@__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 true diff --git a/libclc/opencl/lib/generic/subnormal_helper_func.ll b/libclc/opencl/lib/generic/subnormal_helper_func.ll deleted file mode 100644 index 03beecf979260..0000000000000 --- a/libclc/opencl/lib/generic/subnormal_helper_func.ll +++ /dev/null @@ -1,16 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -@__CLC_SUBNORMAL_DISABLE = external global i1 - -define i1 @__clc_subnormals_disabled() #0 { - %disable = load i1, i1* @__CLC_SUBNORMAL_DISABLE - ret i1 %disable -} - -attributes #0 = { alwaysinline } diff --git a/libclc/opencl/lib/generic/subnormal_use_default.ll b/libclc/opencl/lib/generic/subnormal_use_default.ll deleted file mode 100644 index c648cc0a8aded..0000000000000 --- a/libclc/opencl/lib/generic/subnormal_use_default.ll +++ /dev/null @@ -1,9 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -@__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 false diff --git a/libclc/opencl/lib/spirv/SOURCES b/libclc/opencl/lib/spirv/SOURCES index 0aa923978e9f1..aa7fcee0c4f4a 100644 --- a/libclc/opencl/lib/spirv/SOURCES +++ b/libclc/opencl/lib/spirv/SOURCES @@ -1,4 +1,3 @@ -subnormal_config.cl ../generic/async/async_work_group_strided_copy.cl ../generic/async/wait_group_events.cl ../generic/common/degrees.cl diff --git a/libclc/opencl/lib/spirv/math/fma.cl b/libclc/opencl/lib/spirv/math/fma.cl index 0e328903ba263..5b5b13d81cf68 100644 --- a/libclc/opencl/lib/spirv/math/fma.cl +++ b/libclc/opencl/lib/spirv/math/fma.cl @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #define __CLC_FLOAT_ONLY #define __CLC_FUNCTION fma -#define __CLC_IMPL_FUNCTION(x) __clc_sw_fma +#define __CLC_IMPL_FUNCTION(x) __clc_fma #define __CLC_BODY #include diff --git a/libclc/opencl/lib/spirv/subnormal_config.cl b/libclc/opencl/lib/spirv/subnormal_config.cl deleted file mode 100644 index 114aabb2e9435..0000000000000 --- a/libclc/opencl/lib/spirv/subnormal_config.cl +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; } - -_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; } - -_CLC_DEF bool __clc_fp64_subnormals_supported() { return false; }