Change CUDA_ARCHITECTURES for tests to all-major

stijnh · stijnh · commit 023bc75e8ec6 · 2025-09-02T18:22:15.000+02:00
diff --git a/include/kernel_float/bf16.h b/include/kernel_float/bf16.h
@@ -29,9 +29,6 @@ using bfloat16_t = __hip_bfloat16;
 using bfloat16x2_t = __hip_bfloat162;
 #endif
 
-#if KERNEL_FLOAT_IS_CUDA && __CUDA_ARCH__ >= 800
-#define KERNEL_FLOAT_BF16_OPS_SUPPORTED 1
-#endif
 
 template<>
 struct preferred_vector_size<bfloat16_t> {
@@ -80,7 +77,7 @@ struct allow_float_fallback<bfloat16_t> {
     };                                                                                     \
     }
 
-#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
+#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
 KERNEL_FLOAT_BF16_UNARY_FUN(sin, ::hsin, ::h2sin)
 KERNEL_FLOAT_BF16_UNARY_FUN(cos, ::hcos, ::h2cos)
 
@@ -156,7 +153,7 @@ KERNEL_FLOAT_BF16_UNARY_FUN(negate, hip_hneg, hip_hneg2)
     };                                                                                       \
     }
 
-#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
+#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
 KERNEL_FLOAT_BF16_BINARY_FUN(add, __hadd, __hadd2)
 KERNEL_FLOAT_BF16_BINARY_FUN(subtract, __hsub, __hsub2)
 KERNEL_FLOAT_BF16_BINARY_FUN(multiply, __hmul, __hmul2)
@@ -172,7 +169,7 @@ KERNEL_FLOAT_BF16_BINARY_FUN(greater, __hgt, __hgt2)
 KERNEL_FLOAT_BF16_BINARY_FUN(greater_equal, __hge, __hgt2)
 #endif
 
-#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
+#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
 namespace ops {
 template<>
 struct fma<bfloat16_t> {
@@ -243,7 +240,7 @@ KERNEL_FLOAT_FAST_F32_MAP(KERNEL_FLOAT_FAST_BF16_DISPATCH)
 KERNEL_FLOAT_BF16_CAST(float, __float2bfloat16(input), __bfloat162float(input))
 KERNEL_FLOAT_BF16_CAST(double, __double2bfloat16(input), __bfloat162float(input))
 
-#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
+#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
 // clang-format off
 // there are no official char casts. Instead, cast to int and then to char
 KERNEL_FLOAT_BF16_CAST(char, __int2bfloat16_rn(input), (char)__bfloat162int_rz(input));
@@ -297,6 +294,6 @@ struct promote_type<half_t, bfloat16_t> {
 }  // namespace kernel_float
 
 #endif  // KERNEL_FLOAT_FP16_AVAILABLE
-#endif
+#endif // KERNEL_FLOAT_BF16_AVAILABLE
 
 #endif  //KERNEL_FLOAT_BF16_H
diff --git a/include/kernel_float/fp16.h b/include/kernel_float/fp16.h
@@ -72,6 +72,7 @@ struct allow_float_fallback<half_t> {
 #define KERNEL_FLOAT_FP16_UNARY_FUN(NAME, FUN1, FUN2)
 #endif
 
+#if KERNEL_FLOAT_FP16_OPS_AVAILABLE
 KERNEL_FLOAT_FP16_UNARY_FUN(sin, hsin, h2sin)
 KERNEL_FLOAT_FP16_UNARY_FUN(cos, hcos, h2cos)
 
@@ -92,6 +93,7 @@ KERNEL_FLOAT_FP16_UNARY_FUN(ceil, hceil, h2ceil)
 KERNEL_FLOAT_FP16_UNARY_FUN(rint, hrint, h2rint)
 KERNEL_FLOAT_FP16_UNARY_FUN(trunc, htrunc, h2trunc)
 KERNEL_FLOAT_FP16_UNARY_FUN(negate, __hneg, __hneg2)
+#endif // KERNEL_FLOAT_FP16_OPS_AVAILABLE
 
 #if KERNEL_FLOAT_IS_DEVICE
 #define KERNEL_FLOAT_FP16_BINARY_FUN(NAME, FUN1, FUN2)                                   \
@@ -118,10 +120,11 @@ KERNEL_FLOAT_FP16_UNARY_FUN(negate, __hneg, __hneg2)
 #endif
 
 // There are not available in HIP
+#if KERNEL_FLOAT_FP16_OPS_AVAILABLE
 #if KERNEL_FLOAT_IS_CUDA
 KERNEL_FLOAT_FP16_BINARY_FUN(min, __hmin, __hmin2)
 KERNEL_FLOAT_FP16_BINARY_FUN(max, __hmax, __hmax2)
-#endif
+#endif // KERNEL_FLOAT_IS_CUDA
 
 KERNEL_FLOAT_FP16_BINARY_FUN(add, __hadd, __hadd2)
 KERNEL_FLOAT_FP16_BINARY_FUN(subtract, __hsub, __hsub2)
@@ -134,6 +137,7 @@ KERNEL_FLOAT_FP16_BINARY_FUN(less, __hlt, __hlt2)
 KERNEL_FLOAT_FP16_BINARY_FUN(less_equal, __hle, __hle2)
 KERNEL_FLOAT_FP16_BINARY_FUN(greater, __hgt, __hgt2)
 KERNEL_FLOAT_FP16_BINARY_FUN(greater_equal, __hge, __hgt2)
+#endif // KERNEL_FLOAT_FP16_OPS_AVAILABLE
 
 #if KERNEL_FLOAT_IS_DEVICE
 namespace ops {
@@ -236,6 +240,6 @@ KERNEL_FLOAT_VECTOR_ALIAS(half, half_t)
 
 }  // namespace kernel_float
 
-#endif
+#endif // KERNEL_FLOAT_FP16_AVAILABLE
 
 #endif  //KERNEL_FLOAT_FP16_H
diff --git a/include/kernel_float/macros.h b/include/kernel_float/macros.h
@@ -37,10 +37,18 @@
     #define KERNEL_FLOAT_FP16_AVAILABLE (1)
 #endif  // KERNEL_FLOAT_FP16_AVAILABLE
 
+#ifndef KERNEL_FLOAT_FP16_OPS_AVAILABLE
+#define KERNEL_FLOAT_FP16_OPS_AVAILABLE ((KERNEL_FLOAT_IS_CUDA && __CUDA_ARCH__ >= 530) || KERNEL_FLOAT_IS_HIP)
+#endif
+
 #ifndef KERNEL_FLOAT_BF16_AVAILABLE
     #define KERNEL_FLOAT_BF16_AVAILABLE (1)
 #endif  // KERNEL_FLOAT_BF16_AVAILABLE
 
+#ifndef KERNEL_FLOAT_BF16_OPS_AVAILABLE
+#define KERNEL_FLOAT_BF16_OPS_AVAILABLE ((KERNEL_FLOAT_IS_CUDA && __CUDA_ARCH__ >= 800) || KERNEL_FLOAT_IS_HIP)
+#endif
+
 #ifndef KERNEL_FLOAT_FP8_AVAILABLE
     #ifdef __CUDACC_VER_MAJOR__
         #define KERNEL_FLOAT_FP8_AVAILABLE (__CUDACC_VER_MAJOR__ >= 12)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -16,9 +16,9 @@ if(${KERNEL_FLOAT_LANGUAGE_CUDA})
 
     target_compile_options(kernel_float_tests PRIVATE "-ftime-report -ftime-report-details")
     target_compile_options(kernel_float_tests PRIVATE "--extended-lambda")
-    set_target_properties(kernel_float_tests PROPERTIES CUDA_ARCHITECTURES "70;80")
+    set_target_properties(kernel_float_tests PROPERTIES CUDA_ARCHITECTURES "all-major")
 endif()
 
 if(${KERNEL_FLOAT_LANGUAGE_HIP})
     set_source_files_properties(${FILES} PROPERTIES LANGUAGE HIP)
-endif()
+endif()