Skip to content

Commit 023bc75

Browse files
committed
Change CUDA_ARCHITECTURES for tests to all-major
1 parent 718c4c7 commit 023bc75

File tree

4 files changed

+21
-12
lines changed

4 files changed

+21
-12
lines changed

include/kernel_float/bf16.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ using bfloat16_t = __hip_bfloat16;
2929
using bfloat16x2_t = __hip_bfloat162;
3030
#endif
3131

32-
#if KERNEL_FLOAT_IS_CUDA && __CUDA_ARCH__ >= 800
33-
#define KERNEL_FLOAT_BF16_OPS_SUPPORTED 1
34-
#endif
3532

3633
template<>
3734
struct preferred_vector_size<bfloat16_t> {
@@ -80,7 +77,7 @@ struct allow_float_fallback<bfloat16_t> {
8077
}; \
8178
}
8279

83-
#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
80+
#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
8481
KERNEL_FLOAT_BF16_UNARY_FUN(sin, ::hsin, ::h2sin)
8582
KERNEL_FLOAT_BF16_UNARY_FUN(cos, ::hcos, ::h2cos)
8683

@@ -156,7 +153,7 @@ KERNEL_FLOAT_BF16_UNARY_FUN(negate, hip_hneg, hip_hneg2)
156153
}; \
157154
}
158155

159-
#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
156+
#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
160157
KERNEL_FLOAT_BF16_BINARY_FUN(add, __hadd, __hadd2)
161158
KERNEL_FLOAT_BF16_BINARY_FUN(subtract, __hsub, __hsub2)
162159
KERNEL_FLOAT_BF16_BINARY_FUN(multiply, __hmul, __hmul2)
@@ -172,7 +169,7 @@ KERNEL_FLOAT_BF16_BINARY_FUN(greater, __hgt, __hgt2)
172169
KERNEL_FLOAT_BF16_BINARY_FUN(greater_equal, __hge, __hgt2)
173170
#endif
174171

175-
#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
172+
#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
176173
namespace ops {
177174
template<>
178175
struct fma<bfloat16_t> {
@@ -243,7 +240,7 @@ KERNEL_FLOAT_FAST_F32_MAP(KERNEL_FLOAT_FAST_BF16_DISPATCH)
243240
KERNEL_FLOAT_BF16_CAST(float, __float2bfloat16(input), __bfloat162float(input))
244241
KERNEL_FLOAT_BF16_CAST(double, __double2bfloat16(input), __bfloat162float(input))
245242

246-
#if KERNEL_FLOAT_BF16_OPS_SUPPORTED
243+
#if KERNEL_FLOAT_BF16_OPS_AVAILABLE
247244
// clang-format off
248245
// there are no official char casts. Instead, cast to int and then to char
249246
KERNEL_FLOAT_BF16_CAST(char, __int2bfloat16_rn(input), (char)__bfloat162int_rz(input));
@@ -297,6 +294,6 @@ struct promote_type<half_t, bfloat16_t> {
297294
} // namespace kernel_float
298295

299296
#endif // KERNEL_FLOAT_FP16_AVAILABLE
300-
#endif
297+
#endif // KERNEL_FLOAT_BF16_AVAILABLE
301298

302299
#endif //KERNEL_FLOAT_BF16_H

include/kernel_float/fp16.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ struct allow_float_fallback<half_t> {
7272
#define KERNEL_FLOAT_FP16_UNARY_FUN(NAME, FUN1, FUN2)
7373
#endif
7474

75+
#if KERNEL_FLOAT_FP16_OPS_AVAILABLE
7576
KERNEL_FLOAT_FP16_UNARY_FUN(sin, hsin, h2sin)
7677
KERNEL_FLOAT_FP16_UNARY_FUN(cos, hcos, h2cos)
7778

@@ -92,6 +93,7 @@ KERNEL_FLOAT_FP16_UNARY_FUN(ceil, hceil, h2ceil)
9293
KERNEL_FLOAT_FP16_UNARY_FUN(rint, hrint, h2rint)
9394
KERNEL_FLOAT_FP16_UNARY_FUN(trunc, htrunc, h2trunc)
9495
KERNEL_FLOAT_FP16_UNARY_FUN(negate, __hneg, __hneg2)
96+
#endif // KERNEL_FLOAT_FP16_OPS_AVAILABLE
9597

9698
#if KERNEL_FLOAT_IS_DEVICE
9799
#define KERNEL_FLOAT_FP16_BINARY_FUN(NAME, FUN1, FUN2) \
@@ -118,10 +120,11 @@ KERNEL_FLOAT_FP16_UNARY_FUN(negate, __hneg, __hneg2)
118120
#endif
119121

120122
// There are not available in HIP
123+
#if KERNEL_FLOAT_FP16_OPS_AVAILABLE
121124
#if KERNEL_FLOAT_IS_CUDA
122125
KERNEL_FLOAT_FP16_BINARY_FUN(min, __hmin, __hmin2)
123126
KERNEL_FLOAT_FP16_BINARY_FUN(max, __hmax, __hmax2)
124-
#endif
127+
#endif // KERNEL_FLOAT_IS_CUDA
125128

126129
KERNEL_FLOAT_FP16_BINARY_FUN(add, __hadd, __hadd2)
127130
KERNEL_FLOAT_FP16_BINARY_FUN(subtract, __hsub, __hsub2)
@@ -134,6 +137,7 @@ KERNEL_FLOAT_FP16_BINARY_FUN(less, __hlt, __hlt2)
134137
KERNEL_FLOAT_FP16_BINARY_FUN(less_equal, __hle, __hle2)
135138
KERNEL_FLOAT_FP16_BINARY_FUN(greater, __hgt, __hgt2)
136139
KERNEL_FLOAT_FP16_BINARY_FUN(greater_equal, __hge, __hgt2)
140+
#endif // KERNEL_FLOAT_FP16_OPS_AVAILABLE
137141

138142
#if KERNEL_FLOAT_IS_DEVICE
139143
namespace ops {
@@ -236,6 +240,6 @@ KERNEL_FLOAT_VECTOR_ALIAS(half, half_t)
236240

237241
} // namespace kernel_float
238242

239-
#endif
243+
#endif // KERNEL_FLOAT_FP16_AVAILABLE
240244

241245
#endif //KERNEL_FLOAT_FP16_H

include/kernel_float/macros.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,18 @@
3737
#define KERNEL_FLOAT_FP16_AVAILABLE (1)
3838
#endif // KERNEL_FLOAT_FP16_AVAILABLE
3939

40+
#ifndef KERNEL_FLOAT_FP16_OPS_AVAILABLE
41+
#define KERNEL_FLOAT_FP16_OPS_AVAILABLE ((KERNEL_FLOAT_IS_CUDA && __CUDA_ARCH__ >= 530) || KERNEL_FLOAT_IS_HIP)
42+
#endif
43+
4044
#ifndef KERNEL_FLOAT_BF16_AVAILABLE
4145
#define KERNEL_FLOAT_BF16_AVAILABLE (1)
4246
#endif // KERNEL_FLOAT_BF16_AVAILABLE
4347

48+
#ifndef KERNEL_FLOAT_BF16_OPS_AVAILABLE
49+
#define KERNEL_FLOAT_BF16_OPS_AVAILABLE ((KERNEL_FLOAT_IS_CUDA && __CUDA_ARCH__ >= 800) || KERNEL_FLOAT_IS_HIP)
50+
#endif
51+
4452
#ifndef KERNEL_FLOAT_FP8_AVAILABLE
4553
#ifdef __CUDACC_VER_MAJOR__
4654
#define KERNEL_FLOAT_FP8_AVAILABLE (__CUDACC_VER_MAJOR__ >= 12)

tests/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ if(${KERNEL_FLOAT_LANGUAGE_CUDA})
1616

1717
target_compile_options(kernel_float_tests PRIVATE "-ftime-report -ftime-report-details")
1818
target_compile_options(kernel_float_tests PRIVATE "--extended-lambda")
19-
set_target_properties(kernel_float_tests PROPERTIES CUDA_ARCHITECTURES "70;80")
19+
set_target_properties(kernel_float_tests PROPERTIES CUDA_ARCHITECTURES "all-major")
2020
endif()
2121

2222
if(${KERNEL_FLOAT_LANGUAGE_HIP})
2323
set_source_files_properties(${FILES} PROPERTIES LANGUAGE HIP)
24-
endif()
24+
endif()

0 commit comments

Comments
 (0)