1616
1717// ================================================================================
1818// this file has been auto-generated, do not modify its contents!
19- // date: 2024-11-20 10:36:45.284577
20- // git hash: 76501fda40df9e396998d11840bc8f10b11ea47b
19+ // date: 2024-11-26 13:52:06.286983
20+ // git hash: c4c6ac09808d14b5407afb06ecdecd235cd50ed3
2121// ================================================================================
2222
2323#ifndef KERNEL_FLOAT_MACROS_H
@@ -1397,16 +1397,13 @@ KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(cos)
13971397KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (tan)
13981398
13991399KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (exp)
1400- KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (exp2)
14011400KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (log)
1402- KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (log2)
14031401
14041402KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (sqrt)
14051403KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (rcp)
14061404KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (rsqrt)
14071405
1408- // This PTX is only supported on CUDA
1409- #if KERNEL_FLOAT_IS_CUDA && KERNEL_FLOAT_IS_DEVICE
1406+ #if KERNEL_FLOAT_IS_DEVICE
14101407#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN (T, F, EXPR_F32 ) \
14111408 namespace detail { \
14121409 template <> \
@@ -1430,6 +1427,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, sin, __sinf(input))
14301427KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , cos, __cosf(input))
14311428KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , tan, __tanf(input))
14321429
1430+ // This PTX is only supported on CUDA
1431+ #if KERNEL_FLOAT_IS_CUDA
14331432#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX (T, F, INSTR, REG ) \
14341433 namespace detail { \
14351434 template <> \
@@ -1446,7 +1445,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(double, rsqrt, "rsqrt.approx.f64", "d")
14461445KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , sqrt, " sqrt.approx.f32" , " f" )
14471446KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rcp, " rcp.approx.f32" , " f" )
14481447KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rsqrt, " rsqrt.approx.f32" , " f" )
1449- KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , tanh, " tanh.approx.f32;" , " f" )
1448+ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , tanh, " tanh.approx.f32" , " f" )
1449+ #endif
14501450
14511451#define KERNEL_FLOAT_FAST_F32_MAP (F ) \
14521452 F (exp) F(exp2) F(exp10) F(log) F(log2) F(log10) F(sin) F(cos) F(tan) F(rcp) F(rsqrt) F(sqrt)
@@ -1455,7 +1455,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32;", "f")
14551455// KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
14561456// KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, exp2, "ex2.approx.f32", "f")
14571457// KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
1458-
1458+ #else
1459+ #define KERNEL_FLOAT_FAST_F32_MAP (F )
14591460#endif
14601461
14611462} // namespace kernel_float
0 commit comments