16
16
17
17
// ================================================================================
18
18
// this file has been auto-generated, do not modify its contents!
19
- // date: 2024-11-20 10:36:45.284577
20
- // git hash: 76501fda40df9e396998d11840bc8f10b11ea47b
19
+ // date: 2024-11-26 13:52:06.286983
20
+ // git hash: c4c6ac09808d14b5407afb06ecdecd235cd50ed3
21
21
// ================================================================================
22
22
23
23
#ifndef KERNEL_FLOAT_MACROS_H
@@ -1397,16 +1397,13 @@ KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(cos)
1397
1397
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (tan)
1398
1398
1399
1399
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (exp)
1400
- KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (exp2)
1401
1400
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (log)
1402
- KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (log2)
1403
1401
1404
1402
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (sqrt)
1405
1403
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (rcp)
1406
1404
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST (rsqrt)
1407
1405
1408
- // This PTX is only supported on CUDA
1409
- #if KERNEL_FLOAT_IS_CUDA && KERNEL_FLOAT_IS_DEVICE
1406
+ #if KERNEL_FLOAT_IS_DEVICE
1410
1407
#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN (T, F, EXPR_F32 ) \
1411
1408
namespace detail { \
1412
1409
template <> \
@@ -1430,6 +1427,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, sin, __sinf(input))
1430
1427
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , cos, __cosf(input))
1431
1428
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , tan, __tanf(input))
1432
1429
1430
+ // This PTX is only supported on CUDA
1431
+ #if KERNEL_FLOAT_IS_CUDA
1433
1432
#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX (T, F, INSTR, REG ) \
1434
1433
namespace detail { \
1435
1434
template <> \
@@ -1446,7 +1445,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(double, rsqrt, "rsqrt.approx.f64", "d")
1446
1445
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , sqrt, " sqrt.approx.f32" , " f" )
1447
1446
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rcp, " rcp.approx.f32" , " f" )
1448
1447
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rsqrt, " rsqrt.approx.f32" , " f" )
1449
- KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , tanh, " tanh.approx.f32;" , " f" )
1448
+ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , tanh, " tanh.approx.f32" , " f" )
1449
+ #endif
1450
1450
1451
1451
#define KERNEL_FLOAT_FAST_F32_MAP (F ) \
1452
1452
F (exp) F(exp2) F(exp10) F(log) F(log2) F(log10) F(sin) F(cos) F(tan) F(rcp) F(rsqrt) F(sqrt)
@@ -1455,7 +1455,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32;", "f")
1455
1455
// KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
1456
1456
// KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, exp2, "ex2.approx.f32", "f")
1457
1457
// KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
1458
-
1458
+ #else
1459
+ #define KERNEL_FLOAT_FAST_F32_MAP (F )
1459
1460
#endif
1460
1461
1461
1462
} // namespace kernel_float
0 commit comments