Skip to content

Commit 76c695a

Browse files
committed
Fix compilation error on HIP due to KERNEL_FLOAT_FAST_F32_MAP
1 parent 5c859b9 commit 76c695a

File tree

2 files changed

+16
-14
lines changed

2 files changed

+16
-14
lines changed

Diff for: include/kernel_float/unops.h

+7-6
Original file line numberDiff line numberDiff line change
@@ -212,16 +212,13 @@ KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(cos)
212212
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(tan)
213213

214214
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(exp)
215-
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(exp2)
216215
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(log)
217-
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(log2)
218216

219217
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(sqrt)
220218
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(rcp)
221219
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(rsqrt)
222220

223-
// This PTX is only supported on CUDA
224-
#if KERNEL_FLOAT_IS_CUDA && KERNEL_FLOAT_IS_DEVICE
221+
#if KERNEL_FLOAT_IS_DEVICE
225222
#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(T, F, EXPR_F32) \
226223
namespace detail { \
227224
template<> \
@@ -245,6 +242,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, sin, __sinf(input))
245242
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, cos, __cosf(input))
246243
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, tan, __tanf(input))
247244

245+
// This PTX is only supported on CUDA
246+
#if KERNEL_FLOAT_IS_CUDA
248247
#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(T, F, INSTR, REG) \
249248
namespace detail { \
250249
template<> \
@@ -261,7 +260,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(double, rsqrt, "rsqrt.approx.f64", "d")
261260
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, sqrt, "sqrt.approx.f32", "f")
262261
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, rcp, "rcp.approx.f32", "f")
263262
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, rsqrt, "rsqrt.approx.f32", "f")
264-
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32;", "f")
263+
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32", "f")
264+
#endif
265265

266266
#define KERNEL_FLOAT_FAST_F32_MAP(F) \
267267
F(exp) F(exp2) F(exp10) F(log) F(log2) F(log10) F(sin) F(cos) F(tan) F(rcp) F(rsqrt) F(sqrt)
@@ -270,7 +270,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32;", "f")
270270
//KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
271271
//KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, exp2, "ex2.approx.f32", "f")
272272
//KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
273-
273+
#else
274+
#define KERNEL_FLOAT_FAST_F32_MAP(F)
274275
#endif
275276

276277
} // namespace kernel_float

Diff for: single_include/kernel_float.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
//================================================================================
1818
// this file has been auto-generated, do not modify its contents!
19-
// date: 2024-11-20 10:36:45.284577
20-
// git hash: 76501fda40df9e396998d11840bc8f10b11ea47b
19+
// date: 2024-11-26 13:52:06.286983
20+
// git hash: c4c6ac09808d14b5407afb06ecdecd235cd50ed3
2121
//================================================================================
2222

2323
#ifndef KERNEL_FLOAT_MACROS_H
@@ -1397,16 +1397,13 @@ KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(cos)
13971397
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(tan)
13981398

13991399
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(exp)
1400-
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(exp2)
14011400
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(log)
1402-
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(log2)
14031401

14041402
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(sqrt)
14051403
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(rcp)
14061404
KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(rsqrt)
14071405

1408-
// This PTX is only supported on CUDA
1409-
#if KERNEL_FLOAT_IS_CUDA && KERNEL_FLOAT_IS_DEVICE
1406+
#if KERNEL_FLOAT_IS_DEVICE
14101407
#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(T, F, EXPR_F32) \
14111408
namespace detail { \
14121409
template<> \
@@ -1430,6 +1427,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, sin, __sinf(input))
14301427
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, cos, __cosf(input))
14311428
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, tan, __tanf(input))
14321429

1430+
// This PTX is only supported on CUDA
1431+
#if KERNEL_FLOAT_IS_CUDA
14331432
#define KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(T, F, INSTR, REG) \
14341433
namespace detail { \
14351434
template<> \
@@ -1446,7 +1445,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(double, rsqrt, "rsqrt.approx.f64", "d")
14461445
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, sqrt, "sqrt.approx.f32", "f")
14471446
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, rcp, "rcp.approx.f32", "f")
14481447
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, rsqrt, "rsqrt.approx.f32", "f")
1449-
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32;", "f")
1448+
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32", "f")
1449+
#endif
14501450

14511451
#define KERNEL_FLOAT_FAST_F32_MAP(F) \
14521452
F(exp) F(exp2) F(exp10) F(log) F(log2) F(log10) F(sin) F(cos) F(tan) F(rcp) F(rsqrt) F(sqrt)
@@ -1455,7 +1455,8 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, tanh, "tanh.approx.f32;", "f")
14551455
//KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
14561456
//KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, exp2, "ex2.approx.f32", "f")
14571457
//KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
1458-
1458+
#else
1459+
#define KERNEL_FLOAT_FAST_F32_MAP(F)
14591460
#endif
14601461

14611462
} // namespace kernel_float

0 commit comments

Comments
 (0)