From e949d79893d2c466713659304e6d6ecea3ec4c45 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 9 Jan 2025 22:24:35 +0100 Subject: [PATCH 1/3] Re-enabled reciprocal sub-test in math_brute_force --- .../binary_operator_float.cpp | 72 ++++++++++++++----- .../math_brute_force/function_list.cpp | 18 ++++- test_conformance/math_brute_force/main.cpp | 9 +-- .../math_brute_force/reference_math.cpp | 7 +- 4 files changed, 76 insertions(+), 30 deletions(-) diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 6f5a364521..c1f111cd3f 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_float *s2 = 0; RoundingMode oldRoundMode; + bool reciprocal=strcmp(name, "reciprocal") == 0; + const float reciprocalArrayX [] = { 1.f }; + const float * specialValuesX = reciprocal ? reciprocalArrayX : specialValues; + size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount; + if (relaxedMode) { func = job->f->rfunc; @@ -239,7 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; cl_uint idx = 0; - int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int totalSpecialValueCount = specialValuesCountX * specialValuesCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; if (job_id <= (cl_uint)lastSpecialJobIndex) @@ -247,15 +252,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Insert special values uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesCount; + x = (job_id * buffer_elements) % specialValuesCountX; y = (job_id * buffer_elements) / specialValuesCount; for (; idx < buffer_elements; idx++) { - p[idx] = ((cl_uint *)specialValues)[x]; + p[idx] = ((cl_uint *)specialValuesX)[x]; p2[idx] = ((cl_uint *)specialValues)[y]; ++x; - if (x >= specialValuesCount) + if (x >= specialValuesCountX) { x = 0; y++; @@ -269,13 +274,20 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000; if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000; } + else if (relaxedMode && reciprocal) + { + cl_uint p2j = p2[idx] & 0x7fffffff; + // Replace values outside [2^-126, 2^126] with QNaN + if (p2j < 0x00807d99 || p2j > 0x7e800000) + p2[idx] = 0x7fc00000; + } } } // Init any remaining values for (; idx < buffer_elements; idx++) { - p[idx] = genrand_int32(d); + p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d); p2[idx] = genrand_int32(d); if (relaxedMode && strcmp(name, "divide") == 0) @@ -286,6 +298,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000; if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000; } + else if (relaxedMode && reciprocal) + { + cl_uint p2j = p2[idx] & 0x7fffffff; + // Replace values outside [2^-126, 2^126] with QNaN + if (p2j < 0x00807d99 || p2j > 0x7e800000) + p2[idx] = 0x7fc00000; + } } if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, @@ -402,18 +421,31 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) s2 = (float *)gIn2 + thread_id * buffer_elements; if (gInfNanSupport) { - for (size_t j = 0; j < buffer_elements; j++) - r[j] = (float)func.f_ff(s[j], s2[j]); + if (reciprocal) + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)func.f_f(s2[j]); + else + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)func.f_ff(s[j], s2[j]); } else { - for (size_t j = 0; j < buffer_elements; j++) - { - feclearexcept(FE_OVERFLOW); - r[j] = (float)func.f_ff(s[j], s2[j]); - overflow[j] = - FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } + if (reciprocal) + for (size_t j = 0; j < buffer_elements; j++) + { + feclearexcept(FE_OVERFLOW); + r[j] = (float)func.f_f(s2[j]); + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } + else + for (size_t j = 0; j < buffer_elements; j++) + { + feclearexcept(FE_OVERFLOW); + r[j] = (float)func.f_ff(s[j], s2[j]); + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } } if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); @@ -448,7 +480,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (t[j] != q[j]) { float test = ((float *)q)[j]; - double correct = func.f_ff(s[j], s2[j]); + double correct = reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); // Per section 10 paragraph 6, accept any result if an input or // output is a infinity or NaN or overflow @@ -485,7 +517,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } // retry per section 6.5.3.3 - if (IsFloatSubnormal(s[j])) + if (!reciprocal && IsFloatSubnormal(s[j])) { double correct2, correct3; float err2, err3; @@ -591,8 +623,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (!gInfNanSupport) feclearexcept(FE_OVERFLOW); - correct2 = func.f_ff(s[j], 0.0); - correct3 = func.f_ff(s[j], -0.0); + correct2 = reciprocal ? func.f_f( 0.0) : func.f_ff(s[j], 0.0); + correct3 = reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); // Per section 10 paragraph 6, accept any result if an // input or output is a infinity or NaN or overflow @@ -625,7 +657,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } } - if (fabsf(err) > tinfo->maxError) { tinfo->maxError = fabsf(err); @@ -688,6 +719,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, double maxErrorVal = 0.0; double maxErrorVal2 = 0.0; + // reciprocal differs from divide only in relaxed mode, skip otherwise + if ((strcmp(f->name, "reciprocal") == 0) && !relaxedMode) return CL_SUCCESS; + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 5f7b8ea0e2..d55cc6186b 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -78,6 +78,8 @@ #define reference_copysign NULL #define reference_sqrt NULL #define reference_sqrtl NULL +#define reference_relaxed_reciprocal NULL + #define reference_divide NULL #define reference_dividel NULL #define reference_relaxed_divide NULL @@ -346,7 +348,6 @@ const Func functionList[] = { ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i), ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF), - //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF), ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF), ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i), ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF), @@ -418,6 +419,21 @@ const Func functionList[] = { // basic operations OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF), OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF), + //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF), + { "reciprocal", + "/", + { nullptr }, + { nullptr }, + { (void*)reference_relaxed_reciprocal }, + 2.5f, + 0.0f, + 0.0f, + 3.0f, + 2.5f, + INFINITY, + FTZ_OFF, + RELAXED_ON, + binaryOperatorOF }, { "divide", "/", { (void*)reference_divide }, diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index ec504e6030..a0c6e3f7a7 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -154,7 +154,7 @@ static int doTest(const char *name) exit(EXIT_FAILURE); } - if (func_data->func.p == NULL) + if (func_data->func.p == NULL && func_data->rfunc.p == NULL) { vlog("'%s' is missing implementation, skipping function.\n", func_data->name); @@ -308,9 +308,10 @@ static test_definition test_list[] = { ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10), ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt), ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan), - ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide), - ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment), - ADD_TEST(not ), ADD_TEST(erf), ADD_TEST(erfc), + ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(reciprocal), + ADD_TEST(divide), ADD_TEST(divide_cr), ADD_TEST(multiply), + ADD_TEST(assignment), ADD_TEST(not ), ADD_TEST(erf), + ADD_TEST(erfc), }; #undef ADD_TEST diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 049f2013b4..adf7f4c625 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -1856,8 +1856,6 @@ double reference_logb(double x) double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); } -double reference_reciprocal(double x) { return 1.0 / x; } - double reference_remainder(double x, double y) { int i; @@ -3740,9 +3738,6 @@ long double reference_nanl(cl_ulong x) return (long double)u.f; } - -long double reference_reciprocall(long double x) { return 1.0L / x; } - long double reference_remainderl(long double x, long double y) { int i; @@ -5771,4 +5766,4 @@ long double reference_erfcl(long double x) { return erfc(x); } long double reference_erfl(long double x) { return erf(x); } double reference_erfc(double x) { return erfc(x); } -double reference_erf(double x) { return erf(x); } \ No newline at end of file +double reference_erf(double x) { return erf(x); } From 1092b95d3a515aab317dd83956b15e2953032900 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 10 Jan 2025 08:53:14 +0100 Subject: [PATCH 2/3] fixed clang format --- .../binary_operator_float.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index c1f111cd3f..cba638699b 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -208,9 +208,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_float *s2 = 0; RoundingMode oldRoundMode; - bool reciprocal=strcmp(name, "reciprocal") == 0; - const float reciprocalArrayX [] = { 1.f }; - const float * specialValuesX = reciprocal ? reciprocalArrayX : specialValues; + bool reciprocal = strcmp(name, "reciprocal") == 0; + const float reciprocalArrayX[] = { 1.f }; + const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues; size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount; if (relaxedMode) @@ -278,8 +278,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { cl_uint p2j = p2[idx] & 0x7fffffff; // Replace values outside [2^-126, 2^126] with QNaN - if (p2j < 0x00807d99 || p2j > 0x7e800000) - p2[idx] = 0x7fc00000; + if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000; } } } @@ -302,8 +301,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { cl_uint p2j = p2[idx] & 0x7fffffff; // Replace values outside [2^-126, 2^126] with QNaN - if (p2j < 0x00807d99 || p2j > 0x7e800000) - p2[idx] = 0x7fc00000; + if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000; } } @@ -480,7 +478,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (t[j] != q[j]) { float test = ((float *)q)[j]; - double correct = reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); + double correct = + reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); // Per section 10 paragraph 6, accept any result if an input or // output is a infinity or NaN or overflow @@ -623,8 +622,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (!gInfNanSupport) feclearexcept(FE_OVERFLOW); - correct2 = reciprocal ? func.f_f( 0.0) : func.f_ff(s[j], 0.0); - correct3 = reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); + correct2 = + reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0); + correct3 = + reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); // Per section 10 paragraph 6, accept any result if an // input or output is a infinity or NaN or overflow From 645fa19fb6d0204cdd7806fc64b124d4d4645bc7 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 23 Jan 2025 16:40:09 +0100 Subject: [PATCH 3/3] Corrections related to code review: -reciprocal activated for relaxed float math -reciprocal test added for fp16 and fp64 --- .../binary_operator_double.cpp | 38 +++++++++++---- .../binary_operator_float.cpp | 3 -- .../math_brute_force/binary_operator_half.cpp | 48 ++++++++++++++----- .../math_brute_force/function_list.cpp | 8 ++-- .../math_brute_force/reference_math.cpp | 9 ++++ 5 files changed, 77 insertions(+), 29 deletions(-) diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 7600ab16a3..43cf7effb7 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_double *s; cl_double *s2; + bool reciprocal = strcmp(name, "reciprocal") == 0; + const double reciprocalArrayX[] = { 1.0 }; + const double *specialValuesX = + reciprocal ? reciprocalArrayX : specialValues; + size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount; + Force64BitFPUPrecision(); cl_event e[VECTOR_SIZE_COUNT]; @@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements; cl_uint idx = 0; - int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int totalSpecialValueCount = specialValuesCountX * specialValuesCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; // Test edge cases @@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_double *fp2 = (cl_double *)p2; uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesCount; + x = (job_id * buffer_elements) % specialValuesCountX; y = (job_id * buffer_elements) / specialValuesCount; for (; idx < buffer_elements; idx++) { - fp[idx] = specialValues[x]; + fp[idx] = specialValuesX[x]; fp2[idx] = specialValues[y]; - if (++x >= specialValuesCount) + ++x; + if (x >= specialValuesCountX) { x = 0; y++; @@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Init any remaining values for (; idx < buffer_elements; idx++) { - p[idx] = genrand_int64(d); + p[idx] = + reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d); p2[idx] = genrand_int64(d); } @@ -375,8 +383,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) r = (cl_double *)gOut_Ref + thread_id * buffer_elements; s = (cl_double *)gIn + thread_id * buffer_elements; s2 = (cl_double *)gIn2 + thread_id * buffer_elements; - for (size_t j = 0; j < buffer_elements; j++) - r[j] = (cl_double)func.f_ff(s[j], s2[j]); + + if (reciprocal) + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)func.f_f(s2[j]); + else + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (cl_double)func.f_ff(s[j], s2[j]); // Read the data back -- no need to wait for the first N-1 buffers but wait // for the last buffer. This is an in order queue. @@ -406,7 +419,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (t[j] != q[j]) { cl_double test = ((cl_double *)q)[j]; - long double correct = func.f_ff(s[j], s2[j]); + long double correct = + reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = !(fabsf(err) <= ulps); @@ -479,8 +494,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } else if (IsDoubleSubnormal(s2[j])) { - long double correct2 = func.f_ff(s[j], 0.0); - long double correct3 = func.f_ff(s[j], -0.0); + long double correct2 = + reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0); + long double correct3 = + reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); + float err2 = Bruteforce_Ulp_Error_Double(test, correct2); float err3 = diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index cba638699b..49cfe67080 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -720,9 +720,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, double maxErrorVal = 0.0; double maxErrorVal2 = 0.0; - // reciprocal differs from divide only in relaxed mode, skip otherwise - if ((strcmp(f->name, "reciprocal") == 0) && !relaxedMode) return CL_SUCCESS; - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp index b4abf49058..1a0776e399 100644 --- a/test_conformance/math_brute_force/binary_operator_half.cpp +++ b/test_conformance/math_brute_force/binary_operator_half.cpp @@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) std::vector s(0), s2(0); RoundingMode oldRoundMode; + bool reciprocal = strcmp(name, "reciprocal") == 0; + const cl_half reciprocalArrayHalfX[] = { 0x3c00 }; + const cl_half *specialValuesHalfX = + reciprocal ? reciprocalArrayHalfX : specialValuesHalf; + size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount; + cl_event e[VECTOR_SIZE_COUNT]; cl_half *out[VECTOR_SIZE_COUNT]; @@ -148,7 +154,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements; cl_uint idx = 0; int totalSpecialValueCount = - specialValuesHalfCount * specialValuesHalfCount; + specialValuesHalfCountX * specialValuesHalfCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; if (job_id <= (cl_uint)lastSpecialJobIndex) @@ -156,14 +162,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) // Insert special values uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesHalfCount; + x = (job_id * buffer_elements) % specialValuesHalfCountX; y = (job_id * buffer_elements) / specialValuesHalfCount; for (; idx < buffer_elements; idx++) { - p[idx] = specialValuesHalf[x]; + p[idx] = specialValuesHalfX[x]; p2[idx] = specialValuesHalf[y]; - if (++x >= specialValuesHalfCount) + ++x; + if (x >= specialValuesHalfCountX) { x = 0; y++; @@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) // Init any remaining values for (; idx < buffer_elements; idx++) { - p[idx] = (cl_half)genrand_int32(d); + p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0] + : (cl_half)genrand_int32(d); p2[idx] = (cl_half)genrand_int32(d); } if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, @@ -283,11 +291,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) s.resize(buffer_elements); s2.resize(buffer_elements); - for (size_t j = 0; j < buffer_elements; j++) + if (reciprocal) + { + for (size_t j = 0; j < buffer_elements; j++) + { + s[j] = HTF(p[j]); + s2[j] = HTF(p2[j]); + r[j] = HFF(func.f_f(s2[j])); + } + } + else { - s[j] = HTF(p[j]); - s2[j] = HTF(p2[j]); - r[j] = HFF(func.f_ff(s[j], s2[j])); + for (size_t j = 0; j < buffer_elements; j++) + { + s[j] = HTF(p[j]); + s2[j] = HTF(p2[j]); + r[j] = HFF(func.f_ff(s[j], s2[j])); + } } if (ftz) RestoreFPState(&oldMode); @@ -320,7 +340,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) if (r[j] != q[j]) { float test = HTF(q[j]); - float correct = func.f_ff(s[j], s2[j]); + float correct = + reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); // Per section 10 paragraph 6, accept any result if an input or // output is a infinity or NaN or overflow @@ -446,9 +467,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) double correct2, correct3; float err2, err3; - correct2 = func.f_ff(s[j], 0.0); - correct3 = func.f_ff(s[j], -0.0); - + correct2 = + reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0); + correct3 = + reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); // Per section 10 paragraph 6, accept any result if an // input or output is a infinity or NaN or overflow diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index d55cc6186b..1aee78aef0 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -78,6 +78,8 @@ #define reference_copysign NULL #define reference_sqrt NULL #define reference_sqrtl NULL +#define reference_reciprocal NULL +#define reference_reciprocall NULL #define reference_relaxed_reciprocal NULL #define reference_divide NULL @@ -422,8 +424,8 @@ const Func functionList[] = { //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF), { "reciprocal", "/", - { nullptr }, - { nullptr }, + { (void*)reference_reciprocal }, + { (void*)reference_reciprocall }, { (void*)reference_relaxed_reciprocal }, 2.5f, 0.0f, @@ -433,7 +435,7 @@ const Func functionList[] = { INFINITY, FTZ_OFF, RELAXED_ON, - binaryOperatorOF }, + binaryOperatorF }, { "divide", "/", { (void*)reference_divide }, diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index adf7f4c625..acde113602 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -1856,6 +1856,15 @@ double reference_logb(double x) double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); } +long double reference_reciprocall(long double y) +{ + double dx = 1.0; + double dy = y; + return dx / dy; +} + +double reference_reciprocal(double x) { return 1.0 / x; } + double reference_remainder(double x, double y) { int i;