From e949d79893d2c466713659304e6d6ecea3ec4c45 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Thu, 9 Jan 2025 22:24:35 +0100
Subject: [PATCH 1/3] Re-enabled reciprocal sub-test in math_brute_force

---
 .../binary_operator_float.cpp                 | 72 ++++++++++++++-----
 .../math_brute_force/function_list.cpp        | 18 ++++-
 test_conformance/math_brute_force/main.cpp    |  9 +--
 .../math_brute_force/reference_math.cpp       |  7 +-
 4 files changed, 76 insertions(+), 30 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 6f5a364521..c1f111cd3f 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_float *s2 = 0;
     RoundingMode oldRoundMode;
 
+    bool reciprocal=strcmp(name, "reciprocal") == 0;
+    const float reciprocalArrayX [] = { 1.f };
+    const float * specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
+    size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
+
     if (relaxedMode)
     {
         func = job->f->rfunc;
@@ -239,7 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
     cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
+    int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     if (job_id <= (cl_uint)lastSpecialJobIndex)
@@ -247,15 +252,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         // Insert special values
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesCount;
+        x = (job_id * buffer_elements) % specialValuesCountX;
         y = (job_id * buffer_elements) / specialValuesCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            p[idx] = ((cl_uint *)specialValues)[x];
+            p[idx] = ((cl_uint *)specialValuesX)[x];
             p2[idx] = ((cl_uint *)specialValues)[y];
             ++x;
-            if (x >= specialValuesCount)
+            if (x >= specialValuesCountX)
             {
                 x = 0;
                 y++;
@@ -269,13 +274,20 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
                 if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
             }
+            else if (relaxedMode && reciprocal)
+            {
+                cl_uint p2j = p2[idx] & 0x7fffffff;
+                // Replace values outside [2^-126, 2^126] with QNaN
+                if (p2j < 0x00807d99 || p2j > 0x7e800000)
+                    p2[idx] = 0x7fc00000;
+            }
         }
     }
 
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = genrand_int32(d);
+        p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d);
         p2[idx] = genrand_int32(d);
 
         if (relaxedMode && strcmp(name, "divide") == 0)
@@ -286,6 +298,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
             if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
         }
+        else if (relaxedMode && reciprocal)
+        {
+            cl_uint p2j = p2[idx] & 0x7fffffff;
+            // Replace values outside [2^-126, 2^126] with QNaN
+            if (p2j < 0x00807d99 || p2j > 0x7e800000)
+                p2[idx] = 0x7fc00000;
+        }
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -402,18 +421,31 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     s2 = (float *)gIn2 + thread_id * buffer_elements;
     if (gInfNanSupport)
     {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)func.f_ff(s[j], s2[j]);
+        if (reciprocal)
+            for (size_t j = 0; j < buffer_elements; j++)
+                r[j] = (float)func.f_f(s2[j]);
+        else
+            for (size_t j = 0; j < buffer_elements; j++)
+                r[j] = (float)func.f_ff(s[j], s2[j]);
     }
     else
     {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
+        if (reciprocal)
+            for (size_t j = 0; j < buffer_elements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float)func.f_f(s2[j]);
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        else
+            for (size_t j = 0; j < buffer_elements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float)func.f_ff(s[j], s2[j]);
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
     }
 
     if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
@@ -448,7 +480,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 float test = ((float *)q)[j];
-                double correct = func.f_ff(s[j], s2[j]);
+                double correct = reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -485,7 +517,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
 
                     // retry per section 6.5.3.3
-                    if (IsFloatSubnormal(s[j]))
+                    if (!reciprocal && IsFloatSubnormal(s[j]))
                     {
                         double correct2, correct3;
                         float err2, err3;
@@ -591,8 +623,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                         if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
+                        correct2 = reciprocal ? func.f_f( 0.0) : func.f_ff(s[j], 0.0);
+                        correct3 = reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
@@ -625,7 +657,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
                 }
 
-
                 if (fabsf(err) > tinfo->maxError)
                 {
                     tinfo->maxError = fabsf(err);
@@ -688,6 +719,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     double maxErrorVal = 0.0;
     double maxErrorVal2 = 0.0;
 
+    // reciprocal differs from divide only in relaxed mode, skip otherwise
+    if ((strcmp(f->name, "reciprocal") == 0) && !relaxedMode) return CL_SUCCESS;
+
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index 5f7b8ea0e2..d55cc6186b 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -78,6 +78,8 @@
 #define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
+#define reference_relaxed_reciprocal NULL
+
 #define reference_divide NULL
 #define reference_dividel NULL
 #define reference_relaxed_divide NULL
@@ -346,7 +348,6 @@ const Func functionList[] = {
 
     ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
     ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
-    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
     ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
     ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
     ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
@@ -418,6 +419,21 @@ const Func functionList[] = {
     // basic operations
     OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
     OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
+    { "reciprocal",
+      "/",
+      { nullptr },
+      { nullptr },
+      { (void*)reference_relaxed_reciprocal },
+      2.5f,
+      0.0f,
+      0.0f,
+      3.0f,
+      2.5f,
+      INFINITY,
+      FTZ_OFF,
+      RELAXED_ON,
+      binaryOperatorOF },
     { "divide",
       "/",
       { (void*)reference_divide },
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index ec504e6030..a0c6e3f7a7 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -154,7 +154,7 @@ static int doTest(const char *name)
         exit(EXIT_FAILURE);
     }
 
-    if (func_data->func.p == NULL)
+    if (func_data->func.p == NULL && func_data->rfunc.p == NULL)
     {
         vlog("'%s' is missing implementation, skipping function.\n",
              func_data->name);
@@ -308,9 +308,10 @@ static test_definition test_list[] = {
     ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
     ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
     ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
-    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
-    ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
-    ADD_TEST(not ),          ADD_TEST(erf),        ADD_TEST(erfc),
+    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(reciprocal),
+    ADD_TEST(divide),        ADD_TEST(divide_cr),  ADD_TEST(multiply),
+    ADD_TEST(assignment),    ADD_TEST(not ),       ADD_TEST(erf),
+    ADD_TEST(erfc),
 };
 
 #undef ADD_TEST
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 049f2013b4..adf7f4c625 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1856,8 +1856,6 @@ double reference_logb(double x)
 
 double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
 
-double reference_reciprocal(double x) { return 1.0 / x; }
-
 double reference_remainder(double x, double y)
 {
     int i;
@@ -3740,9 +3738,6 @@ long double reference_nanl(cl_ulong x)
     return (long double)u.f;
 }
 
-
-long double reference_reciprocall(long double x) { return 1.0L / x; }
-
 long double reference_remainderl(long double x, long double y)
 {
     int i;
@@ -5771,4 +5766,4 @@ long double reference_erfcl(long double x) { return erfc(x); }
 long double reference_erfl(long double x) { return erf(x); }
 
 double reference_erfc(double x) { return erfc(x); }
-double reference_erf(double x) { return erf(x); }
\ No newline at end of file
+double reference_erf(double x) { return erf(x); }

From 1092b95d3a515aab317dd83956b15e2953032900 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Fri, 10 Jan 2025 08:53:14 +0100
Subject: [PATCH 2/3] fixed clang format

---
 .../binary_operator_float.cpp                 | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index c1f111cd3f..cba638699b 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -208,9 +208,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_float *s2 = 0;
     RoundingMode oldRoundMode;
 
-    bool reciprocal=strcmp(name, "reciprocal") == 0;
-    const float reciprocalArrayX [] = { 1.f };
-    const float * specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const float reciprocalArrayX[] = { 1.f };
+    const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
     size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
 
     if (relaxedMode)
@@ -278,8 +278,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             {
                 cl_uint p2j = p2[idx] & 0x7fffffff;
                 // Replace values outside [2^-126, 2^126] with QNaN
-                if (p2j < 0x00807d99 || p2j > 0x7e800000)
-                    p2[idx] = 0x7fc00000;
+                if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
             }
         }
     }
@@ -302,8 +301,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             cl_uint p2j = p2[idx] & 0x7fffffff;
             // Replace values outside [2^-126, 2^126] with QNaN
-            if (p2j < 0x00807d99 || p2j > 0x7e800000)
-                p2[idx] = 0x7fc00000;
+            if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
         }
     }
 
@@ -480,7 +478,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 float test = ((float *)q)[j];
-                double correct = reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
+                double correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -623,8 +622,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                         if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
 
-                        correct2 = reciprocal ? func.f_f( 0.0) : func.f_ff(s[j], 0.0);
-                        correct3 = reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
+                        correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow

From 645fa19fb6d0204cdd7806fc64b124d4d4645bc7 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Thu, 23 Jan 2025 16:40:09 +0100
Subject: [PATCH 3/3] Corrections related to code review:

-reciprocal activated for relaxed float math
-reciprocal test added for fp16 and fp64
---
 .../binary_operator_double.cpp                | 38 +++++++++++----
 .../binary_operator_float.cpp                 |  3 --
 .../math_brute_force/binary_operator_half.cpp | 48 ++++++++++++++-----
 .../math_brute_force/function_list.cpp        |  8 ++--
 .../math_brute_force/reference_math.cpp       |  9 ++++
 5 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 7600ab16a3..43cf7effb7 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_double *s;
     cl_double *s2;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const double reciprocalArrayX[] = { 1.0 };
+    const double *specialValuesX =
+        reciprocal ? reciprocalArrayX : specialValues;
+    size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
+
     Force64BitFPUPrecision();
 
     cl_event e[VECTOR_SIZE_COUNT];
@@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
     cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
+    int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     // Test edge cases
@@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         cl_double *fp2 = (cl_double *)p2;
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesCount;
+        x = (job_id * buffer_elements) % specialValuesCountX;
         y = (job_id * buffer_elements) / specialValuesCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            fp[idx] = specialValues[x];
+            fp[idx] = specialValuesX[x];
             fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
+            ++x;
+            if (x >= specialValuesCountX)
             {
                 x = 0;
                 y++;
@@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = genrand_int64(d);
+        p[idx] =
+            reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d);
         p2[idx] = genrand_int64(d);
     }
 
@@ -375,8 +383,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
     s = (cl_double *)gIn + thread_id * buffer_elements;
     s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
+
+    if (reciprocal)
+        for (size_t j = 0; j < buffer_elements; j++)
+            r[j] = (float)func.f_f(s2[j]);
+    else
+        for (size_t j = 0; j < buffer_elements; j++)
+            r[j] = (cl_double)func.f_ff(s[j], s2[j]);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -406,7 +419,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
+                long double correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
+
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
@@ -479,8 +494,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
                     else if (IsDoubleSubnormal(s2[j]))
                     {
-                        long double correct2 = func.f_ff(s[j], 0.0);
-                        long double correct3 = func.f_ff(s[j], -0.0);
+                        long double correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        long double correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
+
                         float err2 =
                             Bruteforce_Ulp_Error_Double(test, correct2);
                         float err3 =
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index cba638699b..49cfe67080 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -720,9 +720,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     double maxErrorVal = 0.0;
     double maxErrorVal2 = 0.0;
 
-    // reciprocal differs from divide only in relaxed mode, skip otherwise
-    if ((strcmp(f->name, "reciprocal") == 0) && !relaxedMode) return CL_SUCCESS;
-
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp
index b4abf49058..1a0776e399 100644
--- a/test_conformance/math_brute_force/binary_operator_half.cpp
+++ b/test_conformance/math_brute_force/binary_operator_half.cpp
@@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     std::vector<float> s(0), s2(0);
     RoundingMode oldRoundMode;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const cl_half reciprocalArrayHalfX[] = { 0x3c00 };
+    const cl_half *specialValuesHalfX =
+        reciprocal ? reciprocalArrayHalfX : specialValuesHalf;
+    size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount;
+
     cl_event e[VECTOR_SIZE_COUNT];
     cl_half *out[VECTOR_SIZE_COUNT];
 
@@ -148,7 +154,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
     int totalSpecialValueCount =
-        specialValuesHalfCount * specialValuesHalfCount;
+        specialValuesHalfCountX * specialValuesHalfCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     if (job_id <= (cl_uint)lastSpecialJobIndex)
@@ -156,14 +162,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         // Insert special values
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        x = (job_id * buffer_elements) % specialValuesHalfCountX;
         y = (job_id * buffer_elements) / specialValuesHalfCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            p[idx] = specialValuesHalf[x];
+            p[idx] = specialValuesHalfX[x];
             p2[idx] = specialValuesHalf[y];
-            if (++x >= specialValuesHalfCount)
+            ++x;
+            if (x >= specialValuesHalfCountX)
             {
                 x = 0;
                 y++;
@@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = (cl_half)genrand_int32(d);
+        p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0]
+                            : (cl_half)genrand_int32(d);
         p2[idx] = (cl_half)genrand_int32(d);
     }
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -283,11 +291,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     s.resize(buffer_elements);
     s2.resize(buffer_elements);
 
-    for (size_t j = 0; j < buffer_elements; j++)
+    if (reciprocal)
+    {
+        for (size_t j = 0; j < buffer_elements; j++)
+        {
+            s[j] = HTF(p[j]);
+            s2[j] = HTF(p2[j]);
+            r[j] = HFF(func.f_f(s2[j]));
+        }
+    }
+    else
     {
-        s[j] = HTF(p[j]);
-        s2[j] = HTF(p2[j]);
-        r[j] = HFF(func.f_ff(s[j], s2[j]));
+        for (size_t j = 0; j < buffer_elements; j++)
+        {
+            s[j] = HTF(p[j]);
+            s2[j] = HTF(p2[j]);
+            r[j] = HFF(func.f_ff(s[j], s2[j]));
+        }
     }
 
     if (ftz) RestoreFPState(&oldMode);
@@ -320,7 +340,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             if (r[j] != q[j])
             {
                 float test = HTF(q[j]);
-                float correct = func.f_ff(s[j], s2[j]);
+                float correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -446,9 +467,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
                         double correct2, correct3;
                         float err2, err3;
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
-
+                        correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index d55cc6186b..1aee78aef0 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -78,6 +78,8 @@
 #define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
+#define reference_reciprocal NULL
+#define reference_reciprocall NULL
 #define reference_relaxed_reciprocal NULL
 
 #define reference_divide NULL
@@ -422,8 +424,8 @@ const Func functionList[] = {
     //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
     { "reciprocal",
       "/",
-      { nullptr },
-      { nullptr },
+      { (void*)reference_reciprocal },
+      { (void*)reference_reciprocall },
       { (void*)reference_relaxed_reciprocal },
       2.5f,
       0.0f,
@@ -433,7 +435,7 @@ const Func functionList[] = {
       INFINITY,
       FTZ_OFF,
       RELAXED_ON,
-      binaryOperatorOF },
+      binaryOperatorF },
     { "divide",
       "/",
       { (void*)reference_divide },
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index adf7f4c625..acde113602 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1856,6 +1856,15 @@ double reference_logb(double x)
 
 double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
 
+long double reference_reciprocall(long double y)
+{
+    double dx = 1.0;
+    double dy = y;
+    return dx / dy;
+}
+
+double reference_reciprocal(double x) { return 1.0 / x; }
+
 double reference_remainder(double x, double y)
 {
     int i;