From 782e2a7939afb989169510bd072ddb7ed59d58e3 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Mon, 6 Jan 2025 11:03:31 -0800
Subject: [PATCH] Enable binaryop build without relying on relaxed constexpr
 (#17598)

Contributes to #7795

This PR updates `binaryop` to build without depending on the relaxed constexpr build option.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/17598
---
 cpp/include/cudf/fixed_point/fixed_point.hpp | 44 ++++++++++++++------
 cpp/src/binaryop/compiled/binary_ops.cuh     | 40 +++++++++---------
 2 files changed, 52 insertions(+), 32 deletions(-)
diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp
index ea2f5d4b6ca..5edbb322231 100644
--- a/cpp/include/cudf/fixed_point/fixed_point.hpp
+++ b/cpp/include/cudf/fixed_point/fixed_point.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,7 +60,7 @@ enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 };
  * @return `true` if the type is supported by `fixed_point` implementation
  */
 template <typename T>
-constexpr inline auto is_supported_representation_type()
+CUDF_HOST_DEVICE constexpr inline auto is_supported_representation_type()
 {
   return cuda::std::is_same_v<T, int32_t> ||  //
          cuda::std::is_same_v<T, int64_t> ||  //
@@ -72,6 +72,24 @@ constexpr inline auto is_supported_representation_type()
 // Helper functions for `fixed_point` type
 namespace detail {
 
+/**
+ * @brief Returns the smaller of the given scales
+ *
+ * @param a The left-hand side value to compare
+ * @param b The right-hand side value to compare
+ * @return The smaller of the given scales
+ */
+CUDF_HOST_DEVICE constexpr inline scale_type min(scale_type const& a, scale_type const& b)
+{
+  // TODO This is a temporary workaround because <cuda/std/functional> is not self-contained when
+  // built with NVRTC 11.8. Replace this with cuda::std::min once the underlying issue is resolved.
+#ifdef __CUDA_ARCH__
+  return scale_type{min(static_cast<int>(a), static_cast<int>(b))};
+#else
+  return std::min(a, b);
+#endif
+}
+
 /**
  * @brief A function for integer exponentiation by squaring.
  *
@@ -267,12 +285,12 @@ class fixed_point {
    * @return The `fixed_point` number in base 10 (aka human readable format)
    */
   template <typename U, typename cuda::std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
-  explicit constexpr operator U() const
+  CUDF_HOST_DEVICE explicit constexpr operator U() const
   {
     // Cast to the larger of the two types (of U and Rep) before converting to Rep because in
     // certain cases casting to U before shifting will result in integer overflow (i.e. if U =
     // int32_t, Rep = int64_t and _value > 2 billion)
-    auto const value = std::common_type_t<U, Rep>(_value);
+    auto const value = cuda::std::common_type_t<U, Rep>(_value);
     return static_cast<U>(detail::shift<Rep, Rad>(value, scale_type{-_scale}));
   }
 
@@ -669,7 +687,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator+(fixed_point<Rep1, Rad1> const& lhs,
                                                           fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   auto const sum   = lhs.rescaled(scale)._value + rhs.rescaled(scale)._value;
 
 #if defined(__CUDACC_DEBUG__)
@@ -687,7 +705,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator-(fixed_point<Rep1, Rad1> const& lhs,
                                                           fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   auto const diff  = lhs.rescaled(scale)._value - rhs.rescaled(scale)._value;
 
 #if defined(__CUDACC_DEBUG__)
@@ -735,7 +753,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline bool operator==(fixed_point<Rep1, Rad1> const& lhs,
                                         fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value == rhs.rescaled(scale)._value;
 }
 
@@ -744,7 +762,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline bool operator!=(fixed_point<Rep1, Rad1> const& lhs,
                                         fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value != rhs.rescaled(scale)._value;
 }
 
@@ -753,7 +771,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline bool operator<=(fixed_point<Rep1, Rad1> const& lhs,
                                         fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value <= rhs.rescaled(scale)._value;
 }
 
@@ -762,7 +780,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline bool operator>=(fixed_point<Rep1, Rad1> const& lhs,
                                         fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value >= rhs.rescaled(scale)._value;
 }
 
@@ -771,7 +789,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline bool operator<(fixed_point<Rep1, Rad1> const& lhs,
                                        fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value < rhs.rescaled(scale)._value;
 }
 
@@ -780,7 +798,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline bool operator>(fixed_point<Rep1, Rad1> const& lhs,
                                        fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale = std::min(lhs._scale, rhs._scale);
+  auto const scale = detail::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value > rhs.rescaled(scale)._value;
 }
 
@@ -789,7 +807,7 @@ template <typename Rep1, Radix Rad1>
 CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator%(fixed_point<Rep1, Rad1> const& lhs,
                                                           fixed_point<Rep1, Rad1> const& rhs)
 {
-  auto const scale     = std::min(lhs._scale, rhs._scale);
+  auto const scale     = detail::min(lhs._scale, rhs._scale);
   auto const remainder = lhs.rescaled(scale)._value % rhs.rescaled(scale)._value;
   return fixed_point<Rep1, Rad1>{scaled_integer<Rep1>{remainder, scale}};
 }
diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh
index 0e31a0b6cf5..2f255e7a07c 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/binary_ops.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,8 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/type_traits>
+
 namespace cudf {
 namespace binops {
 namespace compiled {
@@ -51,7 +53,7 @@ struct type_casted_accessor {
   {
     if constexpr (column_device_view::has_element_accessor<Element>()) {
       auto const element = col.element<Element>(is_scalar ? 0 : i);
-      if constexpr (std::is_convertible_v<Element, CastType>) {
+      if constexpr (cuda::std::is_convertible_v<Element, CastType>) {
         return static_cast<CastType>(element);
       } else if constexpr (is_fixed_point<Element>() && cuda::std::is_floating_point_v<CastType>) {
         return convert_fixed_to_floating<CastType>(element);
@@ -75,7 +77,7 @@ struct typed_casted_writer {
                                     FromType val) const
   {
     if constexpr (mutable_column_device_view::has_element_accessor<Element>() and
-                  std::is_constructible_v<Element, FromType>) {
+                  cuda::std::is_constructible_v<Element, FromType>) {
       col.element<Element>(i) = static_cast<Element>(val);
     } else if constexpr (is_fixed_point<Element>()) {
       auto const scale = numeric::scale_type{col.type().scale()};
@@ -109,18 +111,18 @@ struct ops_wrapper {
   template <typename TypeCommon>
   __device__ void operator()(size_type i)
   {
-    if constexpr (std::is_invocable_v<BinaryOperator, TypeCommon, TypeCommon>) {
+    if constexpr (cuda::std::is_invocable_v<BinaryOperator, TypeCommon, TypeCommon>) {
       TypeCommon x =
         type_dispatcher(lhs.type(), type_casted_accessor<TypeCommon>{}, i, lhs, is_lhs_scalar);
       TypeCommon y =
         type_dispatcher(rhs.type(), type_casted_accessor<TypeCommon>{}, i, rhs, is_rhs_scalar);
       auto result = [&]() {
-        if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
-                      std::is_same_v<BinaryOperator, ops::NullNotEquals> or
-                      std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
-                      std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
-                      std::is_same_v<BinaryOperator, ops::NullMax> or
-                      std::is_same_v<BinaryOperator, ops::NullMin>) {
+        if constexpr (cuda::std::is_same_v<BinaryOperator, ops::NullEquals> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullNotEquals> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullMax> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullMin>) {
           bool output_valid = false;
           auto result       = BinaryOperator{}.template operator()<TypeCommon, TypeCommon>(
             x,
@@ -134,7 +136,7 @@ struct ops_wrapper {
           return BinaryOperator{}.template operator()<TypeCommon, TypeCommon>(x, y);
         }
         // To suppress nvcc warning
-        return std::invoke_result_t<BinaryOperator, TypeCommon, TypeCommon>{};
+        return cuda::std::invoke_result_t<BinaryOperator, TypeCommon, TypeCommon>{};
       }();
       if constexpr (is_bool_result<BinaryOperator, TypeCommon, TypeCommon>())
         out.element<decltype(result)>(i) = result;
@@ -161,16 +163,16 @@ struct ops2_wrapper {
   __device__ void operator()(size_type i)
   {
     if constexpr (!has_common_type_v<TypeLhs, TypeRhs> and
-                  std::is_invocable_v<BinaryOperator, TypeLhs, TypeRhs>) {
+                  cuda::std::is_invocable_v<BinaryOperator, TypeLhs, TypeRhs>) {
       TypeLhs x   = lhs.element<TypeLhs>(is_lhs_scalar ? 0 : i);
       TypeRhs y   = rhs.element<TypeRhs>(is_rhs_scalar ? 0 : i);
       auto result = [&]() {
-        if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
-                      std::is_same_v<BinaryOperator, ops::NullNotEquals> or
-                      std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
-                      std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
-                      std::is_same_v<BinaryOperator, ops::NullMax> or
-                      std::is_same_v<BinaryOperator, ops::NullMin>) {
+        if constexpr (cuda::std::is_same_v<BinaryOperator, ops::NullEquals> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullNotEquals> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullMax> or
+                      cuda::std::is_same_v<BinaryOperator, ops::NullMin>) {
           bool output_valid = false;
           auto result       = BinaryOperator{}.template operator()<TypeLhs, TypeRhs>(
             x,
@@ -184,7 +186,7 @@ struct ops2_wrapper {
           return BinaryOperator{}.template operator()<TypeLhs, TypeRhs>(x, y);
         }
         // To suppress nvcc warning
-        return std::invoke_result_t<BinaryOperator, TypeLhs, TypeRhs>{};
+        return cuda::std::invoke_result_t<BinaryOperator, TypeLhs, TypeRhs>{};
       }();
       if constexpr (is_bool_result<BinaryOperator, TypeLhs, TypeRhs>())
         out.element<decltype(result)>(i) = result;