Skip to content

Commit

Permalink
Expose streams in binaryop APIs (rapidsai#14187)
Browse files Browse the repository at this point in the history
Contributes to rapidsai#925

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: rapidsai#14187
  • Loading branch information
vyasr authored Oct 4, 2023
1 parent 29556a2 commit d87e181
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 6 deletions.
8 changes: 8 additions & 0 deletions cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ enum class binary_operator : int32_t {
* @param rhs The right operand column
* @param op The binary operator
* @param output_type The desired data type of the output column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Output column of `output_type` type containing the result of
* the binary operation
Expand All @@ -115,6 +116,7 @@ std::unique_ptr<column> binary_operation(
column_view const& rhs,
binary_operator op,
data_type output_type,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -131,6 +133,7 @@ std::unique_ptr<column> binary_operation(
* @param rhs The right operand scalar
* @param op The binary operator
* @param output_type The desired data type of the output column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Output column of `output_type` type containing the result of
* the binary operation
Expand All @@ -144,6 +147,7 @@ std::unique_ptr<column> binary_operation(
scalar const& rhs,
binary_operator op,
data_type output_type,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -158,6 +162,7 @@ std::unique_ptr<column> binary_operation(
* @param rhs The right operand column
* @param op The binary operator
* @param output_type The desired data type of the output column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Output column of `output_type` type containing the result of
* the binary operation
Expand All @@ -172,6 +177,7 @@ std::unique_ptr<column> binary_operation(
column_view const& rhs,
binary_operator op,
data_type output_type,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -189,6 +195,7 @@ std::unique_ptr<column> binary_operation(
* @param output_type The desired data type of the output column. It is assumed
* that output_type is compatible with the output data type
* of the function in the PTX code
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Output column of `output_type` type containing the result of
* the binary operation
Expand All @@ -201,6 +208,7 @@ std::unique_ptr<column> binary_operation(
column_view const& rhs,
std::string const& ptx,
data_type output_type,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down
12 changes: 8 additions & 4 deletions cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,38 +405,42 @@ std::unique_ptr<column> binary_operation(scalar const& lhs,
column_view const& rhs,
binary_operator op,
data_type output_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr);
return detail::binary_operation(lhs, rhs, op, output_type, stream, mr);
}
std::unique_ptr<column> binary_operation(column_view const& lhs,
scalar const& rhs,
binary_operator op,
data_type output_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr);
return detail::binary_operation(lhs, rhs, op, output_type, stream, mr);
}
std::unique_ptr<column> binary_operation(column_view const& lhs,
column_view const& rhs,
binary_operator op,
data_type output_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr);
return detail::binary_operation(lhs, rhs, op, output_type, stream, mr);
}

std::unique_ptr<column> binary_operation(column_view const& lhs,
column_view const& rhs,
std::string const& ptx,
data_type output_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::get_default_stream(), mr);
return detail::binary_operation(lhs, rhs, ptx, output_type, stream, mr);
}

} // namespace cudf
6 changes: 4 additions & 2 deletions cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,16 @@ namespace {
struct scalar_as_column_view {
using return_type = typename std::pair<column_view, std::unique_ptr<column>>;
template <typename T, CUDF_ENABLE_IF(is_fixed_width<T>())>
return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
return_type operator()(scalar const& s,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource*)
{
auto& h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
auto col_v = column_view(s.type(),
1,
h_scalar_type_view.data(),
reinterpret_cast<bitmask_type const*>(s.validity_data()),
!s.is_valid());
!s.is_valid(stream));
return std::pair{col_v, std::unique_ptr<column>(nullptr)};
}
template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>())>
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ ConfigureTest(
STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu
)

ConfigureTest(STREAM_BINARYOP_TEST streams/binaryop_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
Expand Down
126 changes: 126 additions & 0 deletions cpp/tests/streams/binaryop_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <tests/binaryop/util/runtime_support.h>

#include <cudf/binaryop.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/scalar/scalar.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>

class BinaryopTest : public cudf::test::BaseFixture {};

TEST_F(BinaryopTest, ColumnColumn)
{
cudf::test::fixed_width_column_wrapper<int32_t> lhs{10, 20, 30, 40, 50};
cudf::test::fixed_width_column_wrapper<int32_t> rhs{15, 25, 35, 45, 55};

cudf::binary_operation(lhs,
rhs,
cudf::binary_operator::ADD,
cudf::data_type(cudf::type_to_id<int32_t>()),
cudf::test::get_default_stream());
}

TEST_F(BinaryopTest, ColumnScalar)
{
cudf::test::fixed_width_column_wrapper<int32_t> lhs{10, 20, 30, 40, 50};
cudf::numeric_scalar<int32_t> rhs{23, true, cudf::test::get_default_stream()};

cudf::binary_operation(lhs,
rhs,
cudf::binary_operator::ADD,
cudf::data_type(cudf::type_to_id<int32_t>()),
cudf::test::get_default_stream());
}

TEST_F(BinaryopTest, ScalarColumn)
{
cudf::numeric_scalar<int32_t> lhs{42, true, cudf::test::get_default_stream()};
cudf::test::fixed_width_column_wrapper<int32_t> rhs{15, 25, 35, 45, 55};

cudf::binary_operation(lhs,
rhs,
cudf::binary_operator::ADD,
cudf::data_type(cudf::type_to_id<int32_t>()),
cudf::test::get_default_stream());
}

class BinaryopPTXTest : public BinaryopTest {
protected:
void SetUp() override
{
if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; }
}
};

TEST_F(BinaryopPTXTest, ColumnColumnPTX)
{
cudf::test::fixed_width_column_wrapper<int32_t> lhs{10, 20, 30, 40, 50};
cudf::test::fixed_width_column_wrapper<int64_t> rhs{15, 25, 35, 45, 55};

// c = a*a*a + b*b
char const* ptx =
R"***(
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//
.version 6.3
.target sm_70
.address_size 64
// .globl _ZN8__main__7add$241Eix
.common .global .align 8 .u64 _ZN08NumbaEnv8__main__7add$241Eix;
.common .global .align 8 .u64 _ZN08NumbaEnv5numba7targets7numbers14int_power_impl12$3clocals$3e13int_power$242Exx;
.visible .func (.param .b32 func_retval0) _ZN8__main__7add$241Eix(
.param .b64 _ZN8__main__7add$241Eix_param_0,
.param .b32 _ZN8__main__7add$241Eix_param_1,
.param .b64 _ZN8__main__7add$241Eix_param_2
)
{
.reg .b32 %r<3>;
.reg .b64 %rd<8>;
ld.param.u64 %rd1, [_ZN8__main__7add$241Eix_param_0];
ld.param.u32 %r1, [_ZN8__main__7add$241Eix_param_1];
ld.param.u64 %rd2, [_ZN8__main__7add$241Eix_param_2];
cvt.s64.s32 %rd3, %r1;
mul.wide.s32 %rd4, %r1, %r1;
mul.lo.s64 %rd5, %rd4, %rd3;
mul.lo.s64 %rd6, %rd2, %rd2;
add.s64 %rd7, %rd6, %rd5;
st.u64 [%rd1], %rd7;
mov.u32 %r2, 0;
st.param.b32 [func_retval0+0], %r2;
ret;
}
)***";

cudf::binary_operation(
lhs, rhs, ptx, cudf::data_type(cudf::type_to_id<int32_t>()), cudf::test::get_default_stream());
cudf::binary_operation(lhs, rhs, ptx, cudf::data_type(cudf::type_to_id<int64_t>()));
}

0 comments on commit d87e181

Please sign in to comment.