Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into hll
Browse files Browse the repository at this point in the history
  • Loading branch information
Chong Gao committed Dec 18, 2024
2 parents e29d5a1 + 7a0d39d commit 9f7ec44
Show file tree
Hide file tree
Showing 37 changed files with 1,592 additions and 577 deletions.
8 changes: 4 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,12 +235,12 @@ and build inside WSL2, e.g.
### Testing
Java tests are in the `src/test` directory and c++ tests are in the `src/main/cpp/tests` directory.
The c++ tests are built with the `-DBUILD_TESTS` command line option and will build into the
`target/cmake-build/gtests/` directory. Due to building inside the docker container, it is possible
`target/jni/cmake-build/gtests/` directory. Due to building inside the docker container, it is possible
that the host environment does not match the container well enough to run these executables, resulting
in errors finding libraries. The script `build/run-in-docker` was created to help with this
situation. A test can be run directly using this script or the script can be run without any
arguments to get into an interactive shell inside the container.
```build/run-in-docker target/cmake-build/gtests/ROW_CONVERSION```
```build/run-in-docker target/jni/cmake-build/gtests/ROW_CONVERSION```

#### Testing with Compute Sanitizer
[Compute Sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) is a
Expand Down Expand Up @@ -311,12 +311,12 @@ in the cuDF [CONTRIBUTING](thirdparty/cudf/CONTRIBUTING.md) guide.
### Benchmarks
Benchmarks exist for c++ benchmarks using NVBench and are in the `src/main/cpp/benchmarks` directory.
To build these benchmarks requires the `-DBUILD_BENCHMARKS` build option. Once built, the benchmarks
can be found in the `target/cmake-build/benchmarks/` directory. Due to building inside the docker
can be found in the `target/jni/cmake-build/benchmarks/` directory. Due to building inside the docker
container, it is possible that the host environment does not match the container well enough to
run these executables, resulting in errors finding libraries. The script `build/run-in-docker`
was created to help with this situation. A benchmark can be run directly using this script or the
script can be run without any arguments to get into an interactive shell inside the container.
```build/run-in-docker target/cmake-build/benchmarks/ROW_CONVERSION_BENCH```
```build/run-in-docker target/jni/cmake-build/benchmarks/ROW_CONVERSION_BENCH```
## Code contributions

### Your first issue
Expand Down
144 changes: 144 additions & 0 deletions build/buildcpp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/bin/bash
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Script to build native code in cudf and spark-rapids-jni
#

set -e

if [[ $FROM_MAVEN == "true" ]]; then
echo "Building native libraries. To rerun outside Maven enter the build environment via
$ ./build/run-in-docker
then run
$ REUSE_ENV=true $0
"
fi

# Disable items on arm64 due to missing dependencies in the CUDA toolkit
if [ "$(uname -m)" == "aarch64" ]; then
USE_GDS="OFF" # cuFile RDMA libraries are missing
BUILD_FAULTINJ="OFF" # libcupti_static.a is missing
fi

# Environment variables to control the build
PROJECT_BASE_DIR=${PROJECT_BASE_DIR:-$(realpath $(dirname $0)/..)}
PROJECT_BUILD_DIR=${PROJECT_BUILD_DIR:-$PROJECT_BASE_DIR/target}
if [[ "$REUSE_ENV" != "true" ]]; then
echo "
BUILD_BENCHMARKS=${BUILD_BENCHMARKS:-ON}
BUILD_CUDF_BENCHMARKS=${BUILD_CUDF_BENCHMARKS:-OFF}
BUILD_CUDF_TESTS=${BUILD_CUDF_TESTS:-OFF}
BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON}
BUILD_PROFILER=${BUILD_PROFILER:-ON}
BUILD_TESTS=${BUILD_TESTS:-ON}
export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
CPP_PARALLEL_LEVEL=${CPP_PARALLEL_LEVEL:-10}
CUDF_BUILD_TYPE=${CUDF_BUILD_TYPE:-Release}
CUDF_PATH=${CUDF_PATH:-$PROJECT_BASE_DIR/thirdparty/cudf}
CUDF_PIN_PATH=${CUDF_PIN_PATH:-$PROJECT_BASE_DIR/thirdparty/cudf-pins}
CUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM:-ON}
GPU_ARCHS=${GPU_ARCHS:-RAPIDS}
LIBCUDF_BUILD_CONFIGURE=${LIBCUDF_BUILD_CONFIGURE:-false}
LIBCUDF_BUILD_PATH=${LIBCUDF_BUILD_PATH:-$PROJECT_BUILD_DIR/libcudf/cmake-build}
LIBCUDF_DEPENDENCY_MODE=${LIBCUDF_DEPENDENCY_MODE:-pinned}
LIBCUDF_INSTALL_PATH=${LIBCUDF_INSTALL_PATH:-$PROJECT_BUILD_DIR/libcudf-install}
LIBCUDFJNI_BUILD_PATH=${LIBCUDFJNI_BUILD_PATH:-$PROJECT_BUILD_DIR/libcudfjni}
SPARK_JNI_BUILD_PATH=${SPARK_JNI_BUILD_PATH:-$PROJECT_BUILD_DIR/jni/cmake-build}
RMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL:-OFF}
USE_GDS=${USE_GDS:-OFF}" > "$PROJECT_BUILD_DIR/buildcpp-env.sh"
fi

source "$PROJECT_BUILD_DIR/buildcpp-env.sh"

#
# libcudf build
#
mkdir -p "$LIBCUDF_INSTALL_PATH" "$LIBCUDF_BUILD_PATH"
cd "$LIBCUDF_BUILD_PATH"

# Skip explicit cudf cmake configuration if it appears it has already configured
if [[ $LIBCUDF_BUILD_CONFIGURE == true || ! -f $LIBCUDF_BUILD_PATH/CMakeCache.txt ]]; then
echo "Configuring cudf native libs"
cmake "$CUDF_PATH/cpp" \
-DBUILD_BENCHMARKS="$BUILD_CUDF_BENCHMARKS" \
-DBUILD_SHARED_LIBS=OFF \
-DBUILD_TESTS="$BUILD_CUDF_TESTS" \
-DCMAKE_BUILD_TYPE="$CUDF_BUILD_TYPE" \
-DCMAKE_CUDA_ARCHITECTURES="$GPU_ARCHS" \
-DCMAKE_INSTALL_PREFIX="$LIBCUDF_INSTALL_PATH" \
-DCUDF_DEPENDENCY_PIN_MODE="$LIBCUDF_DEPENDENCY_MODE" \
-DCUDA_STATIC_CUFILE=ON \
-DCUDA_STATIC_RUNTIME=ON \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM="$CUDF_USE_PER_THREAD_DEFAULT_STREAM" \
-DCUDF_KVIKIO_REMOTE_IO=OFF \
-DCUDF_LARGE_STRINGS_DISABLED=ON \
-DLIBCUDF_LOGGING_LEVEL="$RMM_LOGGING_LEVEL" \
-DRMM_LOGGING_LEVEL="$RMM_LOGGING_LEVEL" \
-C="$CUDF_PIN_PATH/setup.cmake"
fi
echo "Building cudf native libs"
cmake --build "$LIBCUDF_BUILD_PATH" --target install "-j$CPP_PARALLEL_LEVEL"

#
# libcudfjni build
#
mkdir -p "$LIBCUDFJNI_BUILD_PATH"
cd "$LIBCUDFJNI_BUILD_PATH"
echo "Configuring cudfjni native libs"
CUDF_CPP_BUILD_DIR="$LIBCUDF_BUILD_PATH" CUDF_ROOT="$CUDF_PATH" cmake \
"$CUDF_PATH/java/src/main/native" \
-DBUILD_SHARED_LIBS=OFF \
-DCUDA_STATIC_CUFILE=ON \
-DCUDA_STATIC_RUNTIME=ON \
-DCUDF_DEPENDENCY_PIN_MODE=pinned \
-DCUDF_JNI_LIBCUDF_STATIC=ON \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM="$CUDF_USE_PER_THREAD_DEFAULT_STREAM" \
-DGPU_ARCHS="$GPU_ARCHS" \
-DRMM_LOGGING_LEVEL="$RMM_LOGGING_LEVEL" \
-DUSE_GDS="$USE_GDS" \
-C="$CUDF_PIN_PATH/setup.cmake"
echo "Building cudfjni native libs"
cmake --build "$LIBCUDFJNI_BUILD_PATH" "-j$CPP_PARALLEL_LEVEL"

#
# sparkjni build
#
mkdir -p "$SPARK_JNI_BUILD_PATH"
cd "$SPARK_JNI_BUILD_PATH"
echo "Configuring spark-rapids-jni native libs"
CUDF_CPP_BUILD_DIR="$LIBCUDF_BUILD_PATH" \
CUDF_ROOT="$CUDF_PATH" \
CUDF_INSTALL_DIR="$LIBCUDF_INSTALL_PATH" \
CUDFJNI_BUILD_DIR="$LIBCUDFJNI_BUILD_PATH" \
cmake \
"$PROJECT_BASE_DIR/src/main/cpp" \
-DBUILD_BENCHMARKS="$BUILD_BENCHMARKS" \
-DBUILD_FAULTINJ="$BUILD_FAULTINJ" \
-DBUILD_PROFILER="$BUILD_PROFILER" \
-DBUILD_TESTS="$BUILD_TESTS" \
-DCUDF_DEPENDENCY_PIN_MODE=pinned \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM="$CUDF_USE_PER_THREAD_DEFAULT_STREAM" \
-DGPU_ARCHS="$GPU_ARCHS" \
-DRMM_LOGGING_LEVEL="$RMM_LOGGING_LEVEL" \
-DUSE_GDS="$USE_GDS" \
-C="$CUDF_PIN_PATH/setup.cmake"
echo "Building spark-rapids-jni native libs"
cmake --build "$SPARK_JNI_BUILD_PATH" "-j$CPP_PARALLEL_LEVEL"
2 changes: 1 addition & 1 deletion ci/submodule-sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ set +e
# Don't do a full build. Just try to update/build CUDF with no patches on top of it.
# calling the antrun directly skips applying patches and also only builds
# libcudf
${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \
${MVN} antrun:run@buildcpp ${MVN_MIRROR} \
-DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
-Dlibcudf.build.configure=true \
-Dlibcudf.dependency.mode=latest \
Expand Down
139 changes: 26 additions & 113 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -430,123 +430,36 @@
</goals>
</execution>
<execution>
<id>build-libcudf</id>
<phase>validate</phase>
<configuration>
<target xmlns:if="ant:if">
<condition property="needConfigure">
<or>
<istrue value="${libcudf.build.configure}"/>
<not>
<available file="${libcudf.build.path}/CMakeCache.txt"/>
</not>
</or>
</condition>
<mkdir dir="${libcudf.install.path}"/>
<mkdir dir="${libcudf.build.path}"/>
<exec dir="${libcudf.build.path}"
failonerror="true"
executable="cmake"
if:true="${needConfigure}">
<arg value="${cudf.path}/cpp"/>
<arg value="-DCMAKE_BUILD_TYPE=${CUDF_BUILD_TYPE}"/>
<arg value="-DBUILD_SHARED_LIBS=OFF"/>
<arg value="-DBUILD_TESTS=${BUILD_CUDF_TESTS}"/>
<arg value="-DBUILD_BENCHMARKS=${BUILD_CUDF_BENCHMARKS}"/>
<arg value="-DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCHS}"/>
<arg value="-DCMAKE_INSTALL_PREFIX=${libcudf.install.path}"/>
<arg value="-DCUDF_DEPENDENCY_PIN_MODE=${libcudf.dependency.mode}"/>
<arg value="-DCUDA_STATIC_CUFILE=ON"/>
<arg value="-DCUDA_STATIC_RUNTIME=ON"/>
<arg value="-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM}" />
<arg value="-DCUDF_LARGE_STRINGS_DISABLED=ON"/>
<arg value="-DCUDF_KVIKIO_REMOTE_IO=OFF"/>
<arg value="-DLIBCUDF_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}" />
<arg value="-DRMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}" />
<arg value="-C=${cudf.pin.path}/setup.cmake"/>
</exec>
<exec dir="${libcudf.build.path}"
failonerror="true"
executable="cmake">
<arg value="--build"/>
<arg value="${libcudf.build.path}"/>
<arg value="--target"/>
<arg value="install"/>
<arg value="-j${CPP_PARALLEL_LEVEL}"/>
</exec>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
<execution>
<id>build-libcudfjni</id>
<phase>validate</phase>
<configuration>
<target>
<mkdir dir="${libcudfjni.build.path}"/>
<exec dir="${libcudfjni.build.path}"
failonerror="true"
executable="cmake">
<env key="CUDF_CPP_BUILD_DIR" value="${libcudf.build.path}"/>
<env key="CUDF_ROOT" value="${cudf.path}"/>
<arg value="${cudf.path}/java/src/main/native"/>
<arg value="-DBUILD_SHARED_LIBS=OFF"/>
<arg value="-DCUDA_STATIC_CUFILE=ON"/>
<arg value="-DCUDA_STATIC_RUNTIME=ON"/>
<arg value="-DCUDF_JNI_LIBCUDF_STATIC=ON"/>
<arg value="-DGPU_ARCHS=${GPU_ARCHS}"/>
<arg value="-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM}"/>
<arg value="-DRMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}"/>
<arg value="-DUSE_GDS=${USE_GDS}"/>
<arg value="-DCUDF_DEPENDENCY_PIN_MODE=pinned"/>
<arg value="-C=${cudf.pin.path}/setup.cmake"/>
</exec>
<exec dir="${libcudfjni.build.path}"
failonerror="true"
executable="cmake">
<arg value="--build"/>
<arg value="${libcudfjni.build.path}"/>
<arg value="-j${CPP_PARALLEL_LEVEL}"/>
</exec>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
<execution>
<id>build-sparkrapidsjni</id>
<id>buildcpp</id>
<phase>validate</phase>
<configuration>
<target>
<mkdir dir="${native.build.path}"/>
<exec dir="${native.build.path}"
failonerror="true"
executable="cmake">
<env key="CUDF_CPP_BUILD_DIR" value="${libcudf.build.path}"/>
<env key="CUDF_ROOT" value="${cudf.path}"/>
<env key="CUDF_INSTALL_DIR" value="${libcudf.install.path}"/>
<env key="CUDFJNI_BUILD_DIR" value="${libcudfjni.build.path}"/>
<arg value="${basedir}/src/main/cpp"/>
<arg value="-DGPU_ARCHS=${GPU_ARCHS}"/>
<arg value="-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM}"/>
<arg value="-DRMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}"/>
<arg value="-DUSE_GDS=${USE_GDS}"/>
<arg value="-DBUILD_TESTS=${BUILD_TESTS}"/>
<arg value="-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS}"/>
<arg value="-DBUILD_FAULTINJ=${BUILD_FAULTINJ}"/>
<arg value="-DBUILD_PROFILER=${BUILD_PROFILER}"/>
<arg value="-DCUDF_DEPENDENCY_PIN_MODE=pinned"/>
<arg value="-C=${cudf.pin.path}/setup.cmake"/>
</exec>
<exec dir="${native.build.path}"
<exec dir="${project.basedir}"
failonerror="true"
executable="cmake">
<arg value="--build"/>
<arg value="${native.build.path}"/>
<arg value="-j${CPP_PARALLEL_LEVEL}"/>
executable="build/buildcpp.sh">
<env key="BUILD_BENCHMARKS" value="${BUILD_BENCHMARKS}"/>
<env key="BUILD_CUDF_BENCHMARKS" value="${BUILD_CUDF_BENCHMARKS}"/>
<env key="BUILD_CUDF_TESTS" value="${BUILD_CUDF_TESTS}"/>
<env key="BUILD_FAULTINJ" value="${BUILD_FAULTINJ}"/>
<env key="BUILD_PROFILER" value="${BUILD_PROFILER}"/>
<env key="BUILD_TESTS" value="${BUILD_TESTS}"/>
<env key="CPP_PARALLEL_LEVEL" value="${CPP_PARALLEL_LEVEL}"/>
<env key="CUDF_BUILD_TYPE" value="${CUDF_BUILD_TYPE}"/>
<env key="CUDF_PATH" value="${cudf.path}"/>
<env key="CUDF_PIN_PATH" value="${cudf.pin.path}"/>
<env key="CUDF_USE_PER_THREAD_DEFAULT_STREAM" value="${CUDF_USE_PER_THREAD_DEFAULT_STREAM}"/>
<env key="FROM_MAVEN" value="true"/>
<env key="GPU_ARCHS" value="${GPU_ARCHS}"/>
<env key="LIBCUDF_BUILD_CONFIGURE" value="${libcudf.build.configure}"/>
<env key="LIBCUDF_BUILD_PATH" value="${libcudf.build.path}"/>
<env key="LIBCUDF_DEPENDENCY_MODE" value="${libcudf.dependency.mode}"/>
<env key="LIBCUDF_INSTALL_PATH" value="${libcudf.install.path}"/>
<env key="LIBCUDFJNI_BUILD_PATH" value="${libcudfjni.build.path}"/>
<env key="PROJECT_BASE_DIR" value="${project.basedir}"/>
<env key="PROJECT_BUILD_DIR" value="${project.build.directory}"/>
<env key="SPARK_JNI_BUILD_PATH" value="${native.build.path}"/>
<env key="RMM_LOGGING_LEVEL" value="${RMM_LOGGING_LEVEL}"/>
<env key="USE_GDS" value="${USE_GDS}"/>
</exec>
</target>
</configuration>
Expand Down
9 changes: 6 additions & 3 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ include(${CUDF_DIR}/cpp/cmake/thirdparty/get_nvtx.cmake)
# find CCCL
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)

# find spdlog
include(${CMAKE_SOURCE_DIR}/cmake/get_spdlog.cmake)

# JNI
find_package(JNI REQUIRED)
if(JNI_FOUND)
Expand Down Expand Up @@ -190,7 +193,7 @@ add_library(
src/BloomFilterJni.cpp
src/CaseWhenJni.cpp
src/CastStringJni.cpp
src/DateTimeRebaseJni.cpp
src/DateTimeUtilsJni.cpp
src/DecimalUtilsJni.cpp
src/GpuTimeZoneDBJni.cpp
src/HashJni.cpp
Expand All @@ -212,6 +215,7 @@ add_library(
src/cast_string.cu
src/cast_string_to_float.cu
src/datetime_rebase.cu
src/datetime_truncate.cu
src/decimal_utils.cu
src/format_float.cu
src/from_json_to_raw_map.cu
Expand Down Expand Up @@ -280,12 +284,11 @@ target_link_libraries(
${CUDFJNI_LIB}
cudf::cudf
nvtx3::nvtx3-cpp
spdlog::spdlog_header_only
-Wl,--no-whole-archive
${ARROW_LIB}
${PARQUET_LIB}
${THRIFT_LIB}
PUBLIC rmm::rmm
PRIVATE $<TARGET_NAME_IF_EXISTS:rmm::rmm_logger_impl>
)
rapids_cuda_set_runtime(spark_rapids_jni USE_STATIC ON)
set_target_properties(spark_rapids_jni PROPERTIES LINK_LANGUAGE "CXX")
Expand Down
Loading

0 comments on commit 9f7ec44

Please sign in to comment.