Skip to content

Commit

Permalink
Merge pull request #2675 from NVIDIA/merge-branch-24.12-to-main
Browse files Browse the repository at this point in the history
Merge branch-24.12 into main [skip ci]
  • Loading branch information
YanxuanLiu authored Dec 13, 2024
2 parents 432f851 + 64626e6 commit dc40a2d
Show file tree
Hide file tree
Showing 63 changed files with 7,361 additions and 1,534 deletions.
55 changes: 55 additions & 0 deletions .github/workflows/license-header-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A workflow to check copyright/license header
name: license header check

on:
pull_request:
types: [opened, synchronize, reopened]

jobs:
license-header-check:
runs-on: ubuntu-latest
if: "!contains(github.event.pull_request.title, '[bot]')"
steps:
- name: Get checkout depth
run: |
echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: ${{ env.PR_FETCH_DEPTH }}

- name: license-header-check
uses: NVIDIA/spark-rapids-common/license-header-check@main
with:
included_file_patterns: |
*.cpp,
*.hpp,
*.cu,
*.cuh,
*.java,
*.sh,
*Dockerfile*,
*Jenkinsfile*,
*.yml,
*.yaml,
*.txt,
*.xml,
*.fbs,
build/*
excluded_file_patterns: |
thirdparty/*
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "thirdparty/cudf"]
path = thirdparty/cudf
url = https://github.com/rapidsai/cudf.git
branch = branch-24.10
branch = branch-24.12
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ $ ./build/build-in-docker install ...
```

Now cd to ~/repos/NVIDIA/spark-rapids and build with one of the options from
[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-24.10/CONTRIBUTING.md#building-from-source).
[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-24.12/CONTRIBUTING.md#building-from-source).

```bash
$ ./build/buildall
Expand Down
49 changes: 42 additions & 7 deletions build/apply-patches
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
# limitations under the License.
#

# Run a command in a Docker container with devtoolset

set -e

BASE_DIR=$( git rev-parse --show-toplevel )
Expand All @@ -26,14 +24,51 @@ PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}

CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}

# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
# making sure that a user can do development work in spark-rapids-jni without the patches
# getting in the way
# The operations I really want to support no matter what state CUDF is in are
# 1) Build the repo from scratch
# 2) Rebuild the repo without having to clean and start over
# 3) upmerge to a new version of the plugin including updating the cudf submodule
#
# Building from scratch is simple. We want clean to unapply any patches and
# build to apply them. But if we want to rebuild without a clean we need to know what
# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
# is to save some state files about what happened. But a user could mess with CUDF directly
# so we want to have ways to double check that they are indeed correct.

FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"

pushd "$CUDF_DIR"
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
echo "Error: CUDF repository has uncommitted changes. No patches will be applied..."
exit 1

PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)

if [ -z "$PATCH_FILES" ] ; then
echo "No patches to apply"
exit 0
fi

CHANGED_FILES=$(git status --porcelain --untracked-files=no)

if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
if git apply -R --check "$FULLY_PATCHED_FILE" ; then
echo "Patches appear to have been applied already"
exit 0
fi
fi

if [ -n "$CHANGED_FILES" ] ; then
echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches"
echo "$CHANGED_FILE"
exit 1
fi

find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do
echo "patching with: $file"
patch --no-backup-if-mismatch -f -t --reject-file=- -p1 -i "$file"
echo "patching with: $file"
git apply -v "$file"
done

git diff > "$FULLY_PATCHED_FILE"

popd
66 changes: 52 additions & 14 deletions build/unapply-patches
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,67 @@
# limitations under the License.
#

# Run a command in a Docker container with devtoolset

set -e

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
BASE_DIR=$( git rev-parse --show-toplevel )

PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}

PATCH_DIR=${PATCH_DIR:-$(realpath "$SCRIPT_DIR/../patches/")}
CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}

CUDF_DIR=${CUDF_DIR:-$(realpath "$SCRIPT_DIR/../thirdparty/cudf/")}
# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
# making sure that a user can do development work in spark-rapids-jni without the patches
# getting in the way
# The operations I really want to support no matter what state CUDF is in are
# 1) Build the repo from scratch
# 2) Rebuild the repo without having to clean and start over
# 3) upmerge to a new version of the plugin including updating the cudf submodule
#
# Building from scratch is simple. We want clean to unapply any patches and
# build to apply them. But if we want to rebuild without a clean we need to know what
# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
# is to save some state files about what happened. But a user could mess with CUDF directly
# so we want to have ways to double check that they are indeed correct.

FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"

pushd "$CUDF_DIR"
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
#only try to remove patches if it looks like something was changed
find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV -r | while IFS= read -r -d '' file; do
echo "patching with: $file"
patch -R --no-backup-if-mismatch --reject-file=- -f -t -p1 -i "$file"
done

PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)

if [ -z "$PATCH_FILES" ] ; then
echo "No patches to remove"
exit 0
fi

# Check for modifications
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
CHANGED_FILES=$(git status --porcelain --untracked-files=no)

if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
if git apply --check -R "$FULLY_PATCHED_FILE"; then
echo "Patches appear to have been applied, so going to remove them"
git apply -R -v "$FULLY_PATCHED_FILE"
rm -f "$FULLY_PATCHED_FILE"

# Check for modifications, again
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
git status --porcelain --untracked-files=no
exit 1
fi

exit 0
else
echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE"
exit 1
fi
fi

if [ -n "$CHANGED_FILES" ] ; then
echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again."
echo "$CHANGED_FILE"
exit 1
else
echo "No changes in CUDF repository to remove"
fi

popd
2 changes: 1 addition & 1 deletion ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSI
RUN mkdir -m 777 /usr/local/rapids /rapids

# 3.22.3: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_VERSION=3.28.6
# default x86_64 from x86 build, aarch64 cmake for arm build
ARG CMAKE_ARCH=x86_64
RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
Expand Down
43 changes: 25 additions & 18 deletions ci/submodule-sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,26 +57,29 @@ if [ -n "$CUDF_TAG" ]; then
else
git submodule update --remote --merge
fi

cudf_pins_only=false
cudf_sha=$(git -C thirdparty/cudf rev-parse HEAD)
if [[ "${cudf_sha}" == "${cudf_prev_sha}" ]]; then
echo "Submodule is up to date."
exit 0
echo "cuDF submodule is up to date. Try update cudf-pins..."
cudf_pins_only=true
else
echo "Try update cudf submodule to ${cudf_sha}..."
git add .
git commit -s -m "Update submodule cudf to ${cudf_sha}"
fi

echo "Try update cudf submodule to ${cudf_sha}..."
git add .

echo "Test against ${cudf_sha}..."

echo "Build libcudf only to update pinned versions..."
MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
set +e
# Don't do a full build. Just try to update/build CUDF with no patches on top of it.
${MVN} validate ${MVN_MIRROR} \
# calling the antrun directly skips applying patches and also only builds
# libcudf
${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \
-DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
-Dlibcudf.build.configure=true \
-Dlibcudf.dependency.mode=latest \
-Dsubmodule.patch.skip \
-DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
-DUSE_GDS=ON \
-DBUILD_TESTS=ON \
-DUSE_SANITIZER=ON
validate_status=$?
Expand All @@ -88,21 +91,25 @@ rapids_cmake_sha=$(git -C ${LIBCUDF_BUILD_PATH}/_deps/rapids-cmake-src/ rev-pars
echo "Update rapids-cmake pinned SHA1 to ${rapids_cmake_sha}"
echo "${rapids_cmake_sha}" > thirdparty/cudf-pins/rapids-cmake.sha

# Bash the wrong nvcomp version to the correct version until
# nvcomp version mismatch is fixed. https://github.com/rapidsai/cudf/issues/16772.
echo "Revert nvcomp to 3.0.6"
sed -i -e 's/4\.0\.1\.0/3.0.6/' \
-e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz|' \
-e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz|' \
thirdparty/cudf-pins/versions.json
echo "Workaround for https://github.com/NVIDIA/spark-rapids-jni/issues/2582"
cudf_patch_path="cudf/cpp/cmake/thirdparty/patches"
sed -i "s|\${current_json_dir}|\${current_json_dir}/../${cudf_patch_path}|g" thirdparty/cudf-pins/versions.json

# Do the git add after the build so that we get
# the updated versions.json generated by the build
echo "Update cudf submodule to ${cudf_sha} with updated pinned versions"
git add .
git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}"
if ! git diff-index --quiet HEAD; then
# We perform a squash merge for submodule-sync commits
git commit -s -m "Update pinned versions for cudf ${cudf_sha}"
elif ${cudf_pins_only}; then
echo "No changes to commit. Exit early..."
exit 0
fi

sha=$(git rev-parse HEAD)

echo "Test against ${cudf_sha}..."
set +e
# now build and test everything with the patches in place
${MVN} clean verify ${MVN_MIRROR} \
Expand Down
Empty file added patches/noop.patch
Empty file.
Loading

0 comments on commit dc40a2d

Please sign in to comment.