Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-24.10' into benchmark-io
Browse files Browse the repository at this point in the history
  • Loading branch information
kingcrimsontianyu authored Sep 24, 2024
2 parents b315d6c + 6badd6b commit ccb8ed0
Show file tree
Hide file tree
Showing 15 changed files with 88 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_java.sh"
static-configure:
needs: checks
Expand All @@ -181,7 +181,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
Expand All @@ -191,7 +191,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/build_docs.sh"
wheel-build-libcudf:
needs: checks
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_cpp_memcheck.sh"
static-configure:
secrets: inherit
Expand Down Expand Up @@ -81,7 +81,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
Expand All @@ -93,7 +93,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies:
- doxygen=1.9.1
- fastavro>=0.22.9
- flatbuffers==24.3.25
- fmt>=10.1.1,<11
- fmt>=11.0.2,<12
- fsspec>=0.6.0
- gcc_linux-64=11.*
- hypothesis
Expand Down Expand Up @@ -84,7 +84,7 @@ dependencies:
- s3fs>=2022.3.0
- scikit-build-core>=0.10.0
- scipy
- spdlog>=1.12.0,<1.13
- spdlog>=1.14.1,<1.15
- sphinx
- sphinx-autobuild
- sphinx-copybutton
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies:
- doxygen=1.9.1
- fastavro>=0.22.9
- flatbuffers==24.3.25
- fmt>=10.1.1,<11
- fmt>=11.0.2,<12
- fsspec>=0.6.0
- gcc_linux-64=11.*
- hypothesis
Expand Down Expand Up @@ -82,7 +82,7 @@ dependencies:
- s3fs>=2022.3.0
- scikit-build-core>=0.10.0
- scipy
- spdlog>=1.12.0,<1.13
- spdlog>=1.14.1,<1.15
- sphinx
- sphinx-autobuild
- sphinx-copybutton
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ librdkafka_version:
- ">=2.5.0,<2.6.0a0"

fmt_version:
- ">=10.1.1,<11"
- ">=11.0.2,<12"

flatbuffers_version:
- "=24.3.25"

spdlog_version:
- ">=1.12.0,<1.13"
- ">=1.14.1,<1.15"

nvcomp_version:
- "=4.0.1"
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool> spdlog::spdlog_header_only
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
)
Expand Down
21 changes: 6 additions & 15 deletions cpp/cmake/thirdparty/get_spdlog.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -16,21 +16,12 @@
function(find_and_configure_spdlog)

include(${rapids-cmake-dir}/cpm/spdlog.cmake)
rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET cudf-exports)
rapids_export_package(BUILD spdlog cudf-exports)
rapids_cpm_spdlog(
FMT_OPTION "EXTERNAL_FMT_HO"
INSTALL_EXPORT_SET cudf-exports
BUILD_EXPORT_SET cudf-exports
)

if(spdlog_ADDED)
rapids_export(
BUILD spdlog
EXPORT_SET spdlog
GLOBAL_TARGETS spdlog spdlog_header_only
NAMESPACE spdlog::
)
include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(
BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports
)
endif()
endfunction()

find_and_configure_spdlog()
4 changes: 2 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,12 @@ dependencies:
common:
- output_types: conda
packages:
- fmt>=10.1.1,<11
- fmt>=11.0.2,<12
- flatbuffers==24.3.25
- librdkafka>=2.5.0,<2.6.0a0
# Align nvcomp version with rapids-cmake
- nvcomp==4.0.1
- spdlog>=1.12.0,<1.13
- spdlog>=1.14.1,<1.15
rapids_build_skbuild:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
12 changes: 6 additions & 6 deletions docs/cudf/source/cudf_polars/index.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cuDF-based GPU backend for Polars [Open Beta]
=============================================
Polars GPU engine
=================

cuDF supports an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API.
cuDF provides an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API.
The engine supports most of the core expressions and data types as well as a growing set of more advanced dataframe manipulations
and data file formats. When using the GPU engine, Polars will convert expressions into an optimized query plan and determine
whether the plan is supported on the GPU. If it is not, the execution will transparently fall back to the standard Polars engine
Expand All @@ -16,7 +16,7 @@ We reproduced the `Polars Decision Support (PDS) <https://github.com/pola-rs/pol



You can see up to 13x speedup using the GPU backend on the compute-heavy PDS queries involving complex aggregation and join operations. Below are the speedups for the top performing queries:
You can see up to 13x speedup using the GPU engine on the compute-heavy PDS queries involving complex aggregation and join operations. Below are the speedups for the top performing queries:


.. figure:: ../_static/compute_heavy_queries_polars.png
Expand All @@ -29,7 +29,7 @@ You can reproduce the results by visiting the `Polars Decision Support (PDS) Git
Learn More
----------

The GPU backend for Polars is now available in Open Beta and the engine is undergoing rapid development. To learn more, visit the `GPU Support page <https://docs.pola.rs/user-guide/gpu-support/>`__ on the Polars website.
The GPU engine for Polars is now available in Open Beta and the engine is undergoing rapid development. To learn more, visit the `GPU Support page <https://docs.pola.rs/user-guide/gpu-support/>`__ on the Polars website.

Launch on Google Colab
----------------------
Expand All @@ -38,4 +38,4 @@ Launch on Google Colab
:width: 200px
:target: https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb

Take the cuDF backend for Polars for a test-drive in a free GPU-enabled notebook environment using your Google account by `launching on Colab <https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb>`__.
Try out the GPU engine for Polars in a free GPU notebook environment. Sign in with your Google account and `launch the demo on Colab <https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb>`__.
16 changes: 16 additions & 0 deletions java/src/main/java/ai/rapids/cudf/JSONOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public final class JSONOptions extends ColumnFilterOptions {
private final boolean allowLeadingZeros;
private final boolean allowNonNumericNumbers;
private final boolean allowUnquotedControlChars;
private final byte lineDelimiter;

private JSONOptions(Builder builder) {
super(builder);
Expand All @@ -52,6 +53,11 @@ private JSONOptions(Builder builder) {
allowLeadingZeros = builder.allowLeadingZeros;
allowNonNumericNumbers = builder.allowNonNumericNumbers;
allowUnquotedControlChars = builder.allowUnquotedControlChars;
lineDelimiter = builder.lineDelimiter;
}

public byte getLineDelimiter() {
return lineDelimiter;
}

public boolean isDayFirst() {
Expand Down Expand Up @@ -123,6 +129,16 @@ public static final class Builder extends ColumnFilterOptions.Builder<JSONOptio
private boolean mixedTypesAsStrings = false;
private boolean keepQuotes = false;

private byte lineDelimiter = '\n';

public Builder withLineDelimiter(char delimiter) {
if (delimiter > Byte.MAX_VALUE) {
throw new IllegalArgumentException("Only basic ASCII values are supported as line delimiters " + delimiter);
}
lineDelimiter = (byte)delimiter;
return this;
}

/**
* Should json validation be strict or not
*/
Expand Down
19 changes: 14 additions & 5 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ private static native long readJSON(int[] numChildren, String[] columnNames,
boolean strictValidation,
boolean allowLeadingZeros,
boolean allowNonNumericNumbers,
boolean allowUnquotedControl) throws CudfException;
boolean allowUnquotedControl,
byte lineDelimiter) throws CudfException;

private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames,
int[] dTypeIds, int[] dTypeScales,
Expand All @@ -272,6 +273,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co
boolean allowLeadingZeros,
boolean allowNonNumericNumbers,
boolean allowUnquotedControl,
byte lineDelimiter,
long dsHandle) throws CudfException;

private static native long readAndInferJSONFromDataSource(boolean dayFirst, boolean lines,
Expand All @@ -284,6 +286,7 @@ private static native long readAndInferJSONFromDataSource(boolean dayFirst, bool
boolean allowLeadingZeros,
boolean allowNonNumericNumbers,
boolean allowUnquotedControl,
byte lineDelimiter,
long dsHandle) throws CudfException;

private static native long readAndInferJSON(long address, long length,
Expand All @@ -297,7 +300,8 @@ private static native long readAndInferJSON(long address, long length,
boolean strictValidation,
boolean allowLeadingZeros,
boolean allowNonNumericNumbers,
boolean allowUnquotedControl) throws CudfException;
boolean allowUnquotedControl,
byte lineDelimiter) throws CudfException;

/**
* Read in Parquet formatted data.
Expand Down Expand Up @@ -1321,7 +1325,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
opts.strictValidation(),
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars()))) {
opts.unquotedControlChars(),
opts.getLineDelimiter()))) {

return gatherJSONColumns(schema, twm, -1);
}
Expand Down Expand Up @@ -1404,7 +1409,8 @@ public static TableWithMeta readJSON(JSONOptions opts, HostMemoryBuffer buffer,
opts.strictValidation(),
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars()));
opts.unquotedControlChars(),
opts.getLineDelimiter()));
}

/**
Expand All @@ -1426,6 +1432,7 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) {
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars(),
opts.getLineDelimiter(),
dsHandle));
return twm;
} finally {
Expand Down Expand Up @@ -1479,7 +1486,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
opts.strictValidation(),
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars()))) {
opts.unquotedControlChars(),
opts.getLineDelimiter()))) {
return gatherJSONColumns(schema, twm, emptyRowCount);
}
}
Expand Down Expand Up @@ -1518,6 +1526,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars(),
opts.getLineDelimiter(),
dsHandle))) {
return gatherJSONColumns(schema, twm, emptyRowCount);
} finally {
Expand Down
12 changes: 10 additions & 2 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1627,6 +1627,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
jboolean allow_leading_zeros,
jboolean allow_nonnumeric_numbers,
jboolean allow_unquoted_control,
jbyte line_delimiter,
jlong ds_handle)
{
JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0);
Expand All @@ -1646,6 +1647,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
.normalize_single_quotes(static_cast<bool>(normalize_single_quotes))
.normalize_whitespace(static_cast<bool>(normalize_whitespace))
.mixed_types_as_string(mixed_types_as_string)
.delimiter(static_cast<char>(line_delimiter))
.strict_validation(strict_validation)
.keep_quotes(keep_quotes);
if (strict_validation) {
Expand Down Expand Up @@ -1676,7 +1678,8 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
jboolean strict_validation,
jboolean allow_leading_zeros,
jboolean allow_nonnumeric_numbers,
jboolean allow_unquoted_control)
jboolean allow_unquoted_control,
jbyte line_delimiter)
{
JNI_NULL_CHECK(env, buffer, "buffer cannot be null", 0);
if (buffer_length <= 0) {
Expand All @@ -1700,6 +1703,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
.normalize_whitespace(static_cast<bool>(normalize_whitespace))
.strict_validation(strict_validation)
.mixed_types_as_string(mixed_types_as_string)
.delimiter(static_cast<char>(line_delimiter))
.keep_quotes(keep_quotes);
if (strict_validation) {
opts.numeric_leading_zeros(allow_leading_zeros)
Expand Down Expand Up @@ -1814,6 +1818,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
jboolean allow_leading_zeros,
jboolean allow_nonnumeric_numbers,
jboolean allow_unquoted_control,
jbyte line_delimiter,
jlong ds_handle)
{
JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0);
Expand Down Expand Up @@ -1848,6 +1853,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
.normalize_single_quotes(static_cast<bool>(normalize_single_quotes))
.normalize_whitespace(static_cast<bool>(normalize_whitespace))
.mixed_types_as_string(mixed_types_as_string)
.delimiter(static_cast<char>(line_delimiter))
.strict_validation(strict_validation)
.keep_quotes(keep_quotes);
if (strict_validation) {
Expand Down Expand Up @@ -1908,7 +1914,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
jboolean strict_validation,
jboolean allow_leading_zeros,
jboolean allow_nonnumeric_numbers,
jboolean allow_unquoted_control)
jboolean allow_unquoted_control,
jbyte line_delimiter)
{
bool read_buffer = true;
if (buffer == 0) {
Expand Down Expand Up @@ -1957,6 +1964,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
.normalize_single_quotes(static_cast<bool>(normalize_single_quotes))
.normalize_whitespace(static_cast<bool>(normalize_whitespace))
.mixed_types_as_string(mixed_types_as_string)
.delimiter(static_cast<char>(line_delimiter))
.strict_validation(strict_validation)
.keep_quotes(keep_quotes);
if (strict_validation) {
Expand Down
Loading

0 comments on commit ccb8ed0

Please sign in to comment.