Skip to content

Commit bc976c3

Browse files
committed
fix polars pyproject.toml merge conflict
2 parents ab3fbdf + f0a3dfe commit bc976c3

File tree

90 files changed

+942
-454
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+942
-454
lines changed

.github/workflows/build.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
package-name: libcudf
9191
package-type: cpp
9292
wheel-build-pylibcudf:
93-
needs: [wheel-publish-libcudf]
93+
needs: [wheel-build-libcudf]
9494
secrets: inherit
9595
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
9696
with:
@@ -111,7 +111,7 @@ jobs:
111111
package-name: pylibcudf
112112
package-type: python
113113
wheel-build-cudf:
114-
needs: wheel-publish-pylibcudf
114+
needs: wheel-build-pylibcudf
115115
secrets: inherit
116116
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
117117
with:
@@ -132,7 +132,7 @@ jobs:
132132
package-name: cudf
133133
package-type: python
134134
wheel-build-dask-cudf:
135-
needs: wheel-publish-cudf
135+
needs: wheel-build-cudf
136136
secrets: inherit
137137
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
138138
with:
@@ -155,7 +155,7 @@ jobs:
155155
package-name: dask_cudf
156156
package-type: python
157157
wheel-build-cudf-polars:
158-
needs: wheel-publish-pylibcudf
158+
needs: wheel-build-pylibcudf
159159
secrets: inherit
160160
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
161161
with:

.pre-commit-config.yaml

+2-11
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,6 @@ repos:
4141
"python/cudf_polars/cudf_polars",
4242
"python/dask_cudf/dask_cudf"]
4343
pass_filenames: false
44-
- repo: https://github.com/nbQA-dev/nbQA
45-
rev: 1.9.1
46-
hooks:
47-
- id: nbqa-isort
48-
# Use the cudf_kafka isort orderings in notebooks so that dask
49-
# and RAPIDS packages have their own sections.
50-
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
5144
- repo: https://github.com/pre-commit/mirrors-clang-format
5245
rev: v16.0.6
5346
hooks:
@@ -153,13 +146,11 @@ repos:
153146
^CHANGELOG.md$
154147
)
155148
- repo: https://github.com/astral-sh/ruff-pre-commit
156-
rev: v0.8.0
149+
rev: v0.9.3
157150
hooks:
158151
- id: ruff
159152
args: ["--fix"]
160-
files: python/.*$
161153
- id: ruff-format
162-
files: python/.*$
163154
- repo: https://github.com/rapidsai/pre-commit-hooks
164155
rev: v0.4.0
165156
hooks:
@@ -173,7 +164,7 @@ repos:
173164
)
174165
- id: verify-alpha-spec
175166
- repo: https://github.com/rapidsai/dependency-file-generator
176-
rev: v1.16.0
167+
rev: v1.17.0
177168
hooks:
178169
- id: rapids-dependency-file-generator
179170
args: ["--clean"]
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,34 @@
1-
# Copyright (c) 2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
2+
3+
import argparse
24

35
import requests
4-
from packaging.version import Version
56
from packaging.specifiers import SpecifierSet
6-
import argparse
7+
from packaging.version import Version
8+
79

810
def get_pandas_versions(pandas_range):
911
url = "https://pypi.org/pypi/pandas/json"
1012
response = requests.get(url)
1113
data = response.json()
12-
versions = [Version(v) for v in data['releases']]
14+
versions = [Version(v) for v in data["releases"]]
1315
specifier = SpecifierSet(pandas_range.lstrip("pandas"))
1416
matching_versions = [v for v in versions if v in specifier]
15-
matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
17+
matching_minors = sorted(
18+
set(".".join((str(v.major), str(v.minor))) for v in matching_versions),
19+
key=Version,
20+
)
1621
return matching_minors
1722

23+
1824
if __name__ == "__main__":
19-
parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
20-
parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
25+
parser = argparse.ArgumentParser(
26+
description="Filter pandas versions by prefix."
27+
)
28+
parser.add_argument(
29+
"pandas_range", type=str, help="The version prefix to filter by."
30+
)
2131
args = parser.parse_args()
2232

2333
versions = get_pandas_versions(args.pandas_range)
24-
print(','.join(versions))
34+
print(",".join(versions))

ci/cudf_pandas_scripts/pandas-tests/job-summary.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
1+
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
22
# All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44

@@ -68,17 +68,27 @@ def emoji_failed(x):
6868
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
6969
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
7070
total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
71-
main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1)
72-
main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1)
71+
main_df["CPU Usage"] = (
72+
(main_df["_slow_function_call"] / total_usage) * 100.0
73+
).round(1)
74+
main_df["GPU Usage"] = (
75+
(main_df["_fast_function_call"] / total_usage) * 100.0
76+
).round(1)
7377

7478
total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
75-
pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1)
76-
pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1)
79+
pr_df["CPU Usage"] = (
80+
(pr_df["_slow_function_call"] / total_usage) * 100.0
81+
).round(1)
82+
pr_df["GPU Usage"] = (
83+
(pr_df["_fast_function_call"] / total_usage) * 100.0
84+
).round(1)
7785

7886
cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
7987
gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)
8088

81-
gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean())
89+
gpu_usage_rate_change = abs(
90+
pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()
91+
)
8292
pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
8393
pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
8494
main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
@@ -92,8 +102,12 @@ def emoji_failed(x):
92102
pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
93103
pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"
94104

95-
pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
96-
diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
105+
pr_df = pr_df[
106+
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
107+
]
108+
diff_df = diff_df[
109+
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
110+
]
97111
diff_df.columns = diff_df.columns + "_diff"
98112
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
99113
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)

ci/utils/nbtestlog2junitxml.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22
# Generate a junit-xml file from parsing a nbtest log
33

44
import re
5-
from xml.etree.ElementTree import Element, ElementTree
6-
from os import path
75
import string
86
from enum import Enum
9-
7+
from os import path
8+
from xml.etree.ElementTree import Element, ElementTree
109

1110
startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$")
12-
skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
11+
skippingPatt = re.compile(
12+
r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$"
13+
)
1314
exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$")
1415
folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$")
1516
timePatt = re.compile(r"^real\s+([\d\.ms]+)$")
@@ -37,12 +38,8 @@ def makeFailureElement(outputLines):
3738

3839

3940
def setFileNameAttr(attrDict, fileName):
40-
attrDict.update(file=fileName,
41-
classname="",
42-
line="",
43-
name="",
44-
time=""
45-
)
41+
attrDict.update(file=fileName, classname="", line="", name="", time="")
42+
4643

4744
def setClassNameAttr(attrDict, className):
4845
attrDict["classname"] = className
@@ -76,11 +73,12 @@ def parseLog(logFile, testSuiteElement):
7673
testSuiteElement.attrib["timestamp"] = ""
7774

7875
attrDict = {}
79-
#setFileNameAttr(attrDict, logFile)
76+
# setFileNameAttr(attrDict, logFile)
8077
setFileNameAttr(attrDict, "nbtest")
8178

82-
parserStateEnum = Enum("parserStateEnum",
83-
"newTest startingLine finishLine exitCode")
79+
parserStateEnum = Enum(
80+
"parserStateEnum", "newTest startingLine finishLine exitCode"
81+
)
8482
parserState = parserStateEnum.newTest
8583

8684
testOutput = ""
@@ -98,7 +96,9 @@ def parseLog(logFile, testSuiteElement):
9896
setTimeAttr(attrDict, "0m0s")
9997
skippedElement = makeTestCaseElement(attrDict)
10098
message = m.group(3) or ""
101-
skippedElement.append(Element("skipped", message=message, type=""))
99+
skippedElement.append(
100+
Element("skipped", message=message, type="")
101+
)
102102
testSuiteElement.append(skippedElement)
103103
incrNumAttr(testSuiteElement, "skipped")
104104
incrNumAttr(testSuiteElement, "tests")
@@ -160,4 +160,6 @@ def parseLog(logFile, testSuiteElement):
160160
testSuiteElement = Element("testsuite", name="nbtest", hostname="")
161161
parseLog(sys.argv[1], testSuiteElement)
162162
testSuitesElement.append(testSuiteElement)
163-
ElementTree(testSuitesElement).write(sys.argv[1]+".xml", xml_declaration=True)
163+
ElementTree(testSuitesElement).write(
164+
sys.argv[1] + ".xml", xml_declaration=True
165+
)

conda/environments/all_cuda-118_arch-x86_64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ dependencies:
6767
- pandas
6868
- pandas>=2.0,<2.2.4dev0
6969
- pandoc
70-
- polars>=1.11,<1.18
70+
- polars>=1.20,<1.22
7171
- pre-commit
7272
- ptxcompiler
7373
- pyarrow>=14.0.0,<20.0.0a0

conda/environments/all_cuda-125_arch-x86_64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ dependencies:
6565
- pandas
6666
- pandas>=2.0,<2.2.4dev0
6767
- pandoc
68-
- polars>=1.11,<1.18
68+
- polars>=1.20,<1.22
6969
- pre-commit
7070
- pyarrow>=14.0.0,<20.0.0a0
7171
- pydata-sphinx-theme>=0.15.4

conda/recipes/cudf-polars/meta.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ requirements:
4343
run:
4444
- python
4545
- pylibcudf ={{ version }}
46-
- polars >=1.11,<1.18
46+
- polars >=1.20,<1.22
4747
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
4848

4949
test:

cpp/benchmarks/CMakeLists.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,10 @@ ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)
428428
# ##################################################################################################
429429
# * rolling benchmark
430430
# ---------------------------------------------------------------------------------
431-
ConfigureNVBench(ROLLING_NVBENCH rolling/grouped_rolling_sum.cpp rolling/rolling_sum.cpp)
431+
ConfigureNVBench(
432+
ROLLING_NVBENCH rolling/grouped_range_rolling_sum.cu rolling/grouped_rolling_sum.cpp
433+
rolling/range_rolling_sum.cu rolling/rolling_sum.cpp
434+
)
432435

433436
add_custom_target(
434437
run_benchmarks

cpp/benchmarks/io/parquet/parquet_reader_input.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ void BM_parquet_read_long_strings(nvbench::state& state)
121121
cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, profile); // THIS
122122
auto const view = tbl->view();
123123

124+
// set smaller threshold to reduce file size and execution time
125+
auto const threshold = 1;
126+
setenv("LIBCUDF_LARGE_STRINGS_THRESHOLD", std::to_string(threshold).c_str(), 1);
127+
124128
cudf::io::parquet_writer_options write_opts =
125129
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
126130
.compression(compression);
@@ -129,6 +133,7 @@ void BM_parquet_read_long_strings(nvbench::state& state)
129133
}();
130134

131135
parquet_read_common(num_rows_written, num_cols, source_sink, state);
136+
unsetenv("LIBCUDF_LARGE_STRINGS_THRESHOLD");
132137
}
133138

134139
template <data_type DataType>

0 commit comments

Comments
 (0)