Skip to content

Commit

Permalink
Pin polars for 24.10 and update polars test suite xfail list (rapidsa…
Browse files Browse the repository at this point in the history
…i#16886)

For releases, since the polars release cadence is quite a lot faster than rapids, we propose to hard-pin to a known good version. In this case, 1.8.x.

At the same time, remove pin in CI scripts and update list of xfailing tests in the polars test suite.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: rapidsai#16886
  • Loading branch information
wence- authored and Matt711 committed Sep 25, 2024
1 parent cbfff22 commit 8927358
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 53 deletions.
2 changes: 1 addition & 1 deletion ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ python -m pytest \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
--tb=native \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
3 changes: 1 addition & 2 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

Expand Down
5 changes: 1 addition & 4 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,14 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.6
- polars>=1.8,<1.9
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
8 changes: 5 additions & 3 deletions python/cudf_polars/cudf_polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@

from __future__ import annotations

# Check we have a supported polars version
import cudf_polars.utils.versions as v
from cudf_polars._version import __git_commit__, __version__
from cudf_polars.callback import execute_with_cudf
from cudf_polars.dsl.translate import translate_ir

del v
# Check we have a supported polars version
from cudf_polars.utils.versions import _ensure_polars_version

_ensure_polars_version()
del _ensure_polars_version

__all__: list[str] = [
"execute_with_cudf",
Expand Down
8 changes: 0 additions & 8 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,6 @@ def _(
cloud_options = None
else:
reader_options, cloud_options = map(json.loads, options)
if (
typ == "csv"
and visitor.version()[0] == 1
and reader_options["schema"] is not None
):
reader_options["schema"] = {
"fields": reader_options["schema"]["inner"]
} # pragma: no cover; CI tests 1.7
file_options = node.file_options
with_columns = file_options.with_columns
n_rows = file_options.n_rows
Expand Down
14 changes: 9 additions & 5 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ def assert_collect_raises(
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise.
Exception or exceptions polars CPU is expected to raise. If
None, CPU is not expected to raise an exception.
cudf_except
Exception or exceptions polars GPU is expected to raise.
Exception or exceptions polars GPU is expected to raise. If
None, GPU is not expected to raise an exception.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Expand Down Expand Up @@ -203,7 +205,8 @@ def assert_collect_raises(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

engine = GPUEngine(raise_on_fail=True)
try:
Expand All @@ -212,7 +215,8 @@ def assert_collect_raises(
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {polars_except}"
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
raise AssertionError(f"GPU execution DID NOT RAISE {polars_except}")
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")
4 changes: 4 additions & 0 deletions python/cudf_polars/cudf_polars/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ def pytest_configure(config: pytest.Config):
"tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
"tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
"tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing",
"tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match",
"tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match",
Expand Down
16 changes: 8 additions & 8 deletions python/cudf_polars/cudf_polars/utils/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@

POLARS_VERSION = parse(__version__)

POLARS_VERSION_GE_16 = POLARS_VERSION >= parse("1.6")
POLARS_VERSION_GT_16 = POLARS_VERSION > parse("1.6")
POLARS_VERSION_LT_16 = POLARS_VERSION < parse("1.6")

if POLARS_VERSION_LT_16:
raise ImportError(
"cudf_polars requires py-polars v1.6 or greater."
) # pragma: no cover
POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8")


def _ensure_polars_version():
if POLARS_VERSION_LT_18:
raise ImportError(
"cudf_polars requires py-polars v1.8 or greater."
) # pragma: no cover
6 changes: 5 additions & 1 deletion python/cudf_polars/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ def test_groupby_nan_minmax_raises(op):
"expr",
[
pl.lit(1).alias("value"),
pl.lit([[4, 5, 6]]).alias("value"),
pytest.param(
pl.lit([[4, 5, 6]]).alias("value"),
marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"),
),
pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
pl.col("float") * (1 - pl.col("int")),
[pl.lit(2).alias("value"), pl.col("float") * 2],
],
Expand Down
35 changes: 15 additions & 20 deletions python/cudf_polars/tests/testing/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

import polars as pl

from cudf_polars.containers import DataFrame
from cudf_polars.dsl.ir import Select
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
Expand Down Expand Up @@ -38,14 +36,24 @@ class E(Exception):
assert_ir_translation_raises(unsupported, E)


def test_collect_assert_raises(monkeypatch):
def test_collect_assert_raises():
df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

with pytest.raises(AssertionError):
# This should raise, because polars CPU can run this query
with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
# This should raise, because polars CPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)

with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
# This should raise, because polars GPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)

Expand All @@ -60,31 +68,18 @@ def test_collect_assert_raises(monkeypatch):
cudf_except=pl.exceptions.InvalidOperationError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="GPU execution RAISED"):
# This should raise because the expected GPU error is wrong
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=NotImplementedError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="CPU execution RAISED"):
# This should raise because the expected CPU error is wrong
assert_collect_raises(
q,
polars_except=NotImplementedError,
cudf_except=pl.exceptions.InvalidOperationError,
)

with monkeypatch.context() as m:
m.setattr(Select, "evaluate", lambda self, cache: DataFrame([]))
# This query should fail, but we monkeypatch a bad
# implementation of Select which "succeeds" to check that our
# assertion notices this case.
q = df.select(pl.col("a") + pl.Series([1, 2]))
with pytest.raises(AssertionError):
assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)

0 comments on commit 8927358

Please sign in to comment.