diff --git a/ci/run_cudf_polars_polars_tests.sh b/ci/run_cudf_polars_polars_tests.sh index 52a827af94c..95f78f17f2f 100755 --- a/ci/run_cudf_polars_polars_tests.sh +++ b/ci/run_cudf_polars_polars_tests.sh @@ -21,7 +21,7 @@ python -m pytest \ -m "" \ -p cudf_polars.testing.plugin \ -v \ - --tb=short \ + --tb=native \ ${DESELECTED_TESTS} \ "$@" \ py-polars/tests diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh index 6c728a9537f..bfc8fd37565 100755 --- a/ci/test_cudf_polars_polars_tests.sh +++ b/ci/test_cudf_polars_polars_tests.sh @@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl rapids-logger "Install cudf_polars" python -m pip install $(echo ./dist/cudf_polars*.whl) -# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")') -TAG="py-1.7.0" +TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")') rapids-logger "Clone polars to ${TAG}" git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1 diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index a36e8734adc..05f882a475b 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -39,7 +39,7 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then | tee ./constraints.txt fi -# echo to expand wildcard before adding `[extra]` requires for pip +# echo to expand wildcard before adding `[test]` requires for pip python -m pip install \ -v \ --constraint ./constraints.txt \ @@ -47,9 +47,6 @@ python -m pip install \ "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" -rapids-logger "Pin to 1.7.0 Temporarily" -python -m pip install polars==1.7.0 - rapids-logger "Run cudf_polars tests" function set_exitcode() diff --git a/dependencies.yaml b/dependencies.yaml index 9c95b9f399f..911c443d294 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -663,7 +663,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.6 + - polars>=1.8,<1.9 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/__init__.py b/python/cudf_polars/cudf_polars/__init__.py index c1317e8f467..66c15f694ee 100644 --- a/python/cudf_polars/cudf_polars/__init__.py +++ b/python/cudf_polars/cudf_polars/__init__.py @@ -10,13 +10,15 @@ from __future__ import annotations -# Check we have a supported polars version -import cudf_polars.utils.versions as v from cudf_polars._version import __git_commit__, __version__ from cudf_polars.callback import execute_with_cudf from cudf_polars.dsl.translate import translate_ir -del v +# Check we have a supported polars version +from cudf_polars.utils.versions import _ensure_polars_version + +_ensure_polars_version() +del _ensure_polars_version __all__: list[str] = [ "execute_with_cudf", diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 45881afe0c8..a0291037f01 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -93,14 +93,6 @@ def _( cloud_options = None else: reader_options, cloud_options = map(json.loads, options) - if ( - typ == "csv" - and visitor.version()[0] == 1 - and reader_options["schema"] is not None - ): - reader_options["schema"] = { - "fields": reader_options["schema"]["inner"] - } # pragma: no cover; CI tests 1.7 file_options = node.file_options with_columns = file_options.with_columns n_rows = file_options.n_rows diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index a79d45899cd..7b6f3848fc4 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -164,9 +164,11 @@ def assert_collect_raises( cudf-polars. Useful for controlling optimization settings. polars_except - Exception or exceptions polars CPU is expected to raise. + Exception or exceptions polars CPU is expected to raise. If + None, CPU is not expected to raise an exception. cudf_except - Exception or exceptions polars GPU is expected to raise. + Exception or exceptions polars GPU is expected to raise. If + None, GPU is not expected to raise an exception. collect_kwargs Common keyword arguments to pass to collect for both polars CPU and cudf-polars. @@ -203,7 +205,8 @@ def assert_collect_raises( f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}" ) from e else: - raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}") + if polars_except != (): + raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}") engine = GPUEngine(raise_on_fail=True) try: @@ -212,7 +215,8 @@ def assert_collect_raises( pass except Exception as e: raise AssertionError( - f"GPU execution RAISED {type(e)}, EXPECTED {polars_except}" + f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}" ) from e else: - raise AssertionError(f"GPU execution DID NOT RAISE {polars_except}") + if cudf_except != (): + raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}") diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index c40d59e6d33..05b76d76808 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -49,11 +49,15 @@ def pytest_configure(config: pytest.Config): "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU", "tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed", "tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read", + "tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing", "tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match", "tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match", "tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394", "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394", "tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?", + "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception", + "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception", "tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU", "tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match", "tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match", diff --git a/python/cudf_polars/cudf_polars/utils/versions.py b/python/cudf_polars/cudf_polars/utils/versions.py index 2e6efde968c..4a7ad6b3cf2 100644 --- a/python/cudf_polars/cudf_polars/utils/versions.py +++ b/python/cudf_polars/cudf_polars/utils/versions.py @@ -12,11 +12,11 @@ POLARS_VERSION = parse(__version__) -POLARS_VERSION_GE_16 = POLARS_VERSION >= parse("1.6") -POLARS_VERSION_GT_16 = POLARS_VERSION > parse("1.6") -POLARS_VERSION_LT_16 = POLARS_VERSION < parse("1.6") - -if POLARS_VERSION_LT_16: - raise ImportError( - "cudf_polars requires py-polars v1.6 or greater." - ) # pragma: no cover +POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8") + + +def _ensure_polars_version(): + if POLARS_VERSION_LT_18: + raise ImportError( + "cudf_polars requires py-polars v1.8 or greater." + ) # pragma: no cover diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py index 6f996e0e0ec..74bf8b9e4e2 100644 --- a/python/cudf_polars/tests/test_groupby.py +++ b/python/cudf_polars/tests/test_groupby.py @@ -168,7 +168,11 @@ def test_groupby_nan_minmax_raises(op): "expr", [ pl.lit(1).alias("value"), - pl.lit([[4, 5, 6]]).alias("value"), + pytest.param( + pl.lit([[4, 5, 6]]).alias("value"), + marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"), + ), + pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)), pl.col("float") * (1 - pl.col("int")), [pl.lit(2).alias("value"), pl.col("float") * 2], ], diff --git a/python/cudf_polars/tests/testing/test_asserts.py b/python/cudf_polars/tests/testing/test_asserts.py index 8e7f1a09d9b..ace1c6b8648 100644 --- a/python/cudf_polars/tests/testing/test_asserts.py +++ b/python/cudf_polars/tests/testing/test_asserts.py @@ -7,8 +7,6 @@ import polars as pl -from cudf_polars.containers import DataFrame -from cudf_polars.dsl.ir import Select from cudf_polars.testing.asserts import ( assert_collect_raises, assert_gpu_result_equal, @@ -38,14 +36,24 @@ class E(Exception): assert_ir_translation_raises(unsupported, E) -def test_collect_assert_raises(monkeypatch): +def test_collect_assert_raises(): df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) - with pytest.raises(AssertionError): - # This should raise, because polars CPU can run this query + with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"): + # This should raise, because polars CPU can run this query, + # but we expect an error. assert_collect_raises( df, polars_except=pl.exceptions.InvalidOperationError, + cudf_except=(), + ) + + with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"): + # This should raise, because polars GPU can run this query, + # but we expect an error. + assert_collect_raises( + df, + polars_except=(), cudf_except=pl.exceptions.InvalidOperationError, ) @@ -60,7 +68,7 @@ def test_collect_assert_raises(monkeypatch): cudf_except=pl.exceptions.InvalidOperationError, ) - with pytest.raises(AssertionError): + with pytest.raises(AssertionError, match="GPU execution RAISED"): # This should raise because the expected GPU error is wrong assert_collect_raises( q, @@ -68,23 +76,10 @@ def test_collect_assert_raises(monkeypatch): cudf_except=NotImplementedError, ) - with pytest.raises(AssertionError): + with pytest.raises(AssertionError, match="CPU execution RAISED"): # This should raise because the expected CPU error is wrong assert_collect_raises( q, polars_except=NotImplementedError, cudf_except=pl.exceptions.InvalidOperationError, ) - - with monkeypatch.context() as m: - m.setattr(Select, "evaluate", lambda self, cache: DataFrame([])) - # This query should fail, but we monkeypatch a bad - # implementation of Select which "succeeds" to check that our - # assertion notices this case. - q = df.select(pl.col("a") + pl.Series([1, 2])) - with pytest.raises(AssertionError): - assert_collect_raises( - q, - polars_except=pl.exceptions.ComputeError, - cudf_except=pl.exceptions.ComputeError, - )