From 056ca24fe1cb87785dcb4539f650de9e5cbcba6a Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 19 Dec 2024 16:48:16 +0000 Subject: [PATCH 1/9] [WIP] Upgrade Polars version to 1.17 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cudf-polars/meta.yaml | 2 +- dependencies.yaml | 2 +- python/cudf_polars/pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 33fc2f651c6..b1c4fe5298c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -66,7 +66,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.15 +- polars>=1.11,<1.17 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<19.0.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index c290a83a37f..425553dda8d 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -64,7 +64,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.15 +- polars>=1.11,<1.17 - pre-commit - pyarrow>=14.0.0,<19.0.0a0 - pydata-sphinx-theme!=0.14.2 diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml index b6c03dc1bc2..79d62e6f041 100644 --- a/conda/recipes/cudf-polars/meta.yaml +++ b/conda/recipes/cudf-polars/meta.yaml @@ -43,7 +43,7 @@ requirements: run: - python - pylibcudf ={{ version }} - - polars >=1.11,<1.15 + - polars >=1.11,<1.17 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/dependencies.yaml b/dependencies.yaml index 7a83efc6e3d..3558157cb47 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -747,7 +747,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.11,<1.15 + - polars>=1.11,<1.17 run_cudf_polars_experimental: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index b781b13ec10..dadc9440541 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.11,<1.15", + "polars>=1.11,<1.17", "pylibcudf==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ From b952e158f32820435377bb5c7db94fc660bba814 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 19 Dec 2024 15:37:59 -0800 Subject: [PATCH 2/9] xfail polars tests --- .../cudf_polars/dsl/expressions/binaryop.py | 6 +++++- python/cudf_polars/cudf_polars/testing/plugin.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py index 245bdbefe88..fad74aede95 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py @@ -97,14 +97,18 @@ def do_evaluate( ) lop = left.obj rop = right.obj + print(left, right) + print(lop, rop) if left.obj.size() != right.obj.size(): if left.is_scalar: lop = left.obj_scalar elif right.is_scalar: rop = right.obj_scalar - return Column( + res = Column( plc.binaryop.binary_operation(lop, rop, self.op, self.dtype), ) + print(res) + return res def collect_agg(self, *, depth: int) -> AggInfo: """Collect information about aggregations in groupbys.""" diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 7a759eea2e9..0285c1b7b07 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -140,6 +140,22 @@ def pytest_configure(config: pytest.Config) -> None: "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype", From 0d702fed6c8bb14044c638b3747cacd0b82e9cc7 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 19 Dec 2024 15:45:32 -0800 Subject: [PATCH 3/9] clean up --- python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py index fad74aede95..245bdbefe88 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py @@ -97,18 +97,14 @@ def do_evaluate( ) lop = left.obj rop = right.obj - print(left, right) - print(lop, rop) if left.obj.size() != right.obj.size(): if left.is_scalar: lop = left.obj_scalar elif right.is_scalar: rop = right.obj_scalar - res = Column( + return Column( plc.binaryop.binary_operation(lop, rop, self.op, self.dtype), ) - print(res) - return res def collect_agg(self, *, depth: int) -> AggInfo: """Collect information about aggregations in groupbys.""" From e07cef8b41f8f04166e89b68607ec5246171c749 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:46:03 -0500 Subject: [PATCH 4/9] Update all_cuda-118_arch-x86_64.yaml --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b1c4fe5298c..694ab9a2b92 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -66,7 +66,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.17 +- polars>=1.11,<1.18 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<19.0.0a0 From 300574517a50051b2d649d83772db70693dd813a Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:46:09 -0500 Subject: [PATCH 5/9] Update all_cuda-125_arch-x86_64.yaml --- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 425553dda8d..53bfa73fa4e 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -64,7 +64,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.17 +- polars>=1.11,<1.18 - pre-commit - pyarrow>=14.0.0,<19.0.0a0 - pydata-sphinx-theme!=0.14.2 From 26957355e2f4d5f4b901648a8a7c56e3b28b4343 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:46:16 -0500 Subject: [PATCH 6/9] Update meta.yaml --- conda/recipes/cudf-polars/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml index 79d62e6f041..9722e529b59 100644 --- a/conda/recipes/cudf-polars/meta.yaml +++ b/conda/recipes/cudf-polars/meta.yaml @@ -43,7 +43,7 @@ requirements: run: - python - pylibcudf ={{ version }} - - polars >=1.11,<1.17 + - polars >=1.11,<1.18 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: From 01007fb4eb1d3f781ea0fa3beb60fcc12d3fd592 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:46:21 -0500 Subject: [PATCH 7/9] Update dependencies.yaml --- dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 3558157cb47..47ac6c7c9cf 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -747,7 +747,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.11,<1.17 + - polars>=1.11,<1.18 run_cudf_polars_experimental: common: - output_types: [conda, requirements, pyproject] From e01aa2eaf394e6b54e8d5ebfd0487e8c45a2359d Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:46:28 -0500 Subject: [PATCH 8/9] Update pyproject.toml --- python/cudf_polars/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index dadc9440541..2b42a0fdaad 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.11,<1.17", + "polars>=1.11,<1.18", "pylibcudf==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ From 7238d1647768501646be9b63d1456278b9a2352e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 20 Dec 2024 12:09:55 -0800 Subject: [PATCH 9/9] update ir --- python/cudf_polars/cudf_polars/dsl/ir.py | 38 ++++++++++++++++--- .../cudf_polars/cudf_polars/dsl/translate.py | 2 +- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 1c1d4860eec..72539de37a7 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -34,9 +34,11 @@ from cudf_polars.utils.versions import POLARS_VERSION_GT_112 if TYPE_CHECKING: - from collections.abc import Callable, Hashable, MutableMapping, Sequence + from collections.abc import Callable, Hashable, Iterable, MutableMapping, Sequence from typing import Literal + from polars.polars import _expr_nodes as pl_expr + from cudf_polars.typing import Schema @@ -1019,7 +1021,27 @@ class ConditionalJoin(IR): __slots__ = ("ast_predicate", "options", "predicate") _non_child = ("schema", "predicate", "options") predicate: expr.Expr - options: tuple + """Expression predicate to join on""" + options: tuple[ + tuple[ + str, + pl_expr.Operator | Iterable[pl_expr.Operator], + ], + bool, + tuple[int, int] | None, + str, + bool, + Literal["none", "left", "right", "left_right", "right_left"], + ] + """ + tuple of options: + - predicates: tuple of ir join type (eg. ie_join) and (In)Equality conditions + - join_nulls: do nulls compare equal? + - slice: optional slice to perform after joining. + - suffix: string suffix for right columns if names match + - coalesce: should key columns be coalesced (only makes sense for outer joins) + - maintain_order: which DataFrame row order to preserve, if any + """ def __init__( self, schema: Schema, predicate: expr.Expr, options: tuple, left: IR, right: IR @@ -1029,15 +1051,16 @@ def __init__( self.options = options self.children = (left, right) self.ast_predicate = to_ast(predicate) - _, join_nulls, zlice, suffix, coalesce = self.options + _, join_nulls, zlice, suffix, coalesce, maintain_order = self.options # Preconditions from polars assert not join_nulls assert not coalesce + assert maintain_order == "none" if self.ast_predicate is None: raise NotImplementedError( f"Conditional join with predicate {predicate}" ) # pragma: no cover; polars never delivers expressions we can't handle - self._non_child_args = (self.ast_predicate, zlice, suffix) + self._non_child_args = (self.ast_predicate, zlice, suffix, maintain_order) @classmethod def do_evaluate( @@ -1045,6 +1068,7 @@ def do_evaluate( predicate: plc.expressions.Expression, zlice: tuple[int, int] | None, suffix: str, + maintain_order: Literal["none", "left", "right", "left_right", "right_left"], left: DataFrame, right: DataFrame, ) -> DataFrame: @@ -1088,6 +1112,7 @@ class Join(IR): tuple[int, int] | None, str, bool, + Literal["none", "left", "right", "left_right", "right_left"], ] """ tuple of options: @@ -1096,6 +1121,7 @@ class Join(IR): - slice: optional slice to perform after joining. - suffix: string suffix for right columns if names match - coalesce: should key columns be coalesced (only makes sense for outer joins) + - maintain_order: which DataFrame row order to preserve, if any """ def __init__( @@ -1222,12 +1248,14 @@ def do_evaluate( tuple[int, int] | None, str, bool, + Literal["none", "left", "right", "left_right", "right_left"], ], left: DataFrame, right: DataFrame, ) -> DataFrame: """Evaluate and return a dataframe.""" - how, join_nulls, zlice, suffix, coalesce = options + how, join_nulls, zlice, suffix, coalesce, maintain_order = options + # TODO: Implement maintain_order param if how == "cross": # Separate implementation, since cross_join returns the # result, not the gather maps diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 37cf36dc4dd..51fd3223c5b 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR: # IR is versioned with major.minor, minor is bumped for backwards # compatible changes (e.g. adding new nodes), major is bumped for # incompatible changes (e.g. renaming nodes). - if (version := self.visitor.version()) >= (4, 0): + if (version := self.visitor.version()) >= (4, 3): e = NotImplementedError( f"No support for polars IR {version=}" ) # pragma: no cover; no such version for now.