From b46633f31ddb79d712c6115639660bf157ca4515 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 21:21:58 +0000 Subject: [PATCH 01/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.3 → v0.4.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.3...v0.4.4) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d2dd579f..4fcbb07b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.3 + rev: v0.4.4 hooks: - id: ruff From 9fb921abfd6c937f5c2870029cf2361fe7f7bd41 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 15 May 2024 09:34:09 -0400 Subject: [PATCH 02/21] Implement dak.nan_to_num --- src/dask_awkward/lib/structure.py | 24 +++++++++--------------- tests/test_structure.py | 9 +++++++++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 6b9fafad..ae5f104f 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -600,26 +600,20 @@ def mask( @borrow_docstring(ak.nan_to_num) def nan_to_num( array: Array, - copy: bool = True, nan: float = 0.0, posinf: Any | None = None, neginf: Any | None = None, - highlevel: bool = True, behavior: Any | None = None, - attrs: Mapping[str, Any] | None = None, ) -> Array: - # return map_partitions( - # ak.nan_to_num, - # array, - # output_partitions=1, - # copy=copy, - # nan=nan, - # posinf=posinf, - # neginf=neginf, - # highlevel=highlevel, - # behavior=behavior, - # ) - raise DaskAwkwardNotImplemented("TODO") + return map_partitions( + ak.nan_to_num, + array, + nan=nan, + posinf=posinf, + neginf=neginf, + highlevel=True, + behavior=behavior, + ) def _numaxis0(*integers): diff --git a/tests/test_structure.py b/tests/test_structure.py index 180e922e..d196ee35 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -157,6 +157,15 @@ def test_drop_none(axis: int) -> None: assert_eq(d, e) +def test_nan_to_num(): + a = [[1, 2, np.nan], [], [np.nan], [5, 6, 7, np.nan], [1, 2], np.nan] + b = [[np.nan, 2, 1], [np.nan], [], np.nan, [7, 6, np.nan, 5], [np.nan, np.nan]] + c = dak.from_lists([a, b]) + d = dak.nan_to_num(c, nan=5) + e = ak.nan_to_num(ak.from_iter(a + b), nan=5) + assert_eq(d, e) + + @pytest.mark.parametrize("axis", [0, 1, -1]) def test_is_none(axis: int) -> None: a: list[Any] = [[1, 2, None], None, None, [], [None], [5, 6, 7, None], [1, 2], None] From 77b7ea1824c9f7b1eeef2c38ba336aa91e64ba5e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 15 May 2024 09:43:38 -0400 Subject: [PATCH 03/21] lint --- tests/test_optimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_optimize.py b/tests/test_optimize.py index 7f67d836..80fb975f 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -28,7 +28,7 @@ def test_multiple_computes(ndjson_points_file: str) -> None: assert len(things3[1]) < len(things3[0]) things = dask.compute(ds1.points, ds2.points.x, ds2.points.y, ds1.points.y, ds3) - assert things[-1].tolist() == ak.Array(lists[0] + lists[1]).tolist() # type: ignore + assert things[-1].tolist() == ak.Array(lists[0] + lists[1]).tolist() def identity(x): From cc00adda140b96320595057856db92b33b47b166 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 21:21:56 +0000 Subject: [PATCH 04/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.4 → v0.4.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.4...v0.4.5) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4fcbb07b..4b24a5f5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.4 + rev: v0.4.5 hooks: - id: ruff From a0e75e48744b542b90bf74a6ef2a0d75aadc5c0d Mon Sep 17 00:00:00 2001 From: Iason Krommydas Date: Fri, 31 May 2024 15:22:22 +0200 Subject: [PATCH 05/21] add without_field, untested yet --- src/dask_awkward/__init__.py | 1 + src/dask_awkward/lib/__init__.py | 1 + src/dask_awkward/lib/structure.py | 41 +++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/src/dask_awkward/__init__.py b/src/dask_awkward/__init__.py index 9b205092..34b5c4f5 100644 --- a/src/dask_awkward/__init__.py +++ b/src/dask_awkward/__init__.py @@ -100,6 +100,7 @@ with_field, with_name, with_parameter, + without_field, without_parameters, zeros_like, zip, diff --git a/src/dask_awkward/lib/__init__.py b/src/dask_awkward/lib/__init__.py index 90835ff7..74d16d6c 100644 --- a/src/dask_awkward/lib/__init__.py +++ b/src/dask_awkward/lib/__init__.py @@ -86,6 +86,7 @@ with_field, with_name, with_parameter, + without_field, without_parameters, zeros_like, zip, diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index ae5f104f..d7e52ed1 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -70,6 +70,7 @@ "values_astype", "where", "with_field", + "without_field", "with_name", "with_parameter", "without_parameters", @@ -1083,6 +1084,46 @@ def with_field( ) +class _WithoutFieldFn: + def __init__( + self, + highlevel: bool, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, + ) -> None: + self.highlevel = highlevel + self.behavior = behavior + self.attrs = attrs + + def __call__(self, array: ak.Array, where: str) -> ak.Array: + return ak.without_field( + array, where=where, behavior=self.behavior, attrs=self.attrs + ) + + +@borrow_docstring(ak.without_field) +def without_field( + base: Array, + where: str, + highlevel: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, +) -> Array: + if not highlevel: + raise ValueError("Only highlevel=True is supported") + + if not isinstance(base, Array): + raise ValueError("Base argument in without_field must be a dask_awkward.Array") + + return map_partitions( + _WithoutFieldFn(highlevel=highlevel, behavior=behavior, attrs=attrs), + base, + where, + label="without-field", + output_divisions=1, + ) + + class _WithNameFn: def __init__( self, From 3a38bf614981831ed11b3d1eb8404201318fcd46 Mon Sep 17 00:00:00 2001 From: Iason Krommydas Date: Fri, 31 May 2024 15:51:14 +0200 Subject: [PATCH 06/21] some testing --- tests/test_structure.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/tests/test_structure.py b/tests/test_structure.py index d196ee35..f602e11e 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -212,20 +212,20 @@ def test_pad_none(axis: int, target: int) -> None: def test_with_field(caa: ak.Array, daa: dak.Array) -> None: - assert_eq( - ak.with_field(caa["points"], caa["points"]["x"], where="xx"), - dak.with_field(daa["points"], daa["points"]["x"], where="xx"), - ) + new_caa = ak.with_field(caa["points"], caa["points"]["x"], where="xx") + new_daa = dak.with_field(daa["points"], daa["points"]["x"], where="xx") + assert_eq(new_caa, new_daa) + assert_eq(ak.without_field(new_caa, "xx"), ak.without_field(new_daa, "xx")) - assert_eq( - ak.with_field(caa["points"], 1, where="xx"), - dak.with_field(daa["points"], 1, where="xx"), - ) + new_caa = ak.with_field(caa["points"], 1, where="xx") + new_daa = dak.with_field(daa["points"], 1, where="xx") + assert_eq(new_caa, new_daa) + assert_eq(ak.without_field(new_caa, "xx"), ak.without_field(new_daa, "xx")) - assert_eq( - ak.with_field(caa["points"], 1.0, where="xx"), - dak.with_field(daa["points"], 1.0, where="xx"), - ) + new_caa = ak.with_field(caa["points"], 1.0, where="xx") + new_daa = dak.with_field(daa["points"], 1.0, where="xx") + assert_eq(new_caa, new_daa) + assert_eq(ak.without_field(new_caa, "xx"), ak.without_field(new_daa, "xx")) with pytest.raises( ValueError, @@ -233,6 +233,15 @@ def test_with_field(caa: ak.Array, daa: dak.Array) -> None: ): _ = dak.with_field([{"foo": 1.0}, {"foo": 2.0}], daa.points.x, where="x") + with pytest.raises( + ValueError, + match="Base argument in without_field must be a dask_awkward.Array", + ): + _ = dak.without_field( + [{"foo": [1.0, 2.0], "bar": [3.0, 4.0]}], + "bar", + ) + with pytest.raises( ValueError, match="with_field cannot accept string, bytes, list, or dict values yet", From ca3ad7a706b2612af1ff709a92213edc9790507f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 21:35:59 +0000 Subject: [PATCH 07/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.5 → v0.4.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.5...v0.4.7) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4b24a5f5..84dde665 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.5 + rev: v0.4.7 hooks: - id: ruff From fde5c808ea6f6135308d5e9b5fb7e42040c64973 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 21:44:22 +0000 Subject: [PATCH 08/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.7 → v0.4.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.7...v0.4.8) - [github.com/asottile/pyupgrade: v3.15.2 → v3.16.0](https://github.com/asottile/pyupgrade/compare/v3.15.2...v3.16.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 84dde665..cf0308b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.7 + rev: v0.4.8 hooks: - id: ruff @@ -35,7 +35,7 @@ repos: language_version: python3 - repo: https://github.com/asottile/pyupgrade - rev: v3.15.2 + rev: v3.16.0 hooks: - id: pyupgrade args: From 21eca160a5bf4032167bd973e9789420266b6c70 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 12 Jun 2024 21:30:50 -0400 Subject: [PATCH 09/21] Make simple types of repartition --- src/dask_awkward/lib/core.py | 84 ++++++++++++++++++++++++------- src/dask_awkward/lib/structure.py | 43 ++++++++++++++++ tests/test_structure.py | 6 +++ 3 files changed, 114 insertions(+), 19 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 40898b11..204ccc3f 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -942,29 +942,75 @@ def repartition( npartitions: int | None = None, divisions: tuple[int, ...] | None = None, rows_per_partition: int | None = None, + one_to_n: int | None = None, + n_to_one: int | None = None, ) -> Array: + """Restructure the partitioning of the whole array + + Various schemes are possible, with one of the mutually exclusive + optional arguments for each. Of these, the first three require + knowledge of the number of rows in each existing partition, which + will be eagerly computed if not already known, and some shuffling of + data between partitions. + + - npartitions: split all the rows as evenly as possible into this + many output partitions. + - divisions: exact row count offsets of each output partition + - rows_per_partition: each partition will have this many rows, + except the last, which will have this number or fewer + - one_to_n: each input partition becomes n output partitions + - n_to_one: every n adjacent input partitions becomes one + output partition. Note that exactly one output partition + (npartitions=1) is a special case of this. + """ from dask_awkward.layers import AwkwardMaterializedLayer - from dask_awkward.lib.structure import repartition_layer + from dask_awkward.lib.structure import ( + repartition_layer, + simple_repartition_layer, + ) - if sum(bool(_) for _ in [npartitions, divisions, rows_per_partition]) != 1: + if ( + sum( + bool(_) + for _ in ( + npartitions, + divisions, + rows_per_partition, + one_to_n, + n_to_one, + ) + ) + != 1 + ): raise ValueError("Please specify exactly one of the inputs") - if not self.known_divisions: - self.eager_compute_divisions() - nrows = self.defined_divisions[-1] - new_divisions: tuple[int, ...] = tuple() - if divisions: - new_divisions = divisions - elif npartitions: - rows_per_partition = math.ceil(nrows / npartitions) - if rows_per_partition: - new_divs = list(range(0, nrows, rows_per_partition)) - new_divs.append(nrows) - new_divisions = tuple(new_divs) - - token = tokenize(self, divisions) - key = f"repartition-{token}" - - new_layer_raw = repartition_layer(self, key, new_divisions) + new_divisions: tuple[int, ...] = () + if npartitions and npartitions == 1: + npartitions, n_to_one = None, self.npartitions + if n_to_one or one_to_n: + token = tokenize(self, n_to_one, one_to_n) + key = f"repartition-{token}" + new_layer_raw, new_divisions = simple_repartition_layer( + self, n_to_one, one_to_n, key + ) + else: + if not self.known_divisions: + self.eager_compute_divisions() + nrows = self.defined_divisions[-1] + if divisions: + if divisions == self.divisions: + # noop + return self + new_divisions = divisions + elif npartitions: + rows_per_partition = math.ceil(nrows / npartitions) + if rows_per_partition: + new_divs = list(range(0, nrows, rows_per_partition)) + new_divs.append(nrows) + new_divisions = tuple(new_divs) + token = tokenize(self, divisions) + key = f"repartition-{token}" + new_layer_raw = repartition_layer(self, key, new_divisions) + new_layer = AwkwardMaterializedLayer( new_layer_raw, previous_layer_names=[self.name], diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index ae5f104f..84b5ea8a 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -1350,6 +1350,49 @@ def repartition_layer(arr: Array, key: str, divisions: tuple[int, ...]) -> dict: return layer +def _subpart(data: ak.Array, parts: int, part: int) -> ak.Array: + from dask_awkward.lib.core import is_typetracer + + if is_typetracer(data): + return data + rows_per = len(data) // parts + return data[ + part * rows_per : None if part == (parts - 1) else (part + 1) * rows_per + ] + + +def _subcat(*arrs: tuple[ak.Array, ...]) -> ak.Array: + return ak.concatenate(arrs) + + +def simple_repartition_layer( + arr: Array, n_to_one: int | None, one_to_n: int | None, key: str +) -> tuple[dict, tuple[Any, ...]]: + layer: dict[tuple[str, int], tuple[Any, ...]] = {} + new_divisions: tuple[Any, ...] + if n_to_one: + for i in range(0, arr.npartitions, n_to_one): + layer[(key, i)] = (_subcat,) + tuple( + (arr.name, part) + for part in range(i, min(i + n_to_one, arr.npartitions)) + ) + new_divisions = arr.divisions[::n_to_one] + elif one_to_n: + for i in range(arr.npartitions): + for part in range(one_to_n): + layer[(key, (i * one_to_n + part))] = ( + _subpart, + (arr.name, i), + one_to_n, + part, + ) + # TODO: if arr.known_divisions: + new_divisions = (None,) * (arr.npartitions * one_to_n + 1) + else: + raise ValueError + return layer, new_divisions + + @borrow_docstring(ak.enforce_type) def enforce_type( array: Array, diff --git a/tests/test_structure.py b/tests/test_structure.py index d196ee35..3c9b1673 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -539,6 +539,12 @@ def test_repartition_whole(daa): assert_eq(daa, daa1, check_divisions=False) +def test_repartition_one_to_n(daa): + daa1 = daa.repartition(one_to_n=2) + assert daa1.npartitions == daa.npartitions * 2 + assert_eq(daa, daa1, check_divisions=False) + + def test_repartition_no_change(daa): daa1 = daa.repartition(divisions=(0, 5, 10, 15)) assert daa1.npartitions == 3 From 3ff54e57f3c4a426d4a4d197d677c4bddd1c3068 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 21:44:28 +0000 Subject: [PATCH 10/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.8 → v0.4.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.4.9) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cf0308b5..4c06f6eb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.8 + rev: v0.4.9 hooks: - id: ruff From c492ac42fee1c17f46501d93305a637649d5a6bf Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 09:29:34 -0400 Subject: [PATCH 11/21] Respect inverse in scalar binop --- src/dask_awkward/lib/core.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 204ccc3f..e52e43af 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -502,9 +502,17 @@ def f(self, other): if is_dask_collection(other): task = (op, self.key, *other.__dask_keys__()) deps.append(other) - plns.append(other.name) + if inv: + plns.insert(0, other.name) + else: + plns.append(other.name) else: - task = (op, self.key, other) + if inv: + task = (op, other, self.key) + else: + task = (op, self.key, other) + if inv: + plns.reverse() graph = HighLevelGraph.from_collections( name, layer=AwkwardMaterializedLayer( @@ -514,10 +522,16 @@ def f(self, other): ), dependencies=tuple(deps), ) - if isinstance(other, Scalar): - meta = op(self._meta, other._meta) + if isinstance(other, (Scalar, Array)): + if inv: + meta = op(other._meta, self._meta) + else: + meta = op(self._meta, other._meta) else: - meta = op(self._meta, other) + if inv: + meta = op(other, self._meta) + else: + meta = op(self._meta, other) return new_scalar_object(graph, name, meta=meta) return f From 46b0a9f2bdeea7d4fc3c789ee753c2128fb6ea70 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 09:34:30 -0400 Subject: [PATCH 12/21] Add test --- tests/test_core.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 369cdbb4..8dd82b9f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -115,7 +115,7 @@ def test_len(ndjson_points_file: str) -> None: assert len(daa) == 10 daa.eager_compute_divisions() assert daa.known_divisions - assert len(daa) == 10 # type: ignore + assert len(daa) == 10 def test_meta_exists(daa: Array) -> None: @@ -157,7 +157,7 @@ def test_partitions_divisions(ndjson_points_file: str) -> None: assert not t1.known_divisions t2 = daa.partitions[1] assert t2.known_divisions - assert t2.divisions == (0, divs[2] - divs[1]) # type: ignore + assert t2.divisions == (0, divs[2] - divs[1]) def test_array_rebuild(ndjson_points_file: str) -> None: @@ -537,7 +537,7 @@ def test_compatible_partitions_after_slice() -> None: assert_eq(lazy, ccrt) # sanity - assert dak.compatible_partitions(lazy, lazy + 2) # type: ignore + assert dak.compatible_partitions(lazy, lazy + 2) assert dak.compatible_partitions(lazy, dak.num(lazy, axis=1) > 2) assert not dak.compatible_partitions(lazy[:-2], lazy) @@ -646,6 +646,14 @@ def test_scalar_divisions(daa: Array) -> None: assert s.divisions == (None, None) +def test_scalar_binop_inv() -> None: + # GH #515 + x = dak.from_lists([[1]]) + y = x[0] # scalar + assert (0 - y) == -1 + assert (y - 0) == 1 + + def test_array_persist(daa: Array) -> None: daa2 = daa["points"]["x"].persist() assert_eq(daa["points"]["x"], daa2) @@ -886,7 +894,7 @@ def test_shape_only_ops(fn: Callable, tmp_path_factory: pytest.TempPathFactory) p = tmp_path_factory.mktemp("zeros-like-flat") ak.to_parquet(a, str(p / "file.parquet")) lazy = dak.from_parquet(str(p)) - result = fn(lazy.b) # type: ignore + result = fn(lazy.b) with dask.config.set({"awkward.optimization.enabled": True}): result.compute() @@ -898,7 +906,7 @@ def test_assign_behavior() -> None: with pytest.raises( TypeError, match="'mappingproxy' object does not support item assignment" ): - dx.behavior["should_fail"] = None # type: ignore + dx.behavior["should_fail"] = None assert dx.behavior == behavior @@ -909,7 +917,7 @@ def test_assign_attrs() -> None: with pytest.raises( TypeError, match="'mappingproxy' object does not support item assignment" ): - dx.attrs["should_fail"] = None # type: ignore + dx.attrs["should_fail"] = None assert dx.attrs == attrs From ca75020a9fddae210bde5422bf6e6d4e5b426dda Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 09:44:04 -0400 Subject: [PATCH 13/21] no mypy for test files --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 56ab6f1e..7527ba14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,7 @@ src_paths = ["src", "tests"] [tool.mypy] python_version = "3.9" -files = ["src", "tests"] +files = ["src"] strict = false warn_unused_configs = true show_error_codes = true From 442d44136a9f56a05197f628c0ff22fac3c96685 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 10:22:45 -0400 Subject: [PATCH 14/21] Don't mypy tests (again) --- .pre-commit-config.yaml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4c06f6eb..226b54f1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -62,6 +62,7 @@ repos: rev: v1.10.0 hooks: - id: mypy + files: "src/" args: [--ignore-missing-imports] additional_dependencies: - dask diff --git a/pyproject.toml b/pyproject.toml index 7527ba14..4bbafc23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,7 @@ src_paths = ["src", "tests"] [tool.mypy] python_version = "3.9" files = ["src"] +exclude = ["tests/"] strict = false warn_unused_configs = true show_error_codes = true From 0803b43a1a545d9e383d3c6a7764f75a9d6d930d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 10:36:03 -0400 Subject: [PATCH 15/21] Fix n_to_one repartition --- src/dask_awkward/lib/structure.py | 9 +++++++-- tests/test_structure.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 7afba822..af72d527 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -1412,12 +1412,17 @@ def simple_repartition_layer( layer: dict[tuple[str, int], tuple[Any, ...]] = {} new_divisions: tuple[Any, ...] if n_to_one: - for i in range(0, arr.npartitions, n_to_one): - layer[(key, i)] = (_subcat,) + tuple( + for i0, i in enumerate(range(0, arr.npartitions, n_to_one)): + layer[(key, i0)] = (_subcat,) + tuple( (arr.name, part) for part in range(i, min(i + n_to_one, arr.npartitions)) ) new_divisions = arr.divisions[::n_to_one] + if arr.npartitions % n_to_one: + new_divisions = new_divisions + (arr.divisions[-1],) + layer[(key, i0 + 1)] = (_subcat,) + tuple( + (arr.name, part) for part in range(new_divisions[-2], new_divisions[-1]) + ) elif one_to_n: for i in range(arr.npartitions): for part in range(one_to_n): diff --git a/tests/test_structure.py b/tests/test_structure.py index 8e003d97..22a3d500 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -554,6 +554,19 @@ def test_repartition_one_to_n(daa): assert_eq(daa, daa1, check_divisions=False) +def test_repartition_n_to_one(): + daa = dak.from_lists([[[1, 2, 3], [], [4, 5]]] * 52) + daa2 = daa.repartition(n_to_one=52) + assert daa2.npartitions == 1 + assert daa.compute().to_list() == daa2.compute().to_list() + daa2 = daa.repartition(n_to_one=53) + assert daa2.npartitions == 1 + assert daa.compute().to_list() == daa2.compute().to_list() + daa2 = daa.repartition(n_to_one=2) + assert daa2.npartitions == 26 + assert daa.compute().to_list() == daa2.compute().to_list() + + def test_repartition_no_change(daa): daa1 = daa.repartition(divisions=(0, 5, 10, 15)) assert daa1.npartitions == 3 From 5a0af7b9f4f82470c66534f4313e4511960e754b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 10:38:06 -0400 Subject: [PATCH 16/21] test unever case --- tests/test_structure.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_structure.py b/tests/test_structure.py index 22a3d500..9b761339 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -565,6 +565,9 @@ def test_repartition_n_to_one(): daa2 = daa.repartition(n_to_one=2) assert daa2.npartitions == 26 assert daa.compute().to_list() == daa2.compute().to_list() + daa2 = daa.repartition(n_to_one=10) + assert daa2.npartitions == 6 + assert daa.compute().to_list() == daa2.compute().to_list() def test_repartition_no_change(daa): From 188483e80810e28f567888d44a358b68f894792e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 19 Jun 2024 11:09:42 -0400 Subject: [PATCH 17/21] fix: allow for Nones in repartition n_to_one --- src/dask_awkward/lib/structure.py | 2 +- tests/test_structure.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index af72d527..6b70c8b9 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -1421,7 +1421,7 @@ def simple_repartition_layer( if arr.npartitions % n_to_one: new_divisions = new_divisions + (arr.divisions[-1],) layer[(key, i0 + 1)] = (_subcat,) + tuple( - (arr.name, part) for part in range(new_divisions[-2], new_divisions[-1]) + (arr.name, part0) for part0 in range(len(layer), arr.npartitions) ) elif one_to_n: for i in range(arr.npartitions): diff --git a/tests/test_structure.py b/tests/test_structure.py index 9b761339..9f86b747 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -555,7 +555,7 @@ def test_repartition_one_to_n(daa): def test_repartition_n_to_one(): - daa = dak.from_lists([[[1, 2, 3], [], [4, 5]]] * 52) + daa = dak.from_lists([[[1, 2, 3], [], [4, 5]] * 2] * 52) daa2 = daa.repartition(n_to_one=52) assert daa2.npartitions == 1 assert daa.compute().to_list() == daa2.compute().to_list() @@ -568,6 +568,9 @@ def test_repartition_n_to_one(): daa2 = daa.repartition(n_to_one=10) assert daa2.npartitions == 6 assert daa.compute().to_list() == daa2.compute().to_list() + daa._divisions = (None,) * len(daa.divisions) + assert daa2.npartitions == 6 + assert daa.compute().to_list() == daa2.compute().to_list() def test_repartition_no_change(daa): From 3e3c884a6fdb762f36eb3716629f51132abd2148 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 21:37:05 +0000 Subject: [PATCH 18/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.9 → v0.4.10](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.9...v0.4.10) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 226b54f1..4de485d5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.9 + rev: v0.4.10 hooks: - id: ruff From ebe17fd5ddc6204ac4e64b09b63eecafb58ac6f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 23:17:05 +0000 Subject: [PATCH 19/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.10 → v0.5.0](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.10...v0.5.0) - [github.com/adamchainz/blacken-docs: 1.16.0 → 1.18.0](https://github.com/adamchainz/blacken-docs/compare/1.16.0...1.18.0) - [github.com/pre-commit/mirrors-mypy: v1.10.0 → v1.10.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.10.0...v1.10.1) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4de485d5..7984064c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.10 + rev: v0.5.0 hooks: - id: ruff @@ -52,14 +52,14 @@ repos: - id: yesqa - repo: https://github.com/adamchainz/blacken-docs - rev: 1.16.0 + rev: 1.18.0 hooks: - id: blacken-docs additional_dependencies: - black - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.10.1 hooks: - id: mypy files: "src/" From 49bf2d792442cf8bae065271c18022bf3813be37 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Jul 2024 21:16:43 +0000 Subject: [PATCH 20/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.0 → v0.5.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.0...v0.5.1) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7984064c..e76d580a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.0 + rev: v0.5.1 hooks: - id: ruff From 77ccde61ed82a037691c5ab48dc3c78fa04dc89d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jul 2024 21:48:38 +0000 Subject: [PATCH 21/21] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.1 → v0.5.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.1...v0.5.2) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e76d580a..b35527a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.1 + rev: v0.5.2 hooks: - id: ruff