From d97292c2ed91f9e4c87a0936dc7df86a471fefcd Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Fri, 13 Dec 2024 12:23:06 -0500 Subject: [PATCH] simplify loop to populate touched columns from all_layers --- pyproject.toml | 128 +++++++++++++++---------------- src/dask_awkward/lib/optimize.py | 12 +-- 2 files changed, 63 insertions(+), 77 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ff4722bd..f37d7535 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,40 +6,40 @@ build-backend = "hatchling.build" name = "dask-awkward" description = "Awkward Array meets Dask" readme = "README.md" -license = {text = "BSD-3-Clause"} +license = { text = "BSD-3-Clause" } requires-python = ">=3.8" authors = [ - { name = "Doug Davis", email = "ddavis@ddavis.io" }, - { name = "Martin Durant", email = "mdurant@anaconda.com" }, + { name = "Doug Davis", email = "ddavis@ddavis.io" }, + { name = "Martin Durant", email = "mdurant@anaconda.com" }, ] maintainers = [ - { name = "Doug Davis", email = "ddavis@ddavis.io" }, - { name = "Martin Durant", email = "mdurant@anaconda.com" }, + { name = "Doug Davis", email = "ddavis@ddavis.io" }, + { name = "Martin Durant", email = "mdurant@anaconda.com" }, ] classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Physics", - "Topic :: Software Development", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Physics", + "Topic :: Software Development", ] dependencies = [ - "awkward >=2.5.1", - "dask >=2023.04.0", - "cachetools", - "typing_extensions >=4.8.0", + "awkward >=2.6.7", + "dask >=2023.04.0", + "cachetools", + "typing_extensions >=4.8.0", ] dynamic = ["version"] @@ -48,33 +48,29 @@ Homepage = "https://github.com/dask-contrib/dask-awkward" "Bug Tracker" = "https://github.com/dask-contrib/dask-awkward/issues" [project.optional-dependencies] -io = [ - "pyarrow", -] -complete = [ - "dask-awkward[io]", -] +io = ["pyarrow"] +complete = ["dask-awkward[io]"] # `docs` and `test` are separate from user installs docs = [ - "dask-awkward[complete]", - "sphinx-book-theme", - "sphinx-design", - "sphinx-codeautolink", - # broken see PR 451 - # "dask-sphinx-theme", + "dask-awkward[complete]", + "sphinx-book-theme", + "sphinx-design", + "sphinx-codeautolink", + # broken see PR 451 + # "dask-sphinx-theme", ] test = [ - "aiohttp;python_version<\"3.12\"", - "dask[dataframe]", - "dask-awkward[complete]", - "dask-histogram", - "distributed", - "hist", - "pandas", - "pytest >=6.0,<8", - "pytest-cov >=3.0.0", - "requests", - "uproot >=5.1.0", + "aiohttp;python_version<\"3.12\"", + "dask[dataframe]", + "dask-awkward[complete]", + "dask-histogram", + "distributed", + "hist", + "pandas", + "pytest >=6.0,<8", + "pytest-cov >=3.0.0", + "requests", + "uproot >=5.1.0", ] [project.entry-points."dask.sizeof"] @@ -130,7 +126,7 @@ warn_unused_ignores = true warn_unreachable = true [[tool.mypy.overrides]] - module = [ +module = [ "awkward.*", "IPython.*", "fsspec.*", @@ -138,9 +134,9 @@ warn_unreachable = true "tlz.*", "uproot.*", "cloudpickle.*", - "cachetools.*" - ] - ignore_missing_imports = true + "cachetools.*", +] +ignore_missing_imports = true [tool.pyright] include = ["src"] @@ -149,27 +145,27 @@ reportPrivateImportUsage = false [tool.coverage.report] exclude_lines = [ - "pragma: no cover", - "if TYPE_CHECKING:", - "except ImportError:", - "NotImplementedError", - "DaskAwkwardNotImplemented", - "_ipython_key_completions_", - "Only highlevel=True is supported", - "\\.\\.\\.$", + "pragma: no cover", + "if TYPE_CHECKING:", + "except ImportError:", + "NotImplementedError", + "DaskAwkwardNotImplemented", + "_ipython_key_completions_", + "Only highlevel=True is supported", + "\\.\\.\\.$", ] fail_under = 90 show_missing = true [tool.coverage.run] omit = [ - "*/dask_awkward/lib/unproject_layout.py", - "*/tests/test_*.py", - "*/tests/__init__.py", - "*/version.py", + "*/dask_awkward/lib/unproject_layout.py", + "*/tests/test_*.py", + "*/tests/__init__.py", + "*/version.py", ] source = ["src/"] [tool.ruff] lint.ignore = ["E501", "E402"] -lint.per-file-ignores = {"__init__.py" = ["E402", "F401"]} +lint.per-file-ignores = { "__init__.py" = ["E402", "F401"] } diff --git a/src/dask_awkward/lib/optimize.py b/src/dask_awkward/lib/optimize.py index 6922c3e4..7c28ce56 100644 --- a/src/dask_awkward/lib/optimize.py +++ b/src/dask_awkward/lib/optimize.py @@ -146,17 +146,7 @@ def _optimize_columns(dsk, all_layers): if not rep: continue rep = first(rep) # each meta of an IO layer should have just one report - cols = set() - # this loop not required after next ak release - for ln in all_layers: - try: - cols.update(rep.data_touched_in((ln,))) - except KeyError: - pass - try: - cols.update(rep.shape_touched_in((ln,))) - except KeyError: - pass + cols = rep.data_touched_in(all_layers) if cols: yield k, lay, cols