Skip to content

Commit

Permalink
Avoid clearing cached_plan for every dataset (#286)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Sep 12, 2023
1 parent cd1dfaf commit 753ffbf
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions dask_expr/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

_cached_dataset_info = {}
_CACHED_DATASET_SIZE = 10
# TODO: Allow _cached_plan to contain >1 item?
_CACHED_PLAN_SIZE = 10
_cached_plan = {}


Expand All @@ -65,6 +65,12 @@ def _control_cached_dataset_info(key):
_cached_dataset_info.pop(key_to_pop)


def _control_cached_plan(key):
if len(_cached_plan) > _CACHED_PLAN_SIZE and key not in _cached_plan:
key_to_pop = list(_cached_plan.keys())[0]
_cached_plan.pop(key_to_pop)


@normalize_token.register(pa_ds.Dataset)
def normalize_pa_ds(ds):
return (ds.files, ds.schema)
Expand Down Expand Up @@ -602,7 +608,7 @@ def _plan(self):
common_kwargs,
)

_cached_plan.clear()
_control_cached_plan(dataset_token)
_cached_plan[dataset_token] = {
"func": io_func,
"parts": parts,
Expand Down

0 comments on commit 753ffbf

Please sign in to comment.