Skip to content

Commit

Permalink
Fallback use for existing cluster helper file if no parquet is present (
Browse files Browse the repository at this point in the history
#41)

* Use fallback for cache loading if cluster helper file exists

* Use constant for column names
  • Loading branch information
dobraczka authored Apr 11, 2024
1 parent fd01baa commit 0427602
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions sylloge/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,16 @@ def _read_ch_or_df_links(
raise ValueError(
"Need to supply read_parquet_fn if not using ClusterHelper"
)
parquet_path = f"{path}_parquet"
if not os.path.exists(parquet_path) and os.path.exists(path):
logger.info(
f"Did not find {parquet_path}, but ClusterHelper file. Creating parquet file from ClusterHelper (no intra-dataset links are used!)"
)
assert ds_prefixes is not None
ch = PrefixedClusterHelper.from_file(path, ds_prefixes=ds_prefixes) # type: ignore[return-value]
pd.DataFrame(list(ch.all_pairs_no_intra()), columns=EA_SIDES).to_parquet(
parquet_path
)
return read_parquet_fn(f"{path}_parquet", **kwargs) # type: ignore[return-value]

@classmethod
Expand Down

0 comments on commit 0427602

Please sign in to comment.