From d9b667c8cbeb120b07a79fa7fcd7f4611163d456 Mon Sep 17 00:00:00 2001 From: Zach Kurtz Date: Tue, 10 Dec 2024 22:30:13 -0500 Subject: [PATCH] support universal pathlib --- dummio/constants.py | 4 +++- dummio/pandas/df_parquet.py | 12 ++++++++++++ pyproject.toml | 5 ++++- tests/test_pandas.py | 18 ++++++++++++++++++ uv.lock | 18 +++++++++++++++++- 5 files changed, 54 insertions(+), 3 deletions(-) diff --git a/dummio/constants.py b/dummio/constants.py index 1323831..f808d12 100644 --- a/dummio/constants.py +++ b/dummio/constants.py @@ -3,7 +3,9 @@ from pathlib import Path from typing import Any, TypeAlias -PathType: TypeAlias = str | Path +from upath import UPath + +PathType: TypeAlias = str | Path | UPath AnyDict: TypeAlias = dict[Any, Any] DEFAULT_ENCODING = "utf-8" diff --git a/dummio/pandas/df_parquet.py b/dummio/pandas/df_parquet.py index 2c269c9..4ad28bb 100644 --- a/dummio/pandas/df_parquet.py +++ b/dummio/pandas/df_parquet.py @@ -3,9 +3,19 @@ from typing import Any import pandas as pd +from upath import UPath from dummio.constants import PathType +STORAGE_OPTIONS = "storage_options" + + +def add_storage_options(*, filepath: PathType, kwargs: dict[str, Any]) -> None: + """If filepath is a universal path, make sure that kwargs includes storage options.""" + if isinstance(filepath, UPath): + if STORAGE_OPTIONS not in kwargs: + kwargs[STORAGE_OPTIONS] = dict(filepath.storage_options) + def save( data: pd.DataFrame, @@ -20,6 +30,7 @@ def save( filepath: Path to save the data. **kwargs: Additional keyword arguments for pandas.DataFrame.to_parquet """ + add_storage_options(filepath=filepath, kwargs=kwargs) data.to_parquet(filepath, **kwargs) @@ -30,4 +41,5 @@ def load(filepath: PathType, **kwargs: Any) -> pd.DataFrame: filepath: Path to read the data. **kwargs: Additional keyword arguments for pandas.read_parquet """ + add_storage_options(filepath=filepath, kwargs=kwargs) return pd.read_parquet(filepath, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 8cbd68e..302d5b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,13 @@ [project] name = "dummio" -version = "1.4.0" +version = "1.5.0" description = "Easiest-possible IO for basic file types." authors = [{ name = "Zach Kurtz", email = "zkurtz@gmail.com" }] readme = "README.md" requires-python = ">=3.10" +dependencies = [ + "universal-pathlib>=0.2.5", +] [dependency-groups] dev = [ diff --git a/tests/test_pandas.py b/tests/test_pandas.py index b5e901a..3a87b91 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -2,6 +2,7 @@ from types import ModuleType import pandas as pd +from upath import UPath from dummio import pandas as pd_io @@ -29,3 +30,20 @@ def test_df_io(tmp_path: Path) -> None: path=tmp_path / "data", module=module, ) + + +def test_add_storage_options() -> None: + """Test the add_storage_options function.""" + kwargs = {} + path = UPath("s3://bucket/data.parquet") + pd_io.df_parquet.add_storage_options(filepath=path, kwargs=kwargs) + assert pd_io.df_parquet.STORAGE_OPTIONS in kwargs + + kwargs = {} + path = UPath("data.parquet") + pd_io.df_parquet.add_storage_options(filepath=path, kwargs=kwargs) + assert pd_io.df_parquet.STORAGE_OPTIONS in kwargs + + kwargs = {} + pd_io.df_parquet.add_storage_options(filepath=Path("data.parquet"), kwargs=kwargs) + assert pd_io.df_parquet.STORAGE_OPTIONS not in kwargs diff --git a/uv.lock b/uv.lock index abb486e..fc540c7 100644 --- a/uv.lock +++ b/uv.lock @@ -122,8 +122,11 @@ wheels = [ [[package]] name = "dummio" -version = "1.3.0" +version = "1.5.0" source = { editable = "." } +dependencies = [ + { name = "universal-pathlib" }, +] [package.dev-dependencies] dev = [ @@ -144,6 +147,7 @@ extras = [ ] [package.metadata] +requires-dist = [{ name = "universal-pathlib", specifier = ">=0.2.5" }] [package.metadata.requires-dev] dev = [ @@ -826,6 +830,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, ] +[[package]] +name = "universal-pathlib" +version = "0.2.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fsspec" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/67/6c31ba464eafda05c677628dd7859ed4904597a78694d9cc81b593c6bad2/universal_pathlib-0.2.5.tar.gz", hash = "sha256:ea5d4fb8178c2ab469cf4fa46d0ceb16ccb378da46dbbc28a8b9c1eebdccc655", size = 174755 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/d9/289d308f889aac33639703a60906e3a0f3ec97419b7ca5bedaddc77648fd/universal_pathlib-0.2.5-py3-none-any.whl", hash = "sha256:a634f700eca827b4ad03bfa0267e51161560dd1de83b051cf0fccf39b3e56b32", size = 49892 }, +] + [[package]] name = "virtualenv" version = "20.27.1"