Skip to content

Commit

Permalink
support universal pathlib
Browse files Browse the repository at this point in the history
  • Loading branch information
zkurtz committed Dec 11, 2024
1 parent 1738845 commit d9b667c
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 3 deletions.
4 changes: 3 additions & 1 deletion dummio/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from pathlib import Path
from typing import Any, TypeAlias

PathType: TypeAlias = str | Path
from upath import UPath

PathType: TypeAlias = str | Path | UPath
AnyDict: TypeAlias = dict[Any, Any]

DEFAULT_ENCODING = "utf-8"
Expand Down
12 changes: 12 additions & 0 deletions dummio/pandas/df_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,19 @@
from typing import Any

import pandas as pd
from upath import UPath

from dummio.constants import PathType

STORAGE_OPTIONS = "storage_options"


def add_storage_options(*, filepath: PathType, kwargs: dict[str, Any]) -> None:
"""If filepath is a universal path, make sure that kwargs includes storage options."""
if isinstance(filepath, UPath):
if STORAGE_OPTIONS not in kwargs:
kwargs[STORAGE_OPTIONS] = dict(filepath.storage_options)


def save(
data: pd.DataFrame,
Expand All @@ -20,6 +30,7 @@ def save(
filepath: Path to save the data.
**kwargs: Additional keyword arguments for pandas.DataFrame.to_parquet
"""
add_storage_options(filepath=filepath, kwargs=kwargs)
data.to_parquet(filepath, **kwargs)


Expand All @@ -30,4 +41,5 @@ def load(filepath: PathType, **kwargs: Any) -> pd.DataFrame:
filepath: Path to read the data.
**kwargs: Additional keyword arguments for pandas.read_parquet
"""
add_storage_options(filepath=filepath, kwargs=kwargs)
return pd.read_parquet(filepath, **kwargs)
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
[project]
name = "dummio"
version = "1.4.0"
version = "1.5.0"
description = "Easiest-possible IO for basic file types."
authors = [{ name = "Zach Kurtz", email = "[email protected]" }]
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"universal-pathlib>=0.2.5",
]

[dependency-groups]
dev = [
Expand Down
18 changes: 18 additions & 0 deletions tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from types import ModuleType

import pandas as pd
from upath import UPath

from dummio import pandas as pd_io

Expand Down Expand Up @@ -29,3 +30,20 @@ def test_df_io(tmp_path: Path) -> None:
path=tmp_path / "data",
module=module,
)


def test_add_storage_options() -> None:
"""Test the add_storage_options function."""
kwargs = {}
path = UPath("s3://bucket/data.parquet")
pd_io.df_parquet.add_storage_options(filepath=path, kwargs=kwargs)
assert pd_io.df_parquet.STORAGE_OPTIONS in kwargs

kwargs = {}
path = UPath("data.parquet")
pd_io.df_parquet.add_storage_options(filepath=path, kwargs=kwargs)
assert pd_io.df_parquet.STORAGE_OPTIONS in kwargs

kwargs = {}
pd_io.df_parquet.add_storage_options(filepath=Path("data.parquet"), kwargs=kwargs)
assert pd_io.df_parquet.STORAGE_OPTIONS not in kwargs
18 changes: 17 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d9b667c

Please sign in to comment.