-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from zkurtz/add-parquet
add pandas dataframe support
- Loading branch information
Showing
11 changed files
with
475 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,41 @@ | ||
# dummio | ||
|
||
IO for dummies! We make IO as easy as possible by implementing the most common and recommendable default options. (Users may pass additional keyword arguments to the underlying IO methods.) For example, instead of | ||
``` | ||
import json | ||
IO for dummies! We make IO as easy as possible by providing a unified `save`/`load` interface, using the most common and recommendable default options for IO between various object types and file types. (Users may pass additional keyword arguments to the underlying IO methods.) | ||
|
||
## Simple IO calls | ||
|
||
dummio simplifies IO calls for some file types. For example, instead of | ||
``` | ||
with open(file_path, 'r', encoding='utf-8') as file: | ||
data = json.load(file) | ||
``` | ||
you can simply | ||
``` | ||
import dummio | ||
data = dummio.json.load(file_path) | ||
``` | ||
|
||
## Standardized IO interface | ||
|
||
In some coding applications it is desirable to pass an IO module as an argument to a function. Here it is convenient to pass a dummio submodule, since all dummio submodules have the same `save` and `load` interface, having equivalent signatures (except for differences hidden in `**kwargs`). | ||
|
||
## Supported object and file types | ||
|
||
So far we support: | ||
- text | ||
- json | ||
- yaml | ||
- simple dictionaries: | ||
- json | ||
- yaml | ||
- pandas dataframes: | ||
- csv | ||
- parquet | ||
|
||
Note that `yaml` is not a required dependency; you may install `dummio` and use it for `json` without bothering with `yaml` installation. Any other IO modules to be added will similarly be optional. | ||
## Dependencies | ||
|
||
dummio has no required dependencies. For example, calling `from dummio.pandas import df_parquet` will raise a helpful message to install pandas if you have not already done so. | ||
|
||
## Examples | ||
|
||
Basic IO methods can be accessed directly as `dummio.text`, `dummio.json`, etc:. | ||
``` | ||
import dummio | ||
|
@@ -34,10 +47,6 @@ path = "io_example_file" | |
dummio.text.save(text, path=path) | ||
assert text == dummio.text.load(path) | ||
# JSON | ||
dummio.json.save(data) | ||
assert data == dummio.json.load(path) | ||
# YAML | ||
dummio.yaml.save(data) | ||
assert data == dummio.yaml.load(path) | ||
|
@@ -53,7 +62,7 @@ We're [on pypi](https://pypi.org/project/dummio/), so `pip install dummio`. | |
git clone [email protected]:zkurtz/dummio.git | ||
cd dummio | ||
pip install uv | ||
uv sync | ||
uv sync --group extras | ||
source .venv/bin/activate | ||
pre-commit install | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,3 @@ | ||
"""dummio initialization.""" | ||
|
||
from importlib.metadata import version | ||
|
||
__version__ = version("dummio") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
try: | ||
import pandas | ||
|
||
del pandas | ||
except ImportError: | ||
raise ImportError("Please install pandas to use dummio.pandas") | ||
|
||
from dummio.pandas import df_csv as df_csv | ||
from dummio.pandas import df_parquet as df_parquet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
"""Pandas data frames to/from csv.""" | ||
|
||
from typing import Any | ||
|
||
import pandas as pd | ||
|
||
from dummio.constants import PathType | ||
|
||
|
||
def save( | ||
data: pd.DataFrame, | ||
*, | ||
filepath: PathType, | ||
**kwargs: Any, | ||
) -> None: | ||
"""Save a yaml file. | ||
Args: | ||
data: Data to save. | ||
filepath: Path to save the data. | ||
**kwargs: Additional keyword arguments for pandas.DataFrame.to_csv | ||
""" | ||
if "index" not in kwargs and not data.index.name: | ||
kwargs["index"] = False | ||
data.to_csv(filepath, **kwargs) | ||
|
||
|
||
def load(filepath: PathType, **kwargs: Any) -> pd.DataFrame: | ||
"""Read a yaml file. | ||
Args: | ||
filepath: Path to read the data. | ||
**kwargs: Additional keyword arguments for pandas.read_csv | ||
""" | ||
return pd.read_csv(filepath, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
"""Pandas data frames to/from parquet.""" | ||
|
||
from typing import Any | ||
|
||
import pandas as pd | ||
|
||
from dummio.constants import PathType | ||
|
||
|
||
def save( | ||
data: pd.DataFrame, | ||
*, | ||
filepath: PathType, | ||
**kwargs: Any, | ||
) -> None: | ||
"""Save a yaml file. | ||
Args: | ||
data: Data to save. | ||
filepath: Path to save the data. | ||
**kwargs: Additional keyword arguments for pandas.DataFrame.to_parquet | ||
""" | ||
data.to_parquet(filepath, **kwargs) | ||
|
||
|
||
def load(filepath: PathType, **kwargs: Any) -> pd.DataFrame: | ||
"""Read a yaml file. | ||
Args: | ||
filepath: Path to read the data. | ||
**kwargs: Additional keyword arguments for pandas.read_parquet | ||
""" | ||
return pd.read_parquet(filepath, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[project] | ||
name = "dummio" | ||
version = "0.2.0" | ||
version = "1.0.0" | ||
description = "Easiest-possible IO for basic file types." | ||
authors = [{ name = "Zach Kurtz", email = "[email protected]" }] | ||
readme = "README.md" | ||
|
@@ -14,7 +14,9 @@ dev = [ | |
"pytest >=8.3.2", | ||
] | ||
extras = [ | ||
"pyyaml >=6.0.2", | ||
"fastparquet>=2024.11.0", | ||
"pandas>=1.5.0", | ||
"pyyaml>=6.0.2", | ||
] | ||
|
||
[project.urls] | ||
|
@@ -26,8 +28,31 @@ package = true | |
[tool.pytest.ini_options] | ||
testpaths = ["tests",] | ||
|
||
[tool.pyright] | ||
include = ["dummio", "tests"] | ||
|
||
[tool.ruff] | ||
line-length = 120 | ||
|
||
[tool.pyright] | ||
include = ["dummio", "tests"] | ||
[tool.ruff.lint] | ||
select = [ | ||
# ruff defaults: | ||
"E4", "E7", "E9", "F", | ||
"I", # flake8-import-order | ||
"TID", # flake8-tidy-imports | ||
"D", # google-style docstrings | ||
] | ||
ignore = [ | ||
"D202", # would require one blank line after the last section of a multi-line docstring | ||
"D203", # would require one blank line at start of class docstring | ||
"D213", # would require multi-line docstring to start with a hard return | ||
"D401", # would require imperative mood in docstring | ||
"D413", # would put blank line at end of each multiline docstring | ||
] | ||
|
||
[tool.ruff.lint.flake8-tidy-imports] | ||
ban-relative-imports = "all" | ||
|
||
[tool.ruff.lint.per-file-ignores] | ||
"__init__.py" = ["D104"] # would require module-level documentation | ||
"test_*.py" = ["D"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
"""Test IO methods for tabulare data types.""" | ||
|
||
from pathlib import Path | ||
from types import ModuleType | ||
|
||
import pandas as pd | ||
|
||
from dummio import pandas as pd_io | ||
|
||
|
||
def dataframe() -> pd.DataFrame: | ||
return pd.DataFrame({"a": [1, 2], "b": [3, 4]}) | ||
|
||
|
||
def _assert_cycle(*, data: pd.DataFrame, path: Path, module: ModuleType) -> None: | ||
"""Apply the module save/load cycle on the data and assert that the reloaded data matches the input data.""" | ||
module.save(data, filepath=path) | ||
loaded_data = module.load(path) | ||
pd.testing.assert_frame_equal(data, loaded_data) | ||
|
||
|
||
def test_df_io(tmp_path: Path) -> None: | ||
"""Test the packio package for IO for tabular data.""" | ||
modules = [ | ||
pd_io.df_parquet, | ||
pd_io.df_csv, | ||
] | ||
for module in modules: | ||
_assert_cycle( | ||
data=dataframe(), | ||
path=tmp_path / "data", | ||
module=module, | ||
) |
Oops, something went wrong.