From 43b6caeb3526a5107eaddfd6b1763dc5bf7d3840 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 30 Sep 2024 13:38:01 +1000 Subject: [PATCH 1/3] added load save implementation for text files --- extras/fileformats/extras/text/__init__.py | 1 + extras/fileformats/extras/text/load_save.py | 35 +++++++++++++++++++++ fileformats/core/fileset.py | 2 ++ fileformats/field/__init__.py | 4 ++- fileformats/text.py | 2 +- 5 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 extras/fileformats/extras/text/__init__.py create mode 100644 extras/fileformats/extras/text/load_save.py diff --git a/extras/fileformats/extras/text/__init__.py b/extras/fileformats/extras/text/__init__.py new file mode 100644 index 00000000..22dfdc8a --- /dev/null +++ b/extras/fileformats/extras/text/__init__.py @@ -0,0 +1 @@ +from . import load_save # noqa: F401 diff --git a/extras/fileformats/extras/text/load_save.py b/extras/fileformats/extras/text/load_save.py new file mode 100644 index 00000000..0f643aeb --- /dev/null +++ b/extras/fileformats/extras/text/load_save.py @@ -0,0 +1,35 @@ +import typing as ty +from fileformats.core import extra_implementation, FileSet +from fileformats.text import Plain # , Csv, Tsv + +# import pandas as pd + + +@extra_implementation(FileSet.load) +def load_text_file(text: Plain, **kwargs: ty.Any) -> Plain: + return text.raw_contents # type: ignore[no-any-return] + + +@extra_implementation(FileSet.save) +def save_text_file(text: Plain, data: ty.Any, **kwargs: ty.Any) -> None: + text.fspath.write_text(data) + + +# @extra_implementation(FileSet.load) +# def load_csv_file(csv_file: Csv, **kwargs: ty.Any) -> pd.DataFrame: +# return pd.read_csv(csv_file.fspath, **kwargs) + + +# @extra_implementation(FileSet.save) +# def save_csv_file(csv_file: Csv, data: pd.DataFrame, **kwargs: ty.Any) -> None: +# data.to_csv(csv_file.fspath, index=False, **kwargs) + + +# @extra_implementation(FileSet.load) +# def load_tsv_file(tsv_file: Tsv, **kwargs: ty.Any) -> pd.DataFrame: +# return pd.read_csv(tsv_file.fspath, sep="\t", **kwargs) + + +# @extra_implementation(FileSet.save) +# def save_tsv_file(tsv_file: Tsv, data: pd.DataFrame, **kwargs: ty.Any) -> None: +# data.to_csv(tsv_file.fspath, sep="\t", index=False, **kwargs) diff --git a/fileformats/core/fileset.py b/fileformats/core/fileset.py index 478ac3ea..13902d45 100644 --- a/fileformats/core/fileset.py +++ b/fileformats/core/fileset.py @@ -194,6 +194,7 @@ def load(self, **kwargs: ty.Any) -> ty.Any: Any the data loaded from the file in an type to the format """ + raise NotImplementedError @extra def save(self, data: ty.Any, **kwargs: ty.Any) -> None: @@ -207,6 +208,7 @@ def save(self, data: ty.Any, **kwargs: ty.Any) -> None: **kwargs : Any any format-specific keyword arguments to pass to the saver """ + raise NotImplementedError @classmethod def new(cls, fspath: ty.Union[str, Path], data: ty.Any, **kwargs: ty.Any) -> Self: diff --git a/fileformats/field/__init__.py b/fileformats/field/__init__.py index 03ea4dc9..f8ea5003 100644 --- a/fileformats/field/__init__.py +++ b/fileformats/field/__init__.py @@ -126,7 +126,9 @@ def __init__(self, value: ty.Any): if isinstance(value, Decimal): self.value = value.value try: - self.value = decimal.Decimal(value) + self.value = ( + value.value if isinstance(value, Decimal) else decimal.Decimal(value) + ) except decimal.InvalidOperation as e: raise FormatMismatchError(str(e)) from None diff --git a/fileformats/text.py b/fileformats/text.py index 7f76f173..ad37a059 100644 --- a/fileformats/text.py +++ b/fileformats/text.py @@ -12,7 +12,7 @@ class Plain(Text, UnicodeFile): iana_mime = "text/plain" -class TextFile(Text, UnicodeFile): +class TextFile(Plain, UnicodeFile): ext = ".txt" From 0c7123a8ed4e174af8055860904b8ad17a3838f0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 30 Sep 2024 13:47:51 +1000 Subject: [PATCH 2/3] changed Foo base class to text.Plain from Unicode --- fileformats/testing/basic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fileformats/testing/basic.py b/fileformats/testing/basic.py index 130581e4..7875eb77 100644 --- a/fileformats/testing/basic.py +++ b/fileformats/testing/basic.py @@ -1,21 +1,21 @@ -from fileformats.generic import UnicodeFile +from fileformats.text import Plain -class Foo(UnicodeFile): +class Foo(Plain): ext = ".foo" -class Bar(UnicodeFile): +class Bar(Plain): ext = ".bar" -class Baz(UnicodeFile): +class Baz(Plain): ext = ".baz" -class Qux(UnicodeFile): +class Qux(Plain): ext = ".qux" From 2b8b53e05815aeb26c2bdd933e751a750a735054 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 30 Sep 2024 13:52:46 +1000 Subject: [PATCH 3/3] fixed up copy constraints unittests --- fileformats/core/tests/test_fs_mount_identifier.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fileformats/core/tests/test_fs_mount_identifier.py b/fileformats/core/tests/test_fs_mount_identifier.py index 7e11a482..96a38cab 100644 --- a/fileformats/core/tests/test_fs_mount_identifier.py +++ b/fileformats/core/tests/test_fs_mount_identifier.py @@ -324,7 +324,7 @@ def test_copy_constraints(tmp_path): mode=copy_modes, ) - assert new_ext4_file.contents == ext4_file.contents + assert new_ext4_file.raw_contents == ext4_file.raw_contents assert os.path.islink(new_ext4_file) # Symlinks not supported on CIFS @@ -332,7 +332,7 @@ def test_copy_constraints(tmp_path): cifs_mnt / "dest", mode=copy_modes, ) - assert new_cifs_file.contents == cifs_file.contents + assert new_cifs_file.raw_contents == cifs_file.raw_contents assert not os.path.islink(new_cifs_file) assert os.stat(new_cifs_file).st_ino == os.stat(cifs_file).st_ino # Hardlink @@ -340,7 +340,7 @@ def test_copy_constraints(tmp_path): new_ext4_file2 = ext4_file.copy( ext4_mnt2 / "dest", mode=File.CopyMode.copy | File.CopyMode.hardlink ) - assert new_ext4_file2.contents == ext4_file.contents + assert new_ext4_file2.raw_contents == ext4_file.raw_contents assert not os.path.islink(new_ext4_file2) assert ( os.stat(ext4_file).st_ino != os.stat(new_ext4_file2).st_ino @@ -351,7 +351,7 @@ def test_copy_constraints(tmp_path): cifs_mnt / "dest", mode=copy_modes, ) - assert ext4_file_on_cifs.contents == ext4_file.contents + assert ext4_file_on_cifs.raw_contents == ext4_file.raw_contents assert not os.path.islink(ext4_file_on_cifs) assert ( os.stat(ext4_file).st_ino != os.stat(ext4_file_on_cifs).st_ino