From af7145ddf0715bae1dd6dcd0c8cda5cd51a7e9b7 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Mon, 29 Jul 2024 19:19:35 +0800 Subject: [PATCH 1/6] rename test file to 3000_lines_gz.txt.gz --- .../{3000_lines.txt.gz => 3000_lines_gz.txt.gz} | Bin tests/test_io.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/test_files/{3000_lines.txt.gz => 3000_lines_gz.txt.gz} (100%) diff --git a/tests/test_files/3000_lines.txt.gz b/tests/test_files/3000_lines_gz.txt.gz similarity index 100% rename from tests/test_files/3000_lines.txt.gz rename to tests/test_files/3000_lines_gz.txt.gz diff --git a/tests/test_io.py b/tests/test_io.py index 9daa17be..91326d52 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -110,7 +110,7 @@ def test_reverse_readfile_gz(self): Make sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line number. """ - fname = os.path.join(TEST_DIR, "3000_lines.txt.gz") + fname = os.path.join(TEST_DIR, "3000_lines_gz.txt.gz") for idx, line in enumerate(reverse_readfile(fname)): assert int(line) == self.NUMLINES - idx From cf84f7b7cbcdae2744634f1dbc130a3142ee23da Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Tue, 30 Jul 2024 11:30:36 +0800 Subject: [PATCH 2/6] gzip_dir skip gzip when gzipped ver exists --- src/monty/shutil.py | 4 ++++ tests/test_shutil.py | 25 ++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/monty/shutil.py b/src/monty/shutil.py index 37d28a35..b258652d 100644 --- a/src/monty/shutil.py +++ b/src/monty/shutil.py @@ -59,6 +59,10 @@ def gzip_dir(path: str | Path, compresslevel: int = 6) -> None: for f in files: full_f = Path(root, f).resolve() if Path(f).suffix.lower() != ".gz" and not full_f.is_dir(): + if full_f.with_suffix(".gz").exists(): + warnings.warn(f"Both {f} and {f}.gz exist.", stacklevel=2) + continue + with ( open(full_f, "rb") as f_in, GzipFile( diff --git a/tests/test_shutil.py b/tests/test_shutil.py index 98ddc1ac..16cd9aa7 100644 --- a/tests/test_shutil.py +++ b/tests/test_shutil.py @@ -127,7 +127,7 @@ def setup_method(self): self.mtime = os.path.getmtime(os.path.join(test_dir, "gzip_dir", "tempfile")) - def test_gzip(self): + def test_gzip_dir(self): full_f = os.path.join(test_dir, "gzip_dir", "tempfile") gzip_dir(os.path.join(test_dir, "gzip_dir")) @@ -139,6 +139,29 @@ def test_gzip(self): assert os.path.getmtime(f"{full_f}.gz") == pytest.approx(self.mtime, 4) + def test_gzip_dir_file_coexist(self): + """Test case where both file and file.gz exist.""" + full_f = os.path.join(test_dir, "gzip_dir", "temptestfile") + gz_f = f"{full_f}.gz" + + # Create both the file and its gzipped version + with open(full_f, "w") as f: + f.write("not gzipped") + with GzipFile(gz_f, "wb") as g: + g.write(b"gzipped") + + with pytest.warns( + UserWarning, match="Both temptestfile and temptestfile.gz exist." + ): + gzip_dir(os.path.join(test_dir, "gzip_dir")) + + # Verify contents of the files + with open(full_f, "r") as f: + assert f.read() == "not gzipped" + + with GzipFile(gz_f, "rb") as g: + assert g.read() == b"gzipped" + def test_handle_sub_dirs(self): sub_dir = os.path.join(test_dir, "gzip_dir", "sub_dir") sub_file = os.path.join(sub_dir, "new_tempfile") From 7741d54b3b1e930892025a8d59ff1ed3636c4c77 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Tue, 30 Jul 2024 11:33:16 +0800 Subject: [PATCH 3/6] Revert "rename test file to 3000_lines_gz.txt.gz" This reverts commit af7145ddf0715bae1dd6dcd0c8cda5cd51a7e9b7. --- .../{3000_lines_gz.txt.gz => 3000_lines.txt.gz} | Bin tests/test_io.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/test_files/{3000_lines_gz.txt.gz => 3000_lines.txt.gz} (100%) diff --git a/tests/test_files/3000_lines_gz.txt.gz b/tests/test_files/3000_lines.txt.gz similarity index 100% rename from tests/test_files/3000_lines_gz.txt.gz rename to tests/test_files/3000_lines.txt.gz diff --git a/tests/test_io.py b/tests/test_io.py index 91326d52..9daa17be 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -110,7 +110,7 @@ def test_reverse_readfile_gz(self): Make sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line number. """ - fname = os.path.join(TEST_DIR, "3000_lines_gz.txt.gz") + fname = os.path.join(TEST_DIR, "3000_lines.txt.gz") for idx, line in enumerate(reverse_readfile(fname)): assert int(line) == self.NUMLINES - idx From 909ae3e3eb393c052375e5618ada4e30533dcccb Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 2 Aug 2024 10:46:56 +0800 Subject: [PATCH 4/6] fix duplicate check --- src/monty/shutil.py | 5 ++--- tests/test_tempfile.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/monty/shutil.py b/src/monty/shutil.py index b258652d..98047f57 100644 --- a/src/monty/shutil.py +++ b/src/monty/shutil.py @@ -54,12 +54,11 @@ def gzip_dir(path: str | Path, compresslevel: int = 6) -> None: compresslevel (int): Level of compression, 1-9. 9 is default for GzipFile, 6 is default for gzip. """ - path = Path(path) - for root, _, files in os.walk(path): + for root, _, files in os.walk(Path(path)): for f in files: full_f = Path(root, f).resolve() if Path(f).suffix.lower() != ".gz" and not full_f.is_dir(): - if full_f.with_suffix(".gz").exists(): + if os.path.exists(f"{full_f}.gz"): warnings.warn(f"Both {f} and {f}.gz exist.", stacklevel=2) continue diff --git a/tests/test_tempfile.py b/tests/test_tempfile.py index 4bce4089..c7faf245 100644 --- a/tests/test_tempfile.py +++ b/tests/test_tempfile.py @@ -49,7 +49,7 @@ def test_with_copy_gzip(self): # We write a pre-scratch file. with open("pre_scratch_text", "w") as f: f.write("write") - init_gz = [f for f in os.listdir(os.getcwd()) if f.endswith(".gz")] + init_gz_files = [f for f in os.listdir(os.getcwd()) if f.endswith(".gz")] with ( ScratchDir( self.scratch_root, @@ -65,7 +65,7 @@ def test_with_copy_gzip(self): # Make sure the stratch_text.gz exists assert "scratch_text.gz" in files for f in files: - if f.endswith(".gz") and f not in init_gz: + if f.endswith(".gz") and f not in init_gz_files: os.remove(f) os.remove("pre_scratch_text") From 5d072196efd6ac8dfcb12a525bc51a85aca409e1 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 2 Aug 2024 10:48:27 +0800 Subject: [PATCH 5/6] check warn msg --- tests/test_tempfile.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/test_tempfile.py b/tests/test_tempfile.py index c7faf245..ce013568 100644 --- a/tests/test_tempfile.py +++ b/tests/test_tempfile.py @@ -3,6 +3,7 @@ import os import shutil +import pytest from monty.tempfile import ScratchDir TEST_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") @@ -50,16 +51,17 @@ def test_with_copy_gzip(self): with open("pre_scratch_text", "w") as f: f.write("write") init_gz_files = [f for f in os.listdir(os.getcwd()) if f.endswith(".gz")] - with ( - ScratchDir( - self.scratch_root, - copy_from_current_on_enter=True, - copy_to_current_on_exit=True, - gzip_on_exit=True, - ), - open("scratch_text", "w") as f, - ): - f.write("write") + with pytest.warns(match="Both 3000_lines.txt and 3000_lines.txt.gz exist."): + with ( + ScratchDir( + self.scratch_root, + copy_from_current_on_enter=True, + copy_to_current_on_exit=True, + gzip_on_exit=True, + ), + open("scratch_text", "w") as f, + ): + f.write("write") files = os.listdir(os.getcwd()) # Make sure the stratch_text.gz exists From 8aa64f9ae53cbcc62129e8c0c9c57159d3fd1ac4 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 2 Aug 2024 10:49:38 +0800 Subject: [PATCH 6/6] fix typo --- tests/test_tempfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tempfile.py b/tests/test_tempfile.py index ce013568..4dfd3b17 100644 --- a/tests/test_tempfile.py +++ b/tests/test_tempfile.py @@ -64,7 +64,7 @@ def test_with_copy_gzip(self): f.write("write") files = os.listdir(os.getcwd()) - # Make sure the stratch_text.gz exists + # Make sure the scratch_text.gz exists assert "scratch_text.gz" in files for f in files: if f.endswith(".gz") and f not in init_gz_files: