From e09bb09d5cf621ca818e3438fbd89e0e307130c0 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Sat, 23 Apr 2022 16:17:39 +0100 Subject: [PATCH 1/4] Add functionality to list duplicated files --- README.md | 3 ++- deduplify/cli.py | 5 +++++ deduplify/compare_files.py | 12 +++++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index aec4363..915ecc9 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ deduplify compare # Compare the filenames again. The code should return **Command line usage:** ```bash -usage: deduplify compare [-h] [-c COUNT] [-v] [-f INFILE] [--purge] +usage: deduplify compare [-h] [-c COUNT] [-v] [-f INFILE] [--list-files] [--purge] optional arguments: -h, --help show this help message and exit @@ -133,6 +133,7 @@ optional arguments: -v, --verbose Print logging messages to the console -f INFILE, --infile INFILE Database to analyse. Must be a JSON file. Default: file_hashes.json + --list-files List duplicated files. Default: False --purge Deletes duplicated files. Default: False ``` diff --git a/deduplify/cli.py b/deduplify/cli.py index 7898818..96e591b 100644 --- a/deduplify/cli.py +++ b/deduplify/cli.py @@ -109,6 +109,11 @@ def parse_args(args): default="file_hashes.json", help="Database to analyse. Must be a JSON file. Default: file_hashes.json", ) + parser_compare.add_argument( + "--list-files", + action="store_true", + help="List duplicated files. Default: False", + ) parser_compare.add_argument( "--purge", action="store_true", help="Deletes duplicated files. Default: False" ) diff --git a/deduplify/compare_files.py b/deduplify/compare_files.py index ac3bf33..94cf9e5 100644 --- a/deduplify/compare_files.py +++ b/deduplify/compare_files.py @@ -8,9 +8,9 @@ files. Python version: >= 3.7 (developed with 3.8) -Packages: tqdm +Packages: tqdm, rich ->>> pip install tqdm +>>> pip install tqdm rich """ import logging @@ -20,6 +20,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed import jmespath +from rich import print_json from tinydb import TinyDB, where from tqdm import tqdm @@ -100,11 +101,12 @@ def delete_files(files: list, workers: int): logger.info("Deletion complete!") -def run_compare(infile: str, purge: bool, count: int, **kwargs): +def run_compare(infile: str, list_files: bool, purge: bool, count: int, **kwargs): """Compare files for duplicated hashes Args: infile (str): JSON location of filepaths and hashes + list_files (bool): Print list of duplicated files to the console purge (bool): Delete duplicated files count (int): Number of threads to parallelise over """ @@ -123,5 +125,9 @@ def run_compare(infile: str, purge: bool, count: int, **kwargs): logger.info("Number of files that can be safely deleted: %s" % len(files_to_delete)) + if list_files: + print("Duplicated files:") + print_json(data=files_to_delete) + if purge: delete_files(files_to_delete, count) From 1b6b1781e7db2eab8b1bbb77503fa40f105ea24a Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Sat, 23 Apr 2022 16:20:04 +0100 Subject: [PATCH 2/4] Add rich to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index bf1f766..eeb9e3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ incremental jmespath==1.0.0 +rich==12.2.0 tinydb==4.7.0 tqdm twisted From 59ffff360b89380e896d71c35eb1442f454e0899 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Sat, 23 Apr 2022 16:21:50 +0100 Subject: [PATCH 3/4] Bump package minor version --- deduplify/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deduplify/_version.py b/deduplify/_version.py index 7e83a58..f62ccdb 100644 --- a/deduplify/_version.py +++ b/deduplify/_version.py @@ -7,5 +7,5 @@ from incremental import Version -__version__ = Version("deduplify", 0, 4, 2) +__version__ = Version("deduplify", 0, 5, 0) __all__ = ["__version__"] From bd736de7d6985177b78eced42ac93ad7be4cbe59 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Sun, 24 Apr 2022 14:03:57 +0100 Subject: [PATCH 4/4] Fix tests --- tests/test_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_compare.py b/tests/test_compare.py index 089688b..55da22a 100644 --- a/tests/test_compare.py +++ b/tests/test_compare.py @@ -43,7 +43,7 @@ def test_run_compare_and_purge(mock): call("path/to/test/file.txt"), ] - run_compare(infile, True, 1) + run_compare(infile, False, True, 1) assert mock.call_count == 2 mock.assert_has_calls(test_calls)