Skip to content

Commit

Permalink
Merge pull request #52 from sgibson91/list-files
Browse files Browse the repository at this point in the history
Add functionality to list duplicated files
  • Loading branch information
sgibson91 authored Apr 24, 2022
2 parents 2570f1d + bd736de commit f4a9700
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 6 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ deduplify compare # Compare the filenames again. The code should return
**Command line usage:**
```bash
usage: deduplify compare [-h] [-c COUNT] [-v] [-f INFILE] [--purge]
usage: deduplify compare [-h] [-c COUNT] [-v] [-f INFILE] [--list-files] [--purge]
optional arguments:
-h, --help show this help message and exit
Expand All @@ -133,6 +133,7 @@ optional arguments:
-v, --verbose Print logging messages to the console
-f INFILE, --infile INFILE
Database to analyse. Must be a JSON file. Default: file_hashes.json
--list-files List duplicated files. Default: False
--purge Deletes duplicated files. Default: False
```
Expand Down
2 changes: 1 addition & 1 deletion deduplify/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@

from incremental import Version

__version__ = Version("deduplify", 0, 4, 2)
__version__ = Version("deduplify", 0, 5, 0)
__all__ = ["__version__"]
5 changes: 5 additions & 0 deletions deduplify/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ def parse_args(args):
default="file_hashes.json",
help="Database to analyse. Must be a JSON file. Default: file_hashes.json",
)
parser_compare.add_argument(
"--list-files",
action="store_true",
help="List duplicated files. Default: False",
)
parser_compare.add_argument(
"--purge", action="store_true", help="Deletes duplicated files. Default: False"
)
Expand Down
12 changes: 9 additions & 3 deletions deduplify/compare_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
files.
Python version: >= 3.7 (developed with 3.8)
Packages: tqdm
Packages: tqdm, rich
>>> pip install tqdm
>>> pip install tqdm rich
"""

import logging
Expand All @@ -20,6 +20,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed

import jmespath
from rich import print_json
from tinydb import TinyDB, where
from tqdm import tqdm

Expand Down Expand Up @@ -100,11 +101,12 @@ def delete_files(files: list, workers: int):
logger.info("Deletion complete!")


def run_compare(infile: str, purge: bool, count: int, **kwargs):
def run_compare(infile: str, list_files: bool, purge: bool, count: int, **kwargs):
"""Compare files for duplicated hashes
Args:
infile (str): JSON location of filepaths and hashes
list_files (bool): Print list of duplicated files to the console
purge (bool): Delete duplicated files
count (int): Number of threads to parallelise over
"""
Expand All @@ -123,5 +125,9 @@ def run_compare(infile: str, purge: bool, count: int, **kwargs):

logger.info("Number of files that can be safely deleted: %s" % len(files_to_delete))

if list_files:
print("Duplicated files:")
print_json(data=files_to_delete)

if purge:
delete_files(files_to_delete, count)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
incremental
jmespath==1.0.0
rich==12.2.0
tinydb==4.7.0
tqdm
twisted
2 changes: 1 addition & 1 deletion tests/test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_run_compare_and_purge(mock):
call("path/to/test/file.txt"),
]

run_compare(infile, True, 1)
run_compare(infile, False, True, 1)

assert mock.call_count == 2
mock.assert_has_calls(test_calls)

0 comments on commit f4a9700

Please sign in to comment.