Skip to content

Commit

Permalink
add --parallel-actions option to execute actions in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
lene committed Jan 17, 2024
1 parent 1d6c148 commit ce354b3
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 13 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Changelog

## [0.10.8] - 2024-01-16
## [0.10.8] - 2024-01-17

### Added
- optional argument to specify the number of threads with `--parallel`
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ with the `--group` argument may be more than two images considered equal.
Use the `--parallel` option to utilize all free cores on your system for calculating image hashes.
Optionally, you can specify the number of processes to use with `--parallel $N`.

To execute the `--on-equal` actions in parallel, use the `--parallel-actions` option, which also can
take an optional number of processes to use as argument.

### Excluding subfolders

Use the `--exclude-dir` option to exclude subfolders of `$IMAGE_ROOT` from the search. The argument
Expand Down
21 changes: 15 additions & 6 deletions duplicate_images/duplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import re
from argparse import Namespace
from multiprocessing.pool import ThreadPool
from os import walk, access, R_OK
from pathlib import Path
from typing import Callable, List, Optional
Expand All @@ -12,7 +13,7 @@
from pillow_heif import register_heif_opener

from duplicate_images.common import path_with_parent, log_execution_time
from duplicate_images.function_types import Results
from duplicate_images.function_types import Results, ImageGroup, ActionFunction
from duplicate_images.hash_store import FileHashStore
from duplicate_images.image_pair_finder import ImagePairFinder, PairFinderOptions
from duplicate_images.log import setup_logging
Expand Down Expand Up @@ -81,11 +82,19 @@ def get_matches(

def execute_actions(matches: Results, args: Namespace) -> None:
action_equal = ACTIONS_ON_EQUALITY[args.on_equal]
for group in sorted(matches):
try:
action_equal(args, group)
except FileNotFoundError:
continue
if args.parallel_actions:
with ThreadPool(args.parallel_actions) as pool:
pool.map(lambda group: execute_action(action_equal, group, args), matches)
else:
for group in sorted(matches):
execute_action(action_equal, group, args)


def execute_action(action: ActionFunction, group: ImageGroup, args: Namespace) -> None:
try:
action(args, group)
except FileNotFoundError:
pass


def set_max_image_pixels(args: Namespace) -> None:
Expand Down
6 changes: 5 additions & 1 deletion duplicate_images/parse_commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ def parse_command_line(args: Optional[List[str]] = None) -> Namespace:
)
parser.add_argument(
'--parallel', nargs='?', type=int, default=None, const=cpu_count(),
help='Calculate hashes using PARALLEL threads (default: number of cores in the system)'
help=f'Calculate hashes using PARALLEL threads (default: {cpu_count()})'
)
parser.add_argument(
'--parallel-actions', nargs='?', type=int, default=None, const=cpu_count(),
help=f'Execute actions on equal images using PARALLEL threads (default: {cpu_count()})'
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
Expand Down
51 changes: 49 additions & 2 deletions tests/unit/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import shlex
from argparse import Namespace
from datetime import datetime, timedelta
from math import factorial
from pathlib import Path
from tempfile import TemporaryDirectory
from tempfile import TemporaryDirectory, NamedTemporaryFile
from typing import List, Generator, Tuple
from unittest.mock import Mock, patch

Expand All @@ -15,7 +17,7 @@
from duplicate_images.methods import IMAGE_HASH_ALGORITHM, quote
from duplicate_images.pair_finder_options import PairFinderOptions
from duplicate_images.parse_commandline import parse_command_line
from .conftest import create_jpg_and_png, create_half_jpg
from .conftest import create_jpg_and_png, create_half_jpg, create_image, IMAGE_WIDTH

HASH_ALGORITHM = IMAGE_HASH_ALGORITHM['phash']

Expand All @@ -33,6 +35,22 @@ def fixture_equal_images(
file.unlink(missing_ok=True)


@pytest.fixture(name='many_equal_images')
def fixture_many_equal_images(
top_directory: TemporaryDirectory, num_images: int
) -> Generator[List[Path], None, None]:
images = []
for _ in range(num_images):
file_name = Path(
NamedTemporaryFile(dir=top_directory.name, prefix='jpeg_', suffix='.jpg').name
)
create_image(file_name, IMAGE_WIDTH)
images.append(file_name)
yield images
for file in images:
file.unlink(missing_ok=True)


def get_equals(equal_images: List[Path], group: bool) -> List[Tuple[Path, ...]]:
equals = ImagePairFinder.create(
equal_images, HASH_ALGORITHM, options=PairFinderOptions(group=group)
Expand Down Expand Up @@ -193,6 +211,35 @@ def test_symlink(equal_images: List[Path], option: str, group: bool):
assert path.resolve() == relevant


@pytest.mark.parametrize('num_images', [7])
@pytest.mark.parametrize('parallel', [4, 10, 20])
@pytest.mark.parametrize('sleep_time', [0.005])
def test_parallel_actions(
many_equal_images: List[Path], num_images: int, parallel: int, sleep_time: float
) -> None:
equals = ImagePairFinder.create(
many_equal_images, HASH_ALGORITHM, options=PairFinderOptions(group=False)
).get_equal_groups()
assert len(equals) == factorial(num_images) / (factorial(2) * factorial(num_images - 2))

execution_time_single = actions_execution_time(
equals, sleep_time, []
)
execution_time_parallel = actions_execution_time(
equals, sleep_time, ['--parallel-actions', str(parallel)]
)
assert execution_time_parallel < execution_time_single


def actions_execution_time(equals: Results, sleep_time: float, extra_args: List[str]) -> timedelta:
args = parse_command_line(
['.', '--on-equal', 'exec', '--exec', f'sleep {sleep_time}'] + extra_args
)
start_time = datetime.now()
duplicate.execute_actions(equals, args)
return datetime.now() - start_time


@pytest.mark.parametrize('option', ['unknown-option'])
def test_unknown_option(option: str) -> None:
with pytest.raises(SystemExit):
Expand Down
23 changes: 20 additions & 3 deletions tests/unit/test_parse_commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,26 @@ def test_parallel_default_arg() -> None:
assert args.parallel == cpu_count()


def test_parallel_explicit_arg() -> None:
args = parse_command_line(['.', '--parallel', '2'])
assert args.parallel == 2
@pytest.mark.parametrize('parallel', ['1', '2', '4', '8', '16'])
def test_parallel_explicit_arg(parallel) -> None:
args = parse_command_line(['.', '--parallel', parallel])
assert args.parallel == int(parallel)


def test_parallel_actions_unspecified() -> None:
args = parse_command_line(['.'])
assert args.parallel_actions is None


def test_parallel_actions_default_arg() -> None:
args = parse_command_line(['.', '--parallel-actions'])
assert args.parallel_actions == cpu_count()


@pytest.mark.parametrize('parallel', ['1', '2', '4', '8', '16'])
def test_parallel_actions_explicit_arg(parallel) -> None:
args = parse_command_line(['.', '--parallel-actions', parallel])
assert args.parallel_actions == int(parallel)


def test_exclude_dir_unspecified() -> None:
Expand Down

0 comments on commit ce354b3

Please sign in to comment.