-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathduplicate_image_finder.py
48 lines (43 loc) · 1.44 KB
/
duplicate_image_finder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import hashlib
import tkinter as tk
from tkinter import filedialog
def get_hash(file_path):
hasher = hashlib.sha256()
with open(file_path, 'rb') as file:
while chunk := file.read(8192):
hasher.update(chunk)
return hasher.hexdigest()
def scan_directory(directory):
images = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(('.png', '.jpg')):
images.append(os.path.join(root, file))
return images
def find_duplicates(directory):
hashes = {}
duplicates = []
images = scan_directory(directory)
for image in images:
image_hash = get_hash(image)
if image_hash in hashes:
duplicates.append(image)
else:
hashes[image_hash] = image
duplicate_folder = os.path.join(directory, 'duplicates')
if not os.path.exists(duplicate_folder):
os.makedirs(duplicate_folder)
for duplicate in duplicates:
os.rename(duplicate, os.path.join(duplicate_folder, os.path.basename(duplicate)))
return len(duplicates)
root = tk.Tk()
root.withdraw()
while True:
directory = filedialog.askdirectory()
if not directory:
print('No directory selected. Please try again')
continue
num_duplicates = find_duplicates(directory)
print(f'Found {num_duplicates} duplicates. They have been moved to the "duplicates" folder in the selected directory.')
break