-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParrotDetector.py
125 lines (88 loc) · 3.14 KB
/
ParrotDetector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# coding: utf-8
import subprocess
# Install or update the fastai package quietly
subprocess.run("pip install -Uqq fastai", shell=True, check=True)
# Install or update the duckduckgo_search package quietly
subprocess.run("pip install -Uqq duckduckgo_search", shell=True, check=True)
from duckduckgo_search import DDGS
from fastcore.all import *
def search_images(term, max_images=100):
print(f"Searching for '{term}'")
with DDGS() as ddgs:
# generator which yields dicts with:
# {'title','image','thumbnail','url','height','width','source'}
search_results = ddgs.images(keywords=term)
# grap number of max_images urls
image_urls = [next(search_results).get("image") for _ in range(max_images)]
# convert to L (functionally extended list class from fastai)
return L(image_urls)
from pathlib import Path
import hashlib
def get_file_hash(file_path):
"""Compute MD5 hash of the file content."""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def remove_duplicates(image_paths):
"""Remove duplicate images based on file content."""
hashes = {}
duplicates = []
for path in image_paths:
file_hash = get_file_hash(path)
if file_hash in hashes:
duplicates.append(path)
else:
hashes[file_hash] = path
return duplicates
urls = search_images("parrot photos", max_images=1)
print(urls[0])
from fastdownload import download_url
dest = "parrot.jpg"
download_url(urls[0], dest, show_progress=False)
from fastai.vision.all import *
im = Image.open(dest)
im.to_thumb(256, 256)
download_url(
search_images("forest photos", max_images=1)[0], "forest.jpg", show_progress=True
)
Image.open("forest.jpg").to_thumb(256, 256)
searches = "forest", "parrot"
path = Path("parrot_or_not")
from time import sleep
for o in searches:
dest = path / o
dest.mkdir(exist_ok=True, parents=True)
download_images(dest, urls=search_images(f"{o} photo"))
sleep(10)
download_images(dest, urls=search_images(f"{o} sun photo"))
sleep(10)
download_images(dest, urls=search_images(f"{o} evening photo"))
sleep(10)
download_images(dest, urls=search_images(f"{o} shade photo"))
sleep(10)
resize_images(path / o, max_size=400, dest=path / o)
failed = verify_images(get_image_files(path))
failed.map(Path.unlink)
print(len(failed))
# Remove duplicate images
duplicates = remove_duplicates(get_image_files(path))
for file in duplicates:
Path(file).unlink()
print(len(duplicates))
dls = DataBlock(
blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files,
splitter=RandomSplitter(valid_pct=0.2, seed=42),
get_y=parent_label,
item_tfms=[Resize(192, method="squish")],
).dataloaders(path)
dls.show_batch(max_n=6)
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(5)
learn.show_results()
is_bird, _, probs = learn.predict(PILImage.create("monkey.jpg"))
print(f"This is a: {is_bird}.")
print(f"Probability it's a forest: {probs[0]:.4f}")