-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdown-webp.py
128 lines (104 loc) · 4.63 KB
/
down-webp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import sys
import requests
import argparse as ap
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
from urllib.parse import urlparse
def download_png_images(url, folder_path, quality, prefix, makewebp, user_agent):
# Create the folder if it doesn't exist
folder_down_path = folder_path + '-down'
folder_compressed_path = folder_path + '-compressed'
os.makedirs(folder_down_path, exist_ok=True)
os.makedirs(folder_compressed_path, exist_ok=True)
# Send an HTTP GET request to the URL
headers = {
'User-Agent': user_agent,
}
response = requests.get(url, headers=headers)
# Check if the request was successful
if response.status_code != 200:
print(f"Failed to retrieve the web page. Status code: {response.status_code}")
return
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Find all image tags with the relevant extensions
relevant_extensions = ['png', 'jpeg', 'jpg', 'webp']
def is_relevant_extension(x):
return x and any(x.lower().endswith(ext) for ext in relevant_extensions)
images = soup.find_all('img', src=is_relevant_extension)
# Download and save each image
index = 0
for img in images:
img_url = img['src']
if img_url.startswith('//'):
img_url = 'https://' + img_url
img_url = img_url.replace('////', '//')
elif not img_url.startswith('http://') and not img_url.startswith('https://'):
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
img_url = base_url + '/' + img_url
response = requests.get(img_url)
if response.status_code != 200:
continue
# Extract the image filename from the URL
if prefix:
img_filename = os.path.join(folder_down_path, str(index).zfill(3) + '-' + os.path.basename(img_url))
else:
img_filename = os.path.join(folder_down_path, os.path.basename(img_url))
index += 1
# Save the original image
with open(img_filename, 'wb') as f:
f.write(response.content)
print(f"{img_filename} downloaded", end='')
# Compress and optionally convert to webp
compress_and_convert(img_filename, makewebp, quality)
def compress_and_convert(img_filename, makewebp, quality):
if makewebp:
format_param = 'WEBP'
target_filename = os.path.splitext(img_filename)[0] + '.webp'
else:
format_param = None
target_filename = img_filename
target_filename = target_filename.replace('-down', '-compressed')
try:
image = Image.open(img_filename)
image.save(
target_filename,
format_param,
quality=quality,
method=6
)
print(f", converted")
except Exception as e:
print(f", skipped because of error: {type(e)}: {e}")
class MyParser(ap.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
if __name__ == '__main__':
p = MyParser(description="Configuration", formatter_class=ap.ArgumentDefaultsHelpFormatter)
p.add_argument("--url", help="URL")
p.add_argument("--quality", type=int, default=75, help="Quality level - 75 is default and sufficient, max is 100") # default is printed
p.add_argument("--prefix", type=bool, default=False, help="Should the script add a prefix to the compressed files so that it's clear in which order they appeared on the original page?")
p.add_argument("--no_webp", type=bool, default=False, help="Should the script NOT convert all to webp after compressing?")
p.add_argument("--user_agent", default='python-requests/2.31.0', help="Specify your own user agent if needed")
p.add_argument("--file", help="File mode: convert just one file")
if len(sys.argv) == 1:
p.print_help()
sys.exit(1)
args = p.parse_args()
print("Command line arguments are:")
print(sys.argv[1:])
if args.url is not None:
url_without_http = args.url.replace('http://', '').replace('https://', '')
url_cleaned = url_without_http.replace('/', '_')
url_cleaned = url_cleaned.replace(':', '-')
download_png_images(args.url, url_cleaned + '-images-' + str(args.quality), args.quality, args.prefix, not args.no_webp, args.user_agent)
elif args.file is not None:
print(args.file, end='')
compress_and_convert(args.file, not args.no_webp, args.quality)
else:
print('Either url or file has to be specified! exiting ...')