Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use zip for image download as default #177

Merged
merged 6 commits into from
Jan 1, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 68 additions & 25 deletions imagetagger/imagetagger/images/templates/images/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,33 @@
import getpass
import shutil
import os
import zipfile
try:
import requests
except ImportError:
print("Python3 requests is not installed. Please use e.g. pip3 install requests")
sys.exit()

BaseUrl = "{{ base_url }}" + "/"
separate_download = False
if "--separate" in sys.argv or "-s" in sys.argv:
separate_download = True
# remove parameters from list
if "--separate" in sys.argv:
sys.argv.remove("--separate")
if "-s" in sys.argv:
sys.argv.remove("-s")
print("The images will be downloaded separately instead of as zip.")
if len(sys.argv) < 2:
imageset = input("Imagesets you want to download, separated by a ',' or ' ':")
imageset = input("Imagesets you want to download, separated by a ',' or ' ': ")
else:
if sys.argv[1] == '-h':
print("This script will download images from the specified imageset for you.")
print("The images will be downloaded from: {}".format(BaseUrl))
print("If errors occur during the download you will be notified at the end of the script execution")
print("Just execute it with ./imagetagger_dl_script.py")
print("If you want to download the images separately instead of as a zip (this was done in the past),")
print("call the script with ./imagetagger_dl_script.py --separate imgsetID1, imgsetID2")
print("Otherwise just execute it with ./imagetagger_dl_script.py")
sys.exit()
else:
imageset = " ".join(sys.argv[1:])
Expand All @@ -42,32 +54,60 @@
if error:
sys.exit()

error = False

loginpage = requests.get(BaseUrl)

cookies = {'csrftoken': loginpage.cookies['csrftoken']}
data = {'username': user,
'password': password,
'csrfmiddlewaretoken': loginpage.cookies['csrftoken']}
loggedinpage = requests.post(
'{}user/login/'.format(BaseUrl),
data=data,
cookies=cookies,
allow_redirects=False,
headers={'referer': BaseUrl})

try:
sessionid = loggedinpage.cookies['sessionid']
except KeyError:
print('Login failed')
sys.exit(1)
cookies = {'sessionid': sessionid}


def download_zip(current_imageset):
print(f"Now downloading {current_imageset}")
if not os.path.exists(os.path.join(os.getcwd(), filename, current_imageset)):
os.makedirs(os.path.join(os.getcwd(), filename, current_imageset))
ziplink = f"{BaseUrl}images/imageset/{current_imageset}/download/"
with requests.get(ziplink,
data=data,
cookies=cookies,
allow_redirects=False,
headers={'referer': BaseUrl},
stream=True) as r:
# this is intended for the case when an imageset does not exist or the zip does not yet exist
if r.status_code == 404:
print("In Imageset {} was an error. The server returned page not found. Try --separate if zip download is disabled.".format(current_imageset))
errorlist.append(current_imageset)
return
filepath = os.path.join(filename, current_imageset)
full_zipname = os.path.join(filepath, current_imageset+".zip")
with open(full_zipname, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
with zipfile.ZipFile(full_zipname, 'r') as unzip:
unzip.extractall(filepath)
os.remove(full_zipname)
print(f"Downloaded imageset {current_imageset}")

# Download images individually. This is slower than downloading the zip, so the zip is used by default.
def download_imageset(current_imageset):
error = False
if not os.path.exists(os.path.join(os.getcwd(), filename, current_imageset)):
os.makedirs(os.path.join(os.getcwd(), filename, current_imageset))
loginpage = requests.get(BaseUrl)
csrftoken = loginpage.cookies['csrftoken']

cookies = {'csrftoken': csrftoken}
csrfmiddlewaretoken = csrftoken
data = {'username': user,
'password': password,
'csrfmiddlewaretoken': csrfmiddlewaretoken}
loggedinpage = requests.post(
'{}user/login/'.format(BaseUrl),
data=data,
cookies=cookies,
allow_redirects=False,
headers={'referer': BaseUrl})

try:
sessionid = loggedinpage.cookies['sessionid']
except KeyError:
print('Login failed')
sys.exit(1)
cookies = {'sessionid': sessionid}
page = requests.get("{}images/imagelist/{}/".format(BaseUrl,
current_imageset),
cookies=cookies)
Expand Down Expand Up @@ -103,8 +143,11 @@ def download_imageset(current_imageset):


for imgset in imagesets:
if imgset is not " ":
download_imageset(imgset)
if imgset != " ":
if not separate_download:
download_zip(imgset)
else:
download_imageset(imgset)
if errorlist:
print("There have been errors while downloading the following imagesets: ")
for item in errorlist:
Expand Down