bit-bots · timonegk · Jan 1, 2021 · Dec 12, 2020 · Dec 17, 2020 · Dec 17, 2020
diff --git a/imagetagger/imagetagger/images/templates/images/download.py b/imagetagger/imagetagger/images/templates/images/download.py
@@ -4,21 +4,33 @@
 import getpass
 import shutil
 import os
+import zipfile
 try:
     import requests
 except ImportError:
     print("Python3 requests is not installed. Please use e.g. pip3 install requests")
     sys.exit()
 
 BaseUrl = "{{ base_url }}" + "/"
+separate_download = False
+if "--separate" in sys.argv or "-s" in sys.argv:
+    separate_download = True
+    # remove parameters from list
+    if "--separate" in sys.argv:
+        sys.argv.remove("--separate")
+    if "-s" in sys.argv:
+        sys.argv.remove("-s")
+    print("The images will be downloaded separately instead of as zip.")
 if len(sys.argv) < 2:
-    imageset = input("Imagesets you want to download, separated by a ',' or ' ':")
+    imageset = input("Imagesets you want to download, separated by a ',' or ' ': ")
 else:
     if sys.argv[1] == '-h':
         print("This script will download images from the specified imageset for you.")
         print("The images will be downloaded from: {}".format(BaseUrl))
         print("If errors occur during the download you will be notified at the end of the script execution")
-        print("Just execute it with ./imagetagger_dl_script.py")
+        print("If you want to download the images separately instead of as a zip (this was done in the past),")
+        print("call the script with ./imagetagger_dl_script.py --separate imgsetID1, imgsetID2")
+        print("Otherwise just execute it with ./imagetagger_dl_script.py")
         sys.exit()
     else:
         imageset = " ".join(sys.argv[1:])
@@ -42,32 +54,60 @@
 if error:
     sys.exit()
 
+error = False
+
+loginpage = requests.get(BaseUrl)
 
+cookies = {'csrftoken': loginpage.cookies['csrftoken']}
+data = {'username': user,
+        'password': password,
+        'csrfmiddlewaretoken': loginpage.cookies['csrftoken']}
+loggedinpage = requests.post(
+    '{}user/login/'.format(BaseUrl),
+    data=data,
+    cookies=cookies,
+    allow_redirects=False,
+    headers={'referer': BaseUrl})
+
+try:
+    sessionid = loggedinpage.cookies['sessionid']
+except KeyError:
+    print('Login failed')
+    sys.exit(1)
+cookies = {'sessionid': sessionid}
+
+
+def download_zip(current_imageset):
+    print(f"Now downloading {current_imageset}")
+    if not os.path.exists(os.path.join(os.getcwd(), filename, current_imageset)):
+        os.makedirs(os.path.join(os.getcwd(), filename, current_imageset))
+    ziplink = f"{BaseUrl}images/imageset/{current_imageset}/download/"
+    with requests.get(ziplink,
+                     data=data,
+                     cookies=cookies,
+                     allow_redirects=False,
+                     headers={'referer': BaseUrl},
+                     stream=True) as r:
+        # this is intended for the case when an imageset does not exist or the zip does not yet exist
+        if r.status_code == 404:
+            print("In Imageset {} was an error. The server returned page not found. Try --separate if zip download is disabled.".format(current_imageset))
+            errorlist.append(current_imageset)
+            return
+        filepath = os.path.join(filename, current_imageset)
+        full_zipname = os.path.join(filepath, current_imageset+".zip")
+        with open(full_zipname, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+        with zipfile.ZipFile(full_zipname, 'r') as unzip:
+            unzip.extractall(filepath)
+        os.remove(full_zipname)
+    print(f"Downloaded imageset {current_imageset}")
+
+# Download images individually. This is slower than downloading the zip, so the zip is used by default.
 def download_imageset(current_imageset):
     error = False
     if not os.path.exists(os.path.join(os.getcwd(), filename, current_imageset)):
         os.makedirs(os.path.join(os.getcwd(), filename, current_imageset))
-    loginpage = requests.get(BaseUrl)
-    csrftoken = loginpage.cookies['csrftoken']
-
-    cookies = {'csrftoken': csrftoken}
-    csrfmiddlewaretoken = csrftoken
-    data = {'username': user,
-            'password': password,
-            'csrfmiddlewaretoken': csrfmiddlewaretoken}
-    loggedinpage = requests.post(
-        '{}user/login/'.format(BaseUrl),
-        data=data,
-        cookies=cookies,
-        allow_redirects=False,
-        headers={'referer': BaseUrl})
-
-    try:
-        sessionid = loggedinpage.cookies['sessionid']
-    except KeyError:
-        print('Login failed')
-        sys.exit(1)
-    cookies = {'sessionid': sessionid}
     page = requests.get("{}images/imagelist/{}/".format(BaseUrl,
                         current_imageset),
                         cookies=cookies)
@@ -103,8 +143,11 @@ def download_imageset(current_imageset):
 
 
 for imgset in imagesets:
-    if imgset is not " ":
-        download_imageset(imgset)
+    if imgset != " ":
+        if not separate_download:
+            download_zip(imgset)
+        else:
+            download_imageset(imgset)
 if errorlist:
     print("There have been errors while downloading the following imagesets: ")
     for item in errorlist: