Merge pull request #43 from camicroscope/develop

For 3.8.4
camicroscope · Jan 25, 2021 · 122437a · 122437a
2 parents 342e622 + 4c687b3
commit 122437a
Show file tree

Hide file tree

Showing 5 changed files with 245 additions and 42 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -24,10 +24,20 @@ RUN pip3 install -r requirements.txt
 
 
 EXPOSE 4000
+EXPOSE 4001
 
 #debug/dev only
 # ENV FLASK_APP SlideServer.py
 # CMD python -m flask run --host=0.0.0.0 --port=4000
 
+# The Below BROKE the ability for users to upload images.
+# # non-root user
+# RUN chgrp -R 0 /var && \
+#     chmod -R g+rwX /var && \
+#     chgrp -R 0 /images/uploading && \
+#     chmod -R g+rwX /images/uploading
+#
+# USER 1001
+
 #prod only
 CMD gunicorn -w 4 -b 0.0.0.0:4000 SlideServer:app --timeout 400
diff --git a/OmniLoad.py b/OmniLoad.py
@@ -6,29 +6,27 @@
 import time # for timestamp
 import os # for os/fs systems
 import json # for json in and out
-import pymongo # for mongo in and out
 import requests # for api and pathdb in and out
 
 parser = argparse.ArgumentParser(description='Load slides or results to caMicroscope.')
 # read in collection
 parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user'],
-                    help='Input type (collection)')
+                    help='Input type')
 # read in filepath
 parser.add_argument('-f', type=str, default="manifest.csv",
                     help='Input file')
 # read in dest type
-parser.add_argument('-o', type=str, default="mongo", choices=['mongo', 'jsonfile', 'api', 'pathdb'],
+parser.add_argument('-o', type=str, default="camic", choices=['jsonfile', 'camic', 'pathdb'],
                     help='Output destination type')
+# read in pathdb collection
+parser.add_argument('-pc', type=str, help='Pathdb Collection Name')
 # read in dest uri or equivalent
-parser.add_argument('-d', type=str, default="mongodb://ca-mongo:27017/",
+parser.add_argument('-d', type=str, default="http://localhost:4010/data/Slide/post",
                     help='Output destination')
-# read in mongo database
-parser.add_argument('-db', type=str, default="camic",
-                    help='For mongo, the db to use')
 # read in lookup type
-parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="mongo", choices=['mongo', 'jsonfile', 'api', 'pathdb'])
+parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="camic", choices=['camic', 'pathdb'])
 # read in lookup uri or equivalent
-parser.add_argument('-ld', type=str, default="mongodb://ca-mongo:27017/",
+parser.add_argument('-ld', type=str, default="http://localhost:4010/data/Slide/find",
                     help='Slide ID lookup source')
 
 args = parser.parse_args()
@@ -41,14 +39,17 @@ def openslidedata(manifest):
         slide = openslide.OpenSlide(img['location'])
         slideData = slide.properties
         img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_X, None)
-        img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None)
-        img['mpp'] = img['mpp-x'] or img['mpp-x'] or None
-        img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None)
-        img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None)
+        img['mpp-y'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None)
+        img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None) or slideData.get(
+            "openslide.level[0].height", None)
+        img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None) or slideData.get(
+            "openslide.level[0].width", None)
         img['vendor'] = slideData.get(openslide.PROPERTY_NAME_VENDOR, None)
         img['level_count'] = int(slideData.get('level_count', 1))
-        img['objective'] = float(slideData.get("aperio.AppMag", None))
-        img['timestamp'] = time.time()
+        img['objective'] = float(slideData.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER, 0) or
+                                      slideData.get("aperio.AppMag", -1.0))
+        img['md5sum'] = file_md5(filepath)
+        img['comment'] = slideData.get(openslide.PROPERTY_NAME_COMMENT, None)
         # required values which are often unused
         img['study'] = img.get('study', "")
         img['specimen'] = img.get('specimen', "")
@@ -72,47 +73,64 @@ def openslidedata(manifest):
 if (args.i == "slide"):
     manifest = openslidedata(manifest)
 else:
-    raise NotImplementedError("Slide id lookup not implemented")
-    if (args.lt == "api"):
+
+    if (args.lt == "camic"):
         for x in manifest:
-            # TODO get slide ref from manifest
-            r = requests.get(args.ld)
-            r.json()
-            # put slide id in manifest
-    if (args.lt == "mongo"):
-        pass
+            # TODO more flexible with manifest fields
+            lookup_url = args.ld + "?name=" + x.slide
+            r = requests.get(lookup_url)
+            res = r.json()
+            if (len(res)) == 0:
+                print("[WARN] - no match for slide '" + x.slide + "', skipping")
+                del x
+            x.id = res[0]["_id"]["$oid"]
     if (args.lt == "pathdb"):
-        pass
-    if (args.lt == "jsonfile"):
-        with open(args.ld, 'r') as f:
-            slide_map = json.load(manifest, f)
-            # TODO use
+        raise NotImplementedError("pathdb lookup is broken now")
+        for x in manifest:
+            # TODO there's an error with the url construction when testing, something's up
+            lookup_url = args.ld + args.pc + "/"
+            lookup_url += x.get("studyid", "") or x.get("study")
+            lookup_url += x.get("clinicaltrialsubjectid", "") or x.get("subject")
+            lookup_url += x.get("imageid", "") or x.get("image", "") or x.get("slide", "")
+            lookup_url += "?_format=json"
+            r = requests.get(lookup_url)
+            res = r.json()
+            if (len(res)) == 0:
+                print("[WARN] - no match for slide '" + str(x) + "', skipping")
+                del x
+            else:
+                x.id = res[0]["PathDBID"]
 
 
-# perform validation (!!)
+# TODO add validation (!!)
 print("[WARNING] -- Validation not Implemented")
 
-
-# take appropriate destination action
-if (args.o == "jsonfile"):
-    with open(args.d, 'w') as f:
-        json.dump(manifest, f)
-elif (args.o == "mongo"):
-    client = pymongo.MongoClient(args.d)
-    db = client[args.db]
-    col = db[args.i]
-    col.insert_many(manifest)
-elif (args.o == "api"):
+def postWithAuth(data, url):
     x = requests.post(args.d, json=manifest)
-    # if we get a 401, ask the user for a token
     retry = True
     while (x.status_code == 401 and retry):
         token = input("API returned 401, try a (different) token? : ")
         if (token and token != "no" and token != "n"):
             x = requests.post(args.d, json=manifest, auth=token)
         else:
             retry = False
-    x.raise_for_status()
+    return x
+
+# take appropriate destination action
+if (args.o == "jsonfile"):
+    with open(args.d, 'w') as f:
+        json.dump(manifest, f)
+elif (args.o == "camic"):
+    if (args.i == "slide"):
+        x = postWithAuth(args.d, manifest)
+        x.raise_for_status()
+    else:
+        with open(x.path) as f:
+            file = json.load(f)
+            for rec in file:
+                rec[slide] = x.id
+            x = postWithAuth(args.d, file)
+            x.raise_for_status()
 elif (args.o == "pathdb"):
     #! TODO
     if (args.i != "slide"):

diff --git a/SlideServer.py b/SlideServer.py
@@ -21,6 +21,8 @@
 import csv 
 import pathlib
 import logging
+from gDriveDownload import start, afterUrlAuth, callApi
+from threading import Thread
 
 try:
     from io import BytesIO
@@ -40,6 +42,7 @@
 app.config['SECRET_KEY'] = os.urandom(24)
 app.config['ROI_FOLDER'] = "/images/roiDownload"
 
+
 ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide'])    
 
 
@@ -233,6 +236,7 @@ def urlUploadStatus():
         return flask.Response(json.dumps({"uploaded": "False"}), status=200)
 
 
+
 # Workbench Dataset Creation help-routes
 
 # Route to receive base64 encoded zip files.
@@ -528,3 +532,53 @@ def roiextract(file_name):
     return flask.send_from_directory(app.config["ROI_FOLDER"],filename=file_name, as_attachment=True, cache_timeout=0 )
 
 
+# Google Drive API (OAuth and File Download) Routes
+
+# A new Thread to call the Gdrive API after an Auth Response is returned to the user.
+class getFileFromGdrive(Thread):
+    def __init__(self, params, userId, fileId, token):
+        Thread.__init__(self)
+        self.params, self.userId, self.fileId , self.token = params, userId, fileId, token
+
+    def run(self):
+        if(self.params["auth_url"] != None):
+            self.params["creds"] = afterUrlAuth(self.params["local_server"], self.params["flow"], self.params["wsgi_app"], self.userId)
+        call = callApi(self.params["creds"], self.fileId, self.token)
+        app.logger.info(call)
+
+# Route to start the OAuth Server(to listen if user is Authenticated) and start the file Download after Authentication
+@app.route('/googleDriveUpload/getFile', methods=['POST'])
+def gDriveGetFile():
+    body = flask.request.get_json()
+    if not body:
+        return flask.Response(json.dumps({"error": "Missing JSON body"}), status=400)
+
+    token = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
+    token = secure_filename(token)
+    tmppath = os.path.join("/images/uploading/", token)
+    # regenerate if we happen to collide
+    while os.path.isfile(tmppath):
+        token = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
+        token = secure_filename(token)
+        tmppath = os.path.join("/images/uploading/", token)
+
+    try:
+        params = start(body['userId'])
+    except:
+        return flask.Response(json.dumps({'error': str(sys.exc_info()[0])}), status=400)
+    thread_a = getFileFromGdrive(params, body['userId'], body['fileId'], token)
+    thread_a.start()
+    return flask.Response(json.dumps({"authURL": params["auth_url"], "token": token}), status=200)
+
+# To check if a particular file is downloaded from Gdrive
+@app.route('/googleDriveUpload/checkStatus', methods=['POST'])
+def checkDownloadStatus():
+    body = flask.request.get_json()
+    if not body:
+        return flask.Response(json.dumps({"error": "Missing JSON body"}), status=400)
+    token = body['token']
+    path = app.config['TEMP_FOLDER']+'/'+token
+    if os.path.isfile(path):
+        return flask.Response(json.dumps({"downloadDone": True}), status=200)
+    return flask.Response(json.dumps({"downloadDone": False}), status=200)
+
diff --git a/gDriveDownload.py b/gDriveDownload.py
@@ -0,0 +1,118 @@
+from __future__ import print_function
+import pickle
+import wsgiref.simple_server
+import wsgiref.util
+from googleapiclient.http import MediaIoBaseDownload
+from googleapiclient.discovery import build
+from google_auth_oauthlib.flow import InstalledAppFlow, _WSGIRequestHandler, _RedirectWSGIApp
+from google.auth.transport.requests import Request
+import sys
+import os
+import io
+import shutil
+
+
+# If modifying these scopes, delete the file "googleDrive<userID>.pickle"
+SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
+
+# Starting a local server on :4001 to listen for authentication response from user
+def run_local_server(
+    self=InstalledAppFlow,
+    host="0.0.0.0",
+    port=4001,
+    authorization_prompt_message=InstalledAppFlow._DEFAULT_AUTH_PROMPT_MESSAGE,
+    success_message=InstalledAppFlow._DEFAULT_WEB_SUCCESS_MESSAGE,
+    userId=None,
+):
+    wsgi_app = _RedirectWSGIApp(success_message)
+    local_server = wsgiref.simple_server.make_server(host, port, wsgi_app, handler_class=_WSGIRequestHandler)
+
+    # Making a unique redirect URL for every user 
+    self.redirect_uri = "http://localhost:4010/googleAuth/" + userId
+    auth_url, _ = self.authorization_url()
+
+    print(authorization_prompt_message.format(url=auth_url))
+
+    return auth_url, local_server, wsgi_app, None
+
+
+# Will be called after return of Auth URL
+def afterUrlAuth(local_server, flow, wsgi_app, userId):
+    local_server.handle_request()
+
+    # Note: using https here because oauthlib is very picky that
+    # OAuth 2.0 should only occur over https.
+    authorization_response = wsgi_app.last_request_uri.replace("http", "https")
+    flow.fetch_token(authorization_response=authorization_response)
+    # Save the credentials for the next run
+    with open(
+        "/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle", "wb"
+    ) as token:
+        pickle.dump(flow.credentials, token)
+    return flow.credentials
+
+# Starting the Auth process and checking for pickle file (Token) [Creating a new token if not exists]
+def start(userId):
+    creds = None
+    # The file "googleDrive<userID>.pickle" stores the user's access and refresh tokens, and is
+    # created automatically when the authorization flow completes for the first time.
+    if os.path.exists("/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle"):
+        with open(
+            "/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle", "rb"
+        ) as token:
+            creds = pickle.load(token)
+        return {
+            "auth_url": None,
+            "local_server": None,
+            "wsgi_app": None,
+            "flow": None,
+            "creds": creds,
+        }
+
+    # If there are no (valid) credentials available, let the user log in.
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(
+                "/cloud-upload-apis/credentials/google-drive.json", SCOPES
+            )
+            auth_url, local_server, wsgi_app, creds = run_local_server(
+                self=flow, userId=userId
+            )
+            return {
+                "auth_url": auth_url,
+                "local_server": local_server,
+                "wsgi_app": wsgi_app,
+                "flow": flow,
+                "creds": creds,
+            }
+
+
+# Calling the Drive API to download a file
+def callApi(creds, fileId, token):
+    downloadDone = False
+    service = build("drive", "v3", credentials=creds)
+
+    # Call the Drive v3 API
+    request = service.files().get_media(fileId=fileId)
+    fileName = service.files().get(fileId=fileId).execute()["name"]
+
+    fh = io.BytesIO()
+
+    # Initialise a downloader object to download the file
+    downloader = MediaIoBaseDownload(fh, request)
+
+    try:
+        # Download the data in chunks
+        while not downloadDone:
+            status, downloadDone = downloader.next_chunk()
+        fh.seek(0)
+        # Write the received data to the file
+        with open("/images/uploading/" + token, "wb") as f:
+            shutil.copyfileobj(fh, f)
+
+        # Return True if file Downloaded successfully
+        return {"status": True, "fileName": fileName, "token": token}
+    except:
+        return False
diff --git a/requirements.txt b/requirements.txt
@@ -5,3 +5,6 @@ flask-cors
 requests
 numpy
 Pillow
+google-api-python-client
+google-auth-httplib2
+google-auth-oauthlib