diff --git a/Dockerfile b/Dockerfile index c8b43a3..9b80f6b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,10 +24,20 @@ RUN pip3 install -r requirements.txt EXPOSE 4000 +EXPOSE 4001 #debug/dev only # ENV FLASK_APP SlideServer.py # CMD python -m flask run --host=0.0.0.0 --port=4000 +# The Below BROKE the ability for users to upload images. +# # non-root user +# RUN chgrp -R 0 /var && \ +# chmod -R g+rwX /var && \ +# chgrp -R 0 /images/uploading && \ +# chmod -R g+rwX /images/uploading +# +# USER 1001 + #prod only CMD gunicorn -w 4 -b 0.0.0.0:4000 SlideServer:app --timeout 400 diff --git a/OmniLoad.py b/OmniLoad.py index cd2c2ab..1129136 100644 --- a/OmniLoad.py +++ b/OmniLoad.py @@ -6,29 +6,27 @@ import time # for timestamp import os # for os/fs systems import json # for json in and out -import pymongo # for mongo in and out import requests # for api and pathdb in and out parser = argparse.ArgumentParser(description='Load slides or results to caMicroscope.') # read in collection parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user'], - help='Input type (collection)') + help='Input type') # read in filepath parser.add_argument('-f', type=str, default="manifest.csv", help='Input file') # read in dest type -parser.add_argument('-o', type=str, default="mongo", choices=['mongo', 'jsonfile', 'api', 'pathdb'], +parser.add_argument('-o', type=str, default="camic", choices=['jsonfile', 'camic', 'pathdb'], help='Output destination type') +# read in pathdb collection +parser.add_argument('-pc', type=str, help='Pathdb Collection Name') # read in dest uri or equivalent -parser.add_argument('-d', type=str, default="mongodb://ca-mongo:27017/", +parser.add_argument('-d', type=str, default="http://localhost:4010/data/Slide/post", help='Output destination') -# read in mongo database -parser.add_argument('-db', type=str, default="camic", - help='For mongo, the db to use') # read in lookup type -parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="mongo", choices=['mongo', 'jsonfile', 'api', 'pathdb']) +parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="camic", choices=['camic', 'pathdb']) # read in lookup uri or equivalent -parser.add_argument('-ld', type=str, default="mongodb://ca-mongo:27017/", +parser.add_argument('-ld', type=str, default="http://localhost:4010/data/Slide/find", help='Slide ID lookup source') args = parser.parse_args() @@ -41,14 +39,17 @@ def openslidedata(manifest): slide = openslide.OpenSlide(img['location']) slideData = slide.properties img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_X, None) - img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None) - img['mpp'] = img['mpp-x'] or img['mpp-x'] or None - img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None) - img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None) + img['mpp-y'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None) + img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None) or slideData.get( + "openslide.level[0].height", None) + img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None) or slideData.get( + "openslide.level[0].width", None) img['vendor'] = slideData.get(openslide.PROPERTY_NAME_VENDOR, None) img['level_count'] = int(slideData.get('level_count', 1)) - img['objective'] = float(slideData.get("aperio.AppMag", None)) - img['timestamp'] = time.time() + img['objective'] = float(slideData.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER, 0) or + slideData.get("aperio.AppMag", -1.0)) + img['md5sum'] = file_md5(filepath) + img['comment'] = slideData.get(openslide.PROPERTY_NAME_COMMENT, None) # required values which are often unused img['study'] = img.get('study', "") img['specimen'] = img.get('specimen', "") @@ -72,39 +73,40 @@ def openslidedata(manifest): if (args.i == "slide"): manifest = openslidedata(manifest) else: - raise NotImplementedError("Slide id lookup not implemented") - if (args.lt == "api"): + + if (args.lt == "camic"): for x in manifest: - # TODO get slide ref from manifest - r = requests.get(args.ld) - r.json() - # put slide id in manifest - if (args.lt == "mongo"): - pass + # TODO more flexible with manifest fields + lookup_url = args.ld + "?name=" + x.slide + r = requests.get(lookup_url) + res = r.json() + if (len(res)) == 0: + print("[WARN] - no match for slide '" + x.slide + "', skipping") + del x + x.id = res[0]["_id"]["$oid"] if (args.lt == "pathdb"): - pass - if (args.lt == "jsonfile"): - with open(args.ld, 'r') as f: - slide_map = json.load(manifest, f) - # TODO use + raise NotImplementedError("pathdb lookup is broken now") + for x in manifest: + # TODO there's an error with the url construction when testing, something's up + lookup_url = args.ld + args.pc + "/" + lookup_url += x.get("studyid", "") or x.get("study") + lookup_url += x.get("clinicaltrialsubjectid", "") or x.get("subject") + lookup_url += x.get("imageid", "") or x.get("image", "") or x.get("slide", "") + lookup_url += "?_format=json" + r = requests.get(lookup_url) + res = r.json() + if (len(res)) == 0: + print("[WARN] - no match for slide '" + str(x) + "', skipping") + del x + else: + x.id = res[0]["PathDBID"] -# perform validation (!!) +# TODO add validation (!!) print("[WARNING] -- Validation not Implemented") - -# take appropriate destination action -if (args.o == "jsonfile"): - with open(args.d, 'w') as f: - json.dump(manifest, f) -elif (args.o == "mongo"): - client = pymongo.MongoClient(args.d) - db = client[args.db] - col = db[args.i] - col.insert_many(manifest) -elif (args.o == "api"): +def postWithAuth(data, url): x = requests.post(args.d, json=manifest) - # if we get a 401, ask the user for a token retry = True while (x.status_code == 401 and retry): token = input("API returned 401, try a (different) token? : ") @@ -112,7 +114,23 @@ def openslidedata(manifest): x = requests.post(args.d, json=manifest, auth=token) else: retry = False - x.raise_for_status() + return x + +# take appropriate destination action +if (args.o == "jsonfile"): + with open(args.d, 'w') as f: + json.dump(manifest, f) +elif (args.o == "camic"): + if (args.i == "slide"): + x = postWithAuth(args.d, manifest) + x.raise_for_status() + else: + with open(x.path) as f: + file = json.load(f) + for rec in file: + rec[slide] = x.id + x = postWithAuth(args.d, file) + x.raise_for_status() elif (args.o == "pathdb"): #! TODO if (args.i != "slide"): diff --git a/SlideServer.py b/SlideServer.py index 98e4172..51869ac 100644 --- a/SlideServer.py +++ b/SlideServer.py @@ -21,6 +21,8 @@ import csv import pathlib import logging +from gDriveDownload import start, afterUrlAuth, callApi +from threading import Thread try: from io import BytesIO @@ -40,6 +42,7 @@ app.config['SECRET_KEY'] = os.urandom(24) app.config['ROI_FOLDER'] = "/images/roiDownload" + ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide']) @@ -233,6 +236,7 @@ def urlUploadStatus(): return flask.Response(json.dumps({"uploaded": "False"}), status=200) + # Workbench Dataset Creation help-routes # Route to receive base64 encoded zip files. @@ -528,3 +532,53 @@ def roiextract(file_name): return flask.send_from_directory(app.config["ROI_FOLDER"],filename=file_name, as_attachment=True, cache_timeout=0 ) +# Google Drive API (OAuth and File Download) Routes + +# A new Thread to call the Gdrive API after an Auth Response is returned to the user. +class getFileFromGdrive(Thread): + def __init__(self, params, userId, fileId, token): + Thread.__init__(self) + self.params, self.userId, self.fileId , self.token = params, userId, fileId, token + + def run(self): + if(self.params["auth_url"] != None): + self.params["creds"] = afterUrlAuth(self.params["local_server"], self.params["flow"], self.params["wsgi_app"], self.userId) + call = callApi(self.params["creds"], self.fileId, self.token) + app.logger.info(call) + +# Route to start the OAuth Server(to listen if user is Authenticated) and start the file Download after Authentication +@app.route('/googleDriveUpload/getFile', methods=['POST']) +def gDriveGetFile(): + body = flask.request.get_json() + if not body: + return flask.Response(json.dumps({"error": "Missing JSON body"}), status=400) + + token = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) + token = secure_filename(token) + tmppath = os.path.join("/images/uploading/", token) + # regenerate if we happen to collide + while os.path.isfile(tmppath): + token = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) + token = secure_filename(token) + tmppath = os.path.join("/images/uploading/", token) + + try: + params = start(body['userId']) + except: + return flask.Response(json.dumps({'error': str(sys.exc_info()[0])}), status=400) + thread_a = getFileFromGdrive(params, body['userId'], body['fileId'], token) + thread_a.start() + return flask.Response(json.dumps({"authURL": params["auth_url"], "token": token}), status=200) + +# To check if a particular file is downloaded from Gdrive +@app.route('/googleDriveUpload/checkStatus', methods=['POST']) +def checkDownloadStatus(): + body = flask.request.get_json() + if not body: + return flask.Response(json.dumps({"error": "Missing JSON body"}), status=400) + token = body['token'] + path = app.config['TEMP_FOLDER']+'/'+token + if os.path.isfile(path): + return flask.Response(json.dumps({"downloadDone": True}), status=200) + return flask.Response(json.dumps({"downloadDone": False}), status=200) + diff --git a/gDriveDownload.py b/gDriveDownload.py new file mode 100644 index 0000000..61c0e46 --- /dev/null +++ b/gDriveDownload.py @@ -0,0 +1,118 @@ +from __future__ import print_function +import pickle +import wsgiref.simple_server +import wsgiref.util +from googleapiclient.http import MediaIoBaseDownload +from googleapiclient.discovery import build +from google_auth_oauthlib.flow import InstalledAppFlow, _WSGIRequestHandler, _RedirectWSGIApp +from google.auth.transport.requests import Request +import sys +import os +import io +import shutil + + +# If modifying these scopes, delete the file "googleDrive.pickle" +SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] + +# Starting a local server on :4001 to listen for authentication response from user +def run_local_server( + self=InstalledAppFlow, + host="0.0.0.0", + port=4001, + authorization_prompt_message=InstalledAppFlow._DEFAULT_AUTH_PROMPT_MESSAGE, + success_message=InstalledAppFlow._DEFAULT_WEB_SUCCESS_MESSAGE, + userId=None, +): + wsgi_app = _RedirectWSGIApp(success_message) + local_server = wsgiref.simple_server.make_server(host, port, wsgi_app, handler_class=_WSGIRequestHandler) + + # Making a unique redirect URL for every user + self.redirect_uri = "http://localhost:4010/googleAuth/" + userId + auth_url, _ = self.authorization_url() + + print(authorization_prompt_message.format(url=auth_url)) + + return auth_url, local_server, wsgi_app, None + + +# Will be called after return of Auth URL +def afterUrlAuth(local_server, flow, wsgi_app, userId): + local_server.handle_request() + + # Note: using https here because oauthlib is very picky that + # OAuth 2.0 should only occur over https. + authorization_response = wsgi_app.last_request_uri.replace("http", "https") + flow.fetch_token(authorization_response=authorization_response) + # Save the credentials for the next run + with open( + "/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle", "wb" + ) as token: + pickle.dump(flow.credentials, token) + return flow.credentials + +# Starting the Auth process and checking for pickle file (Token) [Creating a new token if not exists] +def start(userId): + creds = None + # The file "googleDrive.pickle" stores the user's access and refresh tokens, and is + # created automatically when the authorization flow completes for the first time. + if os.path.exists("/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle"): + with open( + "/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle", "rb" + ) as token: + creds = pickle.load(token) + return { + "auth_url": None, + "local_server": None, + "wsgi_app": None, + "flow": None, + "creds": creds, + } + + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file( + "/cloud-upload-apis/credentials/google-drive.json", SCOPES + ) + auth_url, local_server, wsgi_app, creds = run_local_server( + self=flow, userId=userId + ) + return { + "auth_url": auth_url, + "local_server": local_server, + "wsgi_app": wsgi_app, + "flow": flow, + "creds": creds, + } + + +# Calling the Drive API to download a file +def callApi(creds, fileId, token): + downloadDone = False + service = build("drive", "v3", credentials=creds) + + # Call the Drive v3 API + request = service.files().get_media(fileId=fileId) + fileName = service.files().get(fileId=fileId).execute()["name"] + + fh = io.BytesIO() + + # Initialise a downloader object to download the file + downloader = MediaIoBaseDownload(fh, request) + + try: + # Download the data in chunks + while not downloadDone: + status, downloadDone = downloader.next_chunk() + fh.seek(0) + # Write the received data to the file + with open("/images/uploading/" + token, "wb") as f: + shutil.copyfileobj(fh, f) + + # Return True if file Downloaded successfully + return {"status": True, "fileName": fileName, "token": token} + except: + return False diff --git a/requirements.txt b/requirements.txt index 84520dd..488377f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,6 @@ flask-cors requests numpy Pillow +google-api-python-client +google-auth-httplib2 +google-auth-oauthlib \ No newline at end of file