Skip to content

Commit

Permalink
Merge pull request #43 from camicroscope/develop
Browse files Browse the repository at this point in the history
For 3.8.4
  • Loading branch information
birm authored Jan 25, 2021
2 parents 342e622 + 4c687b3 commit 122437a
Show file tree
Hide file tree
Showing 5 changed files with 245 additions and 42 deletions.
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,20 @@ RUN pip3 install -r requirements.txt


EXPOSE 4000
EXPOSE 4001

#debug/dev only
# ENV FLASK_APP SlideServer.py
# CMD python -m flask run --host=0.0.0.0 --port=4000

# The Below BROKE the ability for users to upload images.
# # non-root user
# RUN chgrp -R 0 /var && \
# chmod -R g+rwX /var && \
# chgrp -R 0 /images/uploading && \
# chmod -R g+rwX /images/uploading
#
# USER 1001

#prod only
CMD gunicorn -w 4 -b 0.0.0.0:4000 SlideServer:app --timeout 400
102 changes: 60 additions & 42 deletions OmniLoad.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,27 @@
import time # for timestamp
import os # for os/fs systems
import json # for json in and out
import pymongo # for mongo in and out
import requests # for api and pathdb in and out

parser = argparse.ArgumentParser(description='Load slides or results to caMicroscope.')
# read in collection
parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user'],
help='Input type (collection)')
help='Input type')
# read in filepath
parser.add_argument('-f', type=str, default="manifest.csv",
help='Input file')
# read in dest type
parser.add_argument('-o', type=str, default="mongo", choices=['mongo', 'jsonfile', 'api', 'pathdb'],
parser.add_argument('-o', type=str, default="camic", choices=['jsonfile', 'camic', 'pathdb'],
help='Output destination type')
# read in pathdb collection
parser.add_argument('-pc', type=str, help='Pathdb Collection Name')
# read in dest uri or equivalent
parser.add_argument('-d', type=str, default="mongodb://ca-mongo:27017/",
parser.add_argument('-d', type=str, default="http://localhost:4010/data/Slide/post",
help='Output destination')
# read in mongo database
parser.add_argument('-db', type=str, default="camic",
help='For mongo, the db to use')
# read in lookup type
parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="mongo", choices=['mongo', 'jsonfile', 'api', 'pathdb'])
parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="camic", choices=['camic', 'pathdb'])
# read in lookup uri or equivalent
parser.add_argument('-ld', type=str, default="mongodb://ca-mongo:27017/",
parser.add_argument('-ld', type=str, default="http://localhost:4010/data/Slide/find",
help='Slide ID lookup source')

args = parser.parse_args()
Expand All @@ -41,14 +39,17 @@ def openslidedata(manifest):
slide = openslide.OpenSlide(img['location'])
slideData = slide.properties
img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_X, None)
img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None)
img['mpp'] = img['mpp-x'] or img['mpp-x'] or None
img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None)
img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None)
img['mpp-y'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None)
img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None) or slideData.get(
"openslide.level[0].height", None)
img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None) or slideData.get(
"openslide.level[0].width", None)
img['vendor'] = slideData.get(openslide.PROPERTY_NAME_VENDOR, None)
img['level_count'] = int(slideData.get('level_count', 1))
img['objective'] = float(slideData.get("aperio.AppMag", None))
img['timestamp'] = time.time()
img['objective'] = float(slideData.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER, 0) or
slideData.get("aperio.AppMag", -1.0))
img['md5sum'] = file_md5(filepath)
img['comment'] = slideData.get(openslide.PROPERTY_NAME_COMMENT, None)
# required values which are often unused
img['study'] = img.get('study', "")
img['specimen'] = img.get('specimen', "")
Expand All @@ -72,47 +73,64 @@ def openslidedata(manifest):
if (args.i == "slide"):
manifest = openslidedata(manifest)
else:
raise NotImplementedError("Slide id lookup not implemented")
if (args.lt == "api"):

if (args.lt == "camic"):
for x in manifest:
# TODO get slide ref from manifest
r = requests.get(args.ld)
r.json()
# put slide id in manifest
if (args.lt == "mongo"):
pass
# TODO more flexible with manifest fields
lookup_url = args.ld + "?name=" + x.slide
r = requests.get(lookup_url)
res = r.json()
if (len(res)) == 0:
print("[WARN] - no match for slide '" + x.slide + "', skipping")
del x
x.id = res[0]["_id"]["$oid"]
if (args.lt == "pathdb"):
pass
if (args.lt == "jsonfile"):
with open(args.ld, 'r') as f:
slide_map = json.load(manifest, f)
# TODO use
raise NotImplementedError("pathdb lookup is broken now")
for x in manifest:
# TODO there's an error with the url construction when testing, something's up
lookup_url = args.ld + args.pc + "/"
lookup_url += x.get("studyid", "") or x.get("study")
lookup_url += x.get("clinicaltrialsubjectid", "") or x.get("subject")
lookup_url += x.get("imageid", "") or x.get("image", "") or x.get("slide", "")
lookup_url += "?_format=json"
r = requests.get(lookup_url)
res = r.json()
if (len(res)) == 0:
print("[WARN] - no match for slide '" + str(x) + "', skipping")
del x
else:
x.id = res[0]["PathDBID"]


# perform validation (!!)
# TODO add validation (!!)
print("[WARNING] -- Validation not Implemented")


# take appropriate destination action
if (args.o == "jsonfile"):
with open(args.d, 'w') as f:
json.dump(manifest, f)
elif (args.o == "mongo"):
client = pymongo.MongoClient(args.d)
db = client[args.db]
col = db[args.i]
col.insert_many(manifest)
elif (args.o == "api"):
def postWithAuth(data, url):
x = requests.post(args.d, json=manifest)
# if we get a 401, ask the user for a token
retry = True
while (x.status_code == 401 and retry):
token = input("API returned 401, try a (different) token? : ")
if (token and token != "no" and token != "n"):
x = requests.post(args.d, json=manifest, auth=token)
else:
retry = False
x.raise_for_status()
return x

# take appropriate destination action
if (args.o == "jsonfile"):
with open(args.d, 'w') as f:
json.dump(manifest, f)
elif (args.o == "camic"):
if (args.i == "slide"):
x = postWithAuth(args.d, manifest)
x.raise_for_status()
else:
with open(x.path) as f:
file = json.load(f)
for rec in file:
rec[slide] = x.id
x = postWithAuth(args.d, file)
x.raise_for_status()
elif (args.o == "pathdb"):
#! TODO
if (args.i != "slide"):
Expand Down
54 changes: 54 additions & 0 deletions SlideServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import csv
import pathlib
import logging
from gDriveDownload import start, afterUrlAuth, callApi
from threading import Thread

try:
from io import BytesIO
Expand All @@ -40,6 +42,7 @@
app.config['SECRET_KEY'] = os.urandom(24)
app.config['ROI_FOLDER'] = "/images/roiDownload"


ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide'])


Expand Down Expand Up @@ -233,6 +236,7 @@ def urlUploadStatus():
return flask.Response(json.dumps({"uploaded": "False"}), status=200)



# Workbench Dataset Creation help-routes

# Route to receive base64 encoded zip files.
Expand Down Expand Up @@ -528,3 +532,53 @@ def roiextract(file_name):
return flask.send_from_directory(app.config["ROI_FOLDER"],filename=file_name, as_attachment=True, cache_timeout=0 )


# Google Drive API (OAuth and File Download) Routes

# A new Thread to call the Gdrive API after an Auth Response is returned to the user.
class getFileFromGdrive(Thread):
def __init__(self, params, userId, fileId, token):
Thread.__init__(self)
self.params, self.userId, self.fileId , self.token = params, userId, fileId, token

def run(self):
if(self.params["auth_url"] != None):
self.params["creds"] = afterUrlAuth(self.params["local_server"], self.params["flow"], self.params["wsgi_app"], self.userId)
call = callApi(self.params["creds"], self.fileId, self.token)
app.logger.info(call)

# Route to start the OAuth Server(to listen if user is Authenticated) and start the file Download after Authentication
@app.route('/googleDriveUpload/getFile', methods=['POST'])
def gDriveGetFile():
body = flask.request.get_json()
if not body:
return flask.Response(json.dumps({"error": "Missing JSON body"}), status=400)

token = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
token = secure_filename(token)
tmppath = os.path.join("/images/uploading/", token)
# regenerate if we happen to collide
while os.path.isfile(tmppath):
token = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
token = secure_filename(token)
tmppath = os.path.join("/images/uploading/", token)

try:
params = start(body['userId'])
except:
return flask.Response(json.dumps({'error': str(sys.exc_info()[0])}), status=400)
thread_a = getFileFromGdrive(params, body['userId'], body['fileId'], token)
thread_a.start()
return flask.Response(json.dumps({"authURL": params["auth_url"], "token": token}), status=200)

# To check if a particular file is downloaded from Gdrive
@app.route('/googleDriveUpload/checkStatus', methods=['POST'])
def checkDownloadStatus():
body = flask.request.get_json()
if not body:
return flask.Response(json.dumps({"error": "Missing JSON body"}), status=400)
token = body['token']
path = app.config['TEMP_FOLDER']+'/'+token
if os.path.isfile(path):
return flask.Response(json.dumps({"downloadDone": True}), status=200)
return flask.Response(json.dumps({"downloadDone": False}), status=200)

118 changes: 118 additions & 0 deletions gDriveDownload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from __future__ import print_function
import pickle
import wsgiref.simple_server
import wsgiref.util
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow, _WSGIRequestHandler, _RedirectWSGIApp
from google.auth.transport.requests import Request
import sys
import os
import io
import shutil


# If modifying these scopes, delete the file "googleDrive<userID>.pickle"
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]

# Starting a local server on :4001 to listen for authentication response from user
def run_local_server(
self=InstalledAppFlow,
host="0.0.0.0",
port=4001,
authorization_prompt_message=InstalledAppFlow._DEFAULT_AUTH_PROMPT_MESSAGE,
success_message=InstalledAppFlow._DEFAULT_WEB_SUCCESS_MESSAGE,
userId=None,
):
wsgi_app = _RedirectWSGIApp(success_message)
local_server = wsgiref.simple_server.make_server(host, port, wsgi_app, handler_class=_WSGIRequestHandler)

# Making a unique redirect URL for every user
self.redirect_uri = "http://localhost:4010/googleAuth/" + userId
auth_url, _ = self.authorization_url()

print(authorization_prompt_message.format(url=auth_url))

return auth_url, local_server, wsgi_app, None


# Will be called after return of Auth URL
def afterUrlAuth(local_server, flow, wsgi_app, userId):
local_server.handle_request()

# Note: using https here because oauthlib is very picky that
# OAuth 2.0 should only occur over https.
authorization_response = wsgi_app.last_request_uri.replace("http", "https")
flow.fetch_token(authorization_response=authorization_response)
# Save the credentials for the next run
with open(
"/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle", "wb"
) as token:
pickle.dump(flow.credentials, token)
return flow.credentials

# Starting the Auth process and checking for pickle file (Token) [Creating a new token if not exists]
def start(userId):
creds = None
# The file "googleDrive<userID>.pickle" stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first time.
if os.path.exists("/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle"):
with open(
"/cloud-upload-apis/tokens/googleDrive" + userId + ".pickle", "rb"
) as token:
creds = pickle.load(token)
return {
"auth_url": None,
"local_server": None,
"wsgi_app": None,
"flow": None,
"creds": creds,
}

# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
"/cloud-upload-apis/credentials/google-drive.json", SCOPES
)
auth_url, local_server, wsgi_app, creds = run_local_server(
self=flow, userId=userId
)
return {
"auth_url": auth_url,
"local_server": local_server,
"wsgi_app": wsgi_app,
"flow": flow,
"creds": creds,
}


# Calling the Drive API to download a file
def callApi(creds, fileId, token):
downloadDone = False
service = build("drive", "v3", credentials=creds)

# Call the Drive v3 API
request = service.files().get_media(fileId=fileId)
fileName = service.files().get(fileId=fileId).execute()["name"]

fh = io.BytesIO()

# Initialise a downloader object to download the file
downloader = MediaIoBaseDownload(fh, request)

try:
# Download the data in chunks
while not downloadDone:
status, downloadDone = downloader.next_chunk()
fh.seek(0)
# Write the received data to the file
with open("/images/uploading/" + token, "wb") as f:
shutil.copyfileobj(fh, f)

# Return True if file Downloaded successfully
return {"status": True, "fileName": fileName, "token": token}
except:
return False
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ flask-cors
requests
numpy
Pillow
google-api-python-client
google-auth-httplib2
google-auth-oauthlib

0 comments on commit 122437a

Please sign in to comment.