diff --git a/OmniLoad.py b/OmniLoad.py index 1129136..fc79b50 100644 --- a/OmniLoad.py +++ b/OmniLoad.py @@ -2,15 +2,19 @@ import openslide # to get required slide metadata import csv # to read csv +import sys # for csv limit +import os # for os and filepath utils import argparse # to read arguments -import time # for timestamp -import os # for os/fs systems import json # for json in and out import requests # for api and pathdb in and out +import hashlib + +# for large csv fields, especially segmentations +csv.field_size_limit(sys.maxsize) parser = argparse.ArgumentParser(description='Load slides or results to caMicroscope.') # read in collection -parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user'], +parser.add_argument('-i', type=str, default="slide", choices=['slide', 'heatmap', 'mark', 'user', 'segmentation'], help='Input type') # read in filepath parser.add_argument('-f', type=str, default="manifest.csv", @@ -21,25 +25,37 @@ # read in pathdb collection parser.add_argument('-pc', type=str, help='Pathdb Collection Name') # read in dest uri or equivalent -parser.add_argument('-d', type=str, default="http://localhost:4010/data/Slide/post", +parser.add_argument('-d', type=str, default="http://ca-back:4010/data/Slide/post", help='Output destination') # read in lookup type parser.add_argument('-lt', type=str, help='Slide ID lookup type', default="camic", choices=['camic', 'pathdb']) # read in lookup uri or equivalent -parser.add_argument('-ld', type=str, default="http://localhost:4010/data/Slide/find", +parser.add_argument('-ld', type=str, default="http://ca-back:4010/data/Slide/find", help='Slide ID lookup source') args = parser.parse_args() print(args) +def file_md5(fileName): + m = hashlib.md5() + blocksize = 2 ** 20 + with open(fileName, "rb") as f: + while True: + buf = f.read(blocksize) + if not buf: + break + m.update(buf) + return m.hexdigest() + # get fields openslide expects def openslidedata(manifest): for img in manifest: - img['location'] = img['location'] or img['filename'] or img['file'] + img['location'] = img.get("path", "") or img.get("location", "") or img.get("filename", "") or img.get("file", "") slide = openslide.OpenSlide(img['location']) slideData = slide.properties img['mpp-x'] = slideData.get(openslide.PROPERTY_NAME_MPP_X, None) img['mpp-y'] = slideData.get(openslide.PROPERTY_NAME_MPP_Y, None) + img['mpp'] = img['mpp-x'] or img['mpp-y'] img['height'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, None) or slideData.get( "openslide.level[0].height", None) img['width'] = slideData.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, None) or slideData.get( @@ -48,13 +64,83 @@ def openslidedata(manifest): img['level_count'] = int(slideData.get('level_count', 1)) img['objective'] = float(slideData.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER, 0) or slideData.get("aperio.AppMag", -1.0)) - img['md5sum'] = file_md5(filepath) + img['md5sum'] = file_md5(img['location']) img['comment'] = slideData.get(openslide.PROPERTY_NAME_COMMENT, None) # required values which are often unused img['study'] = img.get('study', "") img['specimen'] = img.get('specimen', "") return manifest +def getWithAuth(url): + x = requests.get(lookup_url) + retry = True + while (x.status_code == 401 and retry): + token = input("API returned 401, try a (different) token? : ") + if (token and token != "no" and token != "n"): + x = requests.get(lookup_url, auth=token) + else: + retry = False + return x + +def postWithAuth(url, data): + x = requests.post(args.d, json=data) + retry = True + while (x.status_code == 401 and retry): + token = input("API returned 401, try a (different) token? : ") + if (token and token != "no" and token != "n"): + x = requests.post(args.d, json=data, auth=token) + else: + retry = False + return x + +def convertSegmentations(poly, name, area): + # interpret the objectively bad polygon representation + poly = poly.replace("[","") + poly = poly.replace("]","") + poly = poly.split(":") + new_poly = [] + x_max = -1. + x_min = 9e99 + y_max = -1. + y_min = 9e99 + for i in range(0,len(poly),2): + x_max = max(x_max, float(poly[i])) + x_min = min(x_min, float(poly[i])) + y_max = max(y_max, float(poly[i+1])) + y_min = min(y_min, float(poly[i+1])) + new_poly.append([float(poly[i]), float(poly[i+1])]) + # construct result + # complete loop + new_poly.append(new_poly[0]) + provenance = {} + provenance['image'] = {} + # may need better execution id + provenance['analysis'] = {"source":"computer", "coordinate":"image", "execution_id":name, "name":name, "computation":"segmentation"} + properties = {} + properties['annotations'] = {"name": name, 'AreaInPixels':area, "PhysicalSize":area} + geometries = {"type":"FeatureCollection"} + feature = {"type":"Feature"} + geometry = {"type":"Polygon"} + geometry['coordinates'] = [new_poly] + bound = {"type":"Polygon"} + feature['geometry'] = geometry + feature['bound'] = bound + # get bound + bound['coordinates'] = [[[x_min, y_min], [x_min, y_max], [x_max, y_max], [x_max, y_min], [x_min, y_min]]] + geometries['features'] = [feature] + res = {} + res['geometries'] = geometries + res['provenance'] = provenance + res['properties'] = properties + res['footprint'] = area + res['x'] = x_min + res['y'] = y_min + res['object_type'] = "unknown" + res['parent_id'] = "self" + return res + +## START script + manifest = [] # context for file @@ -77,62 +163,63 @@ def openslidedata(manifest): if (args.lt == "camic"): for x in manifest: # TODO more flexible with manifest fields - lookup_url = args.ld + "?name=" + x.slide - r = requests.get(lookup_url) + lookup_url = args.ld + "?name=" + x['slide'] + r = getWithAuth(lookup_url) res = r.json() - if (len(res)) == 0: - print("[WARN] - no match for slide '" + x.slide + "', skipping") + try: + x['id'] = res[0]["_id"]["$oid"] + except: + print("[WARN] - no match for slide '" + str(x) + "', skipping") del x - x.id = res[0]["_id"]["$oid"] if (args.lt == "pathdb"): - raise NotImplementedError("pathdb lookup is broken now") for x in manifest: - # TODO there's an error with the url construction when testing, something's up - lookup_url = args.ld + args.pc + "/" + lookup_url = args.ld + "/" + args.pc + "/" lookup_url += x.get("studyid", "") or x.get("study") lookup_url += x.get("clinicaltrialsubjectid", "") or x.get("subject") lookup_url += x.get("imageid", "") or x.get("image", "") or x.get("slide", "") lookup_url += "?_format=json" - r = requests.get(lookup_url) + r = getWithAuth(lookup_url) res = r.json() - if (len(res)) == 0: + try: + x['id'] = res[0]["nid"][0].value + except: print("[WARN] - no match for slide '" + str(x) + "', skipping") del x - else: - x.id = res[0]["PathDBID"] # TODO add validation (!!) print("[WARNING] -- Validation not Implemented") -def postWithAuth(data, url): - x = requests.post(args.d, json=manifest) - retry = True - while (x.status_code == 401 and retry): - token = input("API returned 401, try a (different) token? : ") - if (token and token != "no" and token != "n"): - x = requests.post(args.d, json=manifest, auth=token) - else: - retry = False - return x - # take appropriate destination action if (args.o == "jsonfile"): with open(args.d, 'w') as f: json.dump(manifest, f) elif (args.o == "camic"): if (args.i == "slide"): - x = postWithAuth(args.d, manifest) - x.raise_for_status() + r = postWithAuth(args.d, manifest) + print(r.json()) + r.raise_for_status() else: - with open(x.path) as f: - file = json.load(f) - for rec in file: - rec[slide] = x.id - x = postWithAuth(args.d, file) - x.raise_for_status() + for x in manifest: + with open(x['path']) as f: + if (args.i == "segmentation"): + reader = csv.DictReader(f) + segs = [row for row in reader] + fil = [] + for rec in segs: + res = convertSegmentations(rec['Polygon'], x['segname'], rec['AreaInPixels']) + res['provenance']['image']['slide'] = x['id'] + fil.append(res) + else: + fil = json.load(f) + for rec in fil: + # TODO safer version of this? + rec['provenance']['image']['slide'] = x['id'] + r = postWithAuth(args.d, fil) + print(r.json()) + r.raise_for_status() elif (args.o == "pathdb"): - #! TODO + #! TODO - need the url and pattern for adding a slide to pathdb if (args.i != "slide"): raise AssertionError("Pathdb only holds slide data.") raise NotImplementedError("Output type: " + args.o + " not yet implemented") diff --git a/SlideServer.py b/SlideServer.py index 51869ac..0e02a3a 100644 --- a/SlideServer.py +++ b/SlideServer.py @@ -43,7 +43,7 @@ app.config['ROI_FOLDER'] = "/images/roiDownload" -ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide']) +ALLOWED_EXTENSIONS = set(['svs', 'tif', 'tiff', 'vms', 'vmu', 'ndpi', 'scn', 'mrxs', 'bif', 'svslide', 'png', 'jpg']) def allowed_file(filename): @@ -70,8 +70,11 @@ def makePyramid(filename, dest): try: filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) destpath = os.path.join(app.config['UPLOAD_FOLDER'], dest) - pyvips.Image.new_from_file(filepath, access='sequential').tiffsave(destpath, tile=True, compression="lzw", tile_width=256, tile_height=256, pyramid=True, bigtiff=True, xres=0.254, yres=0.254) - return flask.Response(json.dumps({"status": "OK"}), status=200) + savedImg = pyvips.Image.new_from_file(filepath, access='sequential').tiffsave(destpath, tile=True, compression="lzw", tile_width=256, tile_height=256, pyramid=True, bigtiff=True, xres=0.254, yres=0.254) + while not os.path.exists(filepath): + os.sync() + sleep(750) + return flask.Response(json.dumps({"status": "OK", "srcFile":filename, "destFile":dest, "details":savedImg}), status=200) except BaseException as e: return flask.Response(json.dumps({"type": "pyvips", "error": str(e)}), status=500) @@ -177,18 +180,30 @@ def testRoute(): @app.route("/data/one/", methods=['GET']) def singleSlide(filepath): - return json.dumps(dev_utils.getMetadata(filepath, app.config['UPLOAD_FOLDER'])) + res = dev_utils.getMetadata(filepath, app.config['UPLOAD_FOLDER']) + if (hasattr(res, 'error')): + return flask.Response(json.dumps(res), status=500) + else: + return flask.Response(json.dumps(res), status=200) @app.route("/data/thumbnail/", methods=['GET']) def singleThumb(filepath): size = flask.request.args.get('size', default=50, type=int) - return json.dumps(getThumbnail(filepath, size)) + res = getThumbnail(filepath, size) + if (hasattr(res, 'error')): + return flask.Response(json.dumps(res), status=500) + else: + return flask.Response(json.dumps(res), status=200) @app.route("/data/many/", methods=['GET']) def multiSlide(filepathlist): - return json.dumps(dev_utils.getMetadataList(json.loads(filepathlist), app.config['UPLOAD_FOLDER'])) + res = dev_utils.getMetadataList(json.loads(filepathlist), app.config['UPLOAD_FOLDER']) + if (hasattr(res, 'error')): + return flask.Response(json.dumps(res), status=500) + else: + return flask.Response(json.dumps(res), status=200) @app.route("/getSlide/")