From bd1a8f4f19bddd7c953d41c9d69516c45aaca9ea Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Tue, 22 Mar 2022 16:11:36 +1300 Subject: [PATCH 01/13] Create a version of sparc-api that can be used with the 'stage' index --- app/bfworker.py | 69 ++++++++++++++++++++ app/config.py | 2 +- app/main.py | 105 ++++++++++++++++++++++++------- app/scicrunch_process_results.py | 4 ++ app/scicrunch_requests.py | 9 +++ tests/test_api.py | 8 +++ 6 files changed, 172 insertions(+), 25 deletions(-) create mode 100644 app/bfworker.py diff --git a/app/bfworker.py b/app/bfworker.py new file mode 100644 index 0000000..8c3119d --- /dev/null +++ b/app/bfworker.py @@ -0,0 +1,69 @@ +from pennsieve import Pennsieve +import pennsieve +from app.config import Config + +class BFWorker(object): + def __init__(self, id): + self.bf = Pennsieve(api_token=Config.PENNSIEVE_API_TOKEN, api_secret=Config.PENNSIEVE_API_SECRET) + + + def getCollectionAndMetaFromPackageId(self, packageId): + pkg = self.bf.get(packageId) + if type(pkg) is pennsieve.DataPackage: + colId = pkg.parent + col = self.bf.get(colId) + items = col.items + for item in items: + if packageId == item.id: + return [colId, item.name] + return None + + def getURLFromCollectionIdAndFileName(self, collectionId, fileName): + col = self.bf.get(collectionId) + if type(col) is pennsieve.Collection: + items = col.items + for item in items: + if fileName == item.name: + pkg = item + try: + bfFile = pkg.files[0] + url = bfFile.url + return url + except: + return None + return None + + def getUrlfromPackageId(self, packageId): + pId = packageId + if ('N:' not in packageId): + pId = 'N:' + packageId + pk = self.bf.get(pId) + return pk.files[0].url + + def getImagefromPackageId(self, packageId): + pId = packageId + if ('N:' not in packageId): + pId = 'N:' + packageId + pk = self.bf.get(pId) + # resp = requests.get(pk.files[0].url) + return pk.files[0].url if pk is not None else '' + + def getURLFromDatasetIdAndFilePath(self, datasetId, filePath): + fileArray = filePath.split('/') + items = self.bf.get_dataset(datasetId).items + count = 0 + while type(items) is list: + item = items[count] + for fileName in fileArray: + if fileName == item.name: + if type(item) is pennsieve.Collection: + items = item.items + count = 0 + continue + else: + try: + return item.files[0].url + except: + return None + count += 1 + return None diff --git a/app/config.py b/app/config.py index e5b9f6f..df21497 100644 --- a/app/config.py +++ b/app/config.py @@ -30,7 +30,7 @@ class Config(object): KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key") DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development") SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com") - SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr") + SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_stage") MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates") SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates") WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN") diff --git a/app/main.py b/app/main.py index b514258..eb7d500 100644 --- a/app/main.py +++ b/app/main.py @@ -30,6 +30,7 @@ from app.utilities import img_to_base64_str from app.osparc import run_simulation from app.biolucida_process_results import process_results as process_biolucida_results +from app.bfworker import BFWorker app = Flask(__name__) # set environment variable @@ -39,6 +40,7 @@ ma = Marshmallow(app) email_sender = EmailSender() +bfWorker = BFWorker(None) ps = None s3 = boto3.client( @@ -317,32 +319,63 @@ def presign_resource_url(): # Reverse proxy for objects from S3, a simple get object # operation. This is used by scaffoldvuer and its -# important to keep the relative for accessing -# other required files. +# # important to keep the relative for accessing +# # other required files. +# @app.route("/s3-resource/") +# def direct_download_url(path): +# print(path) +# head_response = s3.head_object( +# Bucket=Config.S3_BUCKET_NAME, +# Key=path, +# RequestPayer="requester" +# ) +# +# content_length = head_response.get('ContentLength', Config.DIRECT_DOWNLOAD_LIMIT) +# if content_length and content_length > Config.DIRECT_DOWNLOAD_LIMIT: # 20 MB +# return abort(413, description=f"File too big to download: {content_length}") +# +# response = s3.get_object( +# Bucket=Config.S3_BUCKET_NAME, +# Key=path, +# RequestPayer="requester" +# ) +# +# encode_base64 = request.args.get("encodeBase64") +# resource = response["Body"].read() +# if encode_base64 is not None: +# return base64.b64encode(resource) +# +# return resource + +# This version of s3-resouces is used for accessing files on staging. Use it as a replacement for 's3-resource' +# No changes are need on the front end, just use s3-resource as normal @app.route("/s3-resource/") def direct_download_url(path): - head_response = s3.head_object( - Bucket=Config.S3_BUCKET_NAME, - Key=path, - RequestPayer="requester" - ) - - content_length = head_response.get('ContentLength', Config.DIRECT_DOWNLOAD_LIMIT) - if content_length and content_length > Config.DIRECT_DOWNLOAD_LIMIT: # 20 MB - return abort(413, description=f"File too big to download: {content_length}") - - response = s3.get_object( - Bucket=Config.S3_BUCKET_NAME, - Key=path, - RequestPayer="requester" - ) - - encode_base64 = request.args.get("encodeBase64") - resource = response["Body"].read() - if encode_base64 is not None: - return base64.b64encode(resource) - - return resource + print(path) + filePath = path.split('files/')[-1] + discoverId = path.split('/')[0] + dataset_query = { + "size": 20, + "from": 0, + "query": { + "query_string": { + "fields": [ + "*pennsieve.identifier" + ], + "query": discoverId + } + }, + "_source": [ + "item.identifier" + ] + } + resp = dataset_search(dataset_query) + pennsieveId = resp['hits']['hits'][0]['_source']['item']['identifier'] + url = bfWorker.getURLFromDatasetIdAndFilePath(pennsieveId, filePath) + if url != None: + resp2 = requests.get(url) + return resp2.json() + return jsonify({'error': 'error with the provided ID '}, status=502) @app.route("/scicrunch-dataset//") @@ -419,6 +452,30 @@ def get_dataset_info_discoverIds(): return process_results(dataset_search(query)) +@app.route('/urlFromPennsieveDatasetIdAndFilePath/') +def getFileUrlFromPennsieve(discoverId): + filePath = request.args.get('filePath') + dataset_query = { + "size": 20, + "from": 0, + "query": { + "query_string": { + "fields": [ + "*pennsieve.identifier" + ], + "query": discoverId + } + }, + "_source": [ + "item.identifier" + ] + } + resp = dataset_search(dataset_query) + pennsieveId = resp['hits']['hits'][0]['_source']['item']['identifier'] + url = bfWorker.getURLFromDatasetIdAndFilePath(pennsieveId, filePath) + if url != None: + return jsonify({'url': url}) + return jsonify({'error': 'error with the provided ID '}, status=502) @app.route("/dataset_info/using_title") def get_dataset_info_title(): diff --git a/app/scicrunch_process_results.py b/app/scicrunch_process_results.py index 963e243..070d468 100644 --- a/app/scicrunch_process_results.py +++ b/app/scicrunch_process_results.py @@ -27,6 +27,10 @@ def _prepare_results(results): for file in hit['_source']['objects'] if file['additional_mimetype']['name'].find('abi.context-information') is not -1 ] + print([ + file['additional_mimetype']['name'] + for file in hit['_source']['objects'] + ]) try: attr['readme'] = hit['_source']['item']['readme']['description'] except KeyError: diff --git a/app/scicrunch_requests.py b/app/scicrunch_requests.py index 5b0886c..5014406 100644 --- a/app/scicrunch_requests.py +++ b/app/scicrunch_requests.py @@ -1,3 +1,4 @@ +import json def create_query_string(query_string): return { "from": 0, @@ -20,6 +21,14 @@ def create_doi_query(doi): } def create_multiple_doi_query(dois, size=10, from_=0): + print(json.dumps({ + "size": 999, + "query": { + "terms": { + "item.curie": dois + } + } + })) return { "size": 999, "query": { diff --git a/tests/test_api.py b/tests/test_api.py index fcfc07a..ffd6193 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -22,6 +22,14 @@ def test_direct_download_url_small_file(client): assert r.status_code == 200 assert b"proximal colon" in r.data +def test_pennsieve_file_path_download(client): + colon_dataset_id = 76 + colon_file_path = 'derivative%2Fscaffold_context_info.json' + r = client.get(f"/urlFromPennsieveDatasetIdAndFilePath/{colon_dataset_id}?filePath={colon_file_path}") + assert r.status_code == 200 + assert 'url' in r.json + + def test_direct_download_url_thumbnail(client): small_s3_file = '95/1/files/derivative%2FScaffold%2Fthumbnail.png' From 8089ad99527fb5418db99e630d29cc30afd9e297 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Fri, 25 Mar 2022 00:12:01 +1300 Subject: [PATCH 02/13] Fix issue in logic: - First file was getting skipped - we only return file contents if the file is json --- app/bfworker.py | 2 +- app/main.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/app/bfworker.py b/app/bfworker.py index 8c3119d..de876b6 100644 --- a/app/bfworker.py +++ b/app/bfworker.py @@ -58,7 +58,7 @@ def getURLFromDatasetIdAndFilePath(self, datasetId, filePath): if fileName == item.name: if type(item) is pennsieve.Collection: items = item.items - count = 0 + count = -1 continue else: try: diff --git a/app/main.py b/app/main.py index eb7d500..e9e714d 100644 --- a/app/main.py +++ b/app/main.py @@ -373,8 +373,11 @@ def direct_download_url(path): pennsieveId = resp['hits']['hits'][0]['_source']['item']['identifier'] url = bfWorker.getURLFromDatasetIdAndFilePath(pennsieveId, filePath) if url != None: - resp2 = requests.get(url) - return resp2.json() + if '.json' in path: + resp2 = requests.get(url) + return resp2.json() + else: + return url return jsonify({'error': 'error with the provided ID '}, status=502) From 44bc3565dc5b8c5f75b1390d225030b2362f17c0 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Fri, 25 Mar 2022 13:44:57 +1300 Subject: [PATCH 03/13] Return content for anything but json is s3-resource --- app/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index e9e714d..1149d0d 100644 --- a/app/main.py +++ b/app/main.py @@ -377,7 +377,8 @@ def direct_download_url(path): resp2 = requests.get(url) return resp2.json() else: - return url + resp2 = requests.get(url) + return resp2.content return jsonify({'error': 'error with the provided ID '}, status=502) From 754316ce582633b1d3c8ef670bc1a94386e04244 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Fri, 1 Jul 2022 14:46:23 +1200 Subject: [PATCH 04/13] A few small fixes --- app/config.py | 2 +- app/main.py | 8 ++------ app/scicrunch_processing_common.py | 1 + 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/app/config.py b/app/config.py index df21497..1eddca2 100644 --- a/app/config.py +++ b/app/config.py @@ -30,7 +30,7 @@ class Config(object): KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key") DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development") SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com") - SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_stage") + SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev") MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates") SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates") WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN") diff --git a/app/main.py b/app/main.py index 1149d0d..d1637ac 100644 --- a/app/main.py +++ b/app/main.py @@ -373,12 +373,8 @@ def direct_download_url(path): pennsieveId = resp['hits']['hits'][0]['_source']['item']['identifier'] url = bfWorker.getURLFromDatasetIdAndFilePath(pennsieveId, filePath) if url != None: - if '.json' in path: - resp2 = requests.get(url) - return resp2.json() - else: - resp2 = requests.get(url) - return resp2.content + resp2 = requests.get(url) + return resp2.content return jsonify({'error': 'error with the provided ID '}, status=502) diff --git a/app/scicrunch_processing_common.py b/app/scicrunch_processing_common.py index d141d87..2e5aad4 100644 --- a/app/scicrunch_processing_common.py +++ b/app/scicrunch_processing_common.py @@ -30,6 +30,7 @@ 'application/vnd.mbfbioscience.neurolucida+xml': SEGMENTATION_FILES, 'inode/vnd.abi.scaffold+directory': SCAFFOLD_DIR, 'inode/vnd.abi.scaffold+file': SCAFFOLD_FILE, + 'application/x.vnd.abi.scaffold.meta+json': SCAFFOLD_FILE, 'inode/vnd.abi.scaffold+thumbnail': SCAFFOLD_THUMBNAIL, 'inode/vnd.abi.scaffold.thumbnail+file': SCAFFOLD_THUMBNAIL, 'inode/vnd.abi.scaffold.view+file': SCAFFOLD_VIEW_FILE, From faad292764362462a7607d8dfbcac1df2a5c74d9 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Mon, 19 Sep 2022 06:02:12 +0530 Subject: [PATCH 05/13] Add check for future scicrunch processing versions --- app/scicrunch_process_results.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/scicrunch_process_results.py b/app/scicrunch_process_results.py index 070d468..36c2302 100644 --- a/app/scicrunch_process_results.py +++ b/app/scicrunch_process_results.py @@ -13,6 +13,10 @@ def _prepare_results(results): except KeyError: continue + if version >= '1.1.5': + print('WARINING! Scicrunch processing is out of date!') + version = '1.1.5' + package_version = f'scicrunch_processing_v_{version.replace(".", "_")}' m = importlib.import_module(f'app.{package_version}') attributes_map = getattr(m, 'ATTRIBUTES_MAP') From 68c9cbbccf0f8a3cf0df7aa45d0702f7844c79e1 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Wed, 28 Sep 2022 12:41:40 +1300 Subject: [PATCH 06/13] Add support for never been published datasets --- app/config.py | 2 +- app/main.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/app/config.py b/app/config.py index 1eddca2..df21497 100644 --- a/app/config.py +++ b/app/config.py @@ -30,7 +30,7 @@ class Config(object): KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key") DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development") SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com") - SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev") + SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_stage") MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates") SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates") WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN") diff --git a/app/main.py b/app/main.py index d1637ac..8c0e3ab 100644 --- a/app/main.py +++ b/app/main.py @@ -349,7 +349,7 @@ def presign_resource_url(): # This version of s3-resouces is used for accessing files on staging. Use it as a replacement for 's3-resource' # No changes are need on the front end, just use s3-resource as normal -@app.route("/s3-resource/") +# @app.route("/s3-resource/") def direct_download_url(path): print(path) filePath = path.split('files/')[-1] @@ -377,6 +377,33 @@ def direct_download_url(path): return resp2.content return jsonify({'error': 'error with the provided ID '}, status=502) +# This version of s3-resouces is used for accessing files on staging that have never been published +@app.route("/s3-resource/") +def direct_download_url2(path): + print(path) + filePath = path.split('files/')[-1] + pennsieveId = path.split('/')[0] + + # If length is small, we have a pennsieve discover id. We will process this one with the normal s3-resource route + if len(pennsieveId) <= 4: + return direct_download_url(path) + + if 'N:package:' not in pennsieveId: + pennsieveId = 'N:dataset:' + pennsieveId + + url = bfWorker.getURLFromDatasetIdAndFilePath(pennsieveId, filePath) + if url != None: + resp2 = requests.get(url) + return resp2.content + return jsonify({'error': 'error with the provided ID '}, status=502) + + +@app.route("/proxy/") +def proxy(): + url = request.args.get('url') + resp = requests.get(url) + return resp.content + return jsonify({'error': 'error with the provided ID '}, status=502) @app.route("/scicrunch-dataset//") def sci_doi(doi1, doi2): From 05da2131f9d1d57cf0c1ab807ce6199dc89951e4 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Fri, 9 Dec 2022 13:24:57 +1300 Subject: [PATCH 07/13] Add pennsieve login script to staging --- app/main.py | 8 ++++++++ scripts/pennsieve.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 scripts/pennsieve.py diff --git a/app/main.py b/app/main.py index 0a6a4d2..b80b18b 100644 --- a/app/main.py +++ b/app/main.py @@ -4,6 +4,7 @@ import json import logging import requests +from flask import make_response from apscheduler.schedulers.background import BackgroundScheduler from botocore.exceptions import ClientError @@ -32,6 +33,7 @@ from app.osparc import run_simulation from app.biolucida_process_results import process_results as process_biolucida_results from app.bfworker import BFWorker +from scripts.pennsieve import pennsieve_login, get_banner app = Flask(__name__) # set environment variable @@ -251,6 +253,12 @@ def create_presigned_url(expiration=3600): return create_s3_presigned_url(key, content_type, expiration) +@app.route("/get_banner/") +def get_banner_pen(datasetId): + p_temp_key = pennsieve_login() + ban = get_banner(p_temp_key, datasetId) + return ban + @app.route("/thumbnail/neurolucida") def thumbnail_from_neurolucida_file(): diff --git a/scripts/pennsieve.py b/scripts/pennsieve.py new file mode 100644 index 0000000..801e01e --- /dev/null +++ b/scripts/pennsieve.py @@ -0,0 +1,41 @@ +import logging +import boto3 +from app.config import Config +import requests +import json + + + +# Returns pennsieve api token valid for 24 hours + +def pennsieve_login(): + r = requests.get(f"{Config.PENNSIEVE_API_HOST}/authentication/cognito-config") + r.raise_for_status() + + cognito_app_client_id = r.json()["tokenPool"]["appClientId"] + cognito_region = r.json()["region"] + + cognito_idp_client = boto3.client( + "cognito-idp", + region_name=cognito_region, + aws_access_key_id="", + aws_secret_access_key="", + ) + + login_response = cognito_idp_client.initiate_auth( + AuthFlow="USER_PASSWORD_AUTH", + AuthParameters={"USERNAME": Config.PENNSIEVE_API_TOKEN, "PASSWORD": Config.PENNSIEVE_API_SECRET}, + ClientId=cognito_app_client_id, + ) + + api_key = login_response["AuthenticationResult"]["AccessToken"] + return api_key + + + +def get_banner(pennsieve_temp_api_key, dataset_id): + print(f"{Config.PENNSIEVE_API_HOST}/datasets/N%3Adataset%3A{dataset_id}/banner?api_key={pennsieve_temp_api_key}") + r = requests.get(f"{Config.PENNSIEVE_API_HOST}/datasets/N%3Adataset%3A{dataset_id}/banner", + headers={"Authorization": f"Bearer {pennsieve_temp_api_key}"}) + r.raise_for_status() + return r.json() From 86a6318450a3c036a604261d4b6dfb54ae7db5c7 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Mon, 15 May 2023 11:37:05 +1200 Subject: [PATCH 08/13] add pennsieve id endpoint --- app/main.py | 12 +++++++++++- app/scicrunch_requests.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index b80b18b..4217627 100644 --- a/app/main.py +++ b/app/main.py @@ -19,7 +19,7 @@ from app.scicrunch_requests import create_doi_query, create_filter_request, create_facet_query, create_doi_aggregate, create_title_query, \ create_identifier_query, create_pennsieve_identifier_query, create_field_query, create_request_body_for_curies, create_onto_term_query, \ - create_multiple_doi_query, create_multiple_discoverId_query + create_multiple_doi_query, create_multiple_discoverId_query, create_pennsieve_id_query from scripts.email_sender import EmailSender, feedback_email, resource_submission_confirmation_email, creation_request_confirmation_email, issue_reporting_email, community_spotlight_submit_form_email, news_and_events_submit_form_email from threading import Lock from xml.etree import ElementTree @@ -563,6 +563,16 @@ def getFileUrlFromPennsieve(discoverId): return jsonify({'url': url}) return jsonify({'error': 'error with the provided ID '}, status=502) + +@app.route("/dataset_info/using_pennsieveId") +@app.route("/dataset_info/using_pennsieveId/") +def get_dataset_info_pennsieve_id(): + ids = request.args.get('ids') + query = create_pennsieve_id_query(ids) + + return process_results(dataset_search(query)) + + @app.route("/dataset_info/using_title") def get_dataset_info_title(): title = request.args.get('title') diff --git a/app/scicrunch_requests.py b/app/scicrunch_requests.py index 16749a9..fdb790a 100644 --- a/app/scicrunch_requests.py +++ b/app/scicrunch_requests.py @@ -161,6 +161,22 @@ def create_doi_request(doi): return query +def create_pennsieve_id_query(pennseiveId): + query = { + "size": 50, + "from": 0, + "query": { + "term": { + "item.identifier.aggregate": { + "value": f"N:dataset:{pennseiveId}" + } + } + } + } + + print(query) + return query + # create_facet_query(type): Generates facet search request data for sci-crunch given a 'type'; where # 'type' is either 'species', 'gender', or 'organ' at this stage. # Returns a tuple of the type-map and request data ( type_map, data ) From 51ff6c75acab6e11ba8812e30e7b86d8435b57ef Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Fri, 19 May 2023 14:23:48 +1200 Subject: [PATCH 09/13] Pick id for scicrunch based on what is available --- app/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index 4217627..ddb387f 100644 --- a/app/main.py +++ b/app/main.py @@ -568,7 +568,11 @@ def getFileUrlFromPennsieve(discoverId): @app.route("/dataset_info/using_pennsieveId/") def get_dataset_info_pennsieve_id(): ids = request.args.get('ids') - query = create_pennsieve_id_query(ids) + + if len(ids) > 4: + query = create_pennsieve_id_query(ids) + else: + query = create_multiple_discoverId_query([ids]) return process_results(dataset_search(query)) From 84be0e305cb8e1b85ee760ebc4d2fb6b4d1e92b9 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Fri, 9 Jun 2023 13:48:17 +1200 Subject: [PATCH 10/13] Fix pennsieve REST api login not working --- app/config.py | 2 +- scripts/pennsieve.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/config.py b/app/config.py index 0fc706c..0382093 100644 --- a/app/config.py +++ b/app/config.py @@ -4,7 +4,7 @@ class Config(object): - PENNSIEVE_API_HOST = os.environ.get("PENNSIEVE_API_HOST") + PENNSIEVE_API_HOST = os.environ.get("PENNSIEVE_API_HOST", "https://api.pennsieve.io") PENNSIEVE_API_SECRET = os.environ.get("PENNSIEVE_API_SECRET", "local-secret-key") PENNSIEVE_API_TOKEN = os.environ.get("PENNSIEVE_API_TOKEN", "local-api-key") PENNSIEVE_EMBARGO_TEAM_ID = os.environ.get("PENNSIEVE_EMBARGO_TEAM_ID") diff --git a/scripts/pennsieve.py b/scripts/pennsieve.py index 801e01e..6b1b5fb 100644 --- a/scripts/pennsieve.py +++ b/scripts/pennsieve.py @@ -18,8 +18,8 @@ def pennsieve_login(): cognito_idp_client = boto3.client( "cognito-idp", region_name=cognito_region, - aws_access_key_id="", - aws_secret_access_key="", + aws_access_key_id=Config.SPARC_PORTAL_AWS_KEY, + aws_secret_access_key=Config.SPARC_PORTAL_AWS_SECRET, ) login_response = cognito_idp_client.initiate_auth( From 681f2bc9b47aceb1c67bb9cd4f49e4862981938f Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Tue, 22 Aug 2023 13:50:48 +1200 Subject: [PATCH 11/13] Add keywords to pass through list --- app/scicrunch_processing_v_1_1_5.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/scicrunch_processing_v_1_1_5.py b/app/scicrunch_processing_v_1_1_5.py index 342e0e5..5dda5de 100644 --- a/app/scicrunch_processing_v_1_1_5.py +++ b/app/scicrunch_processing_v_1_1_5.py @@ -5,7 +5,7 @@ from app.scicrunch_processing_common import PASS_THROUGH_KEYS as BASE_PASS_THROUGH_KEYS from app.manifest_name_to_discover_name import name_map -PASS_THROUGH_KEYS = ["doi", "dataset_identifier", "dataset_version", "dataset_revision", *BASE_PASS_THROUGH_KEYS] +PASS_THROUGH_KEYS = ["doi", "dataset_identifier", "dataset_version", "dataset_revision", "keywords", *BASE_PASS_THROUGH_KEYS] # attributes is used to map desired parameters onto the path of keys needed in the sci-crunch response. @@ -17,6 +17,7 @@ 'sampleSize': ['item', 'statistics', 'samples', 'count'], 'subjectSize': ['item', 'statistics', 'subjects', 'count'], 'name': ['item', 'name'], + 'keywords': ['item', 'keywords'], 'description': ['item', 'description'], 'identifier': ['item', 'identifier'], 'uri': ['distributions', 'current', 'uri'], From f653a6768152b17f3382f799e5714854f9a2079e Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Wed, 20 Sep 2023 11:46:04 +1200 Subject: [PATCH 12/13] Fix issue in while loop stepping through dataset folders --- app/bfworker.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/app/bfworker.py b/app/bfworker.py index de876b6..fc2ee52 100644 --- a/app/bfworker.py +++ b/app/bfworker.py @@ -52,18 +52,19 @@ def getURLFromDatasetIdAndFilePath(self, datasetId, filePath): fileArray = filePath.split('/') items = self.bf.get_dataset(datasetId).items count = 0 + depth = 0 while type(items) is list: item = items[count] - for fileName in fileArray: - if fileName == item.name: - if type(item) is pennsieve.Collection: - items = item.items - count = -1 - continue - else: - try: - return item.files[0].url - except: - return None + if fileArray[depth] == item.name: + if type(item) is pennsieve.Collection: + items = item.items + count = -1 + depth += 1 + continue + else: + try: + return item.files[0].url + except: + return None count += 1 return None From 2ffc3ca3926274fce7e164f9f6bb4657fb6dd714 Mon Sep 17 00:00:00 2001 From: Jesse Khorasanee Date: Wed, 20 Sep 2023 12:57:45 +1200 Subject: [PATCH 13/13] Now ignore dataset and versions when searching folders --- app/bfworker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/bfworker.py b/app/bfworker.py index fc2ee52..aa590f9 100644 --- a/app/bfworker.py +++ b/app/bfworker.py @@ -50,6 +50,7 @@ def getImagefromPackageId(self, packageId): def getURLFromDatasetIdAndFilePath(self, datasetId, filePath): fileArray = filePath.split('/') + fileArray = list(filter(lambda f: not f.isdigit(), fileArray)) items = self.bf.get_dataset(datasetId).items count = 0 depth = 0