From c4f80957e40fb71940019f6f2d37d849f84b0dfd Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Thu, 18 Apr 2024 16:19:13 +0100 Subject: [PATCH 1/9] WIP, added check for public studies, before attempting to verify ownership --- .../management/lib/create_or_update_study.py | 57 +++++++++++++++++-- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index fe344f5bf..4efb03922 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -15,7 +15,9 @@ # limitations under the License. import logging - +import os +from base64 import b64encode +import requests from django.db.models import Q from django.utils import timezone from django.core.exceptions import ObjectDoesNotExist @@ -122,7 +124,7 @@ def _lookup_publication_by_project_id(self, project_id): pass def _update_or_create_study_from_db_result( - self, ena_study, study_result_dir, lineage, ena_db, emg_db + self, ena_study, study_result_dir, lineage, ena_db, emg_db ): """Attributes to parse out: - Center name (done) @@ -163,7 +165,22 @@ def _update_or_create_study_from_db_result( ) hold_date = ena_study.hold_date - is_private = bool(hold_date) + try: + study_is_public = self.check_if_study_is_public(secondary_study_accession) + if study_is_public: + is_private = False + else: + study_ownership_verified = self.verify_study_ownership(secondary_study_accession, + ena_study.submission_account_id) + if study_ownership_verified: + is_private = True + else: + logging.error( + f"Study {secondary_study_accession} is not owned by {ena_study.submission_account_id}. Skipping.") + return + except: + logging.error(f"Could not verify if study {secondary_study_accession} is public. Skipping.") + return # Retrieve biome object biome = emg_models.Biome.objects.using(emg_db).get(lineage=lineage) @@ -222,7 +239,7 @@ def _get_ena_project(ena_db, project_id): return project def _update_or_create_study_from_api_result( - self, api_study, study_result_dir, lineage, ena_db, emg_db + self, api_study, study_result_dir, lineage, ena_db, emg_db ): secondary_study_accession = api_study.get("secondary_study_accession") data_origination = ( @@ -261,10 +278,40 @@ def _update_or_create_study_from_api_result( @staticmethod def _update_or_create_study( - emg_db, project_id, secondary_study_accession, defaults + emg_db, project_id, secondary_study_accession, defaults ): return emg_models.Study.objects.using(emg_db).update_or_create( project_id=project_id, secondary_accession=secondary_study_accession, defaults=defaults, ) + + def check_if_study_is_public(self, study_id): + url = (f"https://www.ebi.ac.uk/ena/portal/api/search?result=study&query=study_accession%3D{study_id}%20OR" + f"%20secondary_study_accession%3D{study_id}&limit=10&dataPortal=metagenome&includeMetagenomes=true&format" + "=json") + headers = { + "accept": "*/*" + } + response = requests.get(url, params=None, headers=headers) + if response.json() == []: + is_public = False + else: + is_public = response.json()[0]['secondary_study_accession'] == study_id + + return is_public + + def verify_study_ownership(self, study_id, submission_account_id): + url = f"{'https://www.ebi.ac.uk/ena/submit/report/studies'}/{study_id}" + # TODO: fetch password from env + generic_password = 'Where to store Password ?' + auth_string = f"mg-{submission_account_id}:{generic_password}" + headers = { + "accept": "*/*", + "Authorization": f"Basic {b64encode(auth_string.encode()).decode()}" + } + response = requests.get(url, params=None, headers=headers) + ownership_verified = False + if response.status_code == 200 and study_id in response.text: + ownership_verified = True + return ownership_verified From 19a5d8ab39475d737385e104ae4e6eec7c893508 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Fri, 26 Apr 2024 13:48:00 +0100 Subject: [PATCH 2/9] Made study privacy check more explicit --- config/local-lite.yml | 2 ++ emgapianns/management/lib/create_or_update_study.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/config/local-lite.yml b/config/local-lite.yml index 0162f7cbd..346e4b9a6 100644 --- a/config/local-lite.yml +++ b/config/local-lite.yml @@ -10,6 +10,8 @@ emg: ENGINE: 'django.db.backends.sqlite3' NAME: '/opt/ci/testdbs/ena-testdb.sqlite' ERA_TABLESPACE_PREFIX: '' + ena_api_password: None + admin: True downloads_bypass_nginx: True diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index 4efb03922..3cbc71e9c 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -29,6 +29,7 @@ update_or_create_publication, ) from emgapianns.management.lib import utils +from emgcli.settings import EMG_CONF from emgena import models as ena_models from emgena.models import RunStudy, AssemblyStudy @@ -165,6 +166,9 @@ def _update_or_create_study_from_db_result( ) hold_date = ena_study.hold_date + ena_api_password = EMG_CONF['emg']['ena_api_password'] + if ena_api_password is None: + logging.warning("ENA API password is missing. Study ownership cannot be verified.") try: study_is_public = self.check_if_study_is_public(secondary_study_accession) if study_is_public: @@ -303,9 +307,11 @@ def check_if_study_is_public(self, study_id): def verify_study_ownership(self, study_id, submission_account_id): url = f"{'https://www.ebi.ac.uk/ena/submit/report/studies'}/{study_id}" - # TODO: fetch password from env - generic_password = 'Where to store Password ?' - auth_string = f"mg-{submission_account_id}:{generic_password}" + ena_api_password = EMG_CONF['emg']['ena_api_password'] + if not ena_api_password: + logging.warning("ENA API password is missing. Study ownership cannot be verified.") + return False + auth_string = f"mg-{submission_account_id}:{ena_api_password}" headers = { "accept": "*/*", "Authorization": f"Basic {b64encode(auth_string.encode()).decode()}" From d6adc270a5b51ba44cbccff59e449d4ca0af7960 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Tue, 30 Apr 2024 10:26:26 +0100 Subject: [PATCH 3/9] Added ena_api_key for tests --- config/local-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/local-tests.yml b/config/local-tests.yml index db4297888..8aff58fc0 100644 --- a/config/local-tests.yml +++ b/config/local-tests.yml @@ -16,4 +16,5 @@ emg: celery_backend: 'redis://localhost:6379/1' results_production_dir: '/dummy/path/results' # metagenomics exchange - me_api: 'https://wwwdev.ebi.ac.uk/ena/registry/metagenome/api' \ No newline at end of file + me_api: 'https://wwwdev.ebi.ac.uk/ena/registry/metagenome/api' + ena_api_password: None \ No newline at end of file From e0350dc9e584ef1b1f9399c7a1831d896e5efd22 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 1 May 2024 15:28:08 +0100 Subject: [PATCH 4/9] Added ena api key to CI config file --- ci/configuration.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/configuration.yaml b/ci/configuration.yaml index 1102dc73e..8f88e50f0 100644 --- a/ci/configuration.yaml +++ b/ci/configuration.yaml @@ -19,4 +19,5 @@ emg: celery_backend: 'redis://localhost:6379/1' results_production_dir: '/dummy/path/results' # metagenomics exchange - me_api: 'https://wwwdev.ebi.ac.uk/ena/registry/metagenome/api' \ No newline at end of file + me_api: 'https://wwwdev.ebi.ac.uk/ena/registry/metagenome/api' + ena_api_password: None \ No newline at end of file From 838f33306bd3b20db076575b031808aa6150f98b Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Thu, 2 May 2024 09:19:23 +0100 Subject: [PATCH 5/9] Update emgapianns/management/lib/create_or_update_study.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea --- emgapianns/management/lib/create_or_update_study.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index 3cbc71e9c..eaf194f4c 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -168,7 +168,7 @@ def _update_or_create_study_from_db_result( hold_date = ena_study.hold_date ena_api_password = EMG_CONF['emg']['ena_api_password'] if ena_api_password is None: - logging.warning("ENA API password is missing. Study ownership cannot be verified.") + logging.error(f"ENA API password is missing. Study {secondary_study_accession} ownership cannot be verified.") try: study_is_public = self.check_if_study_is_public(secondary_study_accession) if study_is_public: From 105b848448969077a348e706a303d8f9833070f2 Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Thu, 2 May 2024 09:19:42 +0100 Subject: [PATCH 6/9] Update emgapianns/management/lib/create_or_update_study.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea --- emgapianns/management/lib/create_or_update_study.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index eaf194f4c..7522f1beb 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -306,7 +306,7 @@ def check_if_study_is_public(self, study_id): return is_public def verify_study_ownership(self, study_id, submission_account_id): - url = f"{'https://www.ebi.ac.uk/ena/submit/report/studies'}/{study_id}" + url = f"https://www.ebi.ac.uk/ena/submit/report/studies/{study_id}" ena_api_password = EMG_CONF['emg']['ena_api_password'] if not ena_api_password: logging.warning("ENA API password is missing. Study ownership cannot be verified.") From 2fe394ce0e376163473c3b78bdaaa2654dee5273 Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Thu, 2 May 2024 09:21:27 +0100 Subject: [PATCH 7/9] Update emgapianns/management/lib/create_or_update_study.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea --- emgapianns/management/lib/create_or_update_study.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index 7522f1beb..df0603028 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -316,7 +316,7 @@ def verify_study_ownership(self, study_id, submission_account_id): "accept": "*/*", "Authorization": f"Basic {b64encode(auth_string.encode()).decode()}" } - response = requests.get(url, params=None, headers=headers) + response = requests.get(url, headers=headers) ownership_verified = False if response.status_code == 200 and study_id in response.text: ownership_verified = True From fcf92ff3615d68f74db5a5cc804ff5957ebca31c Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Thu, 2 May 2024 09:22:38 +0100 Subject: [PATCH 8/9] Update emgapianns/management/lib/create_or_update_study.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea --- emgapianns/management/lib/create_or_update_study.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index df0603028..a355e9424 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -318,6 +318,4 @@ def verify_study_ownership(self, study_id, submission_account_id): } response = requests.get(url, headers=headers) ownership_verified = False - if response.status_code == 200 and study_id in response.text: - ownership_verified = True - return ownership_verified + return response.status_code == 200 and study_id in response.text From b1078a14b48cc57745c8cb6a46d12aa58f51fc6b Mon Sep 17 00:00:00 2001 From: mgs-sails Date: Tue, 20 Aug 2024 10:28:50 +0100 Subject: [PATCH 9/9] Responded to PR suggestions Added try catch and exlplict check for 200 responses from ENA API --- ci/configuration.yaml | 2 +- .../management/lib/create_or_update_study.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ci/configuration.yaml b/ci/configuration.yaml index 8f88e50f0..7458e6162 100644 --- a/ci/configuration.yaml +++ b/ci/configuration.yaml @@ -20,4 +20,4 @@ emg: results_production_dir: '/dummy/path/results' # metagenomics exchange me_api: 'https://wwwdev.ebi.ac.uk/ena/registry/metagenome/api' - ena_api_password: None \ No newline at end of file + ena_api_password: null \ No newline at end of file diff --git a/emgapianns/management/lib/create_or_update_study.py b/emgapianns/management/lib/create_or_update_study.py index a355e9424..6d0b3c9ff 100644 --- a/emgapianns/management/lib/create_or_update_study.py +++ b/emgapianns/management/lib/create_or_update_study.py @@ -297,11 +297,16 @@ def check_if_study_is_public(self, study_id): headers = { "accept": "*/*" } - response = requests.get(url, params=None, headers=headers) - if response.json() == []: + try: + response = requests.get(url, params=None, headers=headers) + response.raise_for_status() # Raise an HTTPError for bad responses + if response.status_code != 200 or response.json() == []: + is_public = False + else: + is_public = response.json()[0]['secondary_study_accession'] == study_id + except requests.RequestException as e: + logging.error(f"Error checking if study is public: {e}") is_public = False - else: - is_public = response.json()[0]['secondary_study_accession'] == study_id return is_public @@ -309,7 +314,7 @@ def verify_study_ownership(self, study_id, submission_account_id): url = f"https://www.ebi.ac.uk/ena/submit/report/studies/{study_id}" ena_api_password = EMG_CONF['emg']['ena_api_password'] if not ena_api_password: - logging.warning("ENA API password is missing. Study ownership cannot be verified.") + logging.error("ENA API password is missing. Study ownership cannot be verified.") return False auth_string = f"mg-{submission_account_id}:{ena_api_password}" headers = { @@ -317,5 +322,4 @@ def verify_study_ownership(self, study_id, submission_account_id): "Authorization": f"Basic {b64encode(auth_string.encode()).decode()}" } response = requests.get(url, headers=headers) - ownership_verified = False return response.status_code == 200 and study_id in response.text