From da8351344525cc8069edcef20b2e2c0b32c65df4 Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 29 Jan 2024 09:11:23 +0000 Subject: [PATCH] Extract logic from the executable to put it in the library reorganise orchestration to ensure the validation only has to be run when the was not successfully run before --- bin/eva-sub-cli.py | 72 ++++++++------------------------- eva_sub_cli/auth.py | 2 +- eva_sub_cli/main.py | 31 ++++----------- eva_sub_cli/submit.py | 36 ++++++++--------- tests/test_main.py | 93 +++++++++++++++++++++++++++++++++++++++++++ tests/test_submit.py | 10 ++--- 6 files changed, 141 insertions(+), 103 deletions(-) create mode 100644 tests/test_main.py diff --git a/bin/eva-sub-cli.py b/bin/eva-sub-cli.py index 6f20326..26fba4b 100755 --- a/bin/eva-sub-cli.py +++ b/bin/eva-sub-cli.py @@ -1,36 +1,19 @@ #!/usr/bin/env python -import csv -import os + from argparse import ArgumentParser -from ebi_eva_common_pyutils.config import WritableConfig from ebi_eva_common_pyutils.logger import logging_config -from eva_sub_cli import SUB_CLI_CONFIG_FILE, __version__ -from eva_sub_cli.docker_validator import DockerValidator, docker_path, container_image -from eva_sub_cli.reporter import READY_FOR_SUBMISSION_TO_EVA -from eva_sub_cli.submit import StudySubmitter - -VALIDATE = 'validate' -SUBMIT = 'submit' -RESUME_SUBMISSION = 'resume_submission' - -logging_config.add_stdout_handler() - - -def get_vcf_files(mapping_file): - vcf_files = [] - with open(mapping_file) as open_file: - reader = csv.DictReader(open_file, delimiter=',') - for row in reader: - vcf_files.append(row['vcf']) - return vcf_files +from eva_sub_cli import main +from eva_sub_cli.main import VALIDATE, SUBMIT if __name__ == "__main__": argparser = ArgumentParser(description='EVA Submission CLI - validate and submit data to EVA') - argparser.add_argument('--task', required=True, choices=[VALIDATE, SUBMIT, RESUME_SUBMISSION], - help='Select a task to perform') + argparser.add_argument('--tasks', nargs='*', choices=[VALIDATE, SUBMIT], default=[SUBMIT], + help='Select a task to perform. Stating VALIDATE run the validation regardless of the ' + 'previous runs, Stating SUBMIT run validate only if the validation was not performed ' + 'successfully before and run the submission.') argparser.add_argument('--submission_dir', required=True, type=str, help='Full path to the directory where all processing will be done ' 'and submission info is/will be stored') @@ -41,38 +24,17 @@ def get_vcf_files(mapping_file): help="Json file that describe the project, analysis, samples and files") group.add_argument("--metadata_xlsx", help="Excel spreadsheet that describe the project, analysis, samples and files") - group.add_argument("--username", - help="Username used for connecting to the ENA webin account") - group.add_argument("--password", - help="Password used for connecting to the ENA webin account") + argparser.add_argument("--username", + help="Username used for connecting to the ENA webin account") + argparser.add_argument("--password", + help="Password used for connecting to the ENA webin account") + argparser.add_argument("--resume", default=False, action='store_true', + help="Resume the process execution from where it left of. This is only supported for a " + "subset of the tasks") args = argparser.parse_args() - # load config - config_file_path = os.path.join(args.submission_dir, SUB_CLI_CONFIG_FILE) - sub_config = WritableConfig(config_file_path, version=__version__) - - vcf_files = get_vcf_files(args.vcf_files_mapping) - metadata_file = args.metadata_json or args.metadata_xlsx - - if args.task == RESUME_SUBMISSION: - # if validation is not passed, process task submit (validate and submit) - if READY_FOR_SUBMISSION_TO_EVA not in sub_config or not sub_config[READY_FOR_SUBMISSION_TO_EVA]: - args.task = SUBMIT - else: - # if validation is passed, upload files without validating again - with StudySubmitter(args.submission_dir, vcf_files, metadata_file, submission_config=sub_config, - username=args.username, password=args.password) as submitter: - submitter.upload_submission() - - if args.task == VALIDATE or args.task == SUBMIT: - with DockerValidator(args.vcf_files_mapping, args.submission_dir, args.metadata_json, args.metadata_xlsx, - submission_config=sub_config) as validator: - validator.validate() - validator.create_reports() - validator.update_config_with_validation_result() + logging_config.add_stdout_handler() - if args.task == SUBMIT: - with StudySubmitter(args.submission_dir, vcf_files, metadata_file, submission_config=sub_config, - username=args.username, password=args.password) as submitter: - submitter.submit() + main.orchestrate_process(args.submission_dir, args.vcf_files_mapping, args.metadata_json, args.metadata_xlsx, + args.tasks, args.resume) diff --git a/eva_sub_cli/auth.py b/eva_sub_cli/auth.py index ee0f35c..3f8eb9d 100644 --- a/eva_sub_cli/auth.py +++ b/eva_sub_cli/auth.py @@ -102,5 +102,5 @@ def get_auth(username=None, password=None): global auth if auth: return auth - auth = WebinAuth(username, password) + auth = WebinAuth(username=username, password=password) return auth diff --git a/eva_sub_cli/main.py b/eva_sub_cli/main.py index d0f3755..9ff26a3 100755 --- a/eva_sub_cli/main.py +++ b/eva_sub_cli/main.py @@ -1,12 +1,11 @@ #!/usr/bin/env python import csv import os -first attempt from ebi_eva_common_pyutils.config import WritableConfig from ebi_eva_common_pyutils.logger import logging_config from eva_sub_cli import SUB_CLI_CONFIG_FILE, __version__ -from eva_sub_cli.docker_validator import DockerValidator, docker_path, container_image +from eva_sub_cli.docker_validator import DockerValidator from eva_sub_cli.reporter import READY_FOR_SUBMISSION_TO_EVA from eva_sub_cli.submit import StudySubmitter @@ -26,7 +25,7 @@ def get_vcf_files(mapping_file): return vcf_files -def orchestrate_process(submission_dir, vcf_files_mapping, metadata_json, metadata_xlsx, task): +def orchestrate_process(submission_dir, vcf_files_mapping, metadata_json, metadata_xlsx, tasks, resume): # load config config_file_path = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE) sub_config = WritableConfig(config_file_path, version=__version__) @@ -34,31 +33,17 @@ def orchestrate_process(submission_dir, vcf_files_mapping, metadata_json, metada metadata_file = metadata_json or metadata_xlsx vcf_files = get_vcf_files(vcf_files_mapping) + # Validation is mandatory so if submit is requested then VALIDATE must have run before or be requested as well + if SUBMIT in tasks and not sub_config.get(READY_FOR_SUBMISSION_TO_EVA, False): + if VALIDATE not in tasks: + tasks.append(VALIDATE) - - # Only run validate if it's been requested if VALIDATE in tasks: with DockerValidator(vcf_files_mapping, submission_dir, metadata_json, metadata_xlsx, submission_config=sub_config) as validator: validator.validate() validator.create_reports() validator.update_config_with_validation_result() - - with StudySubmitter(submission_dir, vcf_files, metadata_file, submission_config=sub_config) as submitter: - submitter.upload_submission() - # if validation is not passed, process task submit (validate and submit) - if READY_FOR_SUBMISSION_TO_EVA in sub_config and sub_config[READY_FOR_SUBMISSION_TO_EVA]: - tasks = SUBMIT - else: - # if validation is passed, upload files without validating again - - if task == VALIDATE or task == SUBMIT: - with DockerValidator(vcf_files_mapping, submission_dir, metadata_json, metadata_xlsx, - submission_config=sub_config) as validator: - validator.validate() - validator.create_reports() - validator.update_config_with_validation_result() - - if task == SUBMIT: + if SUBMIT in tasks: with StudySubmitter(submission_dir, vcf_files, metadata_file, submission_config=sub_config) as submitter: - submitter.submit() + submitter.submit(resume=resume) diff --git a/eva_sub_cli/submit.py b/eva_sub_cli/submit.py index e652253..7a9b8ad 100644 --- a/eva_sub_cli/submit.py +++ b/eva_sub_cli/submit.py @@ -41,20 +41,19 @@ def update_config_with_submission_id_and_upload_url(self, submission_id, upload_ self.sub_config.set(SUB_CLI_CONFIG_KEY_SUBMISSION_ID, value=submission_id) self.sub_config.set(SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL, value=upload_url) - def upload_submission(self, submission_upload_url=None): + def _upload_submission(self): if READY_FOR_SUBMISSION_TO_EVA not in self.sub_config or not self.sub_config[READY_FOR_SUBMISSION_TO_EVA]: raise Exception(f'There are still validation errors that needs to be addressed. ' f'Please review, address and re-validate before uploading.') - if not submission_upload_url: - submission_upload_url = self.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] + submission_upload_url = self.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] for f in self.vcf_files: - self.upload_file(submission_upload_url, f) - self.upload_file(submission_upload_url, self.metadata_file) + self._upload_file(submission_upload_url, f) + self._upload_file(submission_upload_url, self.metadata_file) @retry(tries=5, delay=10, backoff=5) - def upload_file(self, submission_upload_url, input_file): + def _upload_file(self, submission_upload_url, input_file): base_name = os.path.basename(input_file) self.info(f'Transfer {base_name} to EVA FTP') r = requests.put(urljoin(submission_upload_url, base_name), data=open(input_file, 'rb')) @@ -67,21 +66,20 @@ def verify_submission_dir(self, submission_dir): if not os.access(submission_dir, os.W_OK): raise Exception(f"The directory '{submission_dir}' does not have write permissions.") - def submit(self): + def submit(self, resume=False): if READY_FOR_SUBMISSION_TO_EVA not in self.sub_config or not self.sub_config[READY_FOR_SUBMISSION_TO_EVA]: raise Exception(f'There are still validation errors that need to be addressed. ' f'Please review, address and re-validate before submitting.') - - self.verify_submission_dir(self.submission_dir) - response = requests.post(self.submission_initiate_url, - headers={'Accept': 'application/hal+json', - 'Authorization': 'Bearer ' + self.auth.token}) - response.raise_for_status() - response_json = response.json() - self.info("Submission ID {} received!!".format(response_json["submissionId"])) - - # update config with submission id and upload url - self.update_config_with_submission_id_and_upload_url(response_json["submissionId"], response_json["uploadUrl"]) + if not (resume or self.sub_config.get(SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL)): + self.verify_submission_dir(self.submission_dir) + response = requests.post(self.submission_initiate_url, + headers={'Accept': 'application/hal+json', + 'Authorization': 'Bearer ' + self.auth.token}) + response.raise_for_status() + response_json = response.json() + self.info("Submission ID {} received!!".format(response_json["submissionId"])) + # update config with submission id and upload url + self.update_config_with_submission_id_and_upload_url(response_json["submissionId"], response_json["uploadUrl"]) # upload submission - self.upload_submission(response_json["uploadUrl"]) + self._upload_submission() diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..2fb4fab --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,93 @@ +import json +import os +import shutil +import unittest +from unittest.mock import MagicMock, patch, Mock + +import yaml +from ebi_eva_common_pyutils.config import WritableConfig + +from eva_sub_cli import SUB_CLI_CONFIG_FILE +from eva_sub_cli.main import orchestrate_process, VALIDATE, SUBMIT +from eva_sub_cli.reporter import READY_FOR_SUBMISSION_TO_EVA +from eva_sub_cli.submit import StudySubmitter, SUB_CLI_CONFIG_KEY_SUBMISSION_ID, SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL + + +class TestMain(unittest.TestCase): + resource_dir = os.path.join(os.path.dirname(__file__), 'resources') + test_sub_dir = os.path.join(resource_dir, 'test_sub_dir') + config_file = os.path.join(test_sub_dir, SUB_CLI_CONFIG_FILE) + + mapping_file = os.path.join(test_sub_dir, 'vcf_files_metadata.csv') + metadata_json = os.path.join(test_sub_dir, 'sub_metadata.json') + metadata_xlsx = os.path.join(test_sub_dir, 'sub_metadata.xlsx') + + def test_orchestrate_validate(self): + with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \ + patch('eva_sub_cli.main.WritableConfig') as m_config, \ + patch('eva_sub_cli.main.DockerValidator') as m_docker_validator: + orchestrate_process( + self.test_sub_dir, self.mapping_file, self.metadata_json, self.metadata_xlsx, tasks=[VALIDATE], + resume=False + ) + m_get_vcf.assert_called_once_with(self.mapping_file) + m_docker_validator.assert_any_call( + self.mapping_file, self.test_sub_dir, self.metadata_json, self.metadata_xlsx, + submission_config=m_config.return_value + ) + with m_docker_validator() as validator: + validator.validate.assert_called_once_with() + validator.create_reports.assert_called_once_with() + validator.update_config_with_validation_result.assert_called_once_with() + + + def test_orchestrate_validate_submit(self): + with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \ + patch('eva_sub_cli.main.WritableConfig') as m_config, \ + patch('eva_sub_cli.main.DockerValidator') as m_docker_validator, \ + patch('eva_sub_cli.main.StudySubmitter') as m_submitter: + # Empty config + m_config.return_value = {} + + orchestrate_process( + self.test_sub_dir, self.mapping_file, self.metadata_json, self.metadata_xlsx, tasks=[SUBMIT], + resume=False + ) + m_get_vcf.assert_called_once_with(self.mapping_file) + # Validate was run because the config show it was not run successfully before + m_docker_validator.assert_any_call( + self.mapping_file, self.test_sub_dir, self.metadata_json, self.metadata_xlsx, + submission_config=m_config.return_value + ) + with m_docker_validator() as validator: + validator.validate.assert_called_once_with() + validator.create_reports.assert_called_once_with() + validator.update_config_with_validation_result.assert_called_once_with() + + # Submit was created + m_submitter.assert_any_call(self.test_sub_dir, m_get_vcf.return_value, self.metadata_json, + submission_config=m_config.return_value) + with m_submitter() as submitter: + submitter.submit.assert_called_once_with(resume=False) + + def test_orchestrate_validate_no_submit(self): + with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \ + patch('eva_sub_cli.main.WritableConfig') as m_config, \ + patch('eva_sub_cli.main.DockerValidator') as m_docker_validator, \ + patch('eva_sub_cli.main.StudySubmitter') as m_submitter: + # Empty config + m_config.return_value = {READY_FOR_SUBMISSION_TO_EVA: True} + + orchestrate_process( + self.test_sub_dir, self.mapping_file, self.metadata_json, self.metadata_xlsx, tasks=[SUBMIT], + resume=False + ) + m_get_vcf.assert_called_once_with(self.mapping_file) + # Validate was not run because the config showed it was run successfully before + assert m_docker_validator.call_count == 0 + + # Submit was created + m_submitter.assert_any_call(self.test_sub_dir, m_get_vcf.return_value, self.metadata_json, + submission_config=m_config.return_value) + with m_submitter() as submitter: + submitter.submit.assert_called_once_with(resume=False) diff --git a/tests/test_submit.py b/tests/test_submit.py index 84970ed..c2c1bc5 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -40,7 +40,7 @@ def test_submit(self): # Set the side_effect attribute to return different responses with patch('eva_sub_cli.submit.requests.post', return_value=mock_submit_response) as mock_post, \ - patch.object(StudySubmitter, 'upload_submission'), \ + patch.object(StudySubmitter, '_upload_submission'), \ patch.object(StudySubmitter, 'verify_submission_dir'), \ patch.object(StudySubmitter, 'update_config_with_submission_id_and_upload_url'), \ patch.object(self.submitter, 'sub_config', {READY_FOR_SUBMISSION_TO_EVA: True}), \ @@ -63,7 +63,7 @@ def test_submit_with_config(self): sub_config.write() with patch('eva_sub_cli.submit.requests.post', return_value=mock_submit_response) as mock_post, \ - patch.object(StudySubmitter, 'upload_submission'): + patch.object(StudySubmitter, '_upload_submission'): with self.submitter as submitter: submitter.submit() @@ -104,8 +104,8 @@ def test_upload_submission(self): test_url = 'http://example.com/' with patch.object(StudySubmitter, 'upload_file') as mock_upload_file, \ patch.object(self.submitter, 'sub_config', {READY_FOR_SUBMISSION_TO_EVA: True}): - - self.submitter.upload_submission(submission_upload_url=test_url) + self.submitter.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] = test_url + self.submitter._upload_submission() for vcf_file in self.submitter.vcf_files: mock_upload_file.assert_any_call(test_url, vcf_file) mock_upload_file.assert_called_with(test_url, self.submitter.metadata_file) @@ -114,6 +114,6 @@ def test_upload_file(self): test_url = 'http://example.com/' with patch('eva_sub_cli.submit.requests.put') as mock_put: file_to_upload = os.path.join(self.resource_dir, 'EVA_Submission_template.V1.1.4.xlsx') - self.submitter.upload_file(submission_upload_url=test_url, input_file=file_to_upload) + self.submitter._upload_file(submission_upload_url=test_url, input_file=file_to_upload) assert mock_put.mock_calls[0][1][0] == test_url + os.path.basename(file_to_upload) # Cannot test the content of the upload as opening the same file twice give different object