Skip to content

Commit

Permalink
Extract logic from the executable to put it in the library
Browse files Browse the repository at this point in the history
reorganise orchestration to ensure the validation only has to be run when the was not successfully run before
  • Loading branch information
tcezard committed Jan 29, 2024
1 parent 43db475 commit da83513
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 103 deletions.
72 changes: 17 additions & 55 deletions bin/eva-sub-cli.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,19 @@
#!/usr/bin/env python
import csv
import os

from argparse import ArgumentParser

from ebi_eva_common_pyutils.config import WritableConfig
from ebi_eva_common_pyutils.logger import logging_config

from eva_sub_cli import SUB_CLI_CONFIG_FILE, __version__
from eva_sub_cli.docker_validator import DockerValidator, docker_path, container_image
from eva_sub_cli.reporter import READY_FOR_SUBMISSION_TO_EVA
from eva_sub_cli.submit import StudySubmitter

VALIDATE = 'validate'
SUBMIT = 'submit'
RESUME_SUBMISSION = 'resume_submission'

logging_config.add_stdout_handler()


def get_vcf_files(mapping_file):
vcf_files = []
with open(mapping_file) as open_file:
reader = csv.DictReader(open_file, delimiter=',')
for row in reader:
vcf_files.append(row['vcf'])
return vcf_files
from eva_sub_cli import main
from eva_sub_cli.main import VALIDATE, SUBMIT


if __name__ == "__main__":
argparser = ArgumentParser(description='EVA Submission CLI - validate and submit data to EVA')
argparser.add_argument('--task', required=True, choices=[VALIDATE, SUBMIT, RESUME_SUBMISSION],
help='Select a task to perform')
argparser.add_argument('--tasks', nargs='*', choices=[VALIDATE, SUBMIT], default=[SUBMIT],
help='Select a task to perform. Stating VALIDATE run the validation regardless of the '
'previous runs, Stating SUBMIT run validate only if the validation was not performed '
'successfully before and run the submission.')
argparser.add_argument('--submission_dir', required=True, type=str,
help='Full path to the directory where all processing will be done '
'and submission info is/will be stored')
Expand All @@ -41,38 +24,17 @@ def get_vcf_files(mapping_file):
help="Json file that describe the project, analysis, samples and files")
group.add_argument("--metadata_xlsx",
help="Excel spreadsheet that describe the project, analysis, samples and files")
group.add_argument("--username",
help="Username used for connecting to the ENA webin account")
group.add_argument("--password",
help="Password used for connecting to the ENA webin account")
argparser.add_argument("--username",
help="Username used for connecting to the ENA webin account")
argparser.add_argument("--password",
help="Password used for connecting to the ENA webin account")
argparser.add_argument("--resume", default=False, action='store_true',
help="Resume the process execution from where it left of. This is only supported for a "
"subset of the tasks")

args = argparser.parse_args()

# load config
config_file_path = os.path.join(args.submission_dir, SUB_CLI_CONFIG_FILE)
sub_config = WritableConfig(config_file_path, version=__version__)

vcf_files = get_vcf_files(args.vcf_files_mapping)
metadata_file = args.metadata_json or args.metadata_xlsx

if args.task == RESUME_SUBMISSION:
# if validation is not passed, process task submit (validate and submit)
if READY_FOR_SUBMISSION_TO_EVA not in sub_config or not sub_config[READY_FOR_SUBMISSION_TO_EVA]:
args.task = SUBMIT
else:
# if validation is passed, upload files without validating again
with StudySubmitter(args.submission_dir, vcf_files, metadata_file, submission_config=sub_config,
username=args.username, password=args.password) as submitter:
submitter.upload_submission()

if args.task == VALIDATE or args.task == SUBMIT:
with DockerValidator(args.vcf_files_mapping, args.submission_dir, args.metadata_json, args.metadata_xlsx,
submission_config=sub_config) as validator:
validator.validate()
validator.create_reports()
validator.update_config_with_validation_result()
logging_config.add_stdout_handler()

if args.task == SUBMIT:
with StudySubmitter(args.submission_dir, vcf_files, metadata_file, submission_config=sub_config,
username=args.username, password=args.password) as submitter:
submitter.submit()
main.orchestrate_process(args.submission_dir, args.vcf_files_mapping, args.metadata_json, args.metadata_xlsx,
args.tasks, args.resume)
2 changes: 1 addition & 1 deletion eva_sub_cli/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,5 @@ def get_auth(username=None, password=None):
global auth
if auth:
return auth
auth = WebinAuth(username, password)
auth = WebinAuth(username=username, password=password)
return auth
31 changes: 8 additions & 23 deletions eva_sub_cli/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#!/usr/bin/env python
import csv
import os
first attempt
from ebi_eva_common_pyutils.config import WritableConfig
from ebi_eva_common_pyutils.logger import logging_config

from eva_sub_cli import SUB_CLI_CONFIG_FILE, __version__
from eva_sub_cli.docker_validator import DockerValidator, docker_path, container_image
from eva_sub_cli.docker_validator import DockerValidator
from eva_sub_cli.reporter import READY_FOR_SUBMISSION_TO_EVA
from eva_sub_cli.submit import StudySubmitter

Expand All @@ -26,39 +25,25 @@ def get_vcf_files(mapping_file):
return vcf_files


def orchestrate_process(submission_dir, vcf_files_mapping, metadata_json, metadata_xlsx, task):
def orchestrate_process(submission_dir, vcf_files_mapping, metadata_json, metadata_xlsx, tasks, resume):
# load config
config_file_path = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE)
sub_config = WritableConfig(config_file_path, version=__version__)

metadata_file = metadata_json or metadata_xlsx
vcf_files = get_vcf_files(vcf_files_mapping)

# Validation is mandatory so if submit is requested then VALIDATE must have run before or be requested as well
if SUBMIT in tasks and not sub_config.get(READY_FOR_SUBMISSION_TO_EVA, False):
if VALIDATE not in tasks:
tasks.append(VALIDATE)


# Only run validate if it's been requested
if VALIDATE in tasks:
with DockerValidator(vcf_files_mapping, submission_dir, metadata_json, metadata_xlsx,
submission_config=sub_config) as validator:
validator.validate()
validator.create_reports()
validator.update_config_with_validation_result()

with StudySubmitter(submission_dir, vcf_files, metadata_file, submission_config=sub_config) as submitter:
submitter.upload_submission()
# if validation is not passed, process task submit (validate and submit)
if READY_FOR_SUBMISSION_TO_EVA in sub_config and sub_config[READY_FOR_SUBMISSION_TO_EVA]:
tasks = SUBMIT
else:
# if validation is passed, upload files without validating again

if task == VALIDATE or task == SUBMIT:
with DockerValidator(vcf_files_mapping, submission_dir, metadata_json, metadata_xlsx,
submission_config=sub_config) as validator:
validator.validate()
validator.create_reports()
validator.update_config_with_validation_result()

if task == SUBMIT:
if SUBMIT in tasks:
with StudySubmitter(submission_dir, vcf_files, metadata_file, submission_config=sub_config) as submitter:
submitter.submit()
submitter.submit(resume=resume)
36 changes: 17 additions & 19 deletions eva_sub_cli/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,19 @@ def update_config_with_submission_id_and_upload_url(self, submission_id, upload_
self.sub_config.set(SUB_CLI_CONFIG_KEY_SUBMISSION_ID, value=submission_id)
self.sub_config.set(SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL, value=upload_url)

def upload_submission(self, submission_upload_url=None):
def _upload_submission(self):
if READY_FOR_SUBMISSION_TO_EVA not in self.sub_config or not self.sub_config[READY_FOR_SUBMISSION_TO_EVA]:
raise Exception(f'There are still validation errors that needs to be addressed. '
f'Please review, address and re-validate before uploading.')

if not submission_upload_url:
submission_upload_url = self.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL]
submission_upload_url = self.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL]

for f in self.vcf_files:
self.upload_file(submission_upload_url, f)
self.upload_file(submission_upload_url, self.metadata_file)
self._upload_file(submission_upload_url, f)
self._upload_file(submission_upload_url, self.metadata_file)

@retry(tries=5, delay=10, backoff=5)
def upload_file(self, submission_upload_url, input_file):
def _upload_file(self, submission_upload_url, input_file):
base_name = os.path.basename(input_file)
self.info(f'Transfer {base_name} to EVA FTP')
r = requests.put(urljoin(submission_upload_url, base_name), data=open(input_file, 'rb'))
Expand All @@ -67,21 +66,20 @@ def verify_submission_dir(self, submission_dir):
if not os.access(submission_dir, os.W_OK):
raise Exception(f"The directory '{submission_dir}' does not have write permissions.")

def submit(self):
def submit(self, resume=False):
if READY_FOR_SUBMISSION_TO_EVA not in self.sub_config or not self.sub_config[READY_FOR_SUBMISSION_TO_EVA]:
raise Exception(f'There are still validation errors that need to be addressed. '
f'Please review, address and re-validate before submitting.')

self.verify_submission_dir(self.submission_dir)
response = requests.post(self.submission_initiate_url,
headers={'Accept': 'application/hal+json',
'Authorization': 'Bearer ' + self.auth.token})
response.raise_for_status()
response_json = response.json()
self.info("Submission ID {} received!!".format(response_json["submissionId"]))

# update config with submission id and upload url
self.update_config_with_submission_id_and_upload_url(response_json["submissionId"], response_json["uploadUrl"])
if not (resume or self.sub_config.get(SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL)):
self.verify_submission_dir(self.submission_dir)
response = requests.post(self.submission_initiate_url,
headers={'Accept': 'application/hal+json',
'Authorization': 'Bearer ' + self.auth.token})
response.raise_for_status()
response_json = response.json()
self.info("Submission ID {} received!!".format(response_json["submissionId"]))
# update config with submission id and upload url
self.update_config_with_submission_id_and_upload_url(response_json["submissionId"], response_json["uploadUrl"])

# upload submission
self.upload_submission(response_json["uploadUrl"])
self._upload_submission()
93 changes: 93 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json
import os
import shutil
import unittest
from unittest.mock import MagicMock, patch, Mock

import yaml
from ebi_eva_common_pyutils.config import WritableConfig

from eva_sub_cli import SUB_CLI_CONFIG_FILE
from eva_sub_cli.main import orchestrate_process, VALIDATE, SUBMIT
from eva_sub_cli.reporter import READY_FOR_SUBMISSION_TO_EVA
from eva_sub_cli.submit import StudySubmitter, SUB_CLI_CONFIG_KEY_SUBMISSION_ID, SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL


class TestMain(unittest.TestCase):
resource_dir = os.path.join(os.path.dirname(__file__), 'resources')
test_sub_dir = os.path.join(resource_dir, 'test_sub_dir')
config_file = os.path.join(test_sub_dir, SUB_CLI_CONFIG_FILE)

mapping_file = os.path.join(test_sub_dir, 'vcf_files_metadata.csv')
metadata_json = os.path.join(test_sub_dir, 'sub_metadata.json')
metadata_xlsx = os.path.join(test_sub_dir, 'sub_metadata.xlsx')

def test_orchestrate_validate(self):
with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \
patch('eva_sub_cli.main.WritableConfig') as m_config, \
patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
orchestrate_process(
self.test_sub_dir, self.mapping_file, self.metadata_json, self.metadata_xlsx, tasks=[VALIDATE],
resume=False
)
m_get_vcf.assert_called_once_with(self.mapping_file)
m_docker_validator.assert_any_call(
self.mapping_file, self.test_sub_dir, self.metadata_json, self.metadata_xlsx,
submission_config=m_config.return_value
)
with m_docker_validator() as validator:
validator.validate.assert_called_once_with()
validator.create_reports.assert_called_once_with()
validator.update_config_with_validation_result.assert_called_once_with()


def test_orchestrate_validate_submit(self):
with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \
patch('eva_sub_cli.main.WritableConfig') as m_config, \
patch('eva_sub_cli.main.DockerValidator') as m_docker_validator, \
patch('eva_sub_cli.main.StudySubmitter') as m_submitter:
# Empty config
m_config.return_value = {}

orchestrate_process(
self.test_sub_dir, self.mapping_file, self.metadata_json, self.metadata_xlsx, tasks=[SUBMIT],
resume=False
)
m_get_vcf.assert_called_once_with(self.mapping_file)
# Validate was run because the config show it was not run successfully before
m_docker_validator.assert_any_call(
self.mapping_file, self.test_sub_dir, self.metadata_json, self.metadata_xlsx,
submission_config=m_config.return_value
)
with m_docker_validator() as validator:
validator.validate.assert_called_once_with()
validator.create_reports.assert_called_once_with()
validator.update_config_with_validation_result.assert_called_once_with()

# Submit was created
m_submitter.assert_any_call(self.test_sub_dir, m_get_vcf.return_value, self.metadata_json,
submission_config=m_config.return_value)
with m_submitter() as submitter:
submitter.submit.assert_called_once_with(resume=False)

def test_orchestrate_validate_no_submit(self):
with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \
patch('eva_sub_cli.main.WritableConfig') as m_config, \
patch('eva_sub_cli.main.DockerValidator') as m_docker_validator, \
patch('eva_sub_cli.main.StudySubmitter') as m_submitter:
# Empty config
m_config.return_value = {READY_FOR_SUBMISSION_TO_EVA: True}

orchestrate_process(
self.test_sub_dir, self.mapping_file, self.metadata_json, self.metadata_xlsx, tasks=[SUBMIT],
resume=False
)
m_get_vcf.assert_called_once_with(self.mapping_file)
# Validate was not run because the config showed it was run successfully before
assert m_docker_validator.call_count == 0

# Submit was created
m_submitter.assert_any_call(self.test_sub_dir, m_get_vcf.return_value, self.metadata_json,
submission_config=m_config.return_value)
with m_submitter() as submitter:
submitter.submit.assert_called_once_with(resume=False)
10 changes: 5 additions & 5 deletions tests/test_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_submit(self):

# Set the side_effect attribute to return different responses
with patch('eva_sub_cli.submit.requests.post', return_value=mock_submit_response) as mock_post, \
patch.object(StudySubmitter, 'upload_submission'), \
patch.object(StudySubmitter, '_upload_submission'), \
patch.object(StudySubmitter, 'verify_submission_dir'), \
patch.object(StudySubmitter, 'update_config_with_submission_id_and_upload_url'), \
patch.object(self.submitter, 'sub_config', {READY_FOR_SUBMISSION_TO_EVA: True}), \
Expand All @@ -63,7 +63,7 @@ def test_submit_with_config(self):
sub_config.write()

with patch('eva_sub_cli.submit.requests.post', return_value=mock_submit_response) as mock_post, \
patch.object(StudySubmitter, 'upload_submission'):
patch.object(StudySubmitter, '_upload_submission'):
with self.submitter as submitter:
submitter.submit()

Expand Down Expand Up @@ -104,8 +104,8 @@ def test_upload_submission(self):
test_url = 'http://example.com/'
with patch.object(StudySubmitter, 'upload_file') as mock_upload_file, \
patch.object(self.submitter, 'sub_config', {READY_FOR_SUBMISSION_TO_EVA: True}):

self.submitter.upload_submission(submission_upload_url=test_url)
self.submitter.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL] = test_url
self.submitter._upload_submission()
for vcf_file in self.submitter.vcf_files:
mock_upload_file.assert_any_call(test_url, vcf_file)
mock_upload_file.assert_called_with(test_url, self.submitter.metadata_file)
Expand All @@ -114,6 +114,6 @@ def test_upload_file(self):
test_url = 'http://example.com/'
with patch('eva_sub_cli.submit.requests.put') as mock_put:
file_to_upload = os.path.join(self.resource_dir, 'EVA_Submission_template.V1.1.4.xlsx')
self.submitter.upload_file(submission_upload_url=test_url, input_file=file_to_upload)
self.submitter._upload_file(submission_upload_url=test_url, input_file=file_to_upload)
assert mock_put.mock_calls[0][1][0] == test_url + os.path.basename(file_to_upload)
# Cannot test the content of the upload as opening the same file twice give different object

0 comments on commit da83513

Please sign in to comment.