Skip to content

Commit

Permalink
Improve error handling to add errors to DB, fix tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-parker committed Jul 19, 2024
1 parent 2de3d4d commit f59cdd4
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 165 deletions.
120 changes: 52 additions & 68 deletions ena-submission/scripts/create_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import requests
import yaml
from call_loculus import get_group_info
from ena_submission_helper import create_ena_project
from ena_submission_helper import create_ena_project, CreationResults
from ena_types import (
OrganismType,
ProjectLink,
Expand Down Expand Up @@ -124,10 +124,6 @@ def create_project(log_level, config_file):
conditions=seq_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in submission_table with same primary key"
)
else:
update_values = {"status_all": StatusAll.SUBMITTING_PROJECT.name}
number_rows_updated = update_db_where_conditions(
Expand All @@ -136,14 +132,6 @@ def create_project(log_level, config_file):
conditions=seq_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in submission_table with same primary key"
)
elif len(corresponding_project) > 1:
raise psycopg2.DatabaseError(
"found multiple rows in project_table with same primary key"
)
else:
# If not: create project_entry, change status to SUBMITTING_PROJECT
entry = {
Expand All @@ -159,10 +147,6 @@ def create_project(log_level, config_file):
conditions=seq_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in submission table with same primary key"
)
conditions = {"status_all": StatusAll.SUBMITTING_PROJECT.name}
submitting_project = find_conditions_in_db(
db_config, table_name="submission_table", conditions=conditions
Expand All @@ -175,22 +159,16 @@ def create_project(log_level, config_file):
corresponding_project = find_conditions_in_db(
db_config, table_name="project_table", conditions=group_key
)
if len(corresponding_project) == 1:
if corresponding_project[0]["status"] == Status.SUBMITTED.name:
update_values = {"status_all": StatusAll.SUBMITTED_PROJECT.name}
number_rows_updated = update_db_where_conditions(
db_config,
table_name="submission_table",
conditions=seq_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in submission_table with same primary key"
)
elif len(corresponding_project) > 1:
raise psycopg2.DatabaseError(
"found multiple rows in project_table with same primary key"
if (
len(corresponding_project) == 1
and corresponding_project[0]["status"] == Status.SUBMITTED.name
):
update_values = {"status_all": StatusAll.SUBMITTED_PROJECT.name}
number_rows_updated = update_db_where_conditions(
db_config,
table_name="submission_table",
conditions=seq_key,
update_values=update_values,
)
# Check project_table for newly added sequences
conditions = {"status": Status.READY.name}
Expand All @@ -204,43 +182,49 @@ def create_project(log_level, config_file):
group_info = get_group_info(config, row["group_id"])[0]["group"]

project_set = construct_project_set_object(group_info, config, metadata_dict, row)
try:
update_values = {"status": Status.SUBMITTING.name}
number_rows_updated = update_db_where_conditions(
db_config,
table_name="project_table",
conditions=group_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in project table with same primary key"
)
results = create_ena_project(config, project_set)
update_values = {"status": Status.SUBMITTED.name, "result": json.dumps(results)}
number_rows_updated = update_db_where_conditions(
db_config,
table_name="project_table",
conditions=group_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in project table with same primary key"
update_values = {"status": Status.SUBMITTING.name}
number_rows_updated = update_db_where_conditions(
db_config,
table_name="project_table",
conditions=group_key,
update_values=update_values,
)
if number_rows_updated != 1:
# state not correctly updated - do not start submission
continue
project_creation_results: CreationResults = create_ena_project(config, project_set)
if project_creation_results.results:
update_values = {
"status": Status.SUBMITTED.name,
"result": json.dumps(project_creation_results.results),
}
number_rows_updated = 0
tries = 0
while number_rows_updated != 1 and tries < 3:
# If state not correctly added retry 3 times
number_rows_updated = update_db_where_conditions(
db_config,
table_name="project_table",
conditions=group_key,
update_values=update_values,
)
except requests.exceptions.RequestException as e:
# set to has_errors, add errors
update_values = {"status": Status.HAS_ERRORS.name, "errors": json.dumps(e.response)}
number_rows_updated = update_db_where_conditions(
db_config,
table_name="project_table",
conditions=group_key,
update_values=update_values,
)
if number_rows_updated > 1:
raise psycopg2.DatabaseError(
"found multiple rows in project table with same primary key"
tries += 1
else:
update_values = {
"status": Status.HAS_ERRORS.name,
"errors": project_creation_results.errors,
}
number_rows_updated = 0
tries = 0
while number_rows_updated != 1 and tries < 3:
# If state not correctly added retry 3 times
number_rows_updated = update_db_where_conditions(
db_config,
table_name="project_table",
conditions=group_key,
update_values=update_values,
)
tries += 1
# Check project_table for sequences with errors or in submitting status for too long
entries_with_errors = find_conditions_in_db(
db_config, table_name="project_table", conditions={"status": Status.HAS_ERRORS.name}
Expand Down
93 changes: 57 additions & 36 deletions ena-submission/scripts/ena_submission_helper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
import random
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List, Optional

import requests
import xmltodict
from ena_types import ProjectSet, XmlAttribute
from requests.auth import HTTPBasicAuth


@dataclass
class SubmissionConfig:
ena_submission_username: str
ena_submission_password: str
ena_submission_url: str


@dataclass
class CreationResults:
errors: List[str]
warnings: List[str]
results: Optional[Dict[str, str]] = None


def recursive_defaultdict():
return defaultdict(recursive_defaultdict)

Expand Down Expand Up @@ -44,51 +60,56 @@ def get_submission_dict():
return submission


def create_ena_project(config, project_set: ProjectSet):
def create_ena_project(config: SubmissionConfig, project_set: ProjectSet) -> CreationResults:
errors = []
warnings = []

def get_project_xml(project_set):
submission_set = get_submission_dict()
project_set = {"project_set": dataclass_to_dict(project_set)}
webin = {"WEBIN": {**submission_set, **project_set}}
return xmltodict.unparse(webin, pretty=True)

xml = get_project_xml(project_set)
response = post_webin(xml, config)
valid = (
response["RECEIPT"]["@success"] == "true"
and response["RECEIPT"]["PROJECT"]["@accession"]
and response["RECEIPT"]["SUBMISSION"]["@accession"]
)
if not valid:
raise requests.exceptions.RequestException
try:
response = post_webin(xml, config)
response.raise_for_status()
except requests.exceptions.RequestException:
error_message = (
f"Request failed with status:{response.status_code}. "
f"Request: {response.request}, Response: {response.text}"
)
errors.append(error_message)
return CreationResults(results=None, errors=errors, warnings=warnings)
try:
parsed_response = xmltodict.parse(response.text)
valid = (
parsed_response["RECEIPT"]["@success"] == "true"
and parsed_response["RECEIPT"]["PROJECT"]["@accession"]
and parsed_response["RECEIPT"]["SUBMISSION"]["@accession"]
)
if not valid:
raise requests.exceptions.RequestException
except:
error_message = (
f"Response is in unexpected format. "
f"Request: {response.request}, Response: {response.text}"
)
errors.append(error_message)
return CreationResults(results=None, errors=errors, warnings=warnings)
project_results = {
"ena_project_accession": response["RECEIPT"]["PROJECT"]["@accession"],
"ena_submission_accession": response["RECEIPT"]["SUBMISSION"]["@accession"],
"ena_project_accession": parsed_response["RECEIPT"]["PROJECT"]["@accession"],
"ena_submission_accession": parsed_response["RECEIPT"]["SUBMISSION"]["@accession"],
}
return project_results
return CreationResults(results=project_results, errors=errors, warnings=warnings)


def post_webin(xml, config):
def post_webin(xml, config: SubmissionConfig):
headers = {"Accept": "application/xml", "Content-Type": "application/xml"}
try:
# response = requests.post(
# config.url,
# auth=HTTPBasicAuth(config.username, config.password),
# data=xml,
# headers=headers,
# timeout=10, # wait a full 10 seconds for a response incase slow
# )
# response.raise_for_status()
response = """<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="receipt.xsl"?>
<RECEIPT receiptDate="2017-05-09T16:58:08.634+01:00" submissionFile="submission.xml" success="true">
<PROJECT accession="PRJEB20767" alias="cheddar_cheese" status="PRIVATE" />
<SUBMISSION accession="ERA912529" alias="cheese" />
<MESSAGES>
<INFO>This submission is a TEST submission and will be discarded within 24 hours</INFO>
</MESSAGES>
<ACTIONS>ADD</ACTIONS>
</RECEIPT>"""
except requests.exceptions.RequestException as e:
raise (e)
# return xmltodict.parse(response.text)
return xmltodict.parse(response)
return requests.post(
config.ena_submission_url,
auth=HTTPBasicAuth(config.ena_submission_username, config.ena_submission_password),
data=xml,
headers=headers,
timeout=10, # wait a full 10 seconds for a response incase slow
)
10 changes: 8 additions & 2 deletions ena-submission/scripts/ena_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ def __str__(self):

@dataclass
class ProjectType:
center_name: XmlAttribute
alias: XmlAttribute
name: str
title: str
description: str
center_name: Optional[XmlAttribute] = None
alias: Optional[XmlAttribute] = None
collaborators: Optional[ProjectTypeCollaborators] = None
submission_project: Optional[SubmissionProject] = None
umbrella_project: Optional[UmbrellaProject] = None
Expand All @@ -94,6 +94,12 @@ class ProjectType:
project_attributes: Optional[Dict[str, str]] = None


def default_project_type():
return ProjectType(
name="default_name", title="default_title", description="default_description"
)


@dataclass
class ProjectSet:
project: List[ProjectType]
61 changes: 61 additions & 0 deletions ena-submission/scripts/test_ena_submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import unittest
from unittest import mock

from ena_submission_helper import SubmissionConfig, create_ena_project
from ena_types import default_project_type
from requests import exceptions

# Setup a mock configuration
test_config = SubmissionConfig(
ena_submission_url="https://test.url",
ena_submission_password="test_password",
ena_submission_username="test_user",
)

# Example XML responses
test_project_xml_response = response = """
<RECEIPT receiptDate="2017-05-09T16:58:08.634+01:00" submissionFile="submission.xml" success="true">
<PROJECT accession="PRJEB20767" alias="cheddar_cheese" status="PRIVATE" />
<SUBMISSION accession="ERA912529" alias="cheese" />
<MESSAGES>
<INFO>This submission is a TEST submission and will be discarded within 24 hours</INFO>
</MESSAGES>
<ACTIONS>ADD</ACTIONS>
</RECEIPT>
"""


def mock_requests_post(status_code, text):
mock_response = mock.Mock()
mock_response.status_code = status_code
mock_response.text = text
return mock_response


class ProjectCreationTests(unittest.TestCase):
@mock.patch("requests.post")
def test_create_project_success(self, mock_post):
# Testing successful project creation
mock_post.return_value = mock_requests_post(200, test_project_xml_response)
project_set = default_project_type()
response = create_ena_project(test_config, project_set)
desired_response = {
"ena_project_accession": "PRJEB20767",
"ena_submission_accession": "ERA912529",
}
assert response.results == desired_response

@mock.patch("requests.post")
def test_create_project_failure(self, mock_post):
# Testing project creation failure
mock_post.return_value = mock_requests_post(500, "Internal Server Error")
mock_post.return_value.raise_for_status.side_effect = exceptions.RequestException()
project_set = default_project_type()
response = create_ena_project(test_config, project_set)
error_message_part = "Request failed with status:500."
self.assertIn(error_message_part, response.errors[0])
print("success handled correctly")


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit f59cdd4

Please sign in to comment.