Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
corneliusroemer committed Jul 16, 2024
1 parent 503a67e commit 658c4ec
Show file tree
Hide file tree
Showing 10 changed files with 298 additions and 0 deletions.
19 changes: 19 additions & 0 deletions submission/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
__pycache__/
*.py[cod]
*.pyo
*.pyd
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
6 changes: 6 additions & 0 deletions submission/.mambarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
repodata_use_zst: true
channel_priority: strict
download_threads: 20
9 changes: 9 additions & 0 deletions submission/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: loculus-submission
channels:
- conda-forge
- bioconda
dependencies:
- python=3.12
- pip=24
- requests=2
- xmltodict
15 changes: 15 additions & 0 deletions submission/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Basic package config to make it installable
[project]
name = "loculus_submission"
version = "0.1.0"
requires-python = ">=3.12"

# [project.scripts]
# prepro = "loculus_preprocessing.__main__:cli_entry"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/submission"]
1 change: 1 addition & 0 deletions submission/requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest
37 changes: 37 additions & 0 deletions submission/ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
target-version = "py312"
line-length = 100

[lint]
select = [
"E",
"W",
"F",
"B",
"UP",
"SIM",
"I",
"C90",
"N",
"S",
"C4",
"DTZ",
"EM",
"ISC",
"ICN",
"PIE",
"Q",
"RSE",
"RET",
"PL",
"FURB",
"LOG",
"RUF",
]
ignore = ["ISC001"]
preview = true

# ignore-init-module-imports = trueselect = ["E", "F", "B"]

# Ignore all directories named `tests`.
[per-file-ignores]
"tests/**" = ["S101", "PLR2004"]
1 change: 1 addition & 0 deletions submission/src/submission/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .ena_submission import *
80 changes: 80 additions & 0 deletions submission/src/submission/ena_submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import random
from collections import defaultdict

import requests
import xmltodict
from requests.auth import HTTPBasicAuth


def recursive_defaultdict():
return defaultdict(recursive_defaultdict)

def get_submission_dict():
submission = recursive_defaultdict()
submission["SUBMISSION_SET"]["SUBMISSION"]["ACTIONS"]["ACTION"]["ADD"] = None
return submission

def create_project(config):
def get_project_xml(alias, title, description, center_name):
submission_set = get_submission_dict()
project_set = recursive_defaultdict()
project = {
"@alias": f"{alias}{random.randint(1000, 9999)}",
"TITLE": title,
"DESCRIPTION": description,
"SUBMISSION_PROJECT": {"SEQUENCING_PROJECT": None},
}
project_set["PROJECT_SET"]["PROJECT"] = project
webin = {"WEBIN": {**submission_set, **project_set}}
return xmltodict.unparse(webin, pretty=True)

xml = get_project_xml("aliasTBD", "titleTBD", "descriptionTBD", "centerTBD")
response = post_webin(xml, config)
return response

def create_sample(config):
def get_sample_xml(alias, taxon_id, scientific_name, attributes):
submission_set = get_submission_dict()
sample_set = recursive_defaultdict()
sample = {
"@alias": f"{alias}{random.randint(1000, 9999)}",
"TITLE": "titleTBD",
"SAMPLE_NAME": {
"TAXON_ID": taxon_id,
"SCIENTIFIC_NAME": scientific_name,
"COMMON_NAME": None,
},
"SAMPLE_ATTRIBUTES": {
"SAMPLE_ATTRIBUTE": [
{"TAG": key, "VALUE": value} for key, value in attributes.items()
]
},
}
sample_set["SAMPLE_SET"]["SAMPLE"] = sample
webin = {"WEBIN": {**submission_set, **sample_set}}
return xmltodict.unparse(webin, pretty=True)

xml = get_sample_xml("aliasTBD", 1284369, "nameTBD", {
"collection date": "not collected",
"geographic location (country and/or sea)": "not collected",
"ENA-CHECKLIST": "ERC000011",
})
response = post_webin(xml, config)
return response

def create_assembly(config):
# Your code for create_assembly would go here
pass

def post_webin(xml, config):
headers = {"Accept": "application/xml", "Content-Type": "application/xml"}
response = requests.post(
config.url,
auth=HTTPBasicAuth(config.username, config.password),
data=xml,
headers=headers,
)
if response.status_code == 200:
return xmltodict.parse(response.text)
else:
raise Exception("Error:", response.status_code, response.text)
66 changes: 66 additions & 0 deletions submission/tasks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Tasks

## Create bioprojects per group/organism

Unique (namespaced) id per group (per organism: maybe not?) per loculus instance

- com.pathoplexus.ebola-zaire/1/private
- com.pathoplexus.ebola-zaire/1/public

Embedded in the project metadata (on ENA) to allow parsing out from ENA

When you start up, you query ENA for all the projects and build a dict that maps `id -> project_accession`

### Testing

Can test with dev instance

Per group:

To create group:

- Make id
- Create group
- Verify that group exists

Usually, can get the accession back from ENA

## Look up existing projects from ENA

Go from ENA -> existing groups

## Feeding back ENA data to the backend (POSTing)

Have a table in backend that maps `id -> project_accession`, potentially just add to groups table if there's one project per group across all organisms (rather than per group)

### Sequences

Create a sample for the metadata

Config: mapping from loculus column names to the structured sample identifier

In the sample, mention crossreference to Loculus to avoid duplicates

### Testing

Can test with dev server

## How to store state in the backend

### Assemblies

Programmatic submission via CLI

Querying for pending/accepted/rejected assemblies and their final accession

### Moving from private to public

Edit the sample/accession to belong to a different project to move from private to public

### Testing

Can test for samples how this works on the dev instance

For assembly, need to operate on live instance (probably), ask ENA to delete

Write high-level functions that abstract the complexity away
64 changes: 64 additions & 0 deletions submission/tests/test_ena_submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from unittest.mock import Mock, patch

import pytest
from submission import Config, create_assembly, create_project, create_sample

# Setup a mock configuration
test_config = Config(
username="test_user",
password="test_password",
url="https://test.url"
)

# Example XML responses (simplified)
test_project_xml_response = """
<RECEIPT>
<PROJECT accession="PRJ123456" alias="alias123" status="received"/>
</RECEIPT>
"""

test_sample_xml_response = """
<RECEIPT>
<SAMPLE accession="SMP123456" alias="alias123" status="received"/>
</RECEIPT>
"""

@pytest.fixture
def mock_requests_post_success():
with patch('submission.requests.post') as mock_post:
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = test_project_xml_response
mock_post.return_value = mock_response
yield mock_post

def test_create_project_success(mock_requests_post_success):
# Testing successful project creation
response = create_project(test_config)
assert response['RECEIPT']['PROJECT']['@accession'] == 'PRJ123456'
mock_requests_post_success.assert_called_once()

def test_create_sample_success(mock_requests_post_success):
# Testing successful sample registration
with patch('submission.xmltodict.parse') as mock_parse:
mock_parse.return_value = {
'RECEIPT': {
'SAMPLE': {
'@accession': 'SMP123456'
}
}
}
response = create_sample(test_config)
assert response['RECEIPT']['SAMPLE']['@accession'] == 'SMP123456'
mock_requests_post_success.assert_called_once()

def test_create_project_failure(mock_requests_post_success):
# Testing project creation failure due to API error
mock_requests_post_success.return_value.status_code = 500
mock_requests_post_success.return_value.text = "Internal Server Error"
with pytest.raises(Exception) as exc_info:
create_project(test_config)
assert "Error:" in str(exc_info.value)
mock_requests_post_success.assert_called_once()

# Additional tests for create_assembly can be designed similarly once its implementation details are known.

0 comments on commit 658c4ec

Please sign in to comment.