Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ena-submission): Trigger ena submission #2412

Merged
merged 3 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ena-submission/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.snakemake/
results/
results/
__pycache__
31 changes: 26 additions & 5 deletions ena-submission/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,34 @@ rule get_ena_submission_list:
--log-level {params.log_level} \
"""

rule get_ena_submission_list_and_sleep:
rule trigger_submission_to_ena:
input:
file="results/ena_submission_list.json"
script="scripts/trigger_submission_to_ena.py",
config="results/config.yaml",
output:
submitted=touch("results/triggered"),
params:
log_level=LOG_LEVEL,
shell:
"""
python {input.script} \
--config-file {input.config} \
--log-level {params.log_level} \
"""

rule trigger_submission_to_ena_from_file: # for testing
input:
script="scripts/trigger_submission_to_ena.py",
input_file="results/approved_ena_submission_list.json",
config="results/config.yaml",
output:
file="results/sleep.txt"
submitted=touch("results/triggered_from_file"),
params:
log_level=LOG_LEVEL,
shell:
"""
sleep 360
touch {output.file}
python {input.script} \
--config-file {input.config} \
--input-file {input.input_file} \
--log-level {params.log_level} \
"""
3 changes: 3 additions & 0 deletions ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ username: external_metadata_updater
password: external_metadata_updater
keycloak_client_id: backend-client
ingest_pipeline_submitter: insdc_ingest_user
github_username: fake_username
github_pat: fake_pat
github_url: https://api.github.com/repos/pathoplexus/ena-submission/contents/test/approved_ena_submission_list.json?ref=main
14 changes: 8 additions & 6 deletions ena-submission/flyway/sql/V1__Initial_Schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,28 @@ CREATE TABLE submission_table (
accession text not null,
version bigint not null,
organism text not null,
groupId bigint not null,
group_id bigint not null,
errors jsonb,
warnings jsonb,
status_all text not null,
started_at timestamp not null,
finished_at timestamp,
metadata jsonb,
unaligned_nucleotide_sequences jsonb,
external_metadata jsonb,
primary key (accession, version)
);

CREATE TABLE project_table (
groupId bigint not null,
group_id bigint not null,
organism text not null,
errors jsonb,
warnings jsonb,
status text not null,
started_at timestamp not null,
finished_at timestamp,
project_metadata jsonb,
primary key (groupId, organism)
result jsonb,
primary key (group_id, organism)
);

CREATE TABLE sample_table (
Expand All @@ -32,7 +34,7 @@ CREATE TABLE sample_table (
status text not null,
started_at timestamp not null,
finished_at timestamp,
sample_metadata jsonb,
result jsonb,
primary key (accession, version)
);

Expand All @@ -44,6 +46,6 @@ CREATE TABLE assembly_table (
status text not null,
started_at timestamp not null,
finished_at timestamp,
assembly_metadata jsonb,
result jsonb,
primary key (accession, version)
);
anna-parker marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
Binary file not shown.
21 changes: 11 additions & 10 deletions ena-submission/scripts/get_ena_submission_list.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import json
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List
from typing import Any

import click
import yaml
from call_loculus import get_released_data
from notifications import get_slack_config, notify, upload_file_with_comment
from submission_db import get_db_config, in_submission_table
from submission_db_helper import get_db_config, in_submission_table

logger = logging.getLogger(__name__)
logging.basicConfig(
Expand All @@ -22,14 +21,14 @@

@dataclass
class Config:
organisms: List[Dict[str, str]]
organisms: list[dict[str, str]]
organism: str
backend_url: str
keycloak_token_url: str
keycloak_client_id: str
username: str
password: str
ena_specific_metadata: List[str]
ena_specific_metadata: list[str]
ingest_pipeline_submitter: str
db_username: str
db_password: str
Expand All @@ -39,7 +38,7 @@ class Config:
slack_channel_id: str


def get_data_for_submission(config, entries, db_config):
def get_data_for_submission(config, entries, db_config, organism):
"""
Filter data in state APPROVED_FOR_RELEASE:
- data must be state "OPEN" for use
Expand All @@ -66,6 +65,7 @@ def get_data_for_submission(config, entries, db_config):
f"or {config.ingest_pipeline_submitter}. Potential user error: discarding sequence.",
)
continue
item["organism"] = organism
data_dict[key] = item
return data_dict

Expand Down Expand Up @@ -115,7 +115,7 @@ def get_ena_submission_list(log_level, config_file, output_file):
logger.setLevel(log_level)
logging.getLogger("requests").setLevel(logging.WARNING)

with open(config_file) as file:
with open(config_file, encoding="utf-8") as file:
full_config = yaml.safe_load(file)
relevant_config = {key: full_config.get(key, []) for key in Config.__annotations__}
config = Config(**relevant_config)
Expand All @@ -135,14 +135,15 @@ def get_ena_submission_list(log_level, config_file, output_file):
logging.info(f"Getting released sequences for organism: {organism}")

all_entries = get_released_data(config, organism)
entries_to_submit.update(get_data_for_submission(config, all_entries, db_config))
data = get_data_for_submission(config, all_entries, db_config, organism)
entries_to_submit.update(data)

if entries_to_submit:
Path(output_file).write_text(json.dumps(entries_to_submit))
Path(output_file).write_text(json.dumps(entries_to_submit), encoding="utf-8")
send_slack_notification(config, output_file)
else:
logging.info("No sequences found to submit to ENA")
Path(output_file).write_text("")
Path(output_file).write_text("", encoding="utf-8")


if __name__ == "__main__":
Expand Down
84 changes: 0 additions & 84 deletions ena-submission/scripts/submission_db.py

This file was deleted.

Loading
Loading