Skip to content

Commit

Permalink
change get_ena_submission_list to a cronjob and add trigger_submissio…
Browse files Browse the repository at this point in the history
…n_to_ena which gets approved sequences from a github repo
  • Loading branch information
anna-parker committed Aug 12, 2024
1 parent 6f27094 commit 6cce7e5
Show file tree
Hide file tree
Showing 13 changed files with 403 additions and 98 deletions.
3 changes: 2 additions & 1 deletion ena-submission/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.snakemake/
results/
results/
__pycache__
31 changes: 26 additions & 5 deletions ena-submission/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,34 @@ rule get_ena_submission_list:
--log-level {params.log_level} \
"""

rule get_ena_submission_list_and_sleep:
rule trigger_submission_to_ena:
input:
file="results/ena_submission_list.json"
script="scripts/trigger_submission_to_ena.py",
config="results/config.yaml",
output:
submitted=touch("results/triggered"),
params:
log_level=LOG_LEVEL,
shell:
"""
python {input.script} \
--config-file {input.config} \
--log-level {params.log_level} \
"""

rule trigger_submission_to_ena_from_file: # for testing
input:
script="scripts/trigger_submission_to_ena.py",
input_file="results/approved_ena_submission_list.json",
config="results/config.yaml",
output:
file="results/sleep.txt"
submitted=touch("results/triggered_from_file"),
params:
log_level=LOG_LEVEL,
shell:
"""
sleep 360
touch {output.file}
python {input.script} \
--config-file {input.config} \
--input-file {input.input_file} \
--log-level {params.log_level} \
"""
3 changes: 3 additions & 0 deletions ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ username: external_metadata_updater
password: external_metadata_updater
keycloak_client_id: backend-client
ingest_pipeline_submitter: insdc_ingest_user
github_username: fake_username
github_pat: fake_pat
github_url: https://api.github.com/repos/pathoplexus/ena-submission/contents/test/approved_ena_submission_list.json?ref=main
14 changes: 8 additions & 6 deletions ena-submission/flyway/sql/V1__Initial_Schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,28 @@ CREATE TABLE submission_table (
accession text not null,
version bigint not null,
organism text not null,
groupId bigint not null,
group_id bigint not null,
errors jsonb,
warnings jsonb,
status_all text not null,
started_at timestamp not null,
finished_at timestamp,
metadata jsonb,
aligned_nucleotide_sequences jsonb,
external_metadata jsonb,
primary key (accession, version)
);

CREATE TABLE project_table (
groupId bigint not null,
group_id bigint not null,
organism text not null,
errors jsonb,
warnings jsonb,
status text not null,
started_at timestamp not null,
finished_at timestamp,
project_metadata jsonb,
primary key (groupId, organism)
result jsonb,
primary key (group_id, organism)
);

CREATE TABLE sample_table (
Expand All @@ -32,7 +34,7 @@ CREATE TABLE sample_table (
status text not null,
started_at timestamp not null,
finished_at timestamp,
sample_metadata jsonb,
result jsonb,
primary key (accession, version)
);

Expand All @@ -44,6 +46,6 @@ CREATE TABLE assembly_table (
status text not null,
started_at timestamp not null,
finished_at timestamp,
assembly_metadata jsonb,
result jsonb,
primary key (accession, version)
);
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion ena-submission/scripts/get_ena_submission_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import yaml
from call_loculus import get_released_data
from notifications import get_slack_config, notify, upload_file_with_comment
from submission_db import get_db_config, in_submission_table
from submission_db_helper import get_db_config, in_submission_table

logger = logging.getLogger(__name__)
logging.basicConfig(
Expand Down
84 changes: 0 additions & 84 deletions ena-submission/scripts/submission_db.py

This file was deleted.

130 changes: 130 additions & 0 deletions ena-submission/scripts/submission_db_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os
from dataclasses import dataclass
from datetime import datetime
from enum import Enum

import psycopg2
import pytz


@dataclass
class DBConfig:
username: str
password: str
host: str


def get_db_config(db_password_default: str, db_username_default: str, db_host_default: str):
db_password = os.getenv("DB_PASSWORD")
if not db_password:
db_password = db_password_default

db_username = os.getenv("DB_USERNAME")
if not db_username:
db_username = db_username_default

db_host = os.getenv("DB_HOST")
if not db_host:
db_host = db_host_default

db_params = {
"username": db_username,
"password": db_password,
"host": db_host,
}

return DBConfig(**db_params)


class StatusAll(Enum):
READY_TO_SUBMIT = 0
SUBMITTING_PROJECT = 1
SUBMITTING_SAMPLE = 2
SUBMITTING_ASSEMBLY = 3
SUBMITTED_ALL = 4
SENT_TO_LOCULUS = 5
HAS_ERRORS_PROJECT = 6
HAS_ERRORS_ASSEMBLY = 7
HAS_ERRORS_SAMPLE = 8

def __str__(self):
return self.name


class Status(Enum):
READY = 0
SUBMITTING = 1
SUBMITTED = 2
HAS_ERRORS = 3

def __str__(self):
return self.name


@dataclass
class SubmissionTableEntry:
accession: str
version: str
organism: str
group_id: int
errors: str | None = None
warnings: str | None = None
status_all: StatusAll = StatusAll.READY_TO_SUBMIT
started_at: datetime | None = None
finished_at: datetime | None = None
metadata: str | None = None
aligned_nucleotide_sequences: str | None = None
external_metadata: str | None = None


def connect_to_db(db_config: DBConfig):
"""
Establish connection to ena_submitter DB, if DB doesn't exist create it.
"""
try:
con = psycopg2.connect(
dbname="loculus",
user=db_config.username,
host=db_config.host,
password=db_config.password,
options="-c search_path=ena-submission",
)
except ConnectionError as e:
raise ConnectionError("Could not connect to loculus DB") from e
return con


def in_submission_table(accession: str, version: int, db_config: DBConfig) -> bool:
con = connect_to_db(db_config)
cur = con.cursor()
cur.execute(
"select * from submission_table where accession=%s and version=%s",
(f"{accession}", f"{version}"),
)
return bool(cur.rowcount)


def add_to_submission_table(db_config: DBConfig, submission_table_entry: SubmissionTableEntry):
con = connect_to_db(db_config)
cur = con.cursor()
submission_table_entry.started_at = datetime.now(tz=pytz.utc)

cur.execute(
"insert into submission_table values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(
submission_table_entry.accession,
submission_table_entry.version,
submission_table_entry.organism,
submission_table_entry.group_id,
submission_table_entry.errors,
submission_table_entry.warnings,
str(submission_table_entry.status_all),
submission_table_entry.started_at,
submission_table_entry.finished_at,
submission_table_entry.metadata,
submission_table_entry.aligned_nucleotide_sequences,
submission_table_entry.external_metadata,
),
)
con.commit()
con.close()
Loading

0 comments on commit 6cce7e5

Please sign in to comment.