From 268b4cfe7144eaf667d7ebb9a24b7519486f23ff Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:03:21 +0200 Subject: [PATCH 1/3] change get_ena_submission_list to a cronjob and add trigger_submission_to_ena which gets approved sequences from a github repo --- ena-submission/.gitignore | 3 +- ena-submission/Snakefile | 31 ++++- ena-submission/config/defaults.yaml | 3 + .../flyway/sql/V1__Initial_Schema.sql | 14 +- .../__pycache__/call_loculus.cpython-312.pyc | Bin 10763 -> 0 bytes .../__pycache__/submission_db.cpython-312.pyc | Bin 2516 -> 0 bytes .../scripts/get_ena_submission_list.py | 2 +- ena-submission/scripts/submission_db.py | 84 ----------- .../scripts/submission_db_helper.py | 130 +++++++++++++++++ .../scripts/trigger_submission_to_ena.py | 131 ++++++++++++++++++ .../templates/ena-submission-deployment.yaml | 92 +++++++++++- kubernetes/loculus/values.yaml | 5 + kubernetes/loculus/values_preview_server.yaml | 6 + 13 files changed, 403 insertions(+), 98 deletions(-) delete mode 100644 ena-submission/scripts/__pycache__/call_loculus.cpython-312.pyc delete mode 100644 ena-submission/scripts/__pycache__/submission_db.cpython-312.pyc delete mode 100644 ena-submission/scripts/submission_db.py create mode 100644 ena-submission/scripts/submission_db_helper.py create mode 100644 ena-submission/scripts/trigger_submission_to_ena.py diff --git a/ena-submission/.gitignore b/ena-submission/.gitignore index 9a6ebe65b..ecb73e18f 100644 --- a/ena-submission/.gitignore +++ b/ena-submission/.gitignore @@ -1,2 +1,3 @@ .snakemake/ -results/ \ No newline at end of file +results/ +__pycache__ \ No newline at end of file diff --git a/ena-submission/Snakefile b/ena-submission/Snakefile index 510050f54..88371dbc6 100644 --- a/ena-submission/Snakefile +++ b/ena-submission/Snakefile @@ -63,13 +63,34 @@ rule get_ena_submission_list: --log-level {params.log_level} \ """ -rule get_ena_submission_list_and_sleep: +rule trigger_submission_to_ena: input: - file="results/ena_submission_list.json" + script="scripts/trigger_submission_to_ena.py", + config="results/config.yaml", + output: + submitted=touch("results/triggered"), + params: + log_level=LOG_LEVEL, + shell: + """ + python {input.script} \ + --config-file {input.config} \ + --log-level {params.log_level} \ + """ + +rule trigger_submission_to_ena_from_file: # for testing + input: + script="scripts/trigger_submission_to_ena.py", + input_file="results/approved_ena_submission_list.json", + config="results/config.yaml", output: - file="results/sleep.txt" + submitted=touch("results/triggered_from_file"), + params: + log_level=LOG_LEVEL, shell: """ - sleep 360 - touch {output.file} + python {input.script} \ + --config-file {input.config} \ + --input-file {input.input_file} \ + --log-level {params.log_level} \ """ \ No newline at end of file diff --git a/ena-submission/config/defaults.yaml b/ena-submission/config/defaults.yaml index 81b6f4cb7..6058b5d50 100644 --- a/ena-submission/config/defaults.yaml +++ b/ena-submission/config/defaults.yaml @@ -2,3 +2,6 @@ username: external_metadata_updater password: external_metadata_updater keycloak_client_id: backend-client ingest_pipeline_submitter: insdc_ingest_user +github_username: fake_username +github_pat: fake_pat +github_url: https://api.github.com/repos/pathoplexus/ena-submission/contents/test/approved_ena_submission_list.json?ref=main diff --git a/ena-submission/flyway/sql/V1__Initial_Schema.sql b/ena-submission/flyway/sql/V1__Initial_Schema.sql index 9c77e8c62..31b4f6fc1 100644 --- a/ena-submission/flyway/sql/V1__Initial_Schema.sql +++ b/ena-submission/flyway/sql/V1__Initial_Schema.sql @@ -2,26 +2,28 @@ CREATE TABLE submission_table ( accession text not null, version bigint not null, organism text not null, - groupId bigint not null, + group_id bigint not null, errors jsonb, warnings jsonb, status_all text not null, started_at timestamp not null, finished_at timestamp, + metadata jsonb, + aligned_nucleotide_sequences jsonb, external_metadata jsonb, primary key (accession, version) ); CREATE TABLE project_table ( - groupId bigint not null, + group_id bigint not null, organism text not null, errors jsonb, warnings jsonb, status text not null, started_at timestamp not null, finished_at timestamp, - project_metadata jsonb, - primary key (groupId, organism) + result jsonb, + primary key (group_id, organism) ); CREATE TABLE sample_table ( @@ -32,7 +34,7 @@ CREATE TABLE sample_table ( status text not null, started_at timestamp not null, finished_at timestamp, - sample_metadata jsonb, + result jsonb, primary key (accession, version) ); @@ -44,6 +46,6 @@ CREATE TABLE assembly_table ( status text not null, started_at timestamp not null, finished_at timestamp, - assembly_metadata jsonb, + result jsonb, primary key (accession, version) ); \ No newline at end of file diff --git a/ena-submission/scripts/__pycache__/call_loculus.cpython-312.pyc b/ena-submission/scripts/__pycache__/call_loculus.cpython-312.pyc deleted file mode 100644 index 4bf18a0da4b6c6c30d98801db0fdb7106b0db215..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10763 zcmb_iYit|WmA*sHki$1cilm+vWl6Run_~P(V#~1|S&}6?j+DyJs+(|W&Pbw6k?hRK zvba>sv?)S0ZcVpaSVdY?yC`;L#6bLKfdKo*eW3vsSdgZ=VkRn9K>cI$kFnjR@h-M| z?i~(A$x4&$?iF<3bMCo!?m6c>XZSA`ixENkr{`~uzqkXT-{FRuoaM@+{WLE@wv{7bSH>M+XI;x*$$5>KlqK0YXm~q-PW+H7WY92Ea z8kR8&DXn8xQrgCB8pKmmUX_uJlCOD(Qd(mUGH2Jum|Ej>@NB2r+9)|a$KEl#1AD=V zLOiWOgNQS}fjHAU8m0Fgr9fjY&dj?w3-2DMIqMtrn1{4&yqomtI6L&!Rn4h^cD(_q zf82h~!Rt6D%*S+U`3Bw%v^4cVpQDm`$X%q*xT(**O`m6*zB-_*P_gjVsprZ}R~6$9 zuKo>XtV!h>cF_PFHmhx}k>3G3!CaL&7-zXAn9%~KYMy$rid4C(o(vg3@$@*?%X#N+pI=B|z{RP(C#qB2iY_sMB;yVAH zs;uezT>iVt{QuuiV$Wxvsro*?z~1(9ey)YyY6d6}0c;~cc;S3=UHPa|u4 zV{E5>2fH1g^zK;86he=uJCi=XgqW!D8-?3OJ6t|;bT%T!pZE6$P4%= zWHtduJR27{**GD@XA(iu>w4+!-nSdA;A_B=bN; z^h5S7v|dwNplgeDjRm@~oLg(Y`ErqDcIP)oij6Hx+8-WXq1%N<^?6wrR*qFC@Y0MB z3zuyNj3b0!F8+Vi1KBNs2boZy5Mbe7Wp*E?$OA^q~wKjiX^x zdd4foy#hZSzsh?hAryhzIpGyS0490y2^H{Z1S?RKbpmdjldtJd9l>+WT3!MZnR@#pAnvSpc#3lpJOM4S$ntrJ3+j$s(TT`aXsL?B8;WsWRdc*f;DGgfw1D|0b*yzlpp&Gz1TPyZ3hId&w3`G9 z!G8`0lC7acA{q%3RT8-7pPilcV*dUWxf&c0ZFG`|JC*qw(!I3<-x}Xb=30--9fT9ve~D-goP7*XIsgEchE;!>W!>3$>*&p+-#?x{Q#87>U&|YJrTdB|Yx=aJ zNpxT&z&y)ZEVg*hvJP~6Xj+sRY<5N2fKM-YInK?Ia|u_A2{sKx;rzoEAx%|)+dP}n z3IpfNqCQTr6px<*MA!QM`I3G`^7kc8JHX*+>UQ7LVTzN4)On;{md zqPvaw61qbvM%a_mPE`b1T?14Lt*XvVYi}p$cx?t&XH@VtZ-{kO@%vfN=s-p2K|x@Z z2MM0C8IV*_5u84S@Y$(+e1l3Erm#G2NunM4U0XV#sVWW%r3^$}aAkE7x=7dNV#zO*@4xd)(IEp*4(9r; znM5KkNId7oxYDcK+#avbL_|%%Dqq$P4xAJAks8P$V9t@V=VWano)GrqPUD48bcP=g zgt!2j<%)0sI(rhksPmccZ=(6+f#J%&mFI`+|ymXU&(Ab7Mi?yC5&+^qYP$<*! z1Rs+P0$6+O$B9Ba>79y4Vnkwy6bpUODPZAI1k@~BR8E7~Ds5m;?B;O^cvw=zm*@+; zs7&LDGIdQhCImjHFskrGOoXMf^Y%m$KYT56=Z2tvLja@J@mi= zz;>y#VA+#1?_KU)o?ITw84soV*4fsjXG>a*RhK?lG9qVPv99}LMr+fh2LT2ct(ob9 zp{ax@=ExFV^!eZ2aeGI>clh62JxjgW(5;D^6F-RLTs;{hWS?kZ*1E}h^u?B{? z298^jsw7mA2jTl1m_C8#bzr9}P+h9n-YNGM;?M+Y7_)ClTUVXJ4(*n-bq+WH8sU5d z6j@5wk6t|kiq8nH36`TR3Dl#htR4Y-fEqG#Cd@>N}Vzy$0(uSm3b*-r;wYE{VBjFWreV8c-VsB0JtkkOZrC60ujv;%n zrdVul8W6`OVG^_qoUQ@kIz0Dh?+j;D@T*D@uwa|Ij{1I5`fq4B-Ms#5`dMlgDXO`w zyiK#J8n@0OpFP=$eKipSuNe3`;H8DYbrUe)3UPD9aTE9`9}@YbBLFDIUv5Eg4_qap zW0Ur3MNE`1MPSQE9Pl;#5e7R+o$)K(-LD!W|N5st{VC}upvTMcN>t2y`s~QCcU*{1 zd!JsS$1Ag{!u<&BIufuEPhRSeh~bccgKA#z@q{^VNc2M7MG8e?qIY1p*RO`2Hf&HR zs%(k?#mV?gl=DJx$$J@ZR0lj$hL6cvYs*cMqgN}NjB7{UIX(nsUE%qS3$mO#_XTJT1N0ZboXJP3o$ zOv_9{0HflwZI~M2BA!7&BC%@NjL3R?4iRvZw7?Abp1k5I27$zMNSIT?p+PPZ1}F*( z1KsfmALSqzi3@lPyh&Lr@L>__q-ui%bp=({V1H7*0YP#DM3|d}fAQDwasXPIk+osN z4(0Y9%NdV@5wbPsTK46v`_qG9F@ALVC&ND+Uhcc=zUNu7odPdW!FX$|^@e%DoV|4G z)tj$=Ke)nnY;qslcclkQ8Uxe5=5YV*+^T1H-m`m|UF|%Q?>tiIJX-J^TlMtiJ$(hw zK*4b`qyM-Tv3rW`yE2AiZR0~^GVIFq1IpadzS?{^-+cHFjMi;kudOd>k-K5BEAQ%9 z;_tim-lXof=IsYcS{TioEFmZ!v&inuKKIt&EPm6!MT0-JMe-I~dov$YgV-lYTgO}p0Z z-X-VK&YZnt*_gNQ&pkVEFMN;7J^NDL{t|4=(hM-%V8f@H^YrHi&gc3sVpCubC@Lvz>V5grxtOFjz4tSwIJJ8eLgnmIgPil0(WbG&Q^e>y71IM+$ z4D^}?dbGdlVW5#T`YXWg-cMM@kNNGkYGqe0Ewb;WG1PDnQ;iKoC4ZU@I~6J zN-!lgZkIIq{UBI=C0I_M8a_GVV`Qd(;Kcbs;Tk+I>o4|>4xbtx1mPJS9T}C`zR^?X zPWAPkfra%PKOUNiO2So`nY8%*N;uY!K^ShvXQaf8q%`aNzy*$%`Xl3h02Ys`2?Xf z(Fnb%dD99g1-g{9U05`wt%w$Xg04ewNz*^ z>YJFVAR%=OqFF|Dl<;gKJYV~q-Bz$*HR{0}r#6DxmXt;4{-Rt+Sk;jW4 zR7?2CNLS&{$~f@z7wmhEo}Q#{HA$3tN$F3gE|Mc^`>R=|Q)nWzpY+c$l2djUSZg`(oI z?#;9Q2sFfY$o`l{AJjB0a=9A+(s;hcpV1ZT>KEG=-Px&(?z*XHb7d?f1T>svA zF?+nkwj`UB*aL|JKBbxk{JxXMzW8(^3W0piDwz4Pfcc5RfQfbT2!V_9!{Tlgi1-PW)VaVpFFu zjHlCcq3Nh>5JTfUe5?#{%4bUK-v}qjcq9g&7cSs7K7QFaD*(0&Vk{|OXhtBZ%y2W) z2~mhZ2f)a$;QpOBBk#}vtOgZRYf?-l32eZ-C5BRIxHtRXDk2vaFqyNE$)@oc_@Jr~ z5Wb3gZ1~Ks;!iW>UJ}s6SXIbCR)(2>4Od1@FP4Ls)i*_W8|MBD&;KbP%ufMgUZxOZ zdUNFUk?g4zddC`VTBTii+LfIu(5{|wTv4gA`E=6XvxzpOV$-S@W5cnTqzhDN{C_hEOKk^ zhFh_lv88hbcUQ*r;MpUq0JNTc34mRrvDm#2fL-IQmYXer$SxOZe0j4EkX@rOV*_N@ zXapqYsLR)MEe0UV=sqz4$;Vc_m6u+BDciaD++t_W-UgqN*Y&2H&39k#TVoaE<~p## z9$a(oSsr@t!uyje&WjngJd{l?wJ*7IuFe$}W}7o#DX{gcY01W?MoMyFXf(px!@emupgM5Yi4`4v0!dqHFxICop);AZMfZVcc|by z`Wy4H2j-fbBd}tAX7i+F$8XGgOE$!|efqEwcJ~Pb+absFM8w|k&!6iY(rACNqhau< z?iYtFQ2+H&2J-hc%#cZse*-WLu39kYV}^0^Q73)vivM&So)!X-$tLV!kRNbBa1KKy zKv9yEP@?Pl1pI^pw4QSBb$~HJ+yor+9+hv-|f3hSETsP-FT;qo+Db2(m|7XqHH`Rzn3Ab#O_kz|w(7xF{JZs%Oy+ z(K)JT`2Z>I!U(iVY#n9Gf?ov1qWB0GrDJGUSFydb*tWa4dr#5R_;F1KRSV0Ov{0@a z%q0d&@VqR}k{;JsWOZcQvSPM#;k89BZ`oNg;BF(b)MTC6aJFt?c=24`+*&f>ZZm@0 zsm-3vvJ1x+`|~Dm$%4DB$X=W6TXbfJ7QVJb=WXpJ8}7CvlQl~(bd@jDx zpnx>CwCm00*PFlDQlu10H+li_dnxlxCHR@pJxW7fK6rTODz5b6A5|Tior9V_gK!B))s!)+1H%&+!6ekDv(3Rtv5L0_MomrC~ zh04%W1yw5a6eOgox2j4Hm8#x*M5-fEWdg4X7=sO zd*6Jszeb}W1oHFwuNPK)2>mHK4bkex?pI)JB8;$9K!#}Lf>e|ZSrWEVP>QOdN{Fa3 z8bes!K-jk>*Ph*~hcJBJm>eU#D)mEDPYHu1Kz)w> zh$TLoay=D_Zj95m#+(D`lE5ou_YA20bYaO*uxzMUF??7x{Mcs%upcTEFoIY!LO5uI zS3}7#4`yI@B{ozjPyvA*5pTz{L?L!mp#W!SN33ioAe4YvIvJq?h%_uz1Qd_Yq=rVX znX}VoW^Qc^8fW& z^(Fn>U#o>z2zm(S(@vqfK&6v`)_nWCeDlzL`k>)(syQL51ppKNHbm z!;xq?ET9%Uj zwV99(l44GFzK~~&dd{{ik`w1tcWfQbo#`Di$01Z79X_e&7ogFxO_=qTqmz|9bMzb~ zS%>I(=d>4$2kOUmt?yJ$XCzB=i{?_+S-f=c7z2a&@w`Vas-PJfge_2UDtN$N@~C3% z*oa*!U>&%l@s`#lrIQM96I;#MOV`iwKz;o@4g^-;NQzRMCVf;S+zao$h$}FaON5DB zi5%!uz?ett5lqJhQlSFJI{evHP-Rrng1!mqd93SMtp7=@-|aZ+CVJdNr`y@*9vZBM z{qfLUrP7Wf@$$q!RUe8R`3p(j6MftLLr-JFkELyG{NIWqoG+O;J$HKh(Ko>h%6E}L zDy~*^fh4p&!pqtdZLbgJ{pDR&j zQyw5IBv*0>SLbcJz%^hOh?FQ7azdTY2p>XW}iF+r$JXs!b)xZ~5ZeMY= z2{+Pt_q~T>58vI^j#qt(&tKJ0r0s#S8TmT$G~BnX_IV`Z9R;|YuT#R{>b4l{ijuY9 z*>mSlpMl@mWR#wPrErzNw*wW3;o-f{2g0a8MeP#x*xX|{aRR6)7gzF@LobQ$WuXr8 z|4Z#+?GxaA`T>jz)?ow`KwXl4NAVY^^94Fq@gq6>x2#C93IesOK(`{g)ksk4eV|qm rG}UfZ>bN^wMbPZVVp6>FT3G7;cCdn=`LV-me(Cm_$G9q--tPYcv{_Qo diff --git a/ena-submission/scripts/get_ena_submission_list.py b/ena-submission/scripts/get_ena_submission_list.py index 3421bdd8f..f2b3b9ea7 100644 --- a/ena-submission/scripts/get_ena_submission_list.py +++ b/ena-submission/scripts/get_ena_submission_list.py @@ -9,7 +9,7 @@ import yaml from call_loculus import get_released_data from notifications import get_slack_config, notify, upload_file_with_comment -from submission_db import get_db_config, in_submission_table +from submission_db_helper import get_db_config, in_submission_table logger = logging.getLogger(__name__) logging.basicConfig( diff --git a/ena-submission/scripts/submission_db.py b/ena-submission/scripts/submission_db.py deleted file mode 100644 index e01e1e910..000000000 --- a/ena-submission/scripts/submission_db.py +++ /dev/null @@ -1,84 +0,0 @@ -import os -from dataclasses import dataclass -from enum import Enum - -import psycopg2 - - -@dataclass -class DBConfig: - username: str - password: str - host: str - - -def get_db_config(db_password_default: str, db_username_default: str, db_host_default: str): - db_password = os.getenv("DB_PASSWORD") - if not db_password: - db_password = db_password_default - - db_username = os.getenv("DB_USERNAME") - if not db_username: - db_username = db_username_default - - db_host = os.getenv("DB_HOST") - if not db_host: - db_host = db_host_default - - db_params = { - "username": db_username, - "password": db_password, - "host": db_host, - } - - return DBConfig(**db_params) - - -class StatusAll(Enum): - READY_TO_SUBMIT = 0 - SUBMITTING_PROJECT = 1 - SUBMITTING_SAMPLE = 2 - SUBMITTING_ASSEMBLY = 3 - SUBMITTED_ALL = 4 - SENT_TO_LOCULUS = 5 - HAS_ERRORS_PROJECT = 6 - HAS_ERRORS_ASSEMBLY = 7 - HAS_ERRORS_SAMPLE = 8 - - -class Status(Enum): - READY = 0 - SUBMITTING = 1 - SUBMITTED = 2 - HAS_ERRORS = 3 - - -def connect_to_db(username="postgres", password="unsecure", host="127.0.0.1"): - """ - Establish connection to ena_submitter DB, if DB doesn't exist create it. - """ - try: - con = psycopg2.connect( - dbname="loculus", - user=username, - host=host, - password=password, - options="-c search_path=ena-submission", - ) - except ConnectionError as e: - raise ConnectionError("Could not create ena_submitter DB") from e - return con - - -def in_submission_table(accession: str, version: int, db_config: DBConfig) -> bool: - con = connect_to_db( - db_config.username, - db_config.password, - db_config.host, - ) - cur = con.cursor() - cur.execute( - "select * from submission_table where accession=%s and version=%s", - (f"{accession}", f"{version}"), - ) - return bool(cur.rowcount) diff --git a/ena-submission/scripts/submission_db_helper.py b/ena-submission/scripts/submission_db_helper.py new file mode 100644 index 000000000..1007242cc --- /dev/null +++ b/ena-submission/scripts/submission_db_helper.py @@ -0,0 +1,130 @@ +import os +from dataclasses import dataclass +from datetime import datetime +from enum import Enum + +import psycopg2 +import pytz + + +@dataclass +class DBConfig: + username: str + password: str + host: str + + +def get_db_config(db_password_default: str, db_username_default: str, db_host_default: str): + db_password = os.getenv("DB_PASSWORD") + if not db_password: + db_password = db_password_default + + db_username = os.getenv("DB_USERNAME") + if not db_username: + db_username = db_username_default + + db_host = os.getenv("DB_HOST") + if not db_host: + db_host = db_host_default + + db_params = { + "username": db_username, + "password": db_password, + "host": db_host, + } + + return DBConfig(**db_params) + + +class StatusAll(Enum): + READY_TO_SUBMIT = 0 + SUBMITTING_PROJECT = 1 + SUBMITTING_SAMPLE = 2 + SUBMITTING_ASSEMBLY = 3 + SUBMITTED_ALL = 4 + SENT_TO_LOCULUS = 5 + HAS_ERRORS_PROJECT = 6 + HAS_ERRORS_ASSEMBLY = 7 + HAS_ERRORS_SAMPLE = 8 + + def __str__(self): + return self.name + + +class Status(Enum): + READY = 0 + SUBMITTING = 1 + SUBMITTED = 2 + HAS_ERRORS = 3 + + def __str__(self): + return self.name + + +@dataclass +class SubmissionTableEntry: + accession: str + version: str + organism: str + group_id: int + errors: str | None = None + warnings: str | None = None + status_all: StatusAll = StatusAll.READY_TO_SUBMIT + started_at: datetime | None = None + finished_at: datetime | None = None + metadata: str | None = None + aligned_nucleotide_sequences: str | None = None + external_metadata: str | None = None + + +def connect_to_db(db_config: DBConfig): + """ + Establish connection to ena_submitter DB, if DB doesn't exist create it. + """ + try: + con = psycopg2.connect( + dbname="loculus", + user=db_config.username, + host=db_config.host, + password=db_config.password, + options="-c search_path=ena-submission", + ) + except ConnectionError as e: + raise ConnectionError("Could not connect to loculus DB") from e + return con + + +def in_submission_table(accession: str, version: int, db_config: DBConfig) -> bool: + con = connect_to_db(db_config) + cur = con.cursor() + cur.execute( + "select * from submission_table where accession=%s and version=%s", + (f"{accession}", f"{version}"), + ) + return bool(cur.rowcount) + + +def add_to_submission_table(db_config: DBConfig, submission_table_entry: SubmissionTableEntry): + con = connect_to_db(db_config) + cur = con.cursor() + submission_table_entry.started_at = datetime.now(tz=pytz.utc) + + cur.execute( + "insert into submission_table values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", + ( + submission_table_entry.accession, + submission_table_entry.version, + submission_table_entry.organism, + submission_table_entry.group_id, + submission_table_entry.errors, + submission_table_entry.warnings, + str(submission_table_entry.status_all), + submission_table_entry.started_at, + submission_table_entry.finished_at, + submission_table_entry.metadata, + submission_table_entry.aligned_nucleotide_sequences, + submission_table_entry.external_metadata, + ), + ) + con.commit() + con.close() diff --git a/ena-submission/scripts/trigger_submission_to_ena.py b/ena-submission/scripts/trigger_submission_to_ena.py new file mode 100644 index 000000000..583324e4c --- /dev/null +++ b/ena-submission/scripts/trigger_submission_to_ena.py @@ -0,0 +1,131 @@ +# This script adds all approved sequences to the submission_table +# - this should trigger the submission process. + +import base64 +import json +import logging +import os +import time +from dataclasses import dataclass + +import click +import requests +import yaml +from requests.auth import HTTPBasicAuth +from submission_db_helper import ( + SubmissionTableEntry, + add_to_submission_table, + get_db_config, + in_submission_table, +) + +logger = logging.getLogger(__name__) +logging.basicConfig( + encoding="utf-8", + level=logging.INFO, + format="%(asctime)s %(levelname)8s (%(filename)20s:%(lineno)4d) - %(message)s ", + datefmt="%H:%M:%S", +) + + +@dataclass +class Config: + organisms: list[dict[str, str]] + organism: str + db_username: str + db_password: str + db_host: str + github_username: str + github_pat: str + github_url: str + + +@click.command() +@click.option( + "--log-level", + default="INFO", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), +) +@click.option( + "--config-file", + required=True, + type=click.Path(exists=True), +) +@click.option( + "--input-file", + required=False, + type=click.Path(), +) +def trigger_submission_to_ena(log_level, config_file, input_file=None): + logger.setLevel(log_level) + logging.getLogger("requests").setLevel(logging.INFO) + + with open(config_file) as file: + full_config = yaml.safe_load(file) + relevant_config = {key: full_config.get(key, []) for key in Config.__annotations__} + config = Config(**relevant_config) + logger.info(f"Config: {config}") + + db_config = get_db_config(config.db_password, config.db_username, config.db_host) + + if input_file: + sequences_to_upload: dict = json.load(open(input_file, encoding="utf-8")) + for full_accession, data in sequences_to_upload.items(): + accession, version = full_accession.split(".") + if in_submission_table(accession, version, db_config): + continue + entry = { + "accession": accession, + "version": version, + "group_id": data["metadata"]["groupId"], + "organism": data["organism"], + "metadata": json.dumps(data["metadata"]), + "aligned_nucleotide_sequences": json.dumps(data["alignedNucleotideSequences"]), + } + submission_table_entry = SubmissionTableEntry(**entry) + add_to_submission_table(db_config, submission_table_entry) + logger.info(f"Uploaded {full_accession} to submission_table") + return + + while True: + # In a loop get approved sequences uploaded to Github and upload to submission_table + github_username = os.getenv("GITHUB_USERNAME") + if not github_username: + github_username = config.github_username + + github_pat = os.getenv("GITHUB_PAT") + if not github_pat: + github_pat = config.github_pat + + response = requests.get( + config.github_url, + auth=HTTPBasicAuth(github_username, github_pat), + timeout=10, + ) + + if response.status_code == 200: + file_info = response.json() + sequences_to_upload = json.loads(base64.b64decode(file_info["content"]).decode("utf-8")) + else: + error_msg = f"Failed to retrieve file: {response.status_code}" + raise Exception(error_msg) + for full_accession, data in sequences_to_upload.items(): + accession, version = full_accession.split(".") + if in_submission_table(accession, version, db_config): + continue + entry = { + "accession": accession, + "version": version, + "group_id": data["metadata"]["groupId"], + "organism": data["organism"], + "metadata": json.dumps(data["metadata"]), + "aligned_nucleotide_sequences": json.dumps(data["alignedNucleotideSequences"]), + } + submission_table_entry = SubmissionTableEntry(**entry) + add_to_submission_table(db_config, submission_table_entry) + logger.info(f"Uploaded {full_accession} to submission_table") + time.sleep(30) # Sleep for 30seconds to not overwhelm github + + +if __name__ == "__main__": + trigger_submission_to_ena() diff --git a/kubernetes/loculus/templates/ena-submission-deployment.yaml b/kubernetes/loculus/templates/ena-submission-deployment.yaml index 999766b0d..9a64cf9c3 100644 --- a/kubernetes/loculus/templates/ena-submission-deployment.yaml +++ b/kubernetes/loculus/templates/ena-submission-deployment.yaml @@ -93,9 +93,19 @@ spec: secretKeyRef: name: slack-notifications key: slack-channel-id + - name: GITHUB_USERNAME + valueFrom: + secretKeyRef: + name: github-approval-repo + key: github-username + - name: GITHUB_PAT + valueFrom: + secretKeyRef: + name: github-approval-repo + key: github-pat args: - snakemake - - get_ena_submission_list_and_sleep # Do not start submission process yet until on pods until better tested. + - results/triggered volumeMounts: - name: loculus-ena-submission-config-volume mountPath: /package/config/config.yaml @@ -104,4 +114,84 @@ spec: - name: loculus-ena-submission-config-volume configMap: name: loculus-ena-submission-config +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: loculus-get-ena-submission-list-cronjob +spec: + schedule: "0 0 * * *" # get submission list daily at midnight + startingDeadlineSeconds: 60 + concurrencyPolicy: Forbid + jobTemplate: + spec: + activeDeadlineSeconds: {{ $.Values.getSubmissionListLimitSeconds }} + template: + metadata: + labels: + app: loculus + component: loculus-get-ena-submission-list-cronjob + annotations: + argocd.argoproj.io/sync-options: Replace=true + reloader.stakater.com/auto: "true" + spec: + restartPolicy: Never + containers: + - name: ena-submission + image: "ghcr.io/loculus-project/ena-submission:{{ $dockerTag }}" + imagePullPolicy: Always + resources: + requests: + memory: "80Mi" + cpu: "10m" + limits: + memory: "10Gi" + env: + - name: EXTERNAL_METADATA_UPDATER_PASSWORD + valueFrom: + secretKeyRef: + name: service-accounts + key: dummyExternalMetadataUpdaterPassword + - name: DB_HOST + valueFrom: + secretKeyRef: + name: database + key: host + - name: DB_USERNAME + valueFrom: + secretKeyRef: + name: database + key: username + - name: DB_PASSWORD + valueFrom: + secretKeyRef: + name: database + key: password + - name: SLACK_HOOK + valueFrom: + secretKeyRef: + name: slack-notifications + key: slack-hook + - name: SLACK_TOKEN + valueFrom: + secretKeyRef: + name: slack-notifications + key: slack-token + - name: SLACK_CHANNEL_ID + valueFrom: + secretKeyRef: + name: slack-notifications + key: slack-channel-id + args: + - snakemake + - get_ena_submission_list + - --all-temp # Reduce disk usage by not keeping files around + volumeMounts: + - name: loculus-ena-submission-config-volume + mountPath: /package/config/config.yaml + subPath: config.yaml + volumes: + - name: loculus-ena-submission-config-volume + configMap: + name: loculus-ena-submission-config {{- end }} diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index 82c9233e8..e352546e4 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -1505,6 +1505,11 @@ secrets: slack-hook: "dummy" slack-token: "dummy" slack-channel-id: "dummy" + github-approval-repo: + type: raw + data: + github-username: "dummy" + github-pat: "dummy" enableCrossRefCredentials: true runDevelopmentKeycloakDatabase: true runDevelopmentMainDatabase: true diff --git a/kubernetes/loculus/values_preview_server.yaml b/kubernetes/loculus/values_preview_server.yaml index 6bac7aac8..199ba7e27 100644 --- a/kubernetes/loculus/values_preview_server.yaml +++ b/kubernetes/loculus/values_preview_server.yaml @@ -27,4 +27,10 @@ secrets: slack-hook: AgCLEhTwqKL278AbNwpqdRqeg6naNrQJWx3q8Zp+ecXjMaaLLBi1C3uQlt0WKioy+pUAhfe7MowXKLM55hLyh/InZ9o3yLi9T/5cVRXcEXCvODWmbhr94XhcYI3KnVngZLcNl9Gr4LR+bz8A0sl/rCijNYrqYeDCLI6XUmB8mlKnHPqrF6CXC8Y5xyDbNYJONx6DAugq+gQcZYJ101vUOtu2LTD8awCsdF5FOzdcZ344Vxn/xwDlbbvUEKEQp5A5aMfx95zpa+rV/sQYHeCb7Dy1oWqpOrZP/rPJ4K9VGRx5QA9o1Qi0Pl3alRUqiPUR6pbMxbX8u0kCN6drFKxXDAMd+SadsppDGbNQNeQP5cphNJwYxL/0MIgXxJTrQpcynJK1FULX9W+1GtXg+tX4hRCtZL5hnCxPw12QcNOL2N8SJLGEe8gK8QtALpu/DH/trVJ3rMDRkZhhWCvtb9Zt9EuvUhxs07sE9DZ7rEAqzx51v4vr9CzmxkHEiAhrC3Se3CxnSspBP1/X9SvZ+GXn+ZuXzN+KivWCnim0RwhRD75Y7ZP8ct/iu3ilb6b7Pl+KOgOkA3In7c4yVAZwXMTuF6aP2/8inPx5Kk6p8ks7c5XeSIDFOH7C6EJuD7E69Fz6ijaF5bJN8NWBVxkE88xq4un5e7dcuqjIqaQ8kLDX1g8aXiklr6qD29q9H/m+gtd6lxcMb53bMl0EI+GHYTBZn3w+T1PxlY1uoBfNzt1efjXJD7AWlTDxze+5PIYgiFVAOdfv3ey5HJMMw1w5MTLMW44hkpt4MCaHvREBTXq5sxBJe10= slack-token: AgDFAP+F7ze+TY+yK71JPOSkKwIpnBXh7WWweyZZwYm1/CwmKS1iQ4O1p54sTrHrMC737Si27MfTVvrEZRj9aAwq0fQJZt5yldpfMaTccZJHj3rQ5kczyuCMYVcFzmyywAr4DXsUCscjrOLgwTiiK/d8jSFkKyXupC7bB2EcZZUFGpFAj7PiSeJuEIKQptkSjeaasCQXkdkuoKczM38GCu502pIxaJ9kIXVrereyKUpsU/uFDgj0IcKqfiT0M2FGs8Ujl3CXpMxcOLSuxVyCnje27GHpsYrd/uEKX3yl1rB1rV6Z+gMzlO9DDPW/XJl6TY5snOxdaCv7uNzAGwgb/rlaZ5fnrNqsOoucJvh35yxMcKDsx/hY72H7PRnzNpLeqZ/2zAub+fQP/o1edjxaYHaSltS0lwzCivIPOHv66/dDOD9v0LncWkCWGXXOp8Fsz9OrF+NcAZjIY/hDzwy+JRDA7Wtn7jlkA07WFpobkyyKfN+bNT1664wS2IMDRYA6+MbkA99v+ScVsEVlxJqn+PiaDtexQAfQcyN2NPbQe+9xMIQavvrcnwxdMwAP8DBME3vhdrD8yDRJ9GN+ygtZ3dB4FC4iW20ETyzlAqJ/H9M2/ed1O3VyraFDCV3PmSBdq7Rinj1Zg6D+IEp54HtwWiu5s7iNeKW37cSSloRUaojWQ1BFPB6msfP/O5yqREdGrWVhmChWvSDMw2LxmnZbCw3mVdMr9B1XeK76GHa4kOhVOcEqzl0X61NILYDecgLP6HVJZhB+NpHJfOY= slack-channel-id: AgDDxu0CGC/AFqOHMeHjV3KUGPoY0QAmGoqtiuxPPaP+GWOuz4xZbBP6Fymh0XbHFHMB9PtWowrMbvFOkfTncRiKqyK6HIU7GU2GtCla2WTZQNRAW82gcJnxtbtm4KymN3LyTj27qBHkQHNZ90qyBGdsJUYwmBW8XB6wNoz10KDyQSvYrYwQEe0onCgnislxslATPB6CQFWHMghKYoaHHAECpf6sN1kS9rvNq/1e7gG8s++lgF7qSZgjQP77Q6kMoiMS5krX03pPKZXsc69mI8GLIvhalsUfg2BO7swa3FgCbjecp32lW6KuRCfzeMmj2NWpWTLcJSPYPJN0sOkhRFOWUlrylztG82l7dGl5JofJWQVopF+qLTAR6LxHaujFHQ+Y2x4/5tBmurwOT6xXknQjXqYs2qbG1OriivJrjRwhRoZWE2vR5YlE+Zz8S9/vYw0JnKibnB1YvbdBBnpllyXYjTJa4818W05DvJ70qLeILMYcEkY4/jv5xqNdGuwp34gZLcW6+qztHfQVXRf0uXM1B7BPNH0aNMBNN7D0m1vWkTNgKC+V2PiEH9nTVhwSF+MlG/rmR2+v84kWhMP3qdX8/28GnBnvS9rryzuc3e2mHBIiIGHwW+SQjlmdq0jDTtuvFtU8I7ncB5PUe+sYZ0zFrn57blraBG5ntqtZfb+aS3modE+ElmCgBzi8gSQoVxXzmOIfMZRRwAaH0w== + github-approval-repo: + type: sealedsecret + clusterWide: "true" + data: + github-username: AgBtyXeimJmI7e1di9ubkzNODtAAlm0LiXEJJGfzzR/KExSrVMYsWcIIQOCfS/bbLvvvSyddKPGeFYhozG6CK943oTJ2rZHeMBjqtXX0AEw655KfJNgHWoWWk1xdmOzITtzKNlHi4cPcxswQte84NhJqRZPJ1sNhQfOm/AwL0NplQXD6xyl7UGAUKOW8rSLgH+gTUsTzmz2prgwiF3SmwwHCSClL/q9H4/nqkMycMQ5NcQK+5cpUUeWidzL3LjANCJS815S8oMdUWbDFwB3NPPfUhYdRuXM19MAHxq2hhAkn6rKGNI8tQhahi8dfmH2QDMyS1KVRof6taBXJIVMxeejL8nOJbNhPBdtyM+3hVasm/frLycUJxzvQpPPv1ictprM5K/r2sJDIqyXVOZSspyXQ09gFz2D13QsOSfaERbKUMtn9L0bfsG3N/zwo614N8YNCXf+dvIjVCoFwMD5RQ5IkqHLPEQiJ77++feSO+4fbfItjMI8qP4mM6YRI7VqweVKufwdSSropRpxhytnjqoWfvHFyDgzsw4ZuJwlyZs01yuJelog4yqG8WszWCv6Ae3fRM3gaquCj2UlFfxTBxN+aZId/lV3LCG1l06g82J6+YTL5wdCnYuYcpZmp21651J9jWwWzCumRWaQgbY6Ou0VjHKZ9if1uDdYA3S7SJJx/ZDZK4cixoS9VXgsPWFgmHbZNHqOFzOd2Dl9kRA== + github-pat: AgBa5wjYBw3GWXihWdoedx3oEbR5z808WoeIGyk5aAZPQ1MTMbWm4ZMdvJUZVPlkRacQXSt+A2pniYMjzl0bqHkc5guVQOoHu6jZgtyfaOyqT4Ergz1+L9VUzyn6E1NwqV2BtdKQTN7A1VvtLw0BLVG0E01SUcHeoU9MWlF+9BWFtXWWYxWL/sTLKoz0tnj8yu6LLHMMYeCnABAoZwZbJ3kf1UVp3FwcJbuSqsrUTPaeS6z5g90jL/zwJ150VUh9kInHs+kwV1imZp06ZUm3gpdY1f/EQZnxHTPUoaYLOo91cEhG7bSD9KQz1B6m07VzjIlnrwduPUCFQs1gk1hU7gO+6MYlMqCIb8NUQbuX5d5cHiENJm9DXAJdd8oNW02JQ371gulVYvXagAdrYgYv75Mr3YjejHZeinZrJ/ZxeZI0+fL4SLkh+77RyuN1R6ab1vGBoUG+LgHsUEBT7mw/wXtrzRmNBwxNBXSfTrrKM+EzM9KUsrSe490sK3Rf4TExjynL/9biS9ZFw601tdBh8luO3xFhCr8Bn4q7sWnKNmvSu+gMZklyhR65Hf0LHTxUdDcK8WZKFyds6foHTAv9kX4+NK7upVfAdjx9Tpf0ONad3nSboi/+1vYoGtAZcL6BZoNL+1Xu2WAlVa+WollJSV2k4KeTEswE1vmPAhoapyxpry9Mz+8CGkohvSPjHGdfXSNSW7K9zKCCubqzHV2+EsnaDWJ9+cwghAQFOpKdbJuHLwalMpAkTztRjr5wHfD6KSo1apCHZl84S0B2u8floEzU1XRiW8zq10m8cb3/xmdteNL3FNMlZlEieD2nsiU= reduceResourceRequest: true From 2716f7e07db27e1dae807c4243397f5e93220f82 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Tue, 13 Aug 2024 13:18:00 +0200 Subject: [PATCH 2/3] Update with suggestions --- .../scripts/get_ena_submission_list.py | 19 +++---- .../scripts/trigger_submission_to_ena.py | 51 ++++++++----------- kubernetes/loculus/values.yaml | 1 + 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/ena-submission/scripts/get_ena_submission_list.py b/ena-submission/scripts/get_ena_submission_list.py index f2b3b9ea7..aed9d78a3 100644 --- a/ena-submission/scripts/get_ena_submission_list.py +++ b/ena-submission/scripts/get_ena_submission_list.py @@ -1,9 +1,8 @@ import json import logging -import os from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List +from typing import Any import click import yaml @@ -22,14 +21,14 @@ @dataclass class Config: - organisms: List[Dict[str, str]] + organisms: list[dict[str, str]] organism: str backend_url: str keycloak_token_url: str keycloak_client_id: str username: str password: str - ena_specific_metadata: List[str] + ena_specific_metadata: list[str] ingest_pipeline_submitter: str db_username: str db_password: str @@ -39,7 +38,7 @@ class Config: slack_channel_id: str -def get_data_for_submission(config, entries, db_config): +def get_data_for_submission(config, entries, db_config, organism): """ Filter data in state APPROVED_FOR_RELEASE: - data must be state "OPEN" for use @@ -66,6 +65,7 @@ def get_data_for_submission(config, entries, db_config): f"or {config.ingest_pipeline_submitter}. Potential user error: discarding sequence.", ) continue + item["organism"] = organism data_dict[key] = item return data_dict @@ -115,7 +115,7 @@ def get_ena_submission_list(log_level, config_file, output_file): logger.setLevel(log_level) logging.getLogger("requests").setLevel(logging.WARNING) - with open(config_file) as file: + with open(config_file, encoding="utf-8") as file: full_config = yaml.safe_load(file) relevant_config = {key: full_config.get(key, []) for key in Config.__annotations__} config = Config(**relevant_config) @@ -135,14 +135,15 @@ def get_ena_submission_list(log_level, config_file, output_file): logging.info(f"Getting released sequences for organism: {organism}") all_entries = get_released_data(config, organism) - entries_to_submit.update(get_data_for_submission(config, all_entries, db_config)) + data = get_data_for_submission(config, all_entries, db_config, organism) + entries_to_submit.update(data) if entries_to_submit: - Path(output_file).write_text(json.dumps(entries_to_submit)) + Path(output_file).write_text(json.dumps(entries_to_submit), encoding="utf-8") send_slack_notification(config, output_file) else: logging.info("No sequences found to submit to ENA") - Path(output_file).write_text("") + Path(output_file).write_text("", encoding="utf-8") if __name__ == "__main__": diff --git a/ena-submission/scripts/trigger_submission_to_ena.py b/ena-submission/scripts/trigger_submission_to_ena.py index 583324e4c..4dcb6e1c6 100644 --- a/ena-submission/scripts/trigger_submission_to_ena.py +++ b/ena-submission/scripts/trigger_submission_to_ena.py @@ -40,6 +40,24 @@ class Config: github_url: str +def upload_sequences(db_config, sequences_to_upload): + for full_accession, data in sequences_to_upload.items(): + accession, version = full_accession.split(".") + if in_submission_table(accession, version, db_config): + continue + entry = { + "accession": accession, + "version": version, + "group_id": data["metadata"]["groupId"], + "organism": data["organism"], + "metadata": json.dumps(data["metadata"]), + "aligned_nucleotide_sequences": json.dumps(data["alignedNucleotideSequences"]), + } + submission_table_entry = SubmissionTableEntry(**entry) + add_to_submission_table(db_config, submission_table_entry) + logger.info(f"Uploaded {full_accession} to submission_table") + + @click.command() @click.option( "--log-level", @@ -69,22 +87,9 @@ def trigger_submission_to_ena(log_level, config_file, input_file=None): db_config = get_db_config(config.db_password, config.db_username, config.db_host) if input_file: + # Get sequences to upload from a file sequences_to_upload: dict = json.load(open(input_file, encoding="utf-8")) - for full_accession, data in sequences_to_upload.items(): - accession, version = full_accession.split(".") - if in_submission_table(accession, version, db_config): - continue - entry = { - "accession": accession, - "version": version, - "group_id": data["metadata"]["groupId"], - "organism": data["organism"], - "metadata": json.dumps(data["metadata"]), - "aligned_nucleotide_sequences": json.dumps(data["alignedNucleotideSequences"]), - } - submission_table_entry = SubmissionTableEntry(**entry) - add_to_submission_table(db_config, submission_table_entry) - logger.info(f"Uploaded {full_accession} to submission_table") + upload_sequences(db_config, sequences_to_upload) return while True: @@ -109,21 +114,7 @@ def trigger_submission_to_ena(log_level, config_file, input_file=None): else: error_msg = f"Failed to retrieve file: {response.status_code}" raise Exception(error_msg) - for full_accession, data in sequences_to_upload.items(): - accession, version = full_accession.split(".") - if in_submission_table(accession, version, db_config): - continue - entry = { - "accession": accession, - "version": version, - "group_id": data["metadata"]["groupId"], - "organism": data["organism"], - "metadata": json.dumps(data["metadata"]), - "aligned_nucleotide_sequences": json.dumps(data["alignedNucleotideSequences"]), - } - submission_table_entry = SubmissionTableEntry(**entry) - add_to_submission_table(db_config, submission_table_entry) - logger.info(f"Uploaded {full_accession} to submission_table") + upload_sequences(db_config, sequences_to_upload) time.sleep(30) # Sleep for 30seconds to not overwhelm github diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index e352546e4..820be60f7 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -10,6 +10,7 @@ disableIngest: false disableEnaSubmission: false siloImportLimitSeconds: 3600 ingestLimitSeconds: 1800 +getSubmissionListLimitSeconds: 600 preprocessingTimeout: 600 accessionPrefix: "LOC_" dataUseTermsUrls: From bf10b301248b61ac7016799ab858856681ecd27d Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Tue, 13 Aug 2024 13:23:45 +0200 Subject: [PATCH 3/3] Change from uploading aligned to unaligned sequences to the submission_table (assembly requires unaligned sequences) --- ena-submission/flyway/sql/V1__Initial_Schema.sql | 2 +- ena-submission/scripts/submission_db_helper.py | 4 ++-- ena-submission/scripts/trigger_submission_to_ena.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ena-submission/flyway/sql/V1__Initial_Schema.sql b/ena-submission/flyway/sql/V1__Initial_Schema.sql index 31b4f6fc1..ff0259bb8 100644 --- a/ena-submission/flyway/sql/V1__Initial_Schema.sql +++ b/ena-submission/flyway/sql/V1__Initial_Schema.sql @@ -9,7 +9,7 @@ CREATE TABLE submission_table ( started_at timestamp not null, finished_at timestamp, metadata jsonb, - aligned_nucleotide_sequences jsonb, + unaligned_nucleotide_sequences jsonb, external_metadata jsonb, primary key (accession, version) ); diff --git a/ena-submission/scripts/submission_db_helper.py b/ena-submission/scripts/submission_db_helper.py index 1007242cc..ba450fa88 100644 --- a/ena-submission/scripts/submission_db_helper.py +++ b/ena-submission/scripts/submission_db_helper.py @@ -73,7 +73,7 @@ class SubmissionTableEntry: started_at: datetime | None = None finished_at: datetime | None = None metadata: str | None = None - aligned_nucleotide_sequences: str | None = None + unaligned_nucleotide_sequences: str | None = None external_metadata: str | None = None @@ -122,7 +122,7 @@ def add_to_submission_table(db_config: DBConfig, submission_table_entry: Submiss submission_table_entry.started_at, submission_table_entry.finished_at, submission_table_entry.metadata, - submission_table_entry.aligned_nucleotide_sequences, + submission_table_entry.unaligned_nucleotide_sequences, submission_table_entry.external_metadata, ), ) diff --git a/ena-submission/scripts/trigger_submission_to_ena.py b/ena-submission/scripts/trigger_submission_to_ena.py index 4dcb6e1c6..4a53811cf 100644 --- a/ena-submission/scripts/trigger_submission_to_ena.py +++ b/ena-submission/scripts/trigger_submission_to_ena.py @@ -51,7 +51,7 @@ def upload_sequences(db_config, sequences_to_upload): "group_id": data["metadata"]["groupId"], "organism": data["organism"], "metadata": json.dumps(data["metadata"]), - "aligned_nucleotide_sequences": json.dumps(data["alignedNucleotideSequences"]), + "unaligned_nucleotide_sequences": json.dumps(data["unalignedNucleotideSequences"]), } submission_table_entry = SubmissionTableEntry(**entry) add_to_submission_table(db_config, submission_table_entry)