From 541922fc4c64e694c5bbb5a5021820db62dc1086 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Mon, 4 Mar 2024 16:58:35 +0000 Subject: [PATCH 01/15] remove unused class --- .../ensembl/io/genomio/genome_metadata/extend.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 22c102508..4da296a76 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -15,7 +15,6 @@ """Add more metadata to the genome metadata file, including added seq_regions (e.g. MT chromosome).""" __all__ = [ - "MissingDataError", "get_additions", "get_gbff_regions", "get_report_regions_names", @@ -38,16 +37,6 @@ _VERSION_END = re.compile(r"\.\d+$") -class MissingDataError(Exception): - """Used if some data is missing from the report file.""" - - def __init__(self, report_path: PathLike, accession: str, msg: str): - report_msg = f"Can't get data for {accession} in report {report_path}" - if msg: - report_msg = f"{report_msg}: {msg}" - self.msg = report_msg - - def get_additions(report_path: Path, gbff_path: Optional[Path]) -> List[str]: """Returns all `seq_regions` that are mentioned in the report but that are not in the data. From c376136bfd440c57ff1c998cac35aee0aeeab067 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Mon, 4 Mar 2024 17:01:07 +0000 Subject: [PATCH 02/15] update get_gbff_regions method and add unit test --- .../io/genomio/genome_metadata/extend.py | 18 +++---- .../tests/genome_metadata/test_extend.py | 50 ++++++++++++++++++ .../genome_metadata/test_extend/input.gbff.gz | Bin 0 -> 25564 bytes 3 files changed, 58 insertions(+), 10 deletions(-) create mode 100644 src/python/tests/genome_metadata/test_extend.py create mode 100644 src/python/tests/genome_metadata/test_extend/input.gbff.gz diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 4da296a76..e8fdce165 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -59,20 +59,18 @@ def get_additions(report_path: Path, gbff_path: Optional[Path]) -> List[str]: return additions -def get_gbff_regions(gbff_path: Optional[Path]) -> List[str]: - """Returns the `seq_region` data from the GBFF file. +def get_gbff_regions(gbff_path: Optional[PathLike]) -> List[str]: + """Returns the `seq_region` data from a GBFF file. Args: - gbff_path: Gbff file path to use. + gbff_path: GBFF file path to use. """ - if not gbff_path: - return [] - seq_regions = [] - with open_gz_file(gbff_path) as gbff_file: - for record in SeqIO.parse(gbff_file, "genbank"): - record_id = re.sub(_VERSION_END, "", record.id) - seq_regions.append(record_id) + if gbff_path: + with open_gz_file(gbff_path) as gbff_file: + for record in SeqIO.parse(gbff_file, "genbank"): + record_id = re.sub(_VERSION_END, "", record.id) + seq_regions.append(record_id) return seq_regions diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py new file mode 100644 index 000000000..63c30d400 --- /dev/null +++ b/src/python/tests/genome_metadata/test_extend.py @@ -0,0 +1,50 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit testing of `ensembl.io.genomio.genome_metadata.extend` module. + +Typical usage example:: + $ pytest test_extend.py + +""" + +from pathlib import Path +from typing import List + +import pytest + +from ensembl.io.genomio.genome_metadata import extend + + +@pytest.mark.parametrize( + "gbff_file, output", + [ + pytest.param("", [], id="No GBFF file"), + ("input.gbff.gz", ["LR605957", "LR605956"]), + ], +) +def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> None: + """Tests the `extend.get_gbff_regions` class. + + Args: + data_dir: Module's test data directory fixture. + gbff_path: GBFF file name. + output: Expected list of sequence region IDs. + """ + if gbff_file: + gbff_path = data_dir / gbff_file + else: + gbff_path = None + result = extend.get_gbff_regions(gbff_path) + assert result == output diff --git a/src/python/tests/genome_metadata/test_extend/input.gbff.gz b/src/python/tests/genome_metadata/test_extend/input.gbff.gz new file mode 100644 index 0000000000000000000000000000000000000000..67cbd2b1265aad406917d0280c5560c75b78334a GIT binary patch literal 25564 zcmXteV{|6W?{;n5w%e_@w%e_3+qP|UYumPM+cxjw-RJi|@7J7}NQBY7^ zLmnnTpbFLoPBwPN7A`h=7Pj^-&J1RTrlvqw+BSBZBMsj-dV)tvuthZzf9&WTfmKn; zt5u`v8qnUT*Q}R#dsbTG+uMBBO9%wv4G{-VrLTt{rA4T5H57~5$psj< zlT?~Jh*f)h%(~+@3A3TJ-Rv!t-a;lIQqWU@XuLKqVoiK-2rH>~Qw! z-f!vVwpyiEEM?;h$HeVt!}=EOJJ+XYS&Q|G@l(`}u%$I`iQxMiXL<=@;L)RVa5s1V zy>b9HOD}Kj?#baPNcm^uYH6_gA6Vm1u*cIN}IZ#GcUfDd24$|wI`2zI$ zY;J2GEL4B$Ggal~pch>}qK(Svid!=!N-y1gH-mfDSy)B5n z&VX%WH0~F5#oHYL$@am%`02&Lj@8v|TcCa0O2SvdDwaJ@1XUj(tg6)5!HsG>TyMxK zM!C1HO<&KYm+h?WYw;(cZBh4jez%D&I|90IlviQ7;?>olH#+IWI<++a?vd&uRC}v< zZM{*+TRNoiu<-E%)Hv)cnYhDW?;fw0^RcE{_KiHoin^snO8xnk*pMiXU zM8xSE)Oj>50(D+VIftaaLY`|j8m7aT42*uwFb==>9(~d zf3?dClP=5Vc%gy6d9WEO8f>~t>*_nK?dda7qABvC;Cn{In$%I}oU=3@N~?>iGrA*5 zD!);Wi)=SKQayg#UVzwSu*Fc3eF7;ks0zu5{w}Zd?3fYNI1VY>@b8ULpSc*1DGRY) zZS2a3G^OwFz?-YIknnjmr0Qzpz{*Vt(3{^)1Mzhj@QC3?tlp~ z`|YWygivGd7wiiEVHP9cpl+=PhvoW2>-dY4{w3hop+Tu%Dzkxx5A$AS*JaocD-hyb zwy|?6mjU#DqEFcKQuj4*a@@Fs#z+e2sG}%gy+d7&qirhio6C~Z>kx7W1zK&mw_)fX z%nu^>2+XJcJrSR}^yFu$6ONR!Mhe;sQ+fr=qq(iLMzN!UgyV|pqO!^RN%v!G-ASRA zYVAP}7RHK}vEnKmVzp=G7IX;+^WOF8CTpqnRM>=7LkJiHKq{ zJj`WBnnu_5z!Q2J^GgotgyWdtN@bT++5No(ldcZ+)drnF2H@#NftUs3=^|tjNRK&X z(Igc^VD{AZvP)Cuq)KaD346)lCo!dEtMG6lVzT`?6Vl59bFxoWSpn~4B9VvjhM7h~ zkdao7Um3NJpC(;~E!phVK(b9!4+Y47wMA6YuH99HpeC{`fw?XIM2oJB6(mk~WK3iH zb&2%X&0{-SN!^v1;9)!|)|NF$$W-`TSq2$cB(jB%>P$aw z7@IV$*RCFNTwtKgpo?NV8ppb;^aSn?PC6ogsO%$Q~U*O~=(B5Fa;d(hDe0hRc!b%lQp`HAxOoB*Ls5Xz? zsips%IO?E00yiJGw*mW7Wv1ObVtK3DbZ7Soq&5S&{sL{t7BWG*bm+0fY^-9?zp((S z4(*53r88bvwee@sg1&0OPyr@OWQ=dJ+8ZgbIdat5qN{M#F?In(LqJFL2KuD*hl?zL zPxe*<_K0|gsX$wU3#SevA%w-6H{Hw39>-oXX-@{1QO?MYGTC83){+BpccQvOp;9|h z+)`GU74jc$JT96NSxk1>T$;QPW1T$5dFQih49Q-dW?>SVNz(wH+TA0!^B*XhD&e3_ zm$4$Yec%%LJEn??rDjm2a~kCU;jdUDifnGc;pp)zpsrsVt{chz(HpLR=!L~fthur#=CQ(u zx!MN6E3sNTuO#ONm+nUa55|UKa>9FXNc?Rr9(=q3q1Z=$GwfS+$J~eq{}pOJ<deYhDc7+N{BN^bI|omI zx;SZ<-6GdxkMnDhmrz-h! z|2f{GQuY^xU(cSW3=5LCx9$H~tX=YwkmGNRPBXPHF`?`pK6SiPYsdGTJ^nQbVZN?! zue&QN@98vqyl2oVZ+VgbJ3`(DZ_mrXYjDTIZcakpD`<9j4-;Y!Z2ocHc@!#%UA z6YHN|`-7lRzh0D(J;W#x$iJM56Xvk|BOZfeD@kdlVA$NCrh~~ z$7+naJdwP#&7vHZkT;_*(f(i<@&I8ZFzN1*;uxL7*h@;kcy@juc?~icsusW=A$^4F zyTfzB!VCYl7p0@*Ip%FYG};YYBV{*19L@!SL_CjFmd{q6Addln@-;w<6G9?+u@ay{ z)rx-S8-z8roWV2lWsbE+X9&$V$gemsiO{J9*{2?B4-Gd?0Lm-`V-5~Z$th9c@o#W?nKeOQu`Vzl}YnubZvm4;&&eHK&rgo4ssa$HkNAWk&9u+4{vxw8+c z>dy>=l$`cg`0QwM92E%INnHnYEonWMW4`=rcv@0p;*}74i5Wb)*%9WGzM9+EdkA^8 z^N@xyCv7*pHKu4XTq%pVDyjS1wn6dKYg3OqhMN@o2ur&b3{|PnP{dV3?8VYsSDe=v z7arD>N@24!keY|OxH0`lG5kD!RCE?gD}(Yq2*QOfn=yqNt{hw2GH(6ldFXR zgBX;DKTf)$Pf)noCpD6O?V3n1ut8Z}56&@Wo^;{73=I^k;|!2}&xbTogQ6e^#fUDl z6MlwNBU0Sd=g`9IHQs_rxpOjGO9RxT@s!z9#Ds9w{rPTW%nk^QDCh7k0jUAcP{&{y z6Phz9Zn(Ex3@xKVGyOnZFccf4E<=}Nc(UtN)V#TO-6s=w*-2z_r*$@4`D~JGB`)G7 zD^a{|?3Qe|g{yj!EM&a>3|OoB5l8n8TdQTPFr^Pwo;xd%-dYpgZax% z3YnVZqIorYsS$-^{g3JvVTYob;dq7NMmtV{I2X>Lsj(nZQt}Q?`PO`f#HUOi7Paf; zv7{&_$s{#y@ja!q^$wnwq$lomG;or!*W^6I_FZ$XQovB*D1+-h6Yy}C7sJ=Av5;_a}UaW$|5$a5h4dF zFtwvN#Jfr(b(zj-1q}K4{K#KKsG2mur}jfH<-+hTzms2=6xo_h232OPW$<&I2F`^@ zh|^k<`A+5*JMfRZEM?R4i^e0~IMvpfR2Av=IHAkh6>dEbj4)3%H4!P3besy?Q$HR4 zu!e!Lk+~zX86sY;#c;hi(_gFRF`NzvT3dbY8IXu;Z0(fkBW89*Inl3TE>esN0ql|5 z#d%(_R*td>#A8Db@i1nZje#_9HOw#_VqLfhs8>PFTez26q%(yHFBY^fA%F@s%&b-% zmni8uScV5XT|Ra$p>@UMLY(NQ8yN6HE2GIc7715K$^TYDtVC56Zl_<>hvrgxPqKvN zKwX;>Y9eb|ijv6-H#mXhm6>CQTeX0x!S4q9$qcT zWoYIywzKA4S?VJvNG#LM-2+XgoUhjJo@ykAlwuxyNU)B$pf$w*hGD~OjdkuC#8J4) z(JcD5@PH&XRPnI>WLR(h6*?j5VD_wz<%KCsBc$e~?Xhtj?C4Un&L9;$=81oBjZ*?? zGthpej%t!;GA(Pw1B#2%gr=EVqioEl%ejiPFu$=3PMLUuFpWxg3su)9SklZR?qk_72`5pKISH$qK)={|C9+>&0ufN)FAX_e7Lb;LlaeQO^v1lswM8UCE1 zH@Y<2%rhr8;ocCP>{?W;9=;eVHDvqfOUzh%pGh54zWT90X+cxsG>TsXm14DTi^A%Q zU*FAHHFVtV=&CV&JuhavQ{W--JM*-w)fR(@-gQe_I;m&#@E0pc(i$1TYxM(?$szj~ zp5TJ8?DFyTLA$Ebqy1p0mZOo+>t1Yu2At!)=82c(aF|O`yXrmDbR)=ZJTq=*c~a|A zm7OtJU2>`K^&_n;^^0w9$O&<(<aV0CVDpHX-iDHlJZH6i%sK1P!&1+mX!J5`5JB886auLf5u`Y2!4TSRP z^+_I$c3urPX74fg5~d@RJ0J%%#&tD(ro0+f9(D>_X;0R0DQd=KotIqcTUlD&M^t!h z+J9SF)E?B`ZVtAqK0**aGJNgJ9O%jexU%!L;gz>GVv0Y?ZoQx%dF>3XdY9!9T!voV;>U<`3`ll%LXZjEO)g`dcaaya3VtW>)m3SHn;?vV>!RzkS3bmeH2@RAaCYH>GO*e71cY^eP>l#*~Zy^nGv zm>Da!!t=#3|AMCKu{;$!|uV~vTfB!|wjyFvfDf)Iu;D~stNubqy-z{ndlXzf} zSuh14NF6Z|s?){BdnNGtch+8M5S6_P5qzA>D?Eg=T3Ye*tT^nHMDZ=XB)njfkw+Fj z$8ck+WVihPJ{!!D%3MRmRvyP!@~m3w@{lPlaCzf~5Z&bxo1neRaA#D^Iad7fvRCI^ zb(C@dcyWItXXt!=KBjy=SV02j9|^JaJ#%V(uJtM3Em-jVT5IvAQV)h}@vp=i@cL)# zUKd^wUw$6Hf_~n=`z!AO!-kqG&KrQRGq-1?;hfe#3zFOkU!&K28+PCCZ60%3Py_z1 z;+@l{zl(tfC|6y)eco5G6sN&n_Vj$-Tj9C$8Dy3Unh*1G)=W1hZ!Q18tcJ59ltsO>YMBJ^5oPIT3~| zMKljSMDGRJu`J8w7Q>x^^=c|<;HQlif3i~(E@YNjX`sAT5ITlpAlmyjRWQB`)o(@z zwNs z^u^QN6bT*b8|ywhnjzRP@t+qnBOF9rZMS{{!tR@q?2(iM-E^_XO179)x45>O<5S(I z4lO4fwMf9%HgCfOwy#eH@_I(ATXokjCy&t^nabslgBcW>yPdB7O7L9xH#bpEo(x%0 zxi7v|H_TU{d0PH8rFPXlSS9LS--eO~U87&X%nV9{t1rqm=x+nc4@q1p)1Je${JL8n zVzr;<^2eG1j4~!akK&g>xBN&mXYl(<_J?*Fn0GJYRu@_}QjI?9Mf`eftUOsC%^-T| zo~$ufmVivPoofu%5lyj-PzVZT ziBKI@HdP5&8CaVvv_2c_K#MDI+mnX}43jXg@_7{5IEz|Zl0^E+AMdAzJRq~2BLiFG zCjbQ(e;bVsi@qC_hERo)F-PAR)=;uXT3cysIv2>6ArBL$J{Pik%)z~nGy!0nY)Y99fH}k&LMjC(p36StU`Vr_p5xxsrxgmERbm=c~~U4JA%8PU2K-0`cctH4?{a+WQ+jZ-F((bBb{Y44a1 zvNi8v2eXw>c&Z7(A@rW*2wr0ZqC&jBrk{5j8e`;HL4N0aXYucTD;$eFlYZh!$#vL{ zCE$4Y@FXy=2tjt_Yl!bKKoUAN)wT)z!_>>;5?-akOG&yG;~)6Y`e=*VDFhH8SyGU|U2L z8w$dKB_g=Mz+1~gHVV^2jhj>zXYEsJSCvv}6|Wl}7DFi|(|k*Z@_g>iwT&rkWiLGy zVTS{Ekd)r(ga*LX6kAD0GaUHllk?%|90s;sQBp!5M%HoJmjTgnnFiK~P^N1FrIWn5 z20Ouxnz#6E)DWz_nV9vgoq9aG+gxlegUmu~&Gml0Fl3*Ced->nrdeteXAd<1=NTq+ zbC!ihVV{Vk6;JPo>aR1qicxVaItW(Rz1S14RfCctb)jy zbXveZnikw@3MFuvzO4ZFxmFo8;ElPydKPcTFx>nUR=${5_Ba1$bs->~I?ZfJ--qkD z|6&=NAaCFC)5sR29@OaU35S{ef>?;nbGn$?Wv*x`&`=+0#MmFzy)c8=y%QZxUar

L(9g?zq5@fiu;ZbXCiAx5d1-U2PZ&hEd zY*6a0%de7m{>PR5Y9>B=p2o-Zu=H$~u2kcV;)iaFBH3BiIN@KkKaXB;a9Gl8d3D4< z;OpWS>9L?>NGPE&g|`>P$e<%5`zdz8s^%Zz&x_kSe$+f^hg5H~1;yb=x4S>i2mZM_hY?EQ?!F5ET$33z-P5$z}Nhk?((X z_zhb6^4?X$>uGd6bagR#YfM8+>sHoYUJ>J>I0p!yHoGYcD6UZ(IQDU_{i?AlfTXr8>!O&1$(L6$c}tmt3WxkXyf7Oku3i+@879iy*o@Ej zKeVDB5xt+!(|WtSUrLn!itZ%f$MKWwf zv;1Da3p|_q8i6^U)UwDCE)U`>#*%-%x+Mieqh|oo8O}!H^L*%zuxQHr4MS)lc3!ib z+KR*cufU{=*!6o+w!o5vr0n6l2cT<_OVKbV#~&N#w>Q@zzxDRd;_h-D^bIMi0?!$3 zmCgl09V+289`tIHsFo?6VGD;9vt~9Vm6-p*Nm8_-;(}v)hrqy%E!E7d+nk5sW2j8k z*i0Dt@7ioDrl6CsxnhWAbDU3SMk5|%T#E;{f~4JqBl0Iez!YPVz#alJ6`lZaj>EnF z7F7@$34LXQ>v`-$Fj@_w6Y_suq_e@}@i2f&Y7IBfiaNEDFFJz$fXcO~<%fQFt>sAT zRToQ}jQ!N1|1U{b{RD^1+#L4gQManno%Lf0aFMJaT>konV3e^-P=GnBvUtpz|A_|G z;~x~7Tv}`k4yY}w+%6-)4wp_km-&+vCj^Z1%NA$^m}TQVHg6DL&_fKH?td0ZQrtaX_rG>s>UF=arxEk`y**W?2GY!)_8!IvuWhsa zs{ru@jQy`pn_kr9sSTr2hQNnsMlDl7cTTp#S+By70{k8n`BLY?YQMwdd)8m`Z6J0} zQ)|E@PhU;Iz_+xF4IwgNs}aKt=$bDh>P*0%(F)j*bj~-{ny$Bo=TL^-oD4N5y7*TU zVFnS=5u;K}J4%r!mQ*E>hz3c&$I0k+2^JnmgfoT?CT(2qJP3GGs@2D|82 zGbB2YSsRo3@i}PTDF$Hu853IvJKTI+ZO07Lq*H8k&QaI!W6PrFv~WHV=R`C1Zw3R$ zV4n4_#Ap)@G={gg0VafcjFr2aYx7(G>#7#hXKoJMYs~BY7nBUvAuol~5zDbX35dw&7By}h+{cNLcMd3Ms>l?>#0 z?dh$Ke?1wOQ|->Z@PgT3fJbn+PVYXW=%q*dz3!yLF?5)SzHD1Im$Ca9rG4xQ97Qd@ zFK5qKR>wI$w?PHlT_yhDhI<=uA1{hKahFb9bpn=$8E+P>Hv0o{!+CJ+CqGv%v?57e zVHfc04Sb1N6zH9s;Vzi>i|1{sL&mWC?mirlE4J2{qyMv1ch&swqs05$ z)A=A^_w%UoWP5ZF1iX!BxnEhd3LZ&;3fvcfbIMHR@N8Hx7cdQq%eD2oIlsaoL= zRz>I--eI|FB-*hu(;%EC$-9Y2cIg-#_*g6$)DQ4Gi~9lullkbBbP?Kjf5(1ey0oN0 zpuo9MZjw&O@V^_UVO#}UeC2Hzd2z+uB(C%NGh0*jhPx+F)(V&lHYooWjb6Zd?A|gx zZP*4=YLaVXNm)mMSKz&uOb@70RJ*L{m%Vg@0`2bS$e5dn1CcGw6cQHxoCnq!iNl_3v5gLw_MRj3!8 zcdUq`afmdq6t>tyuK#~>W+@3Hc?~P?7(l5B4nI2OC+q*zpu)8#36H?bu z=y;p49zxBHK2Vq9>rXV;AZj{kmtsZ{Kk$pQEI~yZ4KiD73IAc4XezYI<`HdVN>C`( zK=m7#w5Q>y2qv_+LGU_8tDXV}FH?kks%Rv^(*wGD1>&NQFIJHZcT3hb)jK3j+Z{{6 zP2HsA0xp_~>Za3Z77^P1E)YVs%eS{WIN;Cz{rXGMe?=3E6xi(k`6=tU!6_Q|_k#;) zf*I&HPo|5*4C=)8D2$F-j#Y{xxv;zHN-| zer0?3B|W-%<2qfCgP*(ejb_3u?>6i#u&sN8Yje9^YQomW&<_O3ECk)cc%JXq+y0}x zeBSOikUwBh{+7@CgYwG9^YL)=)$cTl>ihob!l6-sjd8~1BRjOuEueF)&z9gU#o}CK zu)42ELH@67hwtQ-Wq+^Hkjmc_%^K0(>;=qi2+QSgUZu5!2SuKeb>~WzM=r@YK47oD z>0Ue`_4;j+Syo$&htF!A0~e$F|E7Skll5Pg9$&I#X9rj1u*1!gtLT&cmV_fOcl3r- zI0U1G`gf7T?zbLm;wz4KQMot~(LkUQ3i((LqY}B!pijgrp|gB5*zp}jg#r7fWknd$ z!KCt)2x*R`POuB-_qYQ}SD{0g_@~}9@-ew) z`8~;bzbX}?kb9!@XL-gAi=TQ|%CAjF?fVBk`=fkSO0NQ&YKMMS!&-0sI!A84eF6AA z7v+B4d#$?n)_rQVueyFe{(7c3ehEFTcOd+hZn>p|&n7&C{9S-&LixO%&(B&+w6Ev$ z$E@DZ?w0;%}0%R0gawg6D#Y7vBENkmSH-9}A0Q64OR_qhn0nwE-#A z-#{~@gEDnC!#LckHO{PCvd=sk=z%>DU00h|wCNK=hr}VRvqs$I32?cFf<9xCkns@e zNO!d1v8Z`VWz~xIw>@X&gak^oU|TZiPJD~TZ%`Ci0%NB*xIV^A#Ki_y8Go!43kd;x zr}0j$i0!zJ*oaQqk(YUCt+RVE3j@aX8ZL2_>xMZ%u$uc*>Tyy=K(A109)HmXX>gWO zAQ^veNXQ^JY2wtdj1$csE=ZhnLFKk)fP$W>vpCuTL`)GU$6ybrRoDm1d2f!;#2-qH zhA#BXu*?XyhGc~r@0c(3;nr!i*7~#v)yJX$QcsWG4|0gRcS~`~G_8wa_;Rn-`u-Kf z!BQ;fY#IKroSI*993y|6K<|TrD;kVfKE*D-dqmcf{h3Khkkbf)(-EK^7XbWwphnBe zjN8zcarBn($STZmjD$+KFl$fg&5lKdU3SKTrCWw3b4va2anBz!_W(3>t_$iLTO9>x*#&itAe@Gy& zCi)IHR=&9AM|U0+rm>p$5+NWfH?3TZIRq?tUNlW5V{$w4?{SjQx%Y<)Uf2@n2>PEu zi3Nck;~dg`9!9yBIrZYN0sscsOBc_FgF;j$nDaa8pX>hoo1@!P!`ph=wS$>TVwBh} z3n7HY!58=%Mfo)}V7t@IaH$je)--$wSQmv>-23g_!YM|Ucd*>r&q($sT>WC&gF%`& zNNh5Et?87~2GinKhsbhch<8zds(ZnY3S!p-HgqeZ!WO zA^M_?SYOGW!QGjOIFzgP;U19})0;{`5C^I+@&2u&){(f~E`Dt!ga%J6)AVtoCtS%~ z{S{?DLsI=!rZWN%rQ-Z-!Ukew`uYt!fhC+`x$yhd#q;%JE*psCFQ_t4P%^A{k_!X_ zX7D54K1vPJfR*MRe8f4Hy%Khe z16#9w21dzB;0wZoDhb@4

w}XnYP4m+?782h7RN)oGF@OU=rtoadE?|#c`a5O${wnF?F;C1br#mc$AIMt+jix zj5(sGd5G|;))+m8(Jf_T-Ne#Unk?2#W(nI~37#?Klx!i^4K#?(gk8t>m92^dBi?YN z53o?gXngXic99So(&3`>;NwYX8k!vhYu+F|;p~9m4dgjioUKk+3u8}=@tnrFV54#5NGZ0q2Yi!b4^xwSv2&ieA9{@7;m>u2RFjgW*q z^P4r>jUSE;5)ks|qnbcLDKa#HMYeD{<_#rV2r8t(KGq%}d5{z8PNqkV=FR2TkZxGn z1aIS%=9lS*DAtgr=gi!zJ@CE<273sb(?H|A(8A|OO~JN?!LYGaAk2-%)sjlLUJJBD z4_oz0eG2G6abn*|r&BC%rdSVXPw!k+5QuTcw%sh{8N@L1&O$1v-odn{M9KhMfds@1 z?Id1oGWZ&Iv1Wbi%tdCh6J>xfkzdXQ*qAAC^42g*tT0kuWb{s4(&Ki1RvcjMI%n7H zB+4(R2q1OL80P}GtwwLFck09tMTLt4r@B^bw1j_HZ7dw$~J)q0h&ajO46 zKn3JZ^6$%tx4!T1kBgI=UtlaNXza?qZCW?l7Q(I>8oCLn3f6 zViY7*U}C(SHhlC)YA|wS2-nDfZW{=KI=kk-uW0+m3!>fODrjJX@C%tisolUcAcig2 z;bbqx{<4jQt%y=H+LeO00S1#lR@d9E_`HsE`v4D^Zb9ihxjcp#hTog^Dzck$ZF|yz z=JpI16`7CUg?pM&$Wn3K-2qpdTpy&5}>G%1v9oNh20I=i#I)0T;=>&X6i~QUJ81r%s9!h)tJP*!$dx1Uj_`*jijc}4zTrKRuhOnH)r{MU(;sucHooW zH*bIL4^fGHzRy?puf@~G+d_C^kJIGYaIX3N%Fa*ye!d=Gf4(nK`#&wma(2&^YWDQcJII5aw#|YzV2>&V^6a4b|A2K`LVyh zB=SG+F3lf34O97zYG>nE%F+(StyFH!p#6*<{N92XHO+ozJ7WV~@Y4=m;G(*=840yV zudLrzf9j^~1iru6e=L3?v6W{v?-{?E27a0znFI9cYK_OQ8T~_zB;(sRCdjV=)>nD#|qY|0dzC3GSu9sL|c?N`PKlZ?19Uy?b+hd_QXae8-Aoyr6?R;|{Ndl`v{<9VY(a@(pv*=l_1(hHp?o zBsl%E#zT19Iy(BaH*1jeKQj1z<0ltpM!#6at_WVgUh<2#ztPy=F^+a`KWTA1`1s_8 zUq-+4aeZ1}-H-k`v*Eh4F^Sp2H*)m%@!b7Kz2N$VAnyM#=RNq&f?uuH$1XkxEj$vG z{-4xq>Y*Vv)6JdFW^;6E@S4~6edMUOVft0#@x6NWer4Mo`9yg4i=g-aC;=$p>Ty5) zICJ@4b?zFa+6g?+Zh8=Q^Y%93EgnDB|DJ7qO+Wsh$8ll%vo6t1eeVr6gdkY|Up$y0 z|GwKBjU4m#`dx{-^HftBegDm)8$Mz;@)LJ!eJK8^TsyX+HGhaGayg=Qe!uhVZj6~X ztkpGAdUlWgJi)yqKTx~F!>Wj@bM zSkG|!*!Cyx@22}b7A+mceTEBuQ0~I8BW3k{h10{(+gr=C_%9cCcj_(sIW9e{+FsJ? zN!Nw-v%4X$1rp5n02dwTnO82|$k)$P-(P&sREdD$ zhck53UxK%jtA0-ztukA&u+_dPd`dzl-rBh0p{ph2uSQUR#j2-SSm@V__Zd-CWlqLJoIp-oKhWeu!=j ztPx5^_mdKv%D93ptr2TwOGRWqJ-4L7F1P>(f{KWU?l$p_S2yHS&{LLXeli#zvKixTJ(OI2DngjI8bE1PqQkxlvfbv} z4&9o(N>vwr*I!TSU8XGY3L5|WlDLBZqFb{h!z5I)Tc;x}sUt_qz#*gwxL*>>y|f&J zbJ7wzMnOypi3!5cKUn3v;_V0FU?($V>iVV|nA}|TTZ%*Fo9`-4S}_{yPWuYNcCR6w z!0{TOI=F*sIddhaHf}Uesf}rw*H>kQ@CTLwE{nkYMC!BjiQT++Oa#S%8UNlGacu9< z@!+nhxF2%m2EhzmL_wr!8s8B&_S2BV6OOgep?uKQvr1AWluViygWy8!cy`!ipP*4G zI$yDz9E)>7rV7YkwCjCqvMJm;1cf9{BK}*A0GY{{M2@5RBCfGLGwekPUodVeE=<6k zmJN3*SnJh=M_&18T@tg>p>IG<@L8+M>ojkfrPPrJV(&4xD{-i{KMLcyR^ zW(+Q)3a{%#$77CRP%sPt>~+6(L_ZvJhhhYCTi;@tzG2rW^tg8J8uwE9krTe7z%ZqU zgXBA3&h#0lEc=Z4*y{|FfuK8<2!b7C1gRta*^jRN+{o)9oTKvR=vrDcXu{(IuPQvG zto~pQ=9RdduUC2J2~)8pWrR-MA~0TrdJbDgtA4EauCBuIj=~)(T-L^=wZhfUAXz$J zoDtu{yPL1CaXetsWPKn~4&%661k%?^ikCKT1^=C0!&6-t!^9GiTPgWdID z8KItLW3MVYAsngN(P3_vFb$H)0$5);4>m&6T?`pW?F!gIyVWl92}Lhub{0A3-kS_s zl7mt*8I9X(_(mVE$$jYx@(F*T;oPITaDLif);OPOJ5*n_rza0%PQnOIgt)cg4WOPPuR=^Wyr^ICv&LV@fVGb;P2|std0n-s zURu6FlGmb3^xOv+Qr6=kg_%>ADy%gnaX!r{51`!^D1it-)PLkZhFfhb`ll(*b>^o& ztVSi=E;D?nJ5+;-^2xggqH%*JZjpijO}M>qscKi|TxL(>kSv5?mj9)09OgJ`(mRFp zwy`28@UB2jCxlDW3bbh7yk@6=$o)>*#Wv^;fgsZj#YT=b2DwKy*NF|U&c@2u$5Sa% zMR77h*n#}xC0uW<36NBz#)q8%^Sx}CCoZK5DIVZ}Xv=1x)OgXhKMbTyUZeF4+Yp?v zJDsSV4AsU_1Q$b{rG^(hwgC2g3AhV@!9mxWp{AAIm1AdWTGEbenV>H~nptyH75ppb8AomWlA@tP^X3sUc$&e8)!XSH6POqY% znj)01QD7*txeR-tsd3UilnOAv!{g$$A2VAPleB`o1<$3n$iFh9YBvt<$55DZWr)gI zqa3N~b-XXU_WAe1Pz2m|L1<1aoi15pnC`kOtCfqK>wv9e{7Y7ztZAYo%nYMNpHkL za@$gqBZm~fif%ffh7#Ap#8rSn_~C%Cc5}r8HR)F3%;<8{GCc%B_l;OHx*BYc=$vbK zrGzcDTfP7;C&pYwo0dFR5`Yj`WkdXU)NJ26^GV-D=wNdOI~2?PwMUsET(b6}##Ee< z^qRI=4$E8P?38D;py<)aaI|U$E!}hIx$R$wYay#IDYKN|!kPpEo0c`#QS;leApR=| z%L>+!Xp1f;LlmwgN%~y!(pAHfJHp63iTOvWBuBE^ql32hW~x|rc-+FbX*(o!&lIpB zN`bVxaBh;7r=pqyS$EEKAwim;Xs5B=x+qf@(VU>fQ?&VF=!>+9lEq$~G5X z#{rvM_w_FuWyQ@LNh!P$4LGnZz?Iie+ulYuCD|bxtmnn$9wnb-CAlSj3Ag84IN4Gj z;89Hp_8A%2Xcc@zz%;6ozjxRq1GFo2Y>*#!)QF!hnC}|PZjy|-(aj9AcF|6)Mm`F=id@v1)y}*8l%>u(;0`a zOi|wz!j);U>G+Ev4=s&u2vae-@uS~r2()4|Fj6O{?lndDCW?NONNeIcS-?Fda`ilo z=)n$TvE>XG(lw>>m`Xf$q2QSy3W`_N!(W5;lHCN9UMV zbA?!_m3!o02?DO;Q%z1=v?v%Rc31a%ij0g^ucH`~_Dz_6y{bFd)Z?hy#`L+6c~DFYD_9NsI=Q#)fC3x^Bs9H?>E(Qcc9 ztpjmPD{Lu#C51DrXC*;18Jp>zfO6a$!HXm6Xt9p<_IJ&&o}gBYu|u>Rq~-vDaetkB z24jnTZQBZL9(;Z1czJEi#|%~P*Jnay%8qinsDLC~otif8jr}%cN?Ju#1gZf8yrOp> zN4*{buhsyN*KjjcFe2V{t+?(=PR2(DUMahR7Uk{hHQBVVpTT^ff=VFcvhRd#+_~#y zTZ(1)itoi9zY1=yf04Ocn>M9TJzJ{8s72z+;Q|XX?GuwL>ik7>QShG1b@o1sIWJ8O z_+ZB2;e;lk&N5WsQVS{*?+(|I_-ONU7*%z-SIK8lJq$|4Ihj&um7e(I5U9`CayzC1 z@{1jQB6~GWBk`E9Yx)MYbh~J!S7GGjoc{%mld%ePo)ql5OZXd{OX`Q#f*~ObopSXg zY@llB5%qO`WX?l6jC(ZjNfjyoQkZqw@42&3mmjcbi|Wcqa1h4uZHJQrm6x7Q-9k^f zut2#=h%+3t6a8)6YeaJ=l7EAj;sZ5~5&>Vax3x@DRJ6b*GJkV@{ z>QuiBW-j=9)lq(>Y+Zz08xn;9xLwvIwEErFF$ccXDWRo8TDnip?olu>dBu38xp-Gi zBWNBWunTERoL~m>GS@K|1{cJ4sVcT8A126e7IUkHMw|Ci)cj=Ry0BAMA5ru)nYIH$ zbFRJWarr%PwI&riCEK;?=%q6;yz+xu{d@yDTQ3WP6mXzPWBt`;Y~N^dP>{L*(CjSP zVX}yho_DsfRH~LNd0D{+UN;y5fx`t=GS_Y%AU#~;;itk7)6Dfr#FmGn=fBv&1YAf4 zOjeD&I)NcCVkE`&9MPssL<@*-n19xo-$?t8_^7cL&4ERW`hNjTBeUEtxRqy`OSJAK zI~_+{tHS_g^qUvtQ`z%~u9DGuWYSnL+9r69ms zyqy!B6@tZ{Pg#9zrxq0Uh)c~SbfLgEyQZ_)Xq)Hw=v>-Zdt0j4ekN{^a3IOs?4$x{ zosP)o@S>LBdysXmY_KnCP76kTTxgw+EfdXJkaeZij9?Ek;ihxcA6e77wGPe9U~#6> z9Km(zQbt#Ll6RyQ-k&zv97$46-peK&xW3hXd1E>Ct~_a}Pr;abOiH%OJ8V;@aqnYD z0@|Fm@HFCDr9zmi{le!+7y)#I?+f^J`tUkiSEAld{AR z#G7>mmPreJ$O~Lh5BDLYkn;wB%URcjKe!3+J#I(k(pJ~dT~P3wf=_j%q01r*6GRI+ zg$hurX_aL>sK^MU!g1@il0N7|C|zghJwiCrIw_|x{?yk{GBbJ0iIP+V@b|?JbhnOg z5v`pvQBaz+s+Gc$IaQ)I;KGXG92X>!zG&>bp5>-XS_g6i zjH&v|L5cW;ZWB_cn!p7TvT@;dX`3_T4)xrWsk6>)H=Zz=969!BbG9q*y48K8*Q{(v zMOMH;FacqfFp<`6@wHt`H<=&&{y;BfN`_$J`ee)MZL#;Vh2V^9Ey(cl12pHQuz~}k zb?4m&EL_FVDly=`pwl>X*P&V`8Ft3h$2;U6siQdPzzE=U2_~ry-R<#m45GDPP7@=v z*_3(Wb{C^WFO?I^S?}>NZH{CaEf&KA(KDH9{EMHEFZk$sKJ8p+F# z$?Y<+QtR%#$?44dwZ?1EmW?OO)sxK7Xc|is{-VZ<)~>rEX6?6S@9_`3Mo&24eVeWx8d8WyeX*!at)kxyYc6Teb>Ou`A~_^k8fPg*zK`>hqA zhaL7laN*`vx2ZSdoN(uM{1a)7xYivV_>R~dJ2mf7uT_MgxYxeu z+t$Lot46lqKGC``j|r*j(pt84=@Y@1HNQo6+ST)Q-tp95&97Yz;Frh1hLn|G*V}1P z(w99d5@^(x#c*4>__D6sCjTgXZI7eXF8pGE0Anc~WzOnJYiFOR0bWISFJpPRa4{R_ z$lB)WAJ2E<93-EWR5e9+_NL=I^uHf+V)O{6)X+4&RaZQDCome?gzZ$Qk&=CYdbLL7|z4F9NBY z!y1Y3fSioVOyL)$*Ja08`jg1og}U5L&newGo_nVrgIktFoONgu+12_G5O`X z&vms)p(ixCH%S+}W9Frx^u^{}ZkpjDTPTx&EDtr0-EM9iYvFWMTq0>Pu8qbMMU+(~ zYe16lnkBaN1sKc%0SnQ(G47ywk{Wl|nu@nkNn}e5TqUJZ>)OHlRTN=DJ6x76T(p!) zc{(@69haT<9y|p<{iXfEf)Q#~R~Rd=$v&7>hZNVYejP`ief_X4v~*rX|g%wvdnFr{=D(5m6wtgZ)jJqBd$e&oYdmB_tv)1 z=EAjaIRu-!ePtgqB|&MVMRn!9mUArmnqMZndfhHFzbgO9uQloUO9q5B5VaaEI&?iB z7h2ZvN?1-@q)}Ufudb)fJ)W0*4*x3GBbQGzF+p2kr8*~Ox?Z-PspPzO4#I7&?PcfM zRtFPPhpv}>)+lU#t+6-Cb&izhHRhB5?R8x*^M2Yad(Ms7v@X8-p>?&Qb?dusK6y`b z&U50;Js$Mpj7^6lS~tuH(zl%k;z=r+PiK=0&10*=sW1W+9~o@H#NWQ{gwA?z2}Iq)3ocz zsXh9#aM>sRPqc1*$G~244hPuZp7RiMkMgCc-L*{Ij_L_y>PL4^7tQl&`uiNU@KsiC z$DSkN5+9SHk;{;GvPE%(-Z5T=louc8ESLaxY zFllwYw)HWU#Nj7>CWIg~1gY(!6?fdOT{EowS6lwh9^|Y*jl*HDtr&`)P|~_}&5(;R z?;ER9-2?mR6}TYr$||nwj?|OZ-gV@_V6(11_ngT}LprLhdU8-*N1>IOxzW%{iN^^o^Xx}mCC<3{*)mqT9tL^J)SQ! zC;IBLR&Q=nmx-CsPzO&(T9;$@oZ564j@gma`9sP?s_iFDeJ(!z(EXyhusA&4utQZz zlHYqQv}hI~GNb&8FY~(YDPSSF-?t0rS>g0qsZ2vvDU)MxWqJjw zY^LZYzu1Q?`O9T7vQ)Mjk~X$~#P|ETVk>#uY2^LTji@utO`|j?g5+u^cHXZ|PDhpV z1B@|Q<2F^V3l|CyEzFJLY8pPJ3OTq`D_IptmH*?fN$V<%X^kf!2{cdWOlmyP@nQ9M zIeX$s>vq&EG52{F$z4TS$rl{7P(S3y%vO89-j zMavY2$rACReu~%_g_LZ-7(Zj=TQ&v|G5Vo~DNrrZBJX*jhRN5Q>Xu6{hH%cj zGX@U~S`W=b#^#YHfTaosK~1acL&=sa0Z$Vx-tm;Bio}Rn;T&C-qgbqoC>dDE*W+|q zdTQ`jkY-tzx>8}?KcONm>UNjzfiNjL^5FJsHs{mf7OIw#U=uffLW5l)2 zd$fu=mB-W#TMmMtC2Cu!15MF7t5&|uwHB?yn!0(QJ<1R)b%my-yAN&T&6MdXb%!8C zNS#b}ZAn#-)#iB>#?za5@aFxE`FrE=5N0Rb&qddA{l@mE0y zu5W<+(0xw1$2??+aN&)TYuw18B=yvt>^wkvBPs9t_T#agbg@{7hW;@rJizxUpw#&3 zP}dvF>50~~IY>z@#sa4avoKIRK zu62#!>hez;dCEj8Ey22!CV8>qFIsCa?1EiTFadOE%oH3t&S15pXwurYX$xWXc(blM zabN%)+6Bd7F-n+9>q5OY$b>^zBK?5jsPY}aTt0%YgrfExsOHyMHm4mKZOUx9nhth$ zXsk}JY1P~)Ch+v{4pEd$5z9I6*Q)%-c&OhCc+#T1hXp&dc{8ocIt2_=rn6CwrEBHm zk^5Os^OX4L+M9HUO`5~z)YUxIMa#B()9QH&h%XHX;VlTE>;Pvp5;(emWB0gi$L>QW zYmHV>GZ5r~LxX=((#|C;vU*pa0wEEn&1MC9A)rJSDIdL2XghHpa(`_wAw4VqIRzo3#deP7Ykr-Mz?cyI zhA}Dt07%RjCMU*R;Q(o!x+iejU7mh@ZBKWWg*17hB|k4(edk9Ol%D2`ALnggS*=Q*vzP=hz?gs>wKJk2q|KU^O}B_yXZYaXxe;;)_nKDx|N42 zlHEVSKVLF`)KYveXmg$c=5t?6QUE3Hg{Wqj_ zojoG$AHmwXp(&e#HW)U_q>jOWEJO=^$kS!X-7B2okCMPIya0J4TI;(H+G9H(?5&`_ z8GBdglya$XlGc3Z=k=J{1S{um?JCK}hMUc=`~G5A$@n%;nmR3WE3_#2d6BREqCP9M zKdTVKXN@FDSW4u}Ikt$C)}!Zux9)?+p^FV2E68%2&^5%b*TuO}JUznEZd13`A=;{g zbm66*0b1K}qx}LqbmhV)7*rmh8LEY&=!uUr;XhPf%Uhwj=h z?SQgBq=kAdlT)*|KqEW}2ih1aDIuE!{~%hjIU3{O9nuwxE0}eHf!Erc?#nc-8{e8S zGTiiK<>_2c{^;t|$l!Xlxpk3jp|!VsiAl~a7$!a#vW)3$xMQt-;26#|5Wnu4(I^u< z{v*&1$AJ#mu6i*L;~FslZYYt~&O80M8Y;!XVVrEi( zmMf}b3YSu$PDn3+)_K13QY3tTDA}|slo0|PGc;lzU^`d67B60f>U`GFqM)xU zD<&nZ4iA(oP%sd!!k8+IO?{>2Ij4rP?9;%}Rq2V&DLhS1`C6z5TN*rjUCGQ}-8{!- zCS7_BCYk3sp0}P0%YNWCfN0-nJ-F+}-2IW1}#Du#@Odg=C z6}yLtt!@a?s&{@6j88lf*2n?De?zTqvE{v!5+ALr?$^#S-?D#Q<$XohY)U2i;axcC z(D}N_Zzo5`$iy1AUgHO$FKS*lOl>k<$`Gx+eFy5MGzc?stSmJ@U^0i z(D17M_o_o`rftkUX<;6+Q!%8v!C5@uc_ncGUk`1q8xFKjJKy_#t6EpIpX$K4GvfZ@ zhi0j$Wp!WheZIgL*|f3~p=-MWDYH5=_qeMs&D7rt@oAW>b#dVfNFb%orJWy|RVRE# z6w=ridAVu=Kqy%j-if2jA#jfu)@$9!T6AS6FU%)zCe*Jqs4{Kd`^9HOOEY3gdVwDzM_t*xCA~>mMVH!K|~R-KvLfOZN$22t36g9vQrT1(T?$FS#>L~ zx`pFraX8>pc@fN@s+Gn(;Z2@YGr@ zqzYfW)&gO7CZtOMgZ8V|6qDAy<(|DQL~SbBn5ipF@53}s?rZkiBjQ@GM66cmV(U= z`%cy^UO@77J?pNUeA*)H_+&Tm)5r%Cx1Htbb3E@e<^wIYl$r*Rp*qM8S%hh*dQ+Wx zj~BizR*%|RT`o*v%n8p8x=&juh{)NIeU4Z3rD2nrNR#M{LjeMNMCaVFYEJjDA zzT>&v3gI!N+VDJF1v$18vD?;s;r(r$O`5V3-cOd%{FDw^=M+Md2yMYR*W0{Tp23;+ zLjt^9slW{`rJ+@aR*NWOm5ICV{0Jtu-_|D)X7TGf=ZW3|Y*(~;9x`5+rTxo%Nhjk5 zJ4YBKF-K$DNsBpT2|_brmdmWuF^nK^(yF00&bdDNt$6rHU>g}``5^)&l&5KQn8O=? zCR&^e>(2Y6EY=SjU44_tL=4mGJ(DB>Qw)M8%yc`lR*tEaY~?Y4F7^?>^wi`kKl~5w z@pd|DK2xyF!s&f=&0e%RESLmTVi^A?TC-m3u_R~%JvBUr>B#EeT|S<^jpPx?xIAZaDZ5jm9jIv!(k;=to%)zM_96k( z5(gmTy#U7`dE}%p!B?2+R&i~OxOL-Qk_si*freX0$@b}?4WYKwgQ1&y>f=tEBYz-C zpzm;kRB;rjUv?8s0=>t_de5-qTIvvDek{=f$y`$#9A<6$v|snC*P=_XjA`A{u2@t( zz+K5!GK4J0RMy3Q-8Byx?JcN2^Oz!ee;ScBY_uBZRJ@CY)(P)hab62AT00;jHp^6N zvUDrR1mEC`4&5uRWshzr$LVIAXbpGD>U`?AHC-280Ap22Yr6>hmO{f*J1kWXa21Yf z0d0NDMThRF*VY3|_;*HOp_Hai5aF{@ZA`Vof`}ISkcrzcN&qUL4$6Ymi}Di$C}0}@ zNm}Yd9?Wq1+JHVHURFhryzIft_o@Cii;3Rw6!)DatG(T0rEi6XPWNKuR;%B_vwA=IBx; zpe`7@>9$Uhfv1C#<&2U{Hs{W{QF0g|bKUt>YKxFT#j@dit9$%U@3C%o)oWex_BgR{ z z7~x9j@SKfUYYl8iy*4QCB_3tVLeZ`(BMA@@Ok@>i!YAd2cFseNnOf{|?Z{2lG)BaE zywqq+RkWap)~0ux+s)X5+PsvS!!W?bit-chQLvNHMQhBBBJN|~W5H}B5{9R&$$DsJ ztsuH`t>tSmN3eU`cEz<`S+uJ8Fqc;DL?fLS`i3Wn#;6R^cn^ zUZ^Vy_rUY)XndqT?tJHmo~EIzmzM~DW@R_+wJS`Rsn>OFoP+StE5#A&s0wFJ0Sk{q zEwJ*FNNda<-6yTv><94KDweW(6B$4s#R8r=9#|kP#kGtjs;t^=#9Z$IH!=_pAKIvJ=+j((Y!nZE>m-6r)bj zWpj4KwN5n}Sg+FiVNB2>+T~1vrLhfJYW7)*)`IUR#xw=9@U{4z0%PH5Vc3?5N6%=a z)iXZkKGTpUuUbDUKie}0C+{D-;%#sy>0ntrP7+{$4zC-yX3JofF!%WA`?bbOBNGty zQP%c5DPK28l<`>#NTh|hHY%r3z>>;Kz#9Z`NdPtJ9e{wJz0bAv7I&qg*lI4T{H(LU2^$xAW?I<=3PI`v z<#%z&N;gPmWTt*_)OyPZ!8qb@KXD= z$NRB!R@r#~2x!Ut05!P-AbinM?F6YZPMWAElH5b7FkE2*E;O|2xMpN` zp6P-6wO&vurZF8cUnKj8yHO^)f!f;IHt)~JBksB}z0P|9!BQn`9d428@ry#PsPmPd z@|d(}?+a$@1xn!wh#Dfnf=b;J=qhVYpZ1LSwv!sU62WXmpe`J(+bUh>Gg|4Ha_V)h zbEB48`jT*5T7S*gyd3e0avkU6#-6~6=_-y3rUUD55TW89gBP+~b0|y6L$k!r$20Gp zpmfD_GH*l4_BVi2F;v)BNL{pmb0S}Hj)_T;u!R8YY7JmVOF0T}exY8=5P81Y$oCAf zg>Kd$AStiJIKPfewD#`zRj*Gl1TIM^hSRI3<~>5b071J}em-8iAG3RXSZ?(e@ik+l zz@?i8O}6lSyvf&gy)7$>C$J1lnSR(Dc^NZ4_uLrZN$a-QiN-EfqNjs}1kcNI04b#c zPG3q|clAcW?s_7oL&h@k_D$h0mVqmZ=eioZiq^zA)_ommdYUd-O|73p7iOwCZTuF_ z!%2(v+G)dQ_CS;MoMrP)B)IEp9|f1RFgHreQNAZNG^YX~4%#(Ff?2>bn0}ARd%W}G zlm$o zEKRe5S#xRYQpVJJ{0)5*anAX4n(w-mtg7TUN37#&vZGdmRvqLPL+FV%=T!Atbc7~s zQlflI$b;XSNw?*Sl?06Tk55`Xn=}?TLu2a8pe}6l5X$ANJRPmur?cLQFM($Tv`XEH z-qTv*@RXRa?~DT(m{gplJz^d*@0|zm1kANHu4{$vl)3fJZQ6-b->;>^&=#A30rE`j zOS7nWXUgiS>b2%QcnS{5V`lRhg-@G9)tTO0{Cb&(+*{y?48E(?S&c>cJ7;BcUFbv3 zRFf|z45Q%=wG(y{8c~_%w>pQa`s5ey*H)ld_1Z2Qm`M`YXJv9^bD9A*E?U)xOc7rr z*j%j#f3m*A!Ui!+g=$*M`?WbyZYa!WhHY%tDhcou(n}IwwwO)gHI*zbD?H z64R%Fb+#OI3c|@5w}0Hu{l*ey2Kkoou8VSta>*#7U8a+bUPAVMW4)FjfN9-?BvA*M z{lsFjSKg2&w8x8ThCmL%hnZX@h!KMx0+Vtuyn$b*9P-2f8|$?bL?kMWZGsc6h7`Oh zKg@VnBrRI=el5#*U8d5_(`9mqKC3=KTIfT@&~ql{MnTEX%GyA?aWY zQbg;nK83QZ%D%bbn}UjZ1_nGg%Ks5c0tAEjq`Z z8^s+Cx@2RXc2B-IKhpPAE@4#)Cn*Hvx^KhD(w;VTBrOR@aS`~ zF4*tJiATxuP728!)YdU+%{TQD$bnw3P%dsZ%@i=>ZPZR=c4fcLx?gMLL2?f26n2>( z7tp5BV(NKffb&H*2bVGH?}HwvEJeL~i_D)Ra(2mmDRIAPd_G@^a|Wqcv7Y>p#`2H= zU?a!Y)koVFo^L%D)*0i}cB?mv*)1fYk;135j$Hh5O1^GYw*p3uuJa|v2_Z>sn81$= z2%3>R&|ARAy+~`swSj^iZDBA$>j2Ne(Az7EhKQ>?3`Z;ElTf3EGC;*9JF2CsmBurW-&Fm5Z^aTY|i#8o>bq#Wg2V@m5fVX^IP_GSk idmPXm&+)x%8eMW}v!wha@NfV2zy2TV_cqLSZvg Date: Mon, 4 Mar 2024 17:59:07 +0000 Subject: [PATCH 03/15] update code and add unit test for _report_to_csv --- .../io/genomio/genome_metadata/extend.py | 27 +++++++------- .../tests/genome_metadata/test_extend.py | 35 ++++++++++++++++++- .../test_extend/assembly_report.txt | 6 ++++ .../test_extend/no_metadata_report.txt | 1 + 4 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 src/python/tests/genome_metadata/test_extend/assembly_report.txt create mode 100644 src/python/tests/genome_metadata/test_extend/no_metadata_report.txt diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index e8fdce165..f0961281e 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -25,7 +25,7 @@ from os import PathLike from pathlib import Path import re -from typing import List, Tuple, Optional +from typing import Dict, List, Tuple, Optional from Bio import SeqIO @@ -74,30 +74,29 @@ def get_gbff_regions(gbff_path: Optional[PathLike]) -> List[str]: return seq_regions -def _report_to_csv(report_path: Path) -> Tuple[str, dict]: - """Returns an assembly report as a CSV string, and the head metadata as a dict. +def _report_to_csv(report_path: Path) -> Tuple[str, Dict]: + """Returns the assembly report as a CSV string, and its metadata as a dictionary. Args: - report_path: Path to a `seq_region` file from INSDC/RefSeq. - + report_path: Path to the assembly report file from INSDC/RefSeq. """ data = "" metadata = {} with report_path.open("r") as report: - last_head = "" + prev_line = "" for line in report: - # Ignore header if line.startswith("#"): # Get metadata values if possible match = re.search("# (.+?): (.+?)$", line) if match: - metadata[match.group(1)] = match.group(2) - last_head = line - continue - if last_head: - data += last_head[2:].strip() + "\n" - last_head = "" - data += line + metadata[match.group(1)] = match.group(2).strip() + prev_line = line + else: + if prev_line: + # Add previous line as header of CSV string, removing the initial "# " + data += prev_line[2:].strip() + "\n" + prev_line = "" + data += line return data, metadata diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index 63c30d400..3493f8263 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -20,8 +20,9 @@ """ from pathlib import Path -from typing import List +from typing import Dict, List, Tuple +from deepdiff import DeepDiff import pytest from ensembl.io.genomio.genome_metadata import extend @@ -48,3 +49,35 @@ def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> gbff_path = None result = extend.get_gbff_regions(gbff_path) assert result == output + + +@pytest.mark.parametrize( + "report_file, output", + [ + pytest.param( + "no_metadata_report.txt", + ("1\t1\tChromosome\tCP089274.1\t5935961", {}), + id="no_metadata_report.txt", + ), + pytest.param( + "assembly_report.txt", + ( + "Name\tMolecule\tLocation\tGenBank-Accn\tLength\n1\t1\tChromosome\tCP089274.1\t5935961", + {"Assembly name": "ASM2392016v1", "Organism name": "Curvularia clavata", "Taxid": "95742"}, + ), + id="assembly_report.txt" + ), + ], +) +def test_report_to_csv(data_dir: Path, report_file: str, output: Tuple[str, Dict]) -> None: + """Tests the `extend._report_to_csv` class. + + Args: + data_dir: Module's test data directory fixture. + report_file: TODO + output: TODO + """ + report_path = data_dir / report_file + result = extend._report_to_csv(report_path) + assert result[0] == output[0] + assert not DeepDiff(result[1], output[1]) diff --git a/src/python/tests/genome_metadata/test_extend/assembly_report.txt b/src/python/tests/genome_metadata/test_extend/assembly_report.txt new file mode 100644 index 000000000..0be772d18 --- /dev/null +++ b/src/python/tests/genome_metadata/test_extend/assembly_report.txt @@ -0,0 +1,6 @@ +# Assembly name: ASM2392016v1 +# Organism name: Curvularia clavata +# Taxid: 95742 +# +# Name Molecule Location GenBank-Accn Length +1 1 Chromosome CP089274.1 5935961 \ No newline at end of file diff --git a/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt b/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt new file mode 100644 index 000000000..fbe891174 --- /dev/null +++ b/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt @@ -0,0 +1 @@ +1 1 Chromosome CP089274.1 5935961 \ No newline at end of file From 66144aab52cde45b4a62957115d3abbe77a24308 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Mon, 4 Mar 2024 18:18:19 +0000 Subject: [PATCH 04/15] update get_report_regions_names and add unit test --- .../io/genomio/genome_metadata/extend.py | 9 ++--- .../tests/genome_metadata/test_extend.py | 39 ++++++++++++++++--- .../test_extend/assembly_report.txt | 6 ++- .../test_extend/no_metadata_report.txt | 2 +- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index f0961281e..1defa51da 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -101,23 +101,20 @@ def _report_to_csv(report_path: Path) -> Tuple[str, Dict]: def get_report_regions_names(report_path: Path) -> List[Tuple[str, str]]: - """Returns a list of `seq_region` names from the report file. + """Returns a list of GenBank-RefSeq `seq_region` names from the assembly report file. Args: - report_path: Path to the seq_regions report from INSDC/RefSeq. + report_path: Path to the assembly report file from INSDC/RefSeq. """ # Get the report in a CSV format, easier to manipulate report_csv, _ = _report_to_csv(report_path) - - # Feed the csv string to the CSV reader + # Feed the CSV string to the CSV reader reader = csv.DictReader(report_csv.splitlines(), delimiter="\t", quoting=csv.QUOTE_NONE) - # Create the seq_regions seq_regions = [] for row in reader: refseq_name = row["RefSeq-Accn"] genbank_name = row["GenBank-Accn"] - if refseq_name == "na": refseq_name = "" if genbank_name == "na": diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index 3493f8263..33ae917ab 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -51,21 +51,26 @@ def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> assert result == output +@pytest.mark.dependency(name="test_report_to_csv") @pytest.mark.parametrize( "report_file, output", [ pytest.param( "no_metadata_report.txt", - ("1\t1\tChromosome\tCP089274.1\t5935961", {}), + ("1\t1\tChromosome\tCP089274.1\tRefChr0001.1\t5935961", {}), id="no_metadata_report.txt", ), pytest.param( "assembly_report.txt", ( - "Name\tMolecule\tLocation\tGenBank-Accn\tLength\n1\t1\tChromosome\tCP089274.1\t5935961", + ( + "Name\tMolecule\tLocation\tGenBank-Accn\tRefSeq-Accn\tLength\n" + "1\t1\tChromosome\tCP089274.1\tRefChr0001.1\t5935961\n" + "2\t2\tChromosome\tCP089275.1\tna\t5880203\n3\t3\tChromosome\tna\tRefChr0002.1\t5901247" + ), {"Assembly name": "ASM2392016v1", "Organism name": "Curvularia clavata", "Taxid": "95742"}, ), - id="assembly_report.txt" + id="assembly_report.txt", ), ], ) @@ -74,10 +79,34 @@ def test_report_to_csv(data_dir: Path, report_file: str, output: Tuple[str, Dict Args: data_dir: Module's test data directory fixture. - report_file: TODO - output: TODO + report_file: Assembly report file name. + output: Expected returned value for the given assembly report file. """ report_path = data_dir / report_file result = extend._report_to_csv(report_path) assert result[0] == output[0] assert not DeepDiff(result[1], output[1]) + + +@pytest.mark.dependency(depends=["test_report_to_csv"]) +@pytest.mark.parametrize( + "report_file, output", + [ + pytest.param( + "assembly_report.txt", + [("CP089274", "RefChr0001"), ("CP089275", ""), ("", "RefChr0002")], + id="assembly_report.txt", + ), + ], +) +def test_get_report_regions_names(data_dir: Path, report_file: str, output: List[Tuple[str, str]]) -> None: + """Tests the `extend.get_report_regions_names` class. + + Args: + data_dir: Module's test data directory fixture. + report_file: Assembly report file name. + output: Expected returned value for the given assembly report file. + """ + report_path = data_dir / report_file + result = extend.get_report_regions_names(report_path) + assert result == output diff --git a/src/python/tests/genome_metadata/test_extend/assembly_report.txt b/src/python/tests/genome_metadata/test_extend/assembly_report.txt index 0be772d18..2db35831e 100644 --- a/src/python/tests/genome_metadata/test_extend/assembly_report.txt +++ b/src/python/tests/genome_metadata/test_extend/assembly_report.txt @@ -2,5 +2,7 @@ # Organism name: Curvularia clavata # Taxid: 95742 # -# Name Molecule Location GenBank-Accn Length -1 1 Chromosome CP089274.1 5935961 \ No newline at end of file +# Name Molecule Location GenBank-Accn RefSeq-Accn Length +1 1 Chromosome CP089274.1 RefChr0001.1 5935961 +2 2 Chromosome CP089275.1 na 5880203 +3 3 Chromosome na RefChr0002.1 5901247 \ No newline at end of file diff --git a/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt b/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt index fbe891174..9ad47af49 100644 --- a/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt +++ b/src/python/tests/genome_metadata/test_extend/no_metadata_report.txt @@ -1 +1 @@ -1 1 Chromosome CP089274.1 5935961 \ No newline at end of file +1 1 Chromosome CP089274.1 RefChr0001.1 5935961 \ No newline at end of file From 8a32bce0b0977e189d7216eb9cfe230b2afeea62 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 12:43:44 +0000 Subject: [PATCH 05/15] bugfix: tuple order is gb,rf + code update --- .../ensembl/io/genomio/genome_metadata/extend.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 1defa51da..b9c0d0083 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -46,15 +46,14 @@ def get_additions(report_path: Path, gbff_path: Optional[Path]) -> List[str]: """ gbff_regions = set(get_gbff_regions(gbff_path)) report_regions = get_report_regions_names(report_path) - additions = [] - for rep_seq in report_regions: - (rs_seq, gb_seq) = rep_seq - if rs_seq not in gbff_regions and gb_seq not in gbff_regions: - if rs_seq: - additions.append(rs_seq) + for seq_region_name in report_regions: + (genbank_seq_name, refseq_seq_name) = seq_region_name + if genbank_seq_name not in gbff_regions and refseq_seq_name not in gbff_regions: + if refseq_seq_name: + additions.append(refseq_seq_name) else: - additions.append(gb_seq) + additions.append(genbank_seq_name) additions = sorted(additions) return additions From 0f4cc33bb86c85f14ccb76dcd0fa69d80e8fdfa9 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 12:45:01 +0000 Subject: [PATCH 06/15] update test data files --- .../tests/genome_metadata/test_extend.py | 4 +- .../test_extend/assembly_report.txt | 6 +- .../genome_metadata/test_extend/input.gbff.gz | Bin 25564 -> 0 bytes .../test_extend/sequences.gbff | 87 ++++++++++++++++++ 4 files changed, 92 insertions(+), 5 deletions(-) delete mode 100644 src/python/tests/genome_metadata/test_extend/input.gbff.gz create mode 100644 src/python/tests/genome_metadata/test_extend/sequences.gbff diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index 33ae917ab..723a83f59 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -32,7 +32,7 @@ "gbff_file, output", [ pytest.param("", [], id="No GBFF file"), - ("input.gbff.gz", ["LR605957", "LR605956"]), + ("sequences.gbff", ["CP089274", "CP089275"]), ], ) def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> None: @@ -68,7 +68,7 @@ def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> "1\t1\tChromosome\tCP089274.1\tRefChr0001.1\t5935961\n" "2\t2\tChromosome\tCP089275.1\tna\t5880203\n3\t3\tChromosome\tna\tRefChr0002.1\t5901247" ), - {"Assembly name": "ASM2392016v1", "Organism name": "Curvularia clavata", "Taxid": "95742"}, + {"Assembly name": "GCA000002765", "Organism name": "Plasmodium falciparum", "Taxid": "36329"}, ), id="assembly_report.txt", ), diff --git a/src/python/tests/genome_metadata/test_extend/assembly_report.txt b/src/python/tests/genome_metadata/test_extend/assembly_report.txt index 2db35831e..ed0234104 100644 --- a/src/python/tests/genome_metadata/test_extend/assembly_report.txt +++ b/src/python/tests/genome_metadata/test_extend/assembly_report.txt @@ -1,6 +1,6 @@ -# Assembly name: ASM2392016v1 -# Organism name: Curvularia clavata -# Taxid: 95742 +# Assembly name: GCA000002765 +# Organism name: Plasmodium falciparum +# Taxid: 36329 # # Name Molecule Location GenBank-Accn RefSeq-Accn Length 1 1 Chromosome CP089274.1 RefChr0001.1 5935961 diff --git a/src/python/tests/genome_metadata/test_extend/input.gbff.gz b/src/python/tests/genome_metadata/test_extend/input.gbff.gz deleted file mode 100644 index 67cbd2b1265aad406917d0280c5560c75b78334a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25564 zcmXteV{|6W?{;n5w%e_@w%e_3+qP|UYumPM+cxjw-RJi|@7J7}NQBY7^ zLmnnTpbFLoPBwPN7A`h=7Pj^-&J1RTrlvqw+BSBZBMsj-dV)tvuthZzf9&WTfmKn; zt5u`v8qnUT*Q}R#dsbTG+uMBBO9%wv4G{-VrLTt{rA4T5H57~5$psj< zlT?~Jh*f)h%(~+@3A3TJ-Rv!t-a;lIQqWU@XuLKqVoiK-2rH>~Qw! z-f!vVwpyiEEM?;h$HeVt!}=EOJJ+XYS&Q|G@l(`}u%$I`iQxMiXL<=@;L)RVa5s1V zy>b9HOD}Kj?#baPNcm^uYH6_gA6Vm1u*cIN}IZ#GcUfDd24$|wI`2zI$ zY;J2GEL4B$Ggal~pch>}qK(Svid!=!N-y1gH-mfDSy)B5n z&VX%WH0~F5#oHYL$@am%`02&Lj@8v|TcCa0O2SvdDwaJ@1XUj(tg6)5!HsG>TyMxK zM!C1HO<&KYm+h?WYw;(cZBh4jez%D&I|90IlviQ7;?>olH#+IWI<++a?vd&uRC}v< zZM{*+TRNoiu<-E%)Hv)cnYhDW?;fw0^RcE{_KiHoin^snO8xnk*pMiXU zM8xSE)Oj>50(D+VIftaaLY`|j8m7aT42*uwFb==>9(~d zf3?dClP=5Vc%gy6d9WEO8f>~t>*_nK?dda7qABvC;Cn{In$%I}oU=3@N~?>iGrA*5 zD!);Wi)=SKQayg#UVzwSu*Fc3eF7;ks0zu5{w}Zd?3fYNI1VY>@b8ULpSc*1DGRY) zZS2a3G^OwFz?-YIknnjmr0Qzpz{*Vt(3{^)1Mzhj@QC3?tlp~ z`|YWygivGd7wiiEVHP9cpl+=PhvoW2>-dY4{w3hop+Tu%Dzkxx5A$AS*JaocD-hyb zwy|?6mjU#DqEFcKQuj4*a@@Fs#z+e2sG}%gy+d7&qirhio6C~Z>kx7W1zK&mw_)fX z%nu^>2+XJcJrSR}^yFu$6ONR!Mhe;sQ+fr=qq(iLMzN!UgyV|pqO!^RN%v!G-ASRA zYVAP}7RHK}vEnKmVzp=G7IX;+^WOF8CTpqnRM>=7LkJiHKq{ zJj`WBnnu_5z!Q2J^GgotgyWdtN@bT++5No(ldcZ+)drnF2H@#NftUs3=^|tjNRK&X z(Igc^VD{AZvP)Cuq)KaD346)lCo!dEtMG6lVzT`?6Vl59bFxoWSpn~4B9VvjhM7h~ zkdao7Um3NJpC(;~E!phVK(b9!4+Y47wMA6YuH99HpeC{`fw?XIM2oJB6(mk~WK3iH zb&2%X&0{-SN!^v1;9)!|)|NF$$W-`TSq2$cB(jB%>P$aw z7@IV$*RCFNTwtKgpo?NV8ppb;^aSn?PC6ogsO%$Q~U*O~=(B5Fa;d(hDe0hRc!b%lQp`HAxOoB*Ls5Xz? zsips%IO?E00yiJGw*mW7Wv1ObVtK3DbZ7Soq&5S&{sL{t7BWG*bm+0fY^-9?zp((S z4(*53r88bvwee@sg1&0OPyr@OWQ=dJ+8ZgbIdat5qN{M#F?In(LqJFL2KuD*hl?zL zPxe*<_K0|gsX$wU3#SevA%w-6H{Hw39>-oXX-@{1QO?MYGTC83){+BpccQvOp;9|h z+)`GU74jc$JT96NSxk1>T$;QPW1T$5dFQih49Q-dW?>SVNz(wH+TA0!^B*XhD&e3_ zm$4$Yec%%LJEn??rDjm2a~kCU;jdUDifnGc;pp)zpsrsVt{chz(HpLR=!L~fthur#=CQ(u zx!MN6E3sNTuO#ONm+nUa55|UKa>9FXNc?Rr9(=q3q1Z=$GwfS+$J~eq{}pOJ<deYhDc7+N{BN^bI|omI zx;SZ<-6GdxkMnDhmrz-h! z|2f{GQuY^xU(cSW3=5LCx9$H~tX=YwkmGNRPBXPHF`?`pK6SiPYsdGTJ^nQbVZN?! zue&QN@98vqyl2oVZ+VgbJ3`(DZ_mrXYjDTIZcakpD`<9j4-;Y!Z2ocHc@!#%UA z6YHN|`-7lRzh0D(J;W#x$iJM56Xvk|BOZfeD@kdlVA$NCrh~~ z$7+naJdwP#&7vHZkT;_*(f(i<@&I8ZFzN1*;uxL7*h@;kcy@juc?~icsusW=A$^4F zyTfzB!VCYl7p0@*Ip%FYG};YYBV{*19L@!SL_CjFmd{q6Addln@-;w<6G9?+u@ay{ z)rx-S8-z8roWV2lWsbE+X9&$V$gemsiO{J9*{2?B4-Gd?0Lm-`V-5~Z$th9c@o#W?nKeOQu`Vzl}YnubZvm4;&&eHK&rgo4ssa$HkNAWk&9u+4{vxw8+c z>dy>=l$`cg`0QwM92E%INnHnYEonWMW4`=rcv@0p;*}74i5Wb)*%9WGzM9+EdkA^8 z^N@xyCv7*pHKu4XTq%pVDyjS1wn6dKYg3OqhMN@o2ur&b3{|PnP{dV3?8VYsSDe=v z7arD>N@24!keY|OxH0`lG5kD!RCE?gD}(Yq2*QOfn=yqNt{hw2GH(6ldFXR zgBX;DKTf)$Pf)noCpD6O?V3n1ut8Z}56&@Wo^;{73=I^k;|!2}&xbTogQ6e^#fUDl z6MlwNBU0Sd=g`9IHQs_rxpOjGO9RxT@s!z9#Ds9w{rPTW%nk^QDCh7k0jUAcP{&{y z6Phz9Zn(Ex3@xKVGyOnZFccf4E<=}Nc(UtN)V#TO-6s=w*-2z_r*$@4`D~JGB`)G7 zD^a{|?3Qe|g{yj!EM&a>3|OoB5l8n8TdQTPFr^Pwo;xd%-dYpgZax% z3YnVZqIorYsS$-^{g3JvVTYob;dq7NMmtV{I2X>Lsj(nZQt}Q?`PO`f#HUOi7Paf; zv7{&_$s{#y@ja!q^$wnwq$lomG;or!*W^6I_FZ$XQovB*D1+-h6Yy}C7sJ=Av5;_a}UaW$|5$a5h4dF zFtwvN#Jfr(b(zj-1q}K4{K#KKsG2mur}jfH<-+hTzms2=6xo_h232OPW$<&I2F`^@ zh|^k<`A+5*JMfRZEM?R4i^e0~IMvpfR2Av=IHAkh6>dEbj4)3%H4!P3besy?Q$HR4 zu!e!Lk+~zX86sY;#c;hi(_gFRF`NzvT3dbY8IXu;Z0(fkBW89*Inl3TE>esN0ql|5 z#d%(_R*td>#A8Db@i1nZje#_9HOw#_VqLfhs8>PFTez26q%(yHFBY^fA%F@s%&b-% zmni8uScV5XT|Ra$p>@UMLY(NQ8yN6HE2GIc7715K$^TYDtVC56Zl_<>hvrgxPqKvN zKwX;>Y9eb|ijv6-H#mXhm6>CQTeX0x!S4q9$qcT zWoYIywzKA4S?VJvNG#LM-2+XgoUhjJo@ykAlwuxyNU)B$pf$w*hGD~OjdkuC#8J4) z(JcD5@PH&XRPnI>WLR(h6*?j5VD_wz<%KCsBc$e~?Xhtj?C4Un&L9;$=81oBjZ*?? zGthpej%t!;GA(Pw1B#2%gr=EVqioEl%ejiPFu$=3PMLUuFpWxg3su)9SklZR?qk_72`5pKISH$qK)={|C9+>&0ufN)FAX_e7Lb;LlaeQO^v1lswM8UCE1 zH@Y<2%rhr8;ocCP>{?W;9=;eVHDvqfOUzh%pGh54zWT90X+cxsG>TsXm14DTi^A%Q zU*FAHHFVtV=&CV&JuhavQ{W--JM*-w)fR(@-gQe_I;m&#@E0pc(i$1TYxM(?$szj~ zp5TJ8?DFyTLA$Ebqy1p0mZOo+>t1Yu2At!)=82c(aF|O`yXrmDbR)=ZJTq=*c~a|A zm7OtJU2>`K^&_n;^^0w9$O&<(<aV0CVDpHX-iDHlJZH6i%sK1P!&1+mX!J5`5JB886auLf5u`Y2!4TSRP z^+_I$c3urPX74fg5~d@RJ0J%%#&tD(ro0+f9(D>_X;0R0DQd=KotIqcTUlD&M^t!h z+J9SF)E?B`ZVtAqK0**aGJNgJ9O%jexU%!L;gz>GVv0Y?ZoQx%dF>3XdY9!9T!voV;>U<`3`ll%LXZjEO)g`dcaaya3VtW>)m3SHn;?vV>!RzkS3bmeH2@RAaCYH>GO*e71cY^eP>l#*~Zy^nGv zm>Da!!t=#3|AMCKu{;$!|uV~vTfB!|wjyFvfDf)Iu;D~stNubqy-z{ndlXzf} zSuh14NF6Z|s?){BdnNGtch+8M5S6_P5qzA>D?Eg=T3Ye*tT^nHMDZ=XB)njfkw+Fj z$8ck+WVihPJ{!!D%3MRmRvyP!@~m3w@{lPlaCzf~5Z&bxo1neRaA#D^Iad7fvRCI^ zb(C@dcyWItXXt!=KBjy=SV02j9|^JaJ#%V(uJtM3Em-jVT5IvAQV)h}@vp=i@cL)# zUKd^wUw$6Hf_~n=`z!AO!-kqG&KrQRGq-1?;hfe#3zFOkU!&K28+PCCZ60%3Py_z1 z;+@l{zl(tfC|6y)eco5G6sN&n_Vj$-Tj9C$8Dy3Unh*1G)=W1hZ!Q18tcJ59ltsO>YMBJ^5oPIT3~| zMKljSMDGRJu`J8w7Q>x^^=c|<;HQlif3i~(E@YNjX`sAT5ITlpAlmyjRWQB`)o(@z zwNs z^u^QN6bT*b8|ywhnjzRP@t+qnBOF9rZMS{{!tR@q?2(iM-E^_XO179)x45>O<5S(I z4lO4fwMf9%HgCfOwy#eH@_I(ATXokjCy&t^nabslgBcW>yPdB7O7L9xH#bpEo(x%0 zxi7v|H_TU{d0PH8rFPXlSS9LS--eO~U87&X%nV9{t1rqm=x+nc4@q1p)1Je${JL8n zVzr;<^2eG1j4~!akK&g>xBN&mXYl(<_J?*Fn0GJYRu@_}QjI?9Mf`eftUOsC%^-T| zo~$ufmVivPoofu%5lyj-PzVZT ziBKI@HdP5&8CaVvv_2c_K#MDI+mnX}43jXg@_7{5IEz|Zl0^E+AMdAzJRq~2BLiFG zCjbQ(e;bVsi@qC_hERo)F-PAR)=;uXT3cysIv2>6ArBL$J{Pik%)z~nGy!0nY)Y99fH}k&LMjC(p36StU`Vr_p5xxsrxgmERbm=c~~U4JA%8PU2K-0`cctH4?{a+WQ+jZ-F((bBb{Y44a1 zvNi8v2eXw>c&Z7(A@rW*2wr0ZqC&jBrk{5j8e`;HL4N0aXYucTD;$eFlYZh!$#vL{ zCE$4Y@FXy=2tjt_Yl!bKKoUAN)wT)z!_>>;5?-akOG&yG;~)6Y`e=*VDFhH8SyGU|U2L z8w$dKB_g=Mz+1~gHVV^2jhj>zXYEsJSCvv}6|Wl}7DFi|(|k*Z@_g>iwT&rkWiLGy zVTS{Ekd)r(ga*LX6kAD0GaUHllk?%|90s;sQBp!5M%HoJmjTgnnFiK~P^N1FrIWn5 z20Ouxnz#6E)DWz_nV9vgoq9aG+gxlegUmu~&Gml0Fl3*Ced->nrdeteXAd<1=NTq+ zbC!ihVV{Vk6;JPo>aR1qicxVaItW(Rz1S14RfCctb)jy zbXveZnikw@3MFuvzO4ZFxmFo8;ElPydKPcTFx>nUR=${5_Ba1$bs->~I?ZfJ--qkD z|6&=NAaCFC)5sR29@OaU35S{ef>?;nbGn$?Wv*x`&`=+0#MmFzy)c8=y%QZxUar

L(9g?zq5@fiu;ZbXCiAx5d1-U2PZ&hEd zY*6a0%de7m{>PR5Y9>B=p2o-Zu=H$~u2kcV;)iaFBH3BiIN@KkKaXB;a9Gl8d3D4< z;OpWS>9L?>NGPE&g|`>P$e<%5`zdz8s^%Zz&x_kSe$+f^hg5H~1;yb=x4S>i2mZM_hY?EQ?!F5ET$33z-P5$z}Nhk?((X z_zhb6^4?X$>uGd6bagR#YfM8+>sHoYUJ>J>I0p!yHoGYcD6UZ(IQDU_{i?AlfTXr8>!O&1$(L6$c}tmt3WxkXyf7Oku3i+@879iy*o@Ej zKeVDB5xt+!(|WtSUrLn!itZ%f$MKWwf zv;1Da3p|_q8i6^U)UwDCE)U`>#*%-%x+Mieqh|oo8O}!H^L*%zuxQHr4MS)lc3!ib z+KR*cufU{=*!6o+w!o5vr0n6l2cT<_OVKbV#~&N#w>Q@zzxDRd;_h-D^bIMi0?!$3 zmCgl09V+289`tIHsFo?6VGD;9vt~9Vm6-p*Nm8_-;(}v)hrqy%E!E7d+nk5sW2j8k z*i0Dt@7ioDrl6CsxnhWAbDU3SMk5|%T#E;{f~4JqBl0Iez!YPVz#alJ6`lZaj>EnF z7F7@$34LXQ>v`-$Fj@_w6Y_suq_e@}@i2f&Y7IBfiaNEDFFJz$fXcO~<%fQFt>sAT zRToQ}jQ!N1|1U{b{RD^1+#L4gQManno%Lf0aFMJaT>konV3e^-P=GnBvUtpz|A_|G z;~x~7Tv}`k4yY}w+%6-)4wp_km-&+vCj^Z1%NA$^m}TQVHg6DL&_fKH?td0ZQrtaX_rG>s>UF=arxEk`y**W?2GY!)_8!IvuWhsa zs{ru@jQy`pn_kr9sSTr2hQNnsMlDl7cTTp#S+By70{k8n`BLY?YQMwdd)8m`Z6J0} zQ)|E@PhU;Iz_+xF4IwgNs}aKt=$bDh>P*0%(F)j*bj~-{ny$Bo=TL^-oD4N5y7*TU zVFnS=5u;K}J4%r!mQ*E>hz3c&$I0k+2^JnmgfoT?CT(2qJP3GGs@2D|82 zGbB2YSsRo3@i}PTDF$Hu853IvJKTI+ZO07Lq*H8k&QaI!W6PrFv~WHV=R`C1Zw3R$ zV4n4_#Ap)@G={gg0VafcjFr2aYx7(G>#7#hXKoJMYs~BY7nBUvAuol~5zDbX35dw&7By}h+{cNLcMd3Ms>l?>#0 z?dh$Ke?1wOQ|->Z@PgT3fJbn+PVYXW=%q*dz3!yLF?5)SzHD1Im$Ca9rG4xQ97Qd@ zFK5qKR>wI$w?PHlT_yhDhI<=uA1{hKahFb9bpn=$8E+P>Hv0o{!+CJ+CqGv%v?57e zVHfc04Sb1N6zH9s;Vzi>i|1{sL&mWC?mirlE4J2{qyMv1ch&swqs05$ z)A=A^_w%UoWP5ZF1iX!BxnEhd3LZ&;3fvcfbIMHR@N8Hx7cdQq%eD2oIlsaoL= zRz>I--eI|FB-*hu(;%EC$-9Y2cIg-#_*g6$)DQ4Gi~9lullkbBbP?Kjf5(1ey0oN0 zpuo9MZjw&O@V^_UVO#}UeC2Hzd2z+uB(C%NGh0*jhPx+F)(V&lHYooWjb6Zd?A|gx zZP*4=YLaVXNm)mMSKz&uOb@70RJ*L{m%Vg@0`2bS$e5dn1CcGw6cQHxoCnq!iNl_3v5gLw_MRj3!8 zcdUq`afmdq6t>tyuK#~>W+@3Hc?~P?7(l5B4nI2OC+q*zpu)8#36H?bu z=y;p49zxBHK2Vq9>rXV;AZj{kmtsZ{Kk$pQEI~yZ4KiD73IAc4XezYI<`HdVN>C`( zK=m7#w5Q>y2qv_+LGU_8tDXV}FH?kks%Rv^(*wGD1>&NQFIJHZcT3hb)jK3j+Z{{6 zP2HsA0xp_~>Za3Z77^P1E)YVs%eS{WIN;Cz{rXGMe?=3E6xi(k`6=tU!6_Q|_k#;) zf*I&HPo|5*4C=)8D2$F-j#Y{xxv;zHN-| zer0?3B|W-%<2qfCgP*(ejb_3u?>6i#u&sN8Yje9^YQomW&<_O3ECk)cc%JXq+y0}x zeBSOikUwBh{+7@CgYwG9^YL)=)$cTl>ihob!l6-sjd8~1BRjOuEueF)&z9gU#o}CK zu)42ELH@67hwtQ-Wq+^Hkjmc_%^K0(>;=qi2+QSgUZu5!2SuKeb>~WzM=r@YK47oD z>0Ue`_4;j+Syo$&htF!A0~e$F|E7Skll5Pg9$&I#X9rj1u*1!gtLT&cmV_fOcl3r- zI0U1G`gf7T?zbLm;wz4KQMot~(LkUQ3i((LqY}B!pijgrp|gB5*zp}jg#r7fWknd$ z!KCt)2x*R`POuB-_qYQ}SD{0g_@~}9@-ew) z`8~;bzbX}?kb9!@XL-gAi=TQ|%CAjF?fVBk`=fkSO0NQ&YKMMS!&-0sI!A84eF6AA z7v+B4d#$?n)_rQVueyFe{(7c3ehEFTcOd+hZn>p|&n7&C{9S-&LixO%&(B&+w6Ev$ z$E@DZ?w0;%}0%R0gawg6D#Y7vBENkmSH-9}A0Q64OR_qhn0nwE-#A z-#{~@gEDnC!#LckHO{PCvd=sk=z%>DU00h|wCNK=hr}VRvqs$I32?cFf<9xCkns@e zNO!d1v8Z`VWz~xIw>@X&gak^oU|TZiPJD~TZ%`Ci0%NB*xIV^A#Ki_y8Go!43kd;x zr}0j$i0!zJ*oaQqk(YUCt+RVE3j@aX8ZL2_>xMZ%u$uc*>Tyy=K(A109)HmXX>gWO zAQ^veNXQ^JY2wtdj1$csE=ZhnLFKk)fP$W>vpCuTL`)GU$6ybrRoDm1d2f!;#2-qH zhA#BXu*?XyhGc~r@0c(3;nr!i*7~#v)yJX$QcsWG4|0gRcS~`~G_8wa_;Rn-`u-Kf z!BQ;fY#IKroSI*993y|6K<|TrD;kVfKE*D-dqmcf{h3Khkkbf)(-EK^7XbWwphnBe zjN8zcarBn($STZmjD$+KFl$fg&5lKdU3SKTrCWw3b4va2anBz!_W(3>t_$iLTO9>x*#&itAe@Gy& zCi)IHR=&9AM|U0+rm>p$5+NWfH?3TZIRq?tUNlW5V{$w4?{SjQx%Y<)Uf2@n2>PEu zi3Nck;~dg`9!9yBIrZYN0sscsOBc_FgF;j$nDaa8pX>hoo1@!P!`ph=wS$>TVwBh} z3n7HY!58=%Mfo)}V7t@IaH$je)--$wSQmv>-23g_!YM|Ucd*>r&q($sT>WC&gF%`& zNNh5Et?87~2GinKhsbhch<8zds(ZnY3S!p-HgqeZ!WO zA^M_?SYOGW!QGjOIFzgP;U19})0;{`5C^I+@&2u&){(f~E`Dt!ga%J6)AVtoCtS%~ z{S{?DLsI=!rZWN%rQ-Z-!Ukew`uYt!fhC+`x$yhd#q;%JE*psCFQ_t4P%^A{k_!X_ zX7D54K1vPJfR*MRe8f4Hy%Khe z16#9w21dzB;0wZoDhb@4

w}XnYP4m+?782h7RN)oGF@OU=rtoadE?|#c`a5O${wnF?F;C1br#mc$AIMt+jix zj5(sGd5G|;))+m8(Jf_T-Ne#Unk?2#W(nI~37#?Klx!i^4K#?(gk8t>m92^dBi?YN z53o?gXngXic99So(&3`>;NwYX8k!vhYu+F|;p~9m4dgjioUKk+3u8}=@tnrFV54#5NGZ0q2Yi!b4^xwSv2&ieA9{@7;m>u2RFjgW*q z^P4r>jUSE;5)ks|qnbcLDKa#HMYeD{<_#rV2r8t(KGq%}d5{z8PNqkV=FR2TkZxGn z1aIS%=9lS*DAtgr=gi!zJ@CE<273sb(?H|A(8A|OO~JN?!LYGaAk2-%)sjlLUJJBD z4_oz0eG2G6abn*|r&BC%rdSVXPw!k+5QuTcw%sh{8N@L1&O$1v-odn{M9KhMfds@1 z?Id1oGWZ&Iv1Wbi%tdCh6J>xfkzdXQ*qAAC^42g*tT0kuWb{s4(&Ki1RvcjMI%n7H zB+4(R2q1OL80P}GtwwLFck09tMTLt4r@B^bw1j_HZ7dw$~J)q0h&ajO46 zKn3JZ^6$%tx4!T1kBgI=UtlaNXza?qZCW?l7Q(I>8oCLn3f6 zViY7*U}C(SHhlC)YA|wS2-nDfZW{=KI=kk-uW0+m3!>fODrjJX@C%tisolUcAcig2 z;bbqx{<4jQt%y=H+LeO00S1#lR@d9E_`HsE`v4D^Zb9ihxjcp#hTog^Dzck$ZF|yz z=JpI16`7CUg?pM&$Wn3K-2qpdTpy&5}>G%1v9oNh20I=i#I)0T;=>&X6i~QUJ81r%s9!h)tJP*!$dx1Uj_`*jijc}4zTrKRuhOnH)r{MU(;sucHooW zH*bIL4^fGHzRy?puf@~G+d_C^kJIGYaIX3N%Fa*ye!d=Gf4(nK`#&wma(2&^YWDQcJII5aw#|YzV2>&V^6a4b|A2K`LVyh zB=SG+F3lf34O97zYG>nE%F+(StyFH!p#6*<{N92XHO+ozJ7WV~@Y4=m;G(*=840yV zudLrzf9j^~1iru6e=L3?v6W{v?-{?E27a0znFI9cYK_OQ8T~_zB;(sRCdjV=)>nD#|qY|0dzC3GSu9sL|c?N`PKlZ?19Uy?b+hd_QXae8-Aoyr6?R;|{Ndl`v{<9VY(a@(pv*=l_1(hHp?o zBsl%E#zT19Iy(BaH*1jeKQj1z<0ltpM!#6at_WVgUh<2#ztPy=F^+a`KWTA1`1s_8 zUq-+4aeZ1}-H-k`v*Eh4F^Sp2H*)m%@!b7Kz2N$VAnyM#=RNq&f?uuH$1XkxEj$vG z{-4xq>Y*Vv)6JdFW^;6E@S4~6edMUOVft0#@x6NWer4Mo`9yg4i=g-aC;=$p>Ty5) zICJ@4b?zFa+6g?+Zh8=Q^Y%93EgnDB|DJ7qO+Wsh$8ll%vo6t1eeVr6gdkY|Up$y0 z|GwKBjU4m#`dx{-^HftBegDm)8$Mz;@)LJ!eJK8^TsyX+HGhaGayg=Qe!uhVZj6~X ztkpGAdUlWgJi)yqKTx~F!>Wj@bM zSkG|!*!Cyx@22}b7A+mceTEBuQ0~I8BW3k{h10{(+gr=C_%9cCcj_(sIW9e{+FsJ? zN!Nw-v%4X$1rp5n02dwTnO82|$k)$P-(P&sREdD$ zhck53UxK%jtA0-ztukA&u+_dPd`dzl-rBh0p{ph2uSQUR#j2-SSm@V__Zd-CWlqLJoIp-oKhWeu!=j ztPx5^_mdKv%D93ptr2TwOGRWqJ-4L7F1P>(f{KWU?l$p_S2yHS&{LLXeli#zvKixTJ(OI2DngjI8bE1PqQkxlvfbv} z4&9o(N>vwr*I!TSU8XGY3L5|WlDLBZqFb{h!z5I)Tc;x}sUt_qz#*gwxL*>>y|f&J zbJ7wzMnOypi3!5cKUn3v;_V0FU?($V>iVV|nA}|TTZ%*Fo9`-4S}_{yPWuYNcCR6w z!0{TOI=F*sIddhaHf}Uesf}rw*H>kQ@CTLwE{nkYMC!BjiQT++Oa#S%8UNlGacu9< z@!+nhxF2%m2EhzmL_wr!8s8B&_S2BV6OOgep?uKQvr1AWluViygWy8!cy`!ipP*4G zI$yDz9E)>7rV7YkwCjCqvMJm;1cf9{BK}*A0GY{{M2@5RBCfGLGwekPUodVeE=<6k zmJN3*SnJh=M_&18T@tg>p>IG<@L8+M>ojkfrPPrJV(&4xD{-i{KMLcyR^ zW(+Q)3a{%#$77CRP%sPt>~+6(L_ZvJhhhYCTi;@tzG2rW^tg8J8uwE9krTe7z%ZqU zgXBA3&h#0lEc=Z4*y{|FfuK8<2!b7C1gRta*^jRN+{o)9oTKvR=vrDcXu{(IuPQvG zto~pQ=9RdduUC2J2~)8pWrR-MA~0TrdJbDgtA4EauCBuIj=~)(T-L^=wZhfUAXz$J zoDtu{yPL1CaXetsWPKn~4&%661k%?^ikCKT1^=C0!&6-t!^9GiTPgWdID z8KItLW3MVYAsngN(P3_vFb$H)0$5);4>m&6T?`pW?F!gIyVWl92}Lhub{0A3-kS_s zl7mt*8I9X(_(mVE$$jYx@(F*T;oPITaDLif);OPOJ5*n_rza0%PQnOIgt)cg4WOPPuR=^Wyr^ICv&LV@fVGb;P2|std0n-s zURu6FlGmb3^xOv+Qr6=kg_%>ADy%gnaX!r{51`!^D1it-)PLkZhFfhb`ll(*b>^o& ztVSi=E;D?nJ5+;-^2xggqH%*JZjpijO}M>qscKi|TxL(>kSv5?mj9)09OgJ`(mRFp zwy`28@UB2jCxlDW3bbh7yk@6=$o)>*#Wv^;fgsZj#YT=b2DwKy*NF|U&c@2u$5Sa% zMR77h*n#}xC0uW<36NBz#)q8%^Sx}CCoZK5DIVZ}Xv=1x)OgXhKMbTyUZeF4+Yp?v zJDsSV4AsU_1Q$b{rG^(hwgC2g3AhV@!9mxWp{AAIm1AdWTGEbenV>H~nptyH75ppb8AomWlA@tP^X3sUc$&e8)!XSH6POqY% znj)01QD7*txeR-tsd3UilnOAv!{g$$A2VAPleB`o1<$3n$iFh9YBvt<$55DZWr)gI zqa3N~b-XXU_WAe1Pz2m|L1<1aoi15pnC`kOtCfqK>wv9e{7Y7ztZAYo%nYMNpHkL za@$gqBZm~fif%ffh7#Ap#8rSn_~C%Cc5}r8HR)F3%;<8{GCc%B_l;OHx*BYc=$vbK zrGzcDTfP7;C&pYwo0dFR5`Yj`WkdXU)NJ26^GV-D=wNdOI~2?PwMUsET(b6}##Ee< z^qRI=4$E8P?38D;py<)aaI|U$E!}hIx$R$wYay#IDYKN|!kPpEo0c`#QS;leApR=| z%L>+!Xp1f;LlmwgN%~y!(pAHfJHp63iTOvWBuBE^ql32hW~x|rc-+FbX*(o!&lIpB zN`bVxaBh;7r=pqyS$EEKAwim;Xs5B=x+qf@(VU>fQ?&VF=!>+9lEq$~G5X z#{rvM_w_FuWyQ@LNh!P$4LGnZz?Iie+ulYuCD|bxtmnn$9wnb-CAlSj3Ag84IN4Gj z;89Hp_8A%2Xcc@zz%;6ozjxRq1GFo2Y>*#!)QF!hnC}|PZjy|-(aj9AcF|6)Mm`F=id@v1)y}*8l%>u(;0`a zOi|wz!j);U>G+Ev4=s&u2vae-@uS~r2()4|Fj6O{?lndDCW?NONNeIcS-?Fda`ilo z=)n$TvE>XG(lw>>m`Xf$q2QSy3W`_N!(W5;lHCN9UMV zbA?!_m3!o02?DO;Q%z1=v?v%Rc31a%ij0g^ucH`~_Dz_6y{bFd)Z?hy#`L+6c~DFYD_9NsI=Q#)fC3x^Bs9H?>E(Qcc9 ztpjmPD{Lu#C51DrXC*;18Jp>zfO6a$!HXm6Xt9p<_IJ&&o}gBYu|u>Rq~-vDaetkB z24jnTZQBZL9(;Z1czJEi#|%~P*Jnay%8qinsDLC~otif8jr}%cN?Ju#1gZf8yrOp> zN4*{buhsyN*KjjcFe2V{t+?(=PR2(DUMahR7Uk{hHQBVVpTT^ff=VFcvhRd#+_~#y zTZ(1)itoi9zY1=yf04Ocn>M9TJzJ{8s72z+;Q|XX?GuwL>ik7>QShG1b@o1sIWJ8O z_+ZB2;e;lk&N5WsQVS{*?+(|I_-ONU7*%z-SIK8lJq$|4Ihj&um7e(I5U9`CayzC1 z@{1jQB6~GWBk`E9Yx)MYbh~J!S7GGjoc{%mld%ePo)ql5OZXd{OX`Q#f*~ObopSXg zY@llB5%qO`WX?l6jC(ZjNfjyoQkZqw@42&3mmjcbi|Wcqa1h4uZHJQrm6x7Q-9k^f zut2#=h%+3t6a8)6YeaJ=l7EAj;sZ5~5&>Vax3x@DRJ6b*GJkV@{ z>QuiBW-j=9)lq(>Y+Zz08xn;9xLwvIwEErFF$ccXDWRo8TDnip?olu>dBu38xp-Gi zBWNBWunTERoL~m>GS@K|1{cJ4sVcT8A126e7IUkHMw|Ci)cj=Ry0BAMA5ru)nYIH$ zbFRJWarr%PwI&riCEK;?=%q6;yz+xu{d@yDTQ3WP6mXzPWBt`;Y~N^dP>{L*(CjSP zVX}yho_DsfRH~LNd0D{+UN;y5fx`t=GS_Y%AU#~;;itk7)6Dfr#FmGn=fBv&1YAf4 zOjeD&I)NcCVkE`&9MPssL<@*-n19xo-$?t8_^7cL&4ERW`hNjTBeUEtxRqy`OSJAK zI~_+{tHS_g^qUvtQ`z%~u9DGuWYSnL+9r69ms zyqy!B6@tZ{Pg#9zrxq0Uh)c~SbfLgEyQZ_)Xq)Hw=v>-Zdt0j4ekN{^a3IOs?4$x{ zosP)o@S>LBdysXmY_KnCP76kTTxgw+EfdXJkaeZij9?Ek;ihxcA6e77wGPe9U~#6> z9Km(zQbt#Ll6RyQ-k&zv97$46-peK&xW3hXd1E>Ct~_a}Pr;abOiH%OJ8V;@aqnYD z0@|Fm@HFCDr9zmi{le!+7y)#I?+f^J`tUkiSEAld{AR z#G7>mmPreJ$O~Lh5BDLYkn;wB%URcjKe!3+J#I(k(pJ~dT~P3wf=_j%q01r*6GRI+ zg$hurX_aL>sK^MU!g1@il0N7|C|zghJwiCrIw_|x{?yk{GBbJ0iIP+V@b|?JbhnOg z5v`pvQBaz+s+Gc$IaQ)I;KGXG92X>!zG&>bp5>-XS_g6i zjH&v|L5cW;ZWB_cn!p7TvT@;dX`3_T4)xrWsk6>)H=Zz=969!BbG9q*y48K8*Q{(v zMOMH;FacqfFp<`6@wHt`H<=&&{y;BfN`_$J`ee)MZL#;Vh2V^9Ey(cl12pHQuz~}k zb?4m&EL_FVDly=`pwl>X*P&V`8Ft3h$2;U6siQdPzzE=U2_~ry-R<#m45GDPP7@=v z*_3(Wb{C^WFO?I^S?}>NZH{CaEf&KA(KDH9{EMHEFZk$sKJ8p+F# z$?Y<+QtR%#$?44dwZ?1EmW?OO)sxK7Xc|is{-VZ<)~>rEX6?6S@9_`3Mo&24eVeWx8d8WyeX*!at)kxyYc6Teb>Ou`A~_^k8fPg*zK`>hqA zhaL7laN*`vx2ZSdoN(uM{1a)7xYivV_>R~dJ2mf7uT_MgxYxeu z+t$Lot46lqKGC``j|r*j(pt84=@Y@1HNQo6+ST)Q-tp95&97Yz;Frh1hLn|G*V}1P z(w99d5@^(x#c*4>__D6sCjTgXZI7eXF8pGE0Anc~WzOnJYiFOR0bWISFJpPRa4{R_ z$lB)WAJ2E<93-EWR5e9+_NL=I^uHf+V)O{6)X+4&RaZQDCome?gzZ$Qk&=CYdbLL7|z4F9NBY z!y1Y3fSioVOyL)$*Ja08`jg1og}U5L&newGo_nVrgIktFoONgu+12_G5O`X z&vms)p(ixCH%S+}W9Frx^u^{}ZkpjDTPTx&EDtr0-EM9iYvFWMTq0>Pu8qbMMU+(~ zYe16lnkBaN1sKc%0SnQ(G47ywk{Wl|nu@nkNn}e5TqUJZ>)OHlRTN=DJ6x76T(p!) zc{(@69haT<9y|p<{iXfEf)Q#~R~Rd=$v&7>hZNVYejP`ief_X4v~*rX|g%wvdnFr{=D(5m6wtgZ)jJqBd$e&oYdmB_tv)1 z=EAjaIRu-!ePtgqB|&MVMRn!9mUArmnqMZndfhHFzbgO9uQloUO9q5B5VaaEI&?iB z7h2ZvN?1-@q)}Ufudb)fJ)W0*4*x3GBbQGzF+p2kr8*~Ox?Z-PspPzO4#I7&?PcfM zRtFPPhpv}>)+lU#t+6-Cb&izhHRhB5?R8x*^M2Yad(Ms7v@X8-p>?&Qb?dusK6y`b z&U50;Js$Mpj7^6lS~tuH(zl%k;z=r+PiK=0&10*=sW1W+9~o@H#NWQ{gwA?z2}Iq)3ocz zsXh9#aM>sRPqc1*$G~244hPuZp7RiMkMgCc-L*{Ij_L_y>PL4^7tQl&`uiNU@KsiC z$DSkN5+9SHk;{;GvPE%(-Z5T=louc8ESLaxY zFllwYw)HWU#Nj7>CWIg~1gY(!6?fdOT{EowS6lwh9^|Y*jl*HDtr&`)P|~_}&5(;R z?;ER9-2?mR6}TYr$||nwj?|OZ-gV@_V6(11_ngT}LprLhdU8-*N1>IOxzW%{iN^^o^Xx}mCC<3{*)mqT9tL^J)SQ! zC;IBLR&Q=nmx-CsPzO&(T9;$@oZ564j@gma`9sP?s_iFDeJ(!z(EXyhusA&4utQZz zlHYqQv}hI~GNb&8FY~(YDPSSF-?t0rS>g0qsZ2vvDU)MxWqJjw zY^LZYzu1Q?`O9T7vQ)Mjk~X$~#P|ETVk>#uY2^LTji@utO`|j?g5+u^cHXZ|PDhpV z1B@|Q<2F^V3l|CyEzFJLY8pPJ3OTq`D_IptmH*?fN$V<%X^kf!2{cdWOlmyP@nQ9M zIeX$s>vq&EG52{F$z4TS$rl{7P(S3y%vO89-j zMavY2$rACReu~%_g_LZ-7(Zj=TQ&v|G5Vo~DNrrZBJX*jhRN5Q>Xu6{hH%cj zGX@U~S`W=b#^#YHfTaosK~1acL&=sa0Z$Vx-tm;Bio}Rn;T&C-qgbqoC>dDE*W+|q zdTQ`jkY-tzx>8}?KcONm>UNjzfiNjL^5FJsHs{mf7OIw#U=uffLW5l)2 zd$fu=mB-W#TMmMtC2Cu!15MF7t5&|uwHB?yn!0(QJ<1R)b%my-yAN&T&6MdXb%!8C zNS#b}ZAn#-)#iB>#?za5@aFxE`FrE=5N0Rb&qddA{l@mE0y zu5W<+(0xw1$2??+aN&)TYuw18B=yvt>^wkvBPs9t_T#agbg@{7hW;@rJizxUpw#&3 zP}dvF>50~~IY>z@#sa4avoKIRK zu62#!>hez;dCEj8Ey22!CV8>qFIsCa?1EiTFadOE%oH3t&S15pXwurYX$xWXc(blM zabN%)+6Bd7F-n+9>q5OY$b>^zBK?5jsPY}aTt0%YgrfExsOHyMHm4mKZOUx9nhth$ zXsk}JY1P~)Ch+v{4pEd$5z9I6*Q)%-c&OhCc+#T1hXp&dc{8ocIt2_=rn6CwrEBHm zk^5Os^OX4L+M9HUO`5~z)YUxIMa#B()9QH&h%XHX;VlTE>;Pvp5;(emWB0gi$L>QW zYmHV>GZ5r~LxX=((#|C;vU*pa0wEEn&1MC9A)rJSDIdL2XghHpa(`_wAw4VqIRzo3#deP7Ykr-Mz?cyI zhA}Dt07%RjCMU*R;Q(o!x+iejU7mh@ZBKWWg*17hB|k4(edk9Ol%D2`ALnggS*=Q*vzP=hz?gs>wKJk2q|KU^O}B_yXZYaXxe;;)_nKDx|N42 zlHEVSKVLF`)KYveXmg$c=5t?6QUE3Hg{Wqj_ zojoG$AHmwXp(&e#HW)U_q>jOWEJO=^$kS!X-7B2okCMPIya0J4TI;(H+G9H(?5&`_ z8GBdglya$XlGc3Z=k=J{1S{um?JCK}hMUc=`~G5A$@n%;nmR3WE3_#2d6BREqCP9M zKdTVKXN@FDSW4u}Ikt$C)}!Zux9)?+p^FV2E68%2&^5%b*TuO}JUznEZd13`A=;{g zbm66*0b1K}qx}LqbmhV)7*rmh8LEY&=!uUr;XhPf%Uhwj=h z?SQgBq=kAdlT)*|KqEW}2ih1aDIuE!{~%hjIU3{O9nuwxE0}eHf!Erc?#nc-8{e8S zGTiiK<>_2c{^;t|$l!Xlxpk3jp|!VsiAl~a7$!a#vW)3$xMQt-;26#|5Wnu4(I^u< z{v*&1$AJ#mu6i*L;~FslZYYt~&O80M8Y;!XVVrEi( zmMf}b3YSu$PDn3+)_K13QY3tTDA}|slo0|PGc;lzU^`d67B60f>U`GFqM)xU zD<&nZ4iA(oP%sd!!k8+IO?{>2Ij4rP?9;%}Rq2V&DLhS1`C6z5TN*rjUCGQ}-8{!- zCS7_BCYk3sp0}P0%YNWCfN0-nJ-F+}-2IW1}#Du#@Odg=C z6}yLtt!@a?s&{@6j88lf*2n?De?zTqvE{v!5+ALr?$^#S-?D#Q<$XohY)U2i;axcC z(D}N_Zzo5`$iy1AUgHO$FKS*lOl>k<$`Gx+eFy5MGzc?stSmJ@U^0i z(D17M_o_o`rftkUX<;6+Q!%8v!C5@uc_ncGUk`1q8xFKjJKy_#t6EpIpX$K4GvfZ@ zhi0j$Wp!WheZIgL*|f3~p=-MWDYH5=_qeMs&D7rt@oAW>b#dVfNFb%orJWy|RVRE# z6w=ridAVu=Kqy%j-if2jA#jfu)@$9!T6AS6FU%)zCe*Jqs4{Kd`^9HOOEY3gdVwDzM_t*xCA~>mMVH!K|~R-KvLfOZN$22t36g9vQrT1(T?$FS#>L~ zx`pFraX8>pc@fN@s+Gn(;Z2@YGr@ zqzYfW)&gO7CZtOMgZ8V|6qDAy<(|DQL~SbBn5ipF@53}s?rZkiBjQ@GM66cmV(U= z`%cy^UO@77J?pNUeA*)H_+&Tm)5r%Cx1Htbb3E@e<^wIYl$r*Rp*qM8S%hh*dQ+Wx zj~BizR*%|RT`o*v%n8p8x=&juh{)NIeU4Z3rD2nrNR#M{LjeMNMCaVFYEJjDA zzT>&v3gI!N+VDJF1v$18vD?;s;r(r$O`5V3-cOd%{FDw^=M+Md2yMYR*W0{Tp23;+ zLjt^9slW{`rJ+@aR*NWOm5ICV{0Jtu-_|D)X7TGf=ZW3|Y*(~;9x`5+rTxo%Nhjk5 zJ4YBKF-K$DNsBpT2|_brmdmWuF^nK^(yF00&bdDNt$6rHU>g}``5^)&l&5KQn8O=? zCR&^e>(2Y6EY=SjU44_tL=4mGJ(DB>Qw)M8%yc`lR*tEaY~?Y4F7^?>^wi`kKl~5w z@pd|DK2xyF!s&f=&0e%RESLmTVi^A?TC-m3u_R~%JvBUr>B#EeT|S<^jpPx?xIAZaDZ5jm9jIv!(k;=to%)zM_96k( z5(gmTy#U7`dE}%p!B?2+R&i~OxOL-Qk_si*freX0$@b}?4WYKwgQ1&y>f=tEBYz-C zpzm;kRB;rjUv?8s0=>t_de5-qTIvvDek{=f$y`$#9A<6$v|snC*P=_XjA`A{u2@t( zz+K5!GK4J0RMy3Q-8Byx?JcN2^Oz!ee;ScBY_uBZRJ@CY)(P)hab62AT00;jHp^6N zvUDrR1mEC`4&5uRWshzr$LVIAXbpGD>U`?AHC-280Ap22Yr6>hmO{f*J1kWXa21Yf z0d0NDMThRF*VY3|_;*HOp_Hai5aF{@ZA`Vof`}ISkcrzcN&qUL4$6Ymi}Di$C}0}@ zNm}Yd9?Wq1+JHVHURFhryzIft_o@Cii;3Rw6!)DatG(T0rEi6XPWNKuR;%B_vwA=IBx; zpe`7@>9$Uhfv1C#<&2U{Hs{W{QF0g|bKUt>YKxFT#j@dit9$%U@3C%o)oWex_BgR{ z z7~x9j@SKfUYYl8iy*4QCB_3tVLeZ`(BMA@@Ok@>i!YAd2cFseNnOf{|?Z{2lG)BaE zywqq+RkWap)~0ux+s)X5+PsvS!!W?bit-chQLvNHMQhBBBJN|~W5H}B5{9R&$$DsJ ztsuH`t>tSmN3eU`cEz<`S+uJ8Fqc;DL?fLS`i3Wn#;6R^cn^ zUZ^Vy_rUY)XndqT?tJHmo~EIzmzM~DW@R_+wJS`Rsn>OFoP+StE5#A&s0wFJ0Sk{q zEwJ*FNNda<-6yTv><94KDweW(6B$4s#R8r=9#|kP#kGtjs;t^=#9Z$IH!=_pAKIvJ=+j((Y!nZE>m-6r)bj zWpj4KwN5n}Sg+FiVNB2>+T~1vrLhfJYW7)*)`IUR#xw=9@U{4z0%PH5Vc3?5N6%=a z)iXZkKGTpUuUbDUKie}0C+{D-;%#sy>0ntrP7+{$4zC-yX3JofF!%WA`?bbOBNGty zQP%c5DPK28l<`>#NTh|hHY%r3z>>;Kz#9Z`NdPtJ9e{wJz0bAv7I&qg*lI4T{H(LU2^$xAW?I<=3PI`v z<#%z&N;gPmWTt*_)OyPZ!8qb@KXD= z$NRB!R@r#~2x!Ut05!P-AbinM?F6YZPMWAElH5b7FkE2*E;O|2xMpN` zp6P-6wO&vurZF8cUnKj8yHO^)f!f;IHt)~JBksB}z0P|9!BQn`9d428@ry#PsPmPd z@|d(}?+a$@1xn!wh#Dfnf=b;J=qhVYpZ1LSwv!sU62WXmpe`J(+bUh>Gg|4Ha_V)h zbEB48`jT*5T7S*gyd3e0avkU6#-6~6=_-y3rUUD55TW89gBP+~b0|y6L$k!r$20Gp zpmfD_GH*l4_BVi2F;v)BNL{pmb0S}Hj)_T;u!R8YY7JmVOF0T}exY8=5P81Y$oCAf zg>Kd$AStiJIKPfewD#`zRj*Gl1TIM^hSRI3<~>5b071J}em-8iAG3RXSZ?(e@ik+l zz@?i8O}6lSyvf&gy)7$>C$J1lnSR(Dc^NZ4_uLrZN$a-QiN-EfqNjs}1kcNI04b#c zPG3q|clAcW?s_7oL&h@k_D$h0mVqmZ=eioZiq^zA)_ommdYUd-O|73p7iOwCZTuF_ z!%2(v+G)dQ_CS;MoMrP)B)IEp9|f1RFgHreQNAZNG^YX~4%#(Ff?2>bn0}ARd%W}G zlm$o zEKRe5S#xRYQpVJJ{0)5*anAX4n(w-mtg7TUN37#&vZGdmRvqLPL+FV%=T!Atbc7~s zQlflI$b;XSNw?*Sl?06Tk55`Xn=}?TLu2a8pe}6l5X$ANJRPmur?cLQFM($Tv`XEH z-qTv*@RXRa?~DT(m{gplJz^d*@0|zm1kANHu4{$vl)3fJZQ6-b->;>^&=#A30rE`j zOS7nWXUgiS>b2%QcnS{5V`lRhg-@G9)tTO0{Cb&(+*{y?48E(?S&c>cJ7;BcUFbv3 zRFf|z45Q%=wG(y{8c~_%w>pQa`s5ey*H)ld_1Z2Qm`M`YXJv9^bD9A*E?U)xOc7rr z*j%j#f3m*A!Ui!+g=$*M`?WbyZYa!WhHY%tDhcou(n}IwwwO)gHI*zbD?H z64R%Fb+#OI3c|@5w}0Hu{l*ey2Kkoou8VSta>*#7U8a+bUPAVMW4)FjfN9-?BvA*M z{lsFjSKg2&w8x8ThCmL%hnZX@h!KMx0+Vtuyn$b*9P-2f8|$?bL?kMWZGsc6h7`Oh zKg@VnBrRI=el5#*U8d5_(`9mqKC3=KTIfT@&~ql{MnTEX%GyA?aWY zQbg;nK83QZ%D%bbn}UjZ1_nGg%Ks5c0tAEjq`Z z8^s+Cx@2RXc2B-IKhpPAE@4#)Cn*Hvx^KhD(w;VTBrOR@aS`~ zF4*tJiATxuP728!)YdU+%{TQD$bnw3P%dsZ%@i=>ZPZR=c4fcLx?gMLL2?f26n2>( z7tp5BV(NKffb&H*2bVGH?}HwvEJeL~i_D)Ra(2mmDRIAPd_G@^a|Wqcv7Y>p#`2H= zU?a!Y)koVFo^L%D)*0i}cB?mv*)1fYk;135j$Hh5O1^GYw*p3uuJa|v2_Z>sn81$= z2%3>R&|ARAy+~`swSj^iZDBA$>j2Ne(Az7EhKQ>?3`Z;ElTf3EGC;*9JF2CsmBurW-&Fm5Z^aTY|i#8o>bq#Wg2V@m5fVX^IP_GSk idmPXm&+)x%8eMW}v!wha@NfV2zy2TV_cqLSZvg Date: Tue, 5 Mar 2024 12:46:21 +0000 Subject: [PATCH 07/15] add extend.get_additions() unit test --- .../tests/genome_metadata/test_extend.py | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index 723a83f59..07d544de1 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -28,6 +28,7 @@ from ensembl.io.genomio.genome_metadata import extend +@pytest.mark.dependency(name="test_get_gbff_regions") @pytest.mark.parametrize( "gbff_file, output", [ @@ -88,7 +89,7 @@ def test_report_to_csv(data_dir: Path, report_file: str, output: Tuple[str, Dict assert not DeepDiff(result[1], output[1]) -@pytest.mark.dependency(depends=["test_report_to_csv"]) +@pytest.mark.dependency(name="test_get_report_regions_names", depends=["test_report_to_csv"]) @pytest.mark.parametrize( "report_file, output", [ @@ -110,3 +111,31 @@ def test_get_report_regions_names(data_dir: Path, report_file: str, output: List report_path = data_dir / report_file result = extend.get_report_regions_names(report_path) assert result == output + + +@pytest.mark.dependency( + name="test_get_additions", depends=["test_get_gbff_regions", "test_get_report_regions_names"] +) +@pytest.mark.parametrize( + "report_file, gbff_file, output", + [ + ("assembly_report.txt", None, ["CP089275", "RefChr0001", "RefChr0002"]), + ("assembly_report.txt", "sequences.gbff", ["RefChr0002"]), + ], +) +def test_get_additions(data_dir: Path, report_file: str, gbff_file: str, output: List[str]) -> None: + """Tests the `extend.get_additions` class. + + Args: + data_dir: Module's test data directory fixture. + report_file: Assembly report file name. + gbff_path: GBFF file name. + output: Expected sequence regions names that need to be added. + """ + report_path = data_dir / report_file + if gbff_file: + gbff_path = data_dir / gbff_file + else: + gbff_path = gbff_file + result = extend.get_additions(report_path, gbff_path) + assert result == output \ No newline at end of file From 7c721b0533db7b5ac143494521880936bed85140 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 16:15:33 +0000 Subject: [PATCH 08/15] add PathLike support removed by mistake --- src/python/ensembl/io/genomio/genome_metadata/extend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index b9c0d0083..9f8fb758e 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -37,7 +37,7 @@ _VERSION_END = re.compile(r"\.\d+$") -def get_additions(report_path: Path, gbff_path: Optional[Path]) -> List[str]: +def get_additions(report_path: PathLike, gbff_path: Optional[PathLike]) -> List[str]: """Returns all `seq_regions` that are mentioned in the report but that are not in the data. Args: @@ -73,7 +73,7 @@ def get_gbff_regions(gbff_path: Optional[PathLike]) -> List[str]: return seq_regions -def _report_to_csv(report_path: Path) -> Tuple[str, Dict]: +def _report_to_csv(report_path: PathLike) -> Tuple[str, Dict]: """Returns the assembly report as a CSV string, and its metadata as a dictionary. Args: @@ -81,7 +81,7 @@ def _report_to_csv(report_path: Path) -> Tuple[str, Dict]: """ data = "" metadata = {} - with report_path.open("r") as report: + with Path(report_path).open("r") as report: prev_line = "" for line in report: if line.startswith("#"): @@ -99,7 +99,7 @@ def _report_to_csv(report_path: Path) -> Tuple[str, Dict]: return data, metadata -def get_report_regions_names(report_path: Path) -> List[Tuple[str, str]]: +def get_report_regions_names(report_path: PathLike) -> List[Tuple[str, str]]: """Returns a list of GenBank-RefSeq `seq_region` names from the assembly report file. Args: From 0bd16d5dade754646322c8fbb7d6933bd367874d Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 16:16:47 +0000 Subject: [PATCH 09/15] rename argument genbank_infile to genbank_file --- .../modules/genome_metadata/amend_genome_data.nf | 2 +- .../ensembl/io/genomio/genome_metadata/extend.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pipelines/nextflow/modules/genome_metadata/amend_genome_data.nf b/pipelines/nextflow/modules/genome_metadata/amend_genome_data.nf index 15c416f42..4e0df9cc9 100644 --- a/pipelines/nextflow/modules/genome_metadata/amend_genome_data.nf +++ b/pipelines/nextflow/modules/genome_metadata/amend_genome_data.nf @@ -29,7 +29,7 @@ process AMEND_GENOME_DATA { ''' genome_metadata_extend --genome_infile !{genome_json} \ --report_file !{asm_report} \ - --genbank_infile !{genbank_gbff} \ + --genbank_file !{genbank_gbff} \ --genome_outfile !{output} schemas_json_validate --json_file !{output} --json_schema genome diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 9f8fb758e..3ec22b9f0 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -128,24 +128,22 @@ def amend_genomic_metadata( genome_infile: PathLike, genome_outfile: PathLike, report_file: Optional[PathLike] = None, - genbank_infile: Optional[PathLike] = None, + genbank_file: Optional[PathLike] = None, ) -> None: """ Args: - genome_infile: Genome data following the src/python/ensembl/io/genomio/data/schemas/genome.json. + genome_infile: Genome data following the `src/python/ensembl/io/genomio/data/schemas/genome.json`. genome_outfile: Amended genome data file. report_file: INSDC/RefSeq sequences report file. - genbank_infile: INSDC/RefSeq GBFF file. + genbank_file: INSDC/RefSeq GBFF file. """ genome_metadata = get_json(genome_infile) - # Get additional sequences in the assembly but not in the data if report_file: - gbff_path = Path(genbank_infile) if genbank_infile else None - additions = get_additions(Path(report_file), gbff_path) + genbank_path = Path(genbank_file) if genbank_file else None + additions = get_additions(report_file, genbank_path) if additions: genome_metadata["added_seq"] = {"region_name": additions} - # Print out the file genome_outfile = Path(genome_outfile) print_json(genome_outfile, genome_metadata) @@ -165,7 +163,7 @@ def main() -> None: "--genome_outfile", required=True, help="Path to the new amended genome metadata file" ) parser.add_argument_src_path("--report_file", help="INSDC/RefSeq sequences report file") - parser.add_argument_src_path("--genbank_infile", help="INSDC/RefSeq GBFF file") + parser.add_argument_src_path("--genbank_file", help="INSDC/RefSeq GBFF file") parser.add_log_arguments() args = parser.parse_args() init_logging_with_args(args) @@ -174,5 +172,5 @@ def main() -> None: genome_infile=args.genome_infile, genome_outfile=args.genome_outfile, report_file=args.report_file, - genbank_infile=args.genbank_infile, + genbank_file=args.genbank_file, ) From a133121a8956eade0a8f225d041a1d625e169465 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 17:12:28 +0000 Subject: [PATCH 10/15] add unit test for extend.amend_genomic_metadata() updating test files (and affected previous tests) as needed --- .../tests/genome_metadata/test_extend.py | 58 ++++++++++++++++--- .../genome_metadata/test_extend/genome.json | 20 +++++++ .../test_extend/sequences.gbff | 31 +++++++++- .../test_extend/updated_genome.json | 27 +++++++++ 4 files changed, 126 insertions(+), 10 deletions(-) create mode 100644 src/python/tests/genome_metadata/test_extend/genome.json create mode 100644 src/python/tests/genome_metadata/test_extend/updated_genome.json diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index 07d544de1..dadffea99 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -20,7 +20,7 @@ """ from pathlib import Path -from typing import Dict, List, Tuple +from typing import Callable, Dict, List, Tuple from deepdiff import DeepDiff import pytest @@ -33,7 +33,7 @@ "gbff_file, output", [ pytest.param("", [], id="No GBFF file"), - ("sequences.gbff", ["CP089274", "CP089275"]), + pytest.param("sequences.gbff", ["CP089274", "CP089275", "RefChr0002"], id="sequences.gbff"), ], ) def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> None: @@ -119,8 +119,10 @@ def test_get_report_regions_names(data_dir: Path, report_file: str, output: List @pytest.mark.parametrize( "report_file, gbff_file, output", [ - ("assembly_report.txt", None, ["CP089275", "RefChr0001", "RefChr0002"]), - ("assembly_report.txt", "sequences.gbff", ["RefChr0002"]), + pytest.param( + "assembly_report.txt", "", ["CP089275", "RefChr0001", "RefChr0002"], id="Additional regions found" + ), + pytest.param("assembly_report.txt", "sequences.gbff", [], id="No additional regions"), ], ) def test_get_additions(data_dir: Path, report_file: str, gbff_file: str, output: List[str]) -> None: @@ -133,9 +135,47 @@ def test_get_additions(data_dir: Path, report_file: str, gbff_file: str, output: output: Expected sequence regions names that need to be added. """ report_path = data_dir / report_file - if gbff_file: - gbff_path = data_dir / gbff_file - else: - gbff_path = gbff_file + gbff_path = data_dir / gbff_file if gbff_file else None result = extend.get_additions(report_path, gbff_path) - assert result == output \ No newline at end of file + assert result == output + + +@pytest.mark.dependency(depends=["test_get_additions"]) +@pytest.mark.parametrize( + "genome_infile, report_file, genbank_file, output_file", + [ + pytest.param("genome.json", "", "", "genome.json", id="No report file"), + pytest.param( + "genome.json", "assembly_report.txt", "", "updated_genome.json", id="Additional seq regions" + ), + pytest.param( + "genome.json", "assembly_report.txt", "sequences.gbff", "genome.json", id="No additional regions" + ), + ], +) +def test_amend_genomic_metadata( + tmp_path: Path, + data_dir: Path, + assert_files: Callable[[Path, Path], None], + genome_infile: str, + report_file: str, + genbank_file: str, + output_file: str, +) -> None: + """Tests the `extend.amend_genomic_metadata` class. + + Args: + tmp_path: Test's unique temporary directory fixture. + data_dir: Module's test data directory fixture. + assert_files: File diff assertion fixture. + genome_infile: Input genome data file. + report_file: INSDC/RefSeq sequences report file. + genbank_file: INSDC/RefSeq GBFF file. + output_file: Expected amended genome data file. + """ + genome_inpath = data_dir / genome_infile + report_path = data_dir / report_file if report_file else None + genbank_path = data_dir / genbank_file if genbank_file else None + genome_outpath = tmp_path / "genome.out" + extend.amend_genomic_metadata(genome_inpath, genome_outpath, report_path, genbank_path) + assert_files(genome_outpath, data_dir / output_file) diff --git a/src/python/tests/genome_metadata/test_extend/genome.json b/src/python/tests/genome_metadata/test_extend/genome.json new file mode 100644 index 000000000..77d2f895d --- /dev/null +++ b/src/python/tests/genome_metadata/test_extend/genome.json @@ -0,0 +1,20 @@ +{ + "BRC4": { + "component": "PlasmoDB", + "organism_abbrev": "pfal3D7" + }, + "assembly": { + "accession": "GCA_000002765.1", + "provider_name": "RefSeq", + "provider_url": "https://www.ncbi.nlm.nih.gov/refseq", + "version": 1 + }, + "genebuild": { + "start_date": "2023-10-17", + "version": "2023-10-17" + }, + "species": { + "scientific_name": "Plasmodium falciparum", + "taxonomy_id": 36329 + } +} \ No newline at end of file diff --git a/src/python/tests/genome_metadata/test_extend/sequences.gbff b/src/python/tests/genome_metadata/test_extend/sequences.gbff index 637cf553c..411fb75d4 100644 --- a/src/python/tests/genome_metadata/test_extend/sequences.gbff +++ b/src/python/tests/genome_metadata/test_extend/sequences.gbff @@ -57,7 +57,6 @@ REFERENCE 1 TITLE Direct Submission JOURNAL Submitted (28-JUN-2019) WTSI, Pathogen Informatics, Wellcome Trust Sanger Institute, CB10 1SA, United Kingdom -COMMENT On Jan 10, 2020 this sequence version replaced LN999985.1. FEATURES Location/Qualifiers source 1..1070 /organism="Plasmodium falciparum 3D7" @@ -85,3 +84,33 @@ ORIGIN 961 aaagtctaaa tttattaaga aaatccatta acattattgt tgtaagggtt caaatccctt 1021 tatctctaac tataacattt atagctaagt ggtcgaaagc aatggactca // +LOCUS RefChr0002 192 bp DNA linear INV 11-NOV-2019 +DEFINITION Plasmodium falciparum 3D7 genome assembly, chromosome: API. +ACCESSION RefChr0002 +VERSION RefChr0002.1 +DBLINK BioProject: PRJNA13173 + BioSample: SAMN00102897 +KEYWORDS . +SOURCE Plasmodium falciparum 3D7 + ORGANISM Plasmodium falciparum 3D7 + Eukaryota; Sar; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; + Plasmodiidae; Plasmodium; Plasmodium (Laverania). +REFERENCE 1 + CONSRTM Pathogen Informatics + TITLE Direct Submission + JOURNAL Submitted (28-JUN-2019) WTSI, Pathogen Informatics, Wellcome Trust + Sanger Institute, CB10 1SA, United Kingdom +COMMENT On Jan 10, 2020 this sequence version replaced LN999985.1. +FEATURES Location/Qualifiers + source 1..192 + /organism="Plasmodium falciparum 3D7" + /mol_type="genomic DNA" + /isolate="3D7" + /db_xref="taxon:36329" + /chromosome="NUC" +ORIGIN + 1 tcagtttgaa tctgaatatc atttaaagag agatatggtg aaatttggta tacacaatgg + 61 atttataatg atatatatta tatataatta atagttttta tgataaatat aatctaatgg + 121 tattttgatt taaaattaaa atttattaga tatatatgtt ataattattg tataacatat + 181 aaaaaatatt ta +// \ No newline at end of file diff --git a/src/python/tests/genome_metadata/test_extend/updated_genome.json b/src/python/tests/genome_metadata/test_extend/updated_genome.json new file mode 100644 index 000000000..abd08f0a4 --- /dev/null +++ b/src/python/tests/genome_metadata/test_extend/updated_genome.json @@ -0,0 +1,27 @@ +{ + "BRC4": { + "component": "PlasmoDB", + "organism_abbrev": "pfal3D7" + }, + "added_seq": { + "region_name": [ + "CP089275", + "RefChr0001", + "RefChr0002" + ] + }, + "assembly": { + "accession": "GCA_000002765.1", + "provider_name": "RefSeq", + "provider_url": "https://www.ncbi.nlm.nih.gov/refseq", + "version": 1 + }, + "genebuild": { + "start_date": "2023-10-17", + "version": "2023-10-17" + }, + "species": { + "scientific_name": "Plasmodium falciparum", + "taxonomy_id": 36329 + } +} \ No newline at end of file From 864c45699dd172c8adc1894d891f527cc074063c Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 17:15:10 +0000 Subject: [PATCH 11/15] update docstring and parser's description --- src/python/ensembl/io/genomio/genome_metadata/extend.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 3ec22b9f0..e9c65de0d 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Add more metadata to the genome metadata file, including added seq_regions (e.g. MT chromosome).""" +"""Updates a genome metadata file to include additional sequence regions (e.g. MT chromosome).""" __all__ = [ "get_additions", @@ -151,9 +151,7 @@ def amend_genomic_metadata( def main() -> None: """Module's entry-point.""" - parser = ArgumentParser( - description="Update genome metadata file to include additional sequence regions (e.g. MT chromosome)." - ) + parser = ArgumentParser(description=__doc__) parser.add_argument_src_path( "--genome_infile", required=True, From 77472c31e64f4e1f88a805677634e9b073836608 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Tue, 5 Mar 2024 17:30:45 +0000 Subject: [PATCH 12/15] rename function and update docstrings --- .../ensembl/io/genomio/genome_metadata/extend.py | 12 ++++++------ src/python/tests/genome_metadata/test_extend.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index e9c65de0d..49d145229 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -18,7 +18,7 @@ "get_additions", "get_gbff_regions", "get_report_regions_names", - "amend_genomic_metadata", + "amend_genome_metadata", ] import csv @@ -124,7 +124,7 @@ def get_report_regions_names(report_path: PathLike) -> List[Tuple[str, str]]: return seq_regions -def amend_genomic_metadata( +def amend_genome_metadata( genome_infile: PathLike, genome_outfile: PathLike, report_file: Optional[PathLike] = None, @@ -132,8 +132,8 @@ def amend_genomic_metadata( ) -> None: """ Args: - genome_infile: Genome data following the `src/python/ensembl/io/genomio/data/schemas/genome.json`. - genome_outfile: Amended genome data file. + genome_infile: Genome metadata following the `src/python/ensembl/io/genomio/data/schemas/genome.json`. + genome_outfile: Amended genome metadata file. report_file: INSDC/RefSeq sequences report file. genbank_file: INSDC/RefSeq GBFF file. """ @@ -155,7 +155,7 @@ def main() -> None: parser.add_argument_src_path( "--genome_infile", required=True, - help="Input genome file (following the src/python/ensembl/io/genomio/data/schemas/genome.json)", + help="Input genome metadata file (following src/python/ensembl/io/genomio/data/schemas/genome.json)", ) parser.add_argument_dst_path( "--genome_outfile", required=True, help="Path to the new amended genome metadata file" @@ -166,7 +166,7 @@ def main() -> None: args = parser.parse_args() init_logging_with_args(args) - amend_genomic_metadata( + amend_genome_metadata( genome_infile=args.genome_infile, genome_outfile=args.genome_outfile, report_file=args.report_file, diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index dadffea99..c50440478 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -153,7 +153,7 @@ def test_get_additions(data_dir: Path, report_file: str, gbff_file: str, output: ), ], ) -def test_amend_genomic_metadata( +def test_amend_genome_metadata( tmp_path: Path, data_dir: Path, assert_files: Callable[[Path, Path], None], @@ -162,20 +162,20 @@ def test_amend_genomic_metadata( genbank_file: str, output_file: str, ) -> None: - """Tests the `extend.amend_genomic_metadata` class. + """Tests the `extend.amend_genome_metadata` class. Args: tmp_path: Test's unique temporary directory fixture. data_dir: Module's test data directory fixture. assert_files: File diff assertion fixture. - genome_infile: Input genome data file. + genome_infile: Input genome metadata file. report_file: INSDC/RefSeq sequences report file. genbank_file: INSDC/RefSeq GBFF file. - output_file: Expected amended genome data file. + output_file: Expected amended genome metadata file. """ genome_inpath = data_dir / genome_infile report_path = data_dir / report_file if report_file else None genbank_path = data_dir / genbank_file if genbank_file else None genome_outpath = tmp_path / "genome.out" - extend.amend_genomic_metadata(genome_inpath, genome_outpath, report_path, genbank_path) + extend.amend_genome_metadata(genome_inpath, genome_outpath, report_path, genbank_path) assert_files(genome_outpath, data_dir / output_file) From fea61a96937331942054770773d678309f5d80c8 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Wed, 6 Mar 2024 11:11:42 +0000 Subject: [PATCH 13/15] update docstrings --- .../ensembl/io/genomio/genome_metadata/extend.py | 2 +- src/python/tests/genome_metadata/test_extend.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 49d145229..571062e2e 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Updates a genome metadata file to include additional sequence regions (e.g. MT chromosome).""" +"""Update a genome metadata file to include additional sequence regions (e.g. MT chromosome).""" __all__ = [ "get_additions", diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index c50440478..5556664ab 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -37,7 +37,7 @@ ], ) def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> None: - """Tests the `extend.get_gbff_regions` class. + """Tests the `extend.get_gbff_regions()` method. Args: data_dir: Module's test data directory fixture. @@ -76,7 +76,7 @@ def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> ], ) def test_report_to_csv(data_dir: Path, report_file: str, output: Tuple[str, Dict]) -> None: - """Tests the `extend._report_to_csv` class. + """Tests the `extend._report_to_csv()` method. Args: data_dir: Module's test data directory fixture. @@ -101,7 +101,7 @@ def test_report_to_csv(data_dir: Path, report_file: str, output: Tuple[str, Dict ], ) def test_get_report_regions_names(data_dir: Path, report_file: str, output: List[Tuple[str, str]]) -> None: - """Tests the `extend.get_report_regions_names` class. + """Tests the `extend.get_report_regions_names()` method. Args: data_dir: Module's test data directory fixture. @@ -126,7 +126,7 @@ def test_get_report_regions_names(data_dir: Path, report_file: str, output: List ], ) def test_get_additions(data_dir: Path, report_file: str, gbff_file: str, output: List[str]) -> None: - """Tests the `extend.get_additions` class. + """Tests the `extend.get_additions()` method. Args: data_dir: Module's test data directory fixture. @@ -162,7 +162,7 @@ def test_amend_genome_metadata( genbank_file: str, output_file: str, ) -> None: - """Tests the `extend.amend_genome_metadata` class. + """Tests the `extend.amend_genome_metadata()` method. Args: tmp_path: Test's unique temporary directory fixture. From f28358ed3704ace0c2378e3898b36435295b3cef Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Wed, 6 Mar 2024 12:34:04 +0000 Subject: [PATCH 14/15] update regex to be more adaptive to changes --- src/python/ensembl/io/genomio/genome_metadata/extend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/ensembl/io/genomio/genome_metadata/extend.py b/src/python/ensembl/io/genomio/genome_metadata/extend.py index 571062e2e..7be556952 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/extend.py +++ b/src/python/ensembl/io/genomio/genome_metadata/extend.py @@ -86,9 +86,9 @@ def _report_to_csv(report_path: PathLike) -> Tuple[str, Dict]: for line in report: if line.startswith("#"): # Get metadata values if possible - match = re.search("# (.+?): (.+?)$", line) + match = re.search(r"^#\s*([^:]+?):\s+(.+?)\s*$", line) if match: - metadata[match.group(1)] = match.group(2).strip() + metadata[match.group(1)] = match.group(2) prev_line = line else: if prev_line: From d45b917cc2bd2c3276e008a7486bcce2bb1476b1 Mon Sep 17 00:00:00 2001 From: "J. Alvarez-Jarreta" Date: Wed, 6 Mar 2024 16:08:22 +0000 Subject: [PATCH 15/15] typo: gbff_file not gbff_path Co-authored-by: Dishalodha <87130059+Dishalodha@users.noreply.github.com> --- src/python/tests/genome_metadata/test_extend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/tests/genome_metadata/test_extend.py b/src/python/tests/genome_metadata/test_extend.py index 5556664ab..7ba9c615d 100644 --- a/src/python/tests/genome_metadata/test_extend.py +++ b/src/python/tests/genome_metadata/test_extend.py @@ -41,7 +41,7 @@ def test_get_gbff_regions(data_dir: Path, gbff_file: str, output: List[str]) -> Args: data_dir: Module's test data directory fixture. - gbff_path: GBFF file name. + gbff_file: GBFF file name. output: Expected list of sequence region IDs. """ if gbff_file: