From 87c46bd1f12d67c76f5c3dc00119430231c9aeb5 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 13:21:24 -0700 Subject: [PATCH 01/23] revisions --- joss_paper/paper.bib | 54 ++++++++++++++++++++++++++++ joss_paper/paper.md | 84 +++++++++++++++++++++++++++----------------- 2 files changed, 105 insertions(+), 33 deletions(-) diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index 56e687e..dba3826 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -109,6 +109,24 @@ @article {MDverse:2024 publisher = {eLife Sciences Publications, Ltd}, } +@article{MLMDMethods:2023, +author = {Jackson, Nicholas E. and Savoie, Brett M. and Statt, Antonia and Webb, Michael A.}, +title = {Introduction to Machine Learning for Molecular Simulation}, +journal = {Journal of Chemical Theory and Computation}, +volume = {19}, +number = {14}, +pages = {4335-4337}, +year = {2023}, +doi = {10.1021/acs.jctc.3c00735}, + note ={PMID: 37489106}, +URL = { + https://doi.org/10.1021/acs.jctc.3c00735 +}, +eprint = { + https://doi.org/10.1021/acs.jctc.3c00735 +} +} + @Article{NumPy:2020, title = {Array programming with {NumPy}}, author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J. @@ -166,6 +184,28 @@ @inproceedings{ParallelAnalysis:2010 series = {FAST'10} } +@article{SplitApplyCombine:2011, + title={The Split-Apply-Combine Strategy for Data Analysis}, + volume={40}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v040i01}, + doi={10.18637/jss.v040.i01}, + abstract={Many data analysis problems involve the application of a split-apply-combine strategy, where you break up a big problem into manageable pieces, operate on each piece independently and then put all the pieces back together. This insight gives rise to a new R package that allows you to smoothly apply this strategy, without having to worry about the type of structure in which your data is stored. The paper includes two case studies showing how these insights make it easier to work with batting records for veteran baseball players and a large 3d array of spatio-temporal ozone measurements.}, + number={1}, + journal={Journal of Statistical Software}, + author={Wickham, Hadley}, + year={2011}, + pages={1–29} +} + +@article{YiiP:2019, +author = "Shujie Fan and Oliver Beckstein", +title = "{Molecular Dynamics trajectories of membrane protein YiiP}", +year = "2019", +month = "5", +url = "https://figshare.com/articles/dataset/Molecular_Dynamics_trajectories_of_membrane_protein_YiiP/8202149", +doi = "10.6084/m9.figshare.8202149.v1" +} + @misc{Zarr:2024, doi = {10.5281/ZENODO.3773449}, url = {https://zenodo.org/doi/10.5281/zenodo.3773449}, @@ -176,3 +216,17 @@ @misc{Zarr:2024 copyright = {Creative Commons Attribution 4.0 International} } +@misc{Zstandard:2021, + series = {Request for Comments}, + number = 8878, + howpublished = {RFC 8878}, + publisher = {RFC Editor}, + doi = {10.17487/RFC8878}, + url = {https://www.rfc-editor.org/info/rfc8878}, + author = {Yann Collet and Murray Kucherawy}, + title = {{Zstandard Compression and the 'application/zstd' Media Type}}, + pagetotal = 45, + year = 2021, + month = feb, + abstract = {Zstandard, or "zstd" (pronounced "zee standard"), is a lossless data compression mechanism. This document describes the mechanism and registers a media type, content encoding, and a structured syntax suffix to be used when transporting zstd-compressed content via MIME. Despite use of the word "standard" as part of Zstandard, readers are advised that this document is not an Internet Standards Track specification; it is being published for informational purposes only. This document replaces and obsoletes RFC 8478.}, +} \ No newline at end of file diff --git a/joss_paper/paper.md b/joss_paper/paper.md index c7da231..f587687 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -9,24 +9,32 @@ tags: authors: - name: Lawson Woods orcid: 0009-0003-0713-4167 - affiliation: 1 + affiliation: [3, 4] - name: Hugo Macdermott-Opeskin orcid: 0000-0002-7393-7457 - affiliation: 1 - name: Edis Jakupovic - affiliation: 1 + affiliation: [1, 2] - name: Yuxuan Zhuang orcid: 0000-0003-4390-8556 - affiliations: 1 + affiliations: [5, 6] - name: Richard J Gowers orcid: 0000-0002-3241-1846 - affiliations: 1 - name: Oliver Beckstein orcid: 000-0003-1340-0831 - affiliation: 1 + affiliation: [1, 2] affiliations: - - name: Placeholder + - name: Department of Physics, Arizona State University, Tempe, Arizona, United States of America index: 1 + - name: Center for Biological Physics, Arizona State University, Tempe, AZ, United States of America + index: 2 + - name: School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, United States of America + index: 3 + - name: School of Molecular Sciences, Arizona State University, Tempe, Arizona, United States of America + index: 4 + - name: Department of Computer Science, Stanford University, Stanford, CA 94305, USA. + index: 5 + - name: Departments of Molecular and Cellular Physiology and Structural Biology, Stanford University School of Medicine, Stanford, CA 94305, USA. + index: 6 date: 22 September 2024 bibliography: paper.bib --- @@ -56,7 +64,7 @@ new constraints on research in this field. Other groups in the field recognize this same need for adherence to FAIR principles [@FAIR:2019] including the MDDB (Molecular Dynamics Data Bank), an EU-scale repository for biosimulation data [@MDDB:2024] and MDverse, a prototype search engine -for publicly-available Gromacs simulation data [@MDverse:2024]. +for publicly-available GROMACS simulation data [@MDverse:2024]. While these efforts currently offer prototype solutions for indexing and searching MD trajectory data, the problem of efficiently distributing the data remains. @@ -67,54 +75,64 @@ so a solution which could prevent this duplication of storage and unnecessary download step would provide greater utility for the computational molecular sciences ecosystem. -Enter `Zarrtraj`, the first fully-functioning tool to our knowledge that allows +Enter *Zarrtraj*, the first fully-functioning tool to our knowledge that allows streaming trajectories into analysis software using an established trajectory format. -`Zarrtraj` is implemented as an `MDAnalysis` [@MDAnalysis:2016] `MDAKit` [@MDAKits:2023] that -enables streaming MD trajectories in the popular `HDF5`-based H5MD format [@H5MD:2014] +*Zarrtraj* is implemented as an MDAnalysis [@MDAnalysis:2016] MDAKit [@MDAKits:2023] that +enables streaming MD trajectories in the popular HDF5-based H5MD format [@H5MD:2014] from AWS S3, Google Cloud Buckets, and Azure Blob Storage & Data Lakes without ever downloading them. -This is possible thanks to the `Zarr` [@Zarr:2024] package which allows +This is possible thanks to the *Zarr* [@Zarr:2024] package which allows streaming array-like data from a variety of storage mediums and [Kerchunk](https://github.com/fsspec/kerchunk), -which extends the capability of `Zarr` by allowing it to read `HDF5` files. -Because it implements the standard `MDAnalysis` trajectory reader API, -`Zarrtraj` can leverage `Zarr`'s ability to read a file in parallel to perform analysis -algorithms in parallel using the "split-apply-combine" paradigm. In addition to the `H5MD` format, -`Zarrtraj` can stream and write trajectories in the experimental `ZarrMD` -format, which ports the `H5MD` layout to the `Zarr` filetype. - -One imported, `Zarrtraj` allows passing trajectory URLs just like ordinary files: +which extends the capability of *Zarr* by allowing it to read HDF5 files. +Because it implements the standard MDAnalysis trajectory reader API, +*Zarrtraj* can leverage *Zarr*'s ability to read a slice of a file and even +to read a file in parallel, making it compatible with +analysis algorithms that use the "split-apply-combine" parallelization strategy [@SplitApplyCombine:2011]. +In addition to the H5MD format, +*Zarrtraj* can stream and write trajectories in the experimental ZarrMD +format, which ports the H5MD layout to the *Zarr* filetype. + +Once imported, *Zarrtraj* allows passing trajectory URLs just like ordinary files: ```python import zarrtraj import MDAnalysis as mda -u = mda.Universe("sample_topology.top", "s3://sample-bucket-name/trajectory.h5md") +u = mda.Universe("topology.pdb", "s3://sample-bucket-name/trajectory.h5md") ``` -Initial benchmarks show that `Zarrtraj` can iterate +Initial benchmarks show that *Zarrtraj* can iterate serially through an AWS S3 cloud trajectory (load into memory one frame at a time) at roughly 1/2 or 1/3 the speed it can iterate through the same trajectory from disk and roughly 1/5 to 1/10 the speed it can iterate through the same trajectory on disk in XTC format \autoref{fig:benchmark}. However, it should be noted that this speed is influenced by network latency and that -writing parallelized algorithms can offset this loss of speed. +writing parallelized algorithms can offset this loss of speed as in \autoref{fig:RMSD}. + +![Benchmarks performed on a machine with 2 Intel Xeon 2.00GHz CPUs, 32GB of RAM, and an SSD configured with RAID 0. The trajectory used for benchmarking was the YiiP trajectory from MDAnalysisData [@YiiP:2019], a 9000-frame (90ns), 111,815 particle simulation of a membrane-protein system. The original 3.47GB XTC trajectory was converted into an uncompressed 11.3GB H5MD trajectory and an uncompressed 11.3GB ZarrMD trajectory using the MDAnalysis `H5MDWriter` and *Zarrtraj* `ZarrMD` writers, respectively. \label{fig:benchmark}](benchmark.png) -![Benchmarks performed on a machine with 2 Intel Xeon 2.00GHz CPUs, 32GB of RAM, and an SSD configured with RAID 0.\label{fig:benchmark}](benchmark.png) +![RMSD benchmarks performed on the same machine as \autoref{fig:benchmark}. YiiP trajectory aligned to first frame as reference using `MDAnalysis.analysis.align.AlignTraj` and converted to compressed, quantized H5MD (7.8GB) and ZarrMD (4.9GB) trajectories. RMSD performed using development branch of MDAnalysis (2.8.0dev) with "serial" and "dask" backends. See [this notebook]() for full benchmark codes. \label{fig:RMSD}](rmsd.png) -With `Zarrtraj`, we envision research groups making their data publicly available +*Zarrtraj* is capable of making use of *Zarr*'s powerful compression and quantization when writing ZarrMD trajectories. +The uncompressed MDAnalysisData YiiP trajectory in ZarrMD format is reduced from 11.3GB uncompressed +to just 4.9GB after compression with the Zstandard algorithm [@Zstandard:2021] +and quantization to 3 digits of precision. See [performance considerations](https://zarrtraj.readthedocs.io/en/latest/performance_considerations.html) +for more. + +This work builds on the existing MDAnalysis `H5MDReader` +[@H5MDReader:2021], and similarly uses *NumPy* [@NumPy:2020] as a common interface in-between MDAnalysis +and the file storage medium. *Zarrtraj* was inspired and made possible by similar efforts in the +geosciences community to align data practices with FAIR principles [@PANGEO:2022]. + +With *Zarrtraj*, we envision research groups making their data publicly available via a cloud URL so that anyone can reuse their trajectories and reproduce their results. Large databases, like MDDB and MDverse, can expose a URL associated with each trajectory in their databases so that users can make a query and immediately use the resulting trajectories to run an analysis on the hits that match their search. Groups seeking to -collect a large volume of trajectory data to train machine learning models can make use +collect a large volume of trajectory data to train machine learning models [@MLMDMethods:2023] can make use of our tool to efficiently and inexpensively obtain the data they need from these published URLs. -This work builds on the existing `MDAnalysis` `H5MDReader` -[@H5MDReader:2021], and similarly uses `NumPy` [@NumPy:2020] as a common interface in-between `MDAnalysis` -and the file storage medium. `Zarrtraj` was inspired and made possible by similar efforts in the -geosciences community to align data practices with FAIR principles [@PANGEO:2022]. - # Acknowledgements -Thank you to Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis. -Thank you to Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream codebase +We thank Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis. +We also thank Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream codebase necessary for this project. LW was a participant in the Google Summer of Code 2024 program. # References \ No newline at end of file From 0a912f2588114cf24c34351483f6f22afb96633a Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 21:57:19 +0000 Subject: [PATCH 02/23] figure 2 --- joss_paper/RMSD.png | Bin 0 -> 49313 bytes joss_paper/figure_2.ipynb | 336 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 336 insertions(+) create mode 100644 joss_paper/RMSD.png create mode 100644 joss_paper/figure_2.ipynb diff --git a/joss_paper/RMSD.png b/joss_paper/RMSD.png new file mode 100644 index 0000000000000000000000000000000000000000..7d5ec9c15d637ee848aa9b8a21353317355bd39b GIT binary patch literal 49313 zcmeFa2UM14mojKgQ`Wb>apiqu6z=D~ulu^r*=O&44!6$BOKw`vxSo!V zZj;oxGm3O{-#?+F`)=QlKj2SzK7_Bvzl7}0s@o}98rnHtwlScSyKHA=W@%?;d}W`5 zfsL)Pr3DX%5C<3gzN>b2R<^>NoaX=d0uDWr&)(tgK&mu}^HK@$Ig%jv)0B0m36a*y+wIGNy}jEs!2-kKEG z;e1J@c-i_c&!tF8k3pfB`)s0elA@5dM9@%cmgLVr|Ga+V#s@WqlLxwezeVWtFCB=CaPU)5k2$_lak=nwc~v-mfw;s*7s+QXjKp&z|%X z&SQsgwuVzjiIry1V?gk56MLj}iBT_WbNX1|zrr z%{PHZ4~dJ5&&oGT!yKzgn4u81ZHdfi!U#-;Awd${} z+oq_+zjUS}>6V{gOSol8(#32Ow!??dEiNvu->^Z+do%O1E3a;Q7Df4d8_Mm{&uV>U z_3?Sz)Z9#8bc$h3*qeYuerEjPs_9yxL-s;Cot>RW)So;wX-ba!_`=(CX>pFu`RRE?~O=%sl{F)=Ybtgz=|5sPtk2-gvf%-Eu$q6gxhi)P`$slrwtyjW#J?me=f zOUWovtng)4$<&u>9dcN+c5RA9M**XdO{|QkWzncaLC`|!*(Aj{{~^z%@#v+iFK_;g zH7LzAFYvV=@Aea~%`w(L<1ox=DcT!9C2cGo^)sIOUX7uxLOt8NeA@(k%7=Dcb*X4E zcf|!AwG18dCf^xFy9XuNu0DEHCHcYZ1l6`}vEjW$15eS8SbLv0m0iqKQO>$y^98&M z*>T6)B5@9Tu0`|(Qr7HqbiwlSU46SRkW*JjL3HxL^A|66?A@EuQRrwo+@62i*EeV0 zuO^jI#341xZLBha<3QEy)KE%QFsEsy|G^{YAC{SK@r@L5Y<_+JfPqhaeT?K}gA8Yq zq~9Ly@!dW{1*vpG9csQ6tTQO4D-j3REP;|0UDfwb{ za=K1&a*-QifcG3(Nwt5p)u|2jH4Ds0>9Q}XT82Dzvc8cGTBD$2?a zhD(>uTX-yd4`z)h6^NIjXt$dra z+YZS;h}Ex(mkZ~|jV@s=rzX4>8FP(DXP?0yL20E>qHY& z(=Jw8hx**vCmRNoyD3m zLG1O+d;ct&eNM1q$s-okePV78mAH461@O7$q+{47VrU+IA>eAzCbGW8T92 zyuW*bzw)C~*O?MoO~vL0d1|6W=X7^ni3{##npC~MVOQ;MCfpopddl((!>?aEJ3Cko z?DhEe2sfQ@`NfTV_x4WLqlPkLS7c*obbx$L*^no>1*(zNm$dMc!|3~P98KA%H~ z;aGdLS_en{erajxn+I$Gl(@WCCGK;)m&)j}O`48qFRaDMl8+Sjk1entNcLEmJ%X*@ z={8=o{kzn3?ZW;)wurqQY|X;97BiZ59`8-UDZG96u07S0+7Ksf^Z6#fWoK&n-Q9(5 z{8wK7co4tBC3@Zjs$?0;_08eB#^1hvEEp-7oA~g@mj16FU##D}S>8U%caLZ?E|i3Q z^gh$z&g$>Ji;U(oZa8z=d-Foe+Dw6?T6tE!5rfR4F3J}!TwoNih~Uu*h{aYjDf3#z z#xd9a1)K6M;%}NSvq-WrCEkLe54*KzU?99#W&1&yAUlgf|3*ZfyYyVAwupUu809f* ztdg$Pn!opTFlV$(sego;$tL>aC#O4IGqUEeAS_b&Y?b@JoNeb8_gyfP567;M4(3S1 zi?6C0SdtIL^2D<##p};aY}>Xi@ZP;kt>=$>F3xidNOhOrwIBU-w)pGw4YP=g_?UdA zRF3IF``zqXc~4H9IN@(!sv?ma{pr=66sOTop~+l_@=e9>M~(1irDk##Dz&@$&RPa2 zm5OlN>O_%H6c`P>h30cSETFE!Gp$l zr^)9Z{XXbWczZkBW1V6b>dRZ(I9_0zpThxD#Zl>RN+CHR@Q8}^hMny9?7BHWPPg0o z%`Ysd#7PJ0?+@+X`c2ez>dT*N_bQjT&1|QqPxRjFq3*djlOR)y)i>!Z%E$TW>FZ0l zz2j(qt5I}QPgSt?>;NUfk7UEWp7R&5C!`dh<%H; zW0GZ83CF_AmoLA3^xmtAMb^)4Q+ayjEJ8<358a2WV?6f#%5u?S+DNjhokh+nxfa?( z@3W7lz3P5u*(GNGUso#66L%WNrg=3ySf3BQo zbvaVZUHDZU4kd>aN6kh~t-OageFx_(Vs{6#$w*K3@7>}zF_a@6#GZsbL&Xbm8Lttb z3_2p#7r4!2b~<%$3jX@$Ps1!KR>Q8OE6+Mc*zU_ie4$8q&55aZdQ=PkEQ=0dKJ)jD z`BBQL>KE?czh7Ioo6qFIVDe!$_h7#z>;OjItM?F~0tGB}cCaa)kqhIk$2N^vn4fC1 zto1Va^zv3E$5Vwgsp|Qeag}Tnm6_SuBgnbTA`Xpz{;@WoDFolhEM#*A$9H)(9-mov zmq|#`+S=Ny3+vXc<5*Z2t=QY&5O-(w>eb}MlM|0H zXIvX?rzhU??a{Ytp1Rk-FYFY~l5eN*x>WF%$7>FCc%gEP&yvZ(=tX-I6>&}KSQ|`lXfLgxA zTQmHb#M!eXvJgB$_SDW}7g;pLjE9UebW78v0$7Fo_Tuu*2mDU>n?>s+E3;*rx2X;^ zr%^xyR+pekp<9cUHJWt7erVUEy?`;`kW&qGmu3aN(Kfl81R) zPK{7!c(AF&)Et3vu~GqKO@K1YTQkqLq-#@JFxBY>e=3v9*ZjEYfuh_>ec$kjG`{jK6Q5oOy)X|8@HZ^u^PBP*Q_jRHHW ze$5^p7hk6VH{s^V2PJK+a$$);lMIXUmVr%-%xzP6r;jTG4vFOcA#sgHAy z2H#uSxOk-yN{?pABfu23gr;u7Z=?!c5mk|gpmc{Su7 z_PBlhm25bY|_N$BWupBBnBa-0FMQ^p~{qtqF+i2@@M%n&8&DqM)w z0&u=?M7!V_S;tg$EO`_)-0gVT&`+5@jC>}se5TD`@WdPgxu;#GXK+dhO0^X=efQ*1$--2dzMP5@ zS@!YDqiX4bEzA`{wm?!!MLp}e|D zCV=ah^>W*4MoO24nLOsdjY{~h+pvL$AXD;9pC}^sV6lr7?+ zH%1jeZB7fH8O*Ay?yU*0`F+g>S%AUFg$Ny!m6Fk*?e5_51p$VhmJM z)h{oiu23GxcV=YEx1;)xPE7D34$9e<;FuZx{>LAb!9GLV-fAu+g5CfHF)pBU9^jpH zY`tXio~m4W6Gu?e)mn@cjr%@rHH!!4>_sHx4d*xKMk#ZH0qG(u8lN0I^73P|eQn6@ zDt)}M+G+8Hi2w#ctA`qy`nOO#rjR#2Tr6`E=-Pw7J zdugW5Qx=G!8pX(`LC5jl8XVsW-m7oR&YV3PN;WVGV|ppWKYw3SyC@%Z!dUvj;mb(W z)u=O#$>-`geO!Z0ip1|8v7nuA^9tv;{`(((2*b$@MW77_xDWF`D8stDFU@zaxJ3}B zt_4fzp(v;4c*)^dY>gF`fyScf!Nf%;n+`4_68)u9cXheWNyoOyP^QF+{iZM3T z)DY*1ddZsi*`_rWdp*szZr!RF5wGVt_U33t?8RWcXvV#JN5&R}n?o1oro*}P$_4xC zo|d?wZi?3}^^ow_$hC;P{26q_BqvRY#U}x5Rj@$Vdo^>CmrVt!#VV?C2B3TF8PBkaKd% za~>(4uAZG7s5!kwRG%djDT+bQVsM;kU=-jxa$o^vKFZYD>5&mFRcvOP))LF94#%$0 z!r_7{QAr4gBj>W1#>QyhLh06qU z@Rhf}izhMeO)%$3`*26`uwV(Nr>Ez(J$o!j*h8&naPisIkL|<3iG0qH`JG{O)lI;LFZWJWrRQ35+_6`aMb05mBf#UQk>XBkxOH0q#=wpd4q zcd!??jR5-lV2kC?4HwqnmTO3S19X!^sT@(&ck58pasO&mu#s&Q04Y9P3?h=U#$u?b zT@O?iApvkqj&zlF<)vF>Is`#dISkh z)VACVjY5aCP!Y%U%5VY8OxxrM_NoJtz9F@dBDH2`WYI>6KUH(8}WJEj0sJ zc&@&Es9(h`Cf3Pv@SxD#95%O4aBwh#m|GrKc?c+kaBNqlN2k`#ht@1Qwd+f>JG^z3+lN=8AW!}wy$es=b1A| zArBtt<1JlAv0ja&Swn6Mh%>U(6(FaVJ91*MPtm_~e5}oU0J(*gm$$JU6@(!Il3`WQ zv0fkn{&IRAw{KQFMzu^N^ko4A@#vLrR03%+{`!EQU_~h~o)X~Fwyj$~WqRxmcb>QF~v%Q^0h;;FE#@b2%ctxwu~Lph8u1)AG#Dl09`jbmeo%pu`F zvp6>r89h1N(RPj-h*p8WJ#DRjl2QVrl<^IjIZ)<92vdYo!k&zKaJ?TfvCKh5sc}V;- zC|Bg>riSEz>#J;d%K5Zxn7&7GY2?EdkC$0{@!#sJIw(x$Ki<4@o^s1KMqt*R@U-&Lk^S} z$t}T*2SfeDW?#p3A?Sw_O7&vGi!eS@S+FP(RVA1H{Jr;)_a1Hn^<+GK?fbzMSct;yG%;k3g?D* z))Y7l0mV@g6n5(=idoB;yV?%AS}xY(gY?L@$07aLFoJ846blyRPL?Jz2L2l_4*QOy zqjaWsH$n&ar&^*wSb73-=tZBqR49S6N z8Bw=*$3Gxmu3VMt}2Ig5zHp&+L7=3fsWM2${O z@vq&?bk6==_;;z`5JC{f!hYDq7=qm|JXh&!pJ3XOUS3yM7a?r_#y|{g+K0^2`ET8R zcUiI=M>=^+|ENB9tju2@cygTATW&zBubJ-Xfi}xfZUaC)gK7YNrASh4_z(L1Qm#MD z#1%K=bMxkLY%5zTztxA60i3!e`fb!SrnzWMUEiOYnfZishES^s`yo;D*!IdEs04w~ z8Mg$=j6te0N6!0MP!B=-*YB6djxg~ZRcwoAuDGjNS_*21UaHppdbg|m5R+K(DpivV ztU*^?7hp2p%d6Wv;&I31S?0gK*iu_};J^Vy6v@o)NWHm5fL4!^oJ73PrGM?V2@7=kEWKteO^fd*`wKK^>4UQqoUn z{`6d$(+!^1F0i|gpr5sqT2AS8nfUVP-o2lkU0mL@wP`}k@PCjh^}wz>N;+35MfF(U zgcu6f?a>!W^?BXfyRcncqF{7^UeCYd%rm6!G2D8IV>=68Rf)YYp$rgc{pI=qmn$tc zhW|_uZ&7h^4@fLLgfZ%#i$zXPt85BYP~3pcB9!j{P+Frvms>!u!2kk@ima?pZ(Vc- zw!SPV$fovuTa_4zyBl>qOnoZPFHx;~s~JQWzjc#p8VBt2&`{cwCOGQ>BFI1VeD znWbV+K1fq=3qv{`&hOiULRxrWZhpLul6ve|eSLj_%cM!>P#yrTNkgnu=FmICtR&RO z938_zMQ!Q14R`0bU%h;J6}te-iDD%=7UWv8``i>nmvfnmt`lEAj%nYxejW9cOYP7C z_=8!Lu@Q3+YQFDF0TdQ?9*f<~EX+bJzdOHMz#Sc$S{JXk9Fy$4ng1Qv#}4CD3Bl zGZuh`vDn!Xq?q{J({qrMle4ELTn($>kmQMHFloB62aij7 z_r1=cX|NW}L_|SYtZZo!cwuie!Niq418wF|w&RG7JMzB21@D3j1i989 z2%>FBHN#5y$ATP8D3oxtW)iW`7T61-s|UfUwIDQ5FSf9@3Ta??`^eoqqq;rWmR&j^ zh3wM1kdzb=S3DMH`r3R2is$AVeeXkNDj9MKx<6j&I#R5L2;G!bCzgU_+mHN^8@V=i zuYk4WjqOL2i3%v}GI6zFRw1Rs4wPfS5U-`!D*Laaqf*Ts6%~>VcLTm^Vvpw)O^=N! zqRxxV)+(@*+Qi69xLWW=u{h$DU0u4UoLZnHgqR&72*hT5w2{~B-Feh%jiwpJ@i;VW z931jw%R#j1n;*4;^s{~B^5-A>nD~UZv#Y=DDejI!rT84}T4#tOK(KBJnX1b^QrL zhBG3qAU30)xmPbKA1rWfl<0tsqQ5^X&v@5Jd1+}y6f=_uNmmEoJtfo+VV`1SS(=iR z4%HZRU-gcT6sQ98Tp;1gl!PzSB0A9e9YncKCF@wkpHBaZ4%d-Wn}uH>^F{S-h6vkN4q(o!Go2~T@m06 zjKOoU$=8jZo&$`ZouDWvZQNopJc9a=O>FzVxQh(~h>P6i`d16>`;>n1V-{g8?1=g# zCdk9oRoE>_NwX*p)|Q@Y1uPjr@lw06kY=&T&!R(DTA*3PF+vbj0cGal&2QhFM8~Un z!npKqfNrsA3(q&QD~)cj(Jpbz2Q~QEdymUG!Xgpoxu+*i$hJ4H{l#b8#Ms!FbPkiH zwk}wo6ofsppCLm@KOLU9W(!h~W6N7at0?f|gx$x+uW`04-6QJq1(~V^l>lVBW6%{8 zpidLrry&Lb!l+={k8Bd)06wb^W%|?I)xP$9_^_m5OAA%dmgeuDNf8PcDcyc%%%B6M z0;90q)f~}Z+2ib4WBP;zWJadKoAfS9)cvf~)<=W{XdVr?f(ip*)F_fsLOp}vW4-4lT|^#%!pXmXk@#KtZd!#L2c1-l>0M>=N4zaL& z!6MXNBLuP!GRFuapad@PQCEn(t=o`#GDSblc;ySwUH^ibltdK zP|Oj)!vrkTkw5!Ug;8DJw*tXG2nh-Kw5i^*8BmmL9F)ZcL)#>Y*a_A`XTvTIWz?&Y z9pJZY+FY0pG30iBn^Xl|QDF7)ROT>s=GXkI^}i4Pkz0F$?E4i5{@?bbACad6AR zQSPso+}rfz@(ZE>1k)Xj+no>%P^ys?lLN86u>IzeA$FhRlq*=DgaQxY)!tw*A8{8fi}xh0G2a178@dWN~S>rdph3#@>wh|r*HT|{FHK= zmzP)Sj!-Fx0F2@uMTXTOAKg~#UF0qhm!}r0Kf8LLXp0~@g6k}ctvwMrp(s8CWzg0m zIhrQ89(8B#kuD6Xw zi!1Z>^6@z8C{2YS{)K&v>#$H5{q?9ur~r-)DnYw-Pkp$^UnQ@4R;b@wbRup@59Jn- z>A<)RXO^}iEf<(d;iNo`Li81WF`yw)duSZ78T+4zY{Krd&&Ews9fsZmi+M;-yX`js zRK9U{kLbm6dLsU|WauhPIYYY;TbNLiveA(!#wH5(C_(wfe8TCix!=9g*0p=xK8!7b zmsf<^$n(-_@Py^UouQ#9<=~$k3blcb!%jG7vIs@Lr#v%OVivSI3C$u>W{H4!Gk(p? z3W}jBS*$}Ra^)+!y1LT*uA9Go_#3pjILHREt-Mdc*tvk8X-wA^3I5QhnxGIZKYG82 zEQ{N>Cr}0gVU1~P-mz_43~~S)3_*UPQ|e(0oA-)oIgfp=fB5SkiW_}>xGp}uay>RR zO?R_vM1x3Lz%S8@klz2b~;oNCd-YEcHvxKTpt8bsR ziFo%6dK_p3B~)#SgLPa(pp*wd19B+P`^UmTb(}ID1uC>DU0V~BlA{JIEFM5M_iyjw zjQV6StzkdeuL71#7nFbtkaGurCltWfRaG*j^J6l}U2{X^`tU}nAaLG(Vboh;EYa-B z*7H_X+Sy%ph_iM0-AJ1m|ID{hrDbL9sLfzXAO#-Oux)+tZFbfKQjiH$7HF#@ zW7_%^1(ZB%J$E1~4u#XQ_%)^7RZXEN0q#18F$_1`?-McTQ4j1K$_3=8ZTl{Ea(iQA zj)-H_0kFfm0H?Dfo=YhRu5s@^pWw0ibYe=CK_iITS`p{q8+PBNp-?U%xH}~By-<fsq9D$MbS733RiIHVuS`P7ZjZGMx41FoH|-y zxLe>C>$i#i9lo;x)WAv*S0+&49c5>ig<4d<+p_2p6vu+0wf)~lyCbK^8IYBJ|NZv~ zT=6P04lx0ZbM+06J>S4dVkoN4CfH3Bhep7Mmki}~dM>#a&-8}(*T;AhdmFTrNI~mU zMAE@q%>dDDB_*&Y?S$u|8$3}@^(2XoZSdwnJplPpJs)y^2wkkE62bl`yG$TB_ji?+!mQ;% z!WXDFV!$Z@(Oe9X%0Cw9xW7iAs}bT->ZLdC0!7fi7Hc-pwe=`y%`Xin1$`m zA;cCT2@Tm;ZT|7c^I&>F_r>9niQ;{I{SN7;NlGAH`S7Y1uwRujbc!2`T^%DuT~&#( z3-BJ-@@y+;=zja?v`PKr>%<>}OJBcnqaQf*VwU0EPM3jWBqbvx*)qVW!2?}5=7){+ z50TEVLf{&koFrby$b}*-EEY8o5#bT>QX(YIBFEZ;{C7t>zKF_`g$kmw1yO}K4>mCi zD3hc}?lzHqmp8EQ+WSikKrU27p zqod3F0%bh$Woj%|d4OoZwv7Y0hez{Q*QvpJKxO;EcU%Oo_N)&|C8vv!p&)r|XTh~L zLFCzS`20zPZ6enX%@62bBr5^W(MiDAW+;M5q{;#g5VrnwjR@ZaS`l_MIR(X$e{622 ziwv|)EC$&jN}Ei{Dj;;GfPf&*9@HVvP>|vV4rG_gJn%h_CJ2*ULtpQ1F!Dz~h4{j2H}{+rzWhj5|A2nN97* z*-3`Dp@72eEsSI+L)o;8oGwWEGSwq~ji9}0IB=q`c*2ZXL*=cDG}ggPt<0=9DaMTmxw^&Zz0A9k4vM^Wc7eNr=mm?^-j z!M7IM78N5c@Ovy|ySN$293L|ZmDrm^K^2Ax*~TMs*#oJWB0Pi%+nW^Bx--c~i$#7~ zm?IoE^c5+A+WU?mV&Gjk%))X8IuGH68Z&govE7L&s+j;+vsP|CRLuljL;6d@CMBE{ z7N7VH@O)_icZo=nP-Xma{WM(SjYY00>anru=@zig`>Ey59VoT2uL82HyL$Xs)oLy!RMkA+2*2YDLUbNrmTDS?zJE3 z)K(B*c!m-o9+lu19Ky<3fR5WhXzBQaMC`NL-co8upIWlA6iFd)R(_^ZlRynzg*!Nm z-#iT7zkrU;4=`Rtr@|TZKF2%-HM}Me_gnBClfW3n_=_i~8+`2bLfo^dddVO zk_YS>4d1?f6WrS-j7JKt@IS=L8V2YYj+4be{0c-9!b6Dc%^v>g=btZ=(`zP-H;OpN zUs*5^l+Y&>^>`d9DGbLx6gxKzrdkT(Twg#o6X23D5q60^%q@3OVH3qXO1SSUl-lJFM@AtE3!!K~QR*!g_E7v)OG%|&FXz`SSGR5TU8%*- z=}>(_?jMj0Ll`60klmomp!2QT_Vds82idrNjRkdlM?`=5<$2LWgAB*OWtaJ}Do#Vt z9=+mAGk#Coh6sUu&D%Tt=K~UXCoc402gE~cXWh52jMyzsOuRkKieIgn`IE*To1@*Q zLT`a?2hOw>3vvhT!Ld+;9Zv)wB1(`8B_<}?iC#a>!BGn@%o}3#0AdP*L%sq&4(?Z% z1)%d||5!kUs-mbVrp!e;c5-1Aa}i0nKnoB@0f(QG0PCbVYh1;%E|NS9LBhu4rxbAZGB4Y@E$mTkGdpj2vo0Wy- zC9iSAOX4|+D<$@sjm*N0#M0vA>KabuUZ9rocH26ky%|t>4#`IxhrDAmY=oLaAFn>t zY%d~j&Inw2A=s`VP(A43G$Y2EaU|t}iGWV?fo7_Z4#GhE$=_DTW3L;&dvdul=nAiECAvmvU1T91bJb?{1rLoW-1T0xZR0C@h0%sT!~&o2~5 zTx3pWNbr8cHc_AnW8`UbkP4QCMUNjpmW9zw78I8p9CpO{zy=FFG-IR8Lp^|Umhs3E z0V z)K~bx-CG{>4rPHU*dn5!vt4|ah$JVIc}SmF0)%8075y26?GlNTz_N59hl|ymS&htKF{dpO zJGK4!gW5Uij2@JYgQ^zz!C5u{# zmxN^;M<$`QtES=olAwNed1KDdgJuIg#m<&^M~(7VuR`JJ4kt%1yrx%6fa2fbwkS#~y;+UUERjal#0r%%;T0py?-Mksju`ufUSi{JMTpO(xmVNwWehl2voe9mEy z`_!O@5?UOHva)U0F37IESRc>@TqIc3jz=M6g&=1Yp18})#Z^xnTFS67x9WBj*lYWC zz--ow&B%XH`oVD&aafM$h>ZfToMndS0~?&%k;H0v>Q3mvX+qY2-KI@^h#;~#x|PK1 z#vsH=&YfsBBR&?c9{b^xJ3EdVk`>QFJk%d}4TrV4u;Z}VSYO?URw;8sVO=RGb_T91 zXEG<*j~{<)P#V}EY$7{bdkC>YLkXsD5{-!QOxv-;;Kr7{7Q{WpTWT8XR}*69bY|DR z@6dSh5YW9J__KP-b&T${2G|D`9F%N3svN%A)%c2HW$MpdGZJ zj=vXoNV+@v*KMch=)62{p~@YA!lwpY^7-S(M?hOtT>k>n>ADQ|D~R<4Ha)kaoO?r= z(z=0N30y+9=CRvmnbMkV8Vp9=s2B>Z3rqt)W^fy0m%~yO0(g6m5=Yl+hzBBF5pcZ{ zgDxSlh{EzF=va7vP3z@j%PmVwTLJto!rVoea+EPu!ZKnVFe< z@FNKSvZrrsynqNb0b50{olt0RUWP2-mPTFHLnJeRAvyWrRr)E|HO6=b0y{W#iW1?e zNkY|y{s};KOZi}LyNByYw-~;DWEaN#@;wFo%IToCtw+cS2)Fh_C=pB!W#>ilpOf+l0yPU$j zyu98iv=*+{qA-OKbleh}p}&2Vs<2SlrW-eIB*Qi&q_t`JDkp~nTHCm-c@EL8`JdL- z_3yl#21glIP#MG;X)u8a^b53C)IHu>vs~p@pI=*d1c40!2t~z#p{~-UoNgTy+n~A) za@$BN*N&sA`YmR(4=s~p-*pKL1^ZQr*%E$!h+N5}(nG!@?KVwlrE#BZQd@dyMqAc>0cPd&0hF_`Jvd^@2teRY z_z7?({36B!X4`;L{7s`GEY( z-eir9f8^C=rtU%&Ev*Oy*N72{Nz{+aSMeIuqaJ;7V zMwnHT0Id3sBFO*IMBD6T%6Ohqh(&-)GW z-wcd~6JP4VFaJJn4@IWBmb75vS7w>bsvs^2oW^>@gKUW4fxr|!;|SYF@v0#1+Bp=$ zSp)PPPiQ{jPSk@1cyQS9&e_vrMQ}ugbaC=ELXOfdsUOrc{2QT5-M@YG2HFXP8$Uu# z4)*gmkcc3Nkas4-Ssa^bP(>8&(=bNCT+%`v=C?Eq|KV#8WLVKA5MZ6rrInyJAlw=-g)!U6`Oi2v0^85euM_ zgbeT=d8$f9k&BCK?CZDE4aBLIfaaR3NJFHPji~kDu)*qywDr-hP8Vv#Qyvn*@r!^U z%3T;fA9Yvw6}AVg^k5oSnWSIIcU|e>q!ak3zD|wdJu<6K|b98&O4zdb>O<%E-ns7N0ljv06YF3y6KmAs;VGJW$CVkCf-g!FRB zW|czpyNbefInFtaULoxx;K86PSS7zWBO!4F0B&aemMxBsGqi_1=dHzjsU})ncL|J* zr7LF&BJpy^7Wm30$d&G(auJ~;(D)nJe;k3n8DT#n1Bf!O(2aJ9*gp`;0(C?j21$C{ zzGFw=$IGH8PAH>9=FKR}Y#YKqwbRHlp~>55-=Ps;no$NtFbaJfk5CL5I;rdVkj~Dx zZ{M0FZlrzDuzvp0C&VfOYDBPWFWLe@i>sVJe-8u@8@0UYJj@kM;1(2_B*FZ_M$UCs zLPFXfT@i_RhD5|n^ei?JD*$?MOh5;r z84IgVFV_*K8~z71P>i+(%ZYR8VZa7+A?@d-5?A+n{Jz@pacb|8{3%-eqod2?`RTv6 z$DOti{~H@Y%vDz@{ffoZ;JYc6f$u4 zN5r|Tq7xeVd`eS3_-n>b?<+pr5x{ZsHoM~*RTnfYsetymkZ;pN8k-}9Y*}j-!4@<@ z0uQzT;(QA?T~?MX?ep_~Bak%`5t$0Rup-z5(vJL8uObibAdjw4NWIWU1kkZVo@Y$g1msx)o8p4|L;a(ZQQ zNLlIb|D=&1v}gR^aQXj0xcv9ejtScEB*(HV9ko*udOL`4pKlX4p0(nt`^!8C+zC!x zXBMZSfvFEGxU zASHsvP4dN%8a5E-Y1yiV}sQr8s_es6P7;aXLfmlYMn zf(t)uB1~@)y-jLtpkU7vvsBUQDvMnFBhpn-UT#>RLwgK;o%F)hRX^)6i@B+zh$i+{ zo)eq@hE0E>eck^Nn}S?UM&22KoJB$992gRkh&&?Y4_W_nclQA){MkRnRZMMAfwT~K zOj8x;*kvfYg{Zz{CP;MjuPZln6Ui4MS_+DX+G&WW_gK&;q=^>1x2XGnks{Jjl@%I8 zVHCQ{BDGSr->E_qY=-1scjW~6NJUtKu=CP~+Hwcb7h2O9O5==92B9h<6Nr4cl02fC zjW#2m+B=J*t*R!3>p)7}3bVBKbHQJ=`-j?p>vSdy*Y%g-u|j8h9Kc2*{FCd~ueWJQ zq+Qc$ZB@e#b?mJMGJOad3b8oBqp1i0vVIpwDv~MjI*_j2LKE7yE@M6nORWzBk1}*> zVqbx2{}xdkkz|mQP7|sYCJ*AM1DZ%800mYmkpWsMSH}DcMeawmEGIz|PlWT5&BFu2 zb21bQ(jGwE>0sxQP%?yaY^N=}W4!EIb?hBEHBWfH{`g_jn?Zfr-&lE91tt7cjwW=} zkPyZ%Ir!{Z`JSJ0zgLGGRgaD}4k^N`5NLoBhWL+~LFBY^tuQ9-kJ`6hA|(W|O|zt_ zKRs1WA1UssL)yha41e~OqFwRB=AIzB0R#&ypFa!H(Lsv&!`K8{$P3C^+H&4IXtC}I zpuk#X7j*mX&gxL)@sHIB_%VVvOb(67mV0%B7 z3Jv$4zVw0S>Qs2|sn{`1pa6+d?CjK@_XGv(@Tb4Zme*VH!2ZgR06JjWAjb^!&;jU^ z=wd0?j#<0>5M|77j!l;lW*y@DAx}?FSPjTV^^Zlh*bl3XLj}uUD{CX;#Rv1bJCyVH zXnwAeYXgh!arW%le7&u-ugN=|AGhu!bhaHkcElktU{ZxpA=>-~pmSMGE?2EzDqqvoUMaa6u@tP_Tp&33+;aBq$~a)wA&&3VO2B0rN6z5v~&a9D|YBq|Jw_8 z)^EEhdV)wkeF5uNwr=(796A|)kC~ob;LY{>RyNF6&b}blYlw<917qtCAt?XjvsaoMRU$f`(?jNKi_`#n@+--0{fmq7ddq#UqFW1$nWPl22k@Jv5+PQ>R9>4OK z+>bI34)53V5s~;SXIo+4qjQZIe2H1^pMNoo(a9u=qCF0lh+t@XRecyHlKan>9Nppt zdc3sHILVDWcbOg@@~x-+0bQlOa2el#A$!NFNT}>67ez}Qivk50cNs! zw8ATj)-KE*A`b1iUubW@s_%`R7h%3pIY{}#)AT9lciJ_)!VPfkn>WvmF0K55NPZ9N zus@^)KH?@JqsL%q(h`FgrnvAQ+r$Uk1Wi)tU(_K(-2C0kMOTLV-P^Hen7$HTIXDxx z5P%~sEp6lcvkvV>_8rCfMYShE97WJFL?mcohE6d5U>PcbtQ?Due%}i#o0Oy7W=kqe zTH0GgC+?BHCmg@mZEbBm;|M+ldfc=-&ifAEJBxeGMvK)92H7Bl0>{55TL?RC9m)N_A zYDAl!M;*)Ol0YIqk0u*3XbE}H7!qVI^9VA<_C0$zARE9Zgyiu2*yIXCb4BpGjNTr! zX*@;|G(dc<4%?=|9wxYIfr?Z`V4FlkGyxrzV$m=-#w$dpgRx77FO_(Nh!qY_3buXw z_Pw%}kd!1h?DFyYE@DI^X^3PmC0Qolt?CS+#uWV5CQ2S4PchPy|TQKUz9)I|sd46aYn^e&tqF-4BnMVmkvyhChMDnSfWI1tkJbwnxN2j&}P1Yq_STGaO% zH2D8(U1Y+WkoIP}53GlF8P4Cy#^wG+Noh@y=EW?+1-0(d|q4nq&X zfNv#jWj<{F9CY*vMo&tSokqYQ21aGM!c8*?qh{gX5LM6msk30Z}|Ak~x-Y(v102}_I9r5pnGD>0uNP$Gy05t~e# z>xX@*P+|pP=W&+J!tFE(d`WtaND7%T`||eDaM)k+!g1uto$TrlN%tNc`eY~-0`oXe zc+FodhnK7^@-ke8sGsGr&4Uo~_n56fhjaez{`?qVa~7_g14Xp>?A4Zk&>9tPbP$L3 z-*ZsIwd2^%I8seGOv>=`I=i|Sy218r=P0`(HaYGClqN%bFv=qXr&LBUGdypFfel3w zU#%~6^XPzq}yd-eCitx zy(wb%Ma;NB8hkLwIt-NOSn*d~vli$ZohKfm7*sJ#zhh++JxAI@Thu=xB!=Af@BV^7 z9HAwv2QYG>9;8G%ykLnqy-1u%Q*(g01`dCv*B3wtggFF7$wGyV!b(k)c6F~ih^SDD z)nJpI1Yc|di}-Nr$}6;b(;&L6Zjmtv+Sw~lQmM~Y#$BG5vYGbZ`_5{3tiOCwz;b0r zWFDbRm)RUvOK&$nom#rID3TIDTdXqXBUP)Kv2}OQ(|_^3xbpe>nM{{}Lgc_CTT8A~ z2q(EU3fAtNE~UNGlMdZ%*Jxk;&HCp(c%gw**7*#1=K$EAjFfdqZ8#-HqD*0-(zH0%`yfr9=Ve zj5j-edAlFwSlvtsz<8_b0owDlA5i;2J@*HNWxo_kC=55`^*#z;Euo?A2Clrp4jY4Q z&%?D#J+v{3N)cNI6k5YzF818|O`BAZ7=R0cJQGjRuBm@Ey=e7-A&_$;(o+-M`pR=K z!!>DI7RRBZ|1i3bQCyawAV|hTkTk=#8Fst`V3)o*qOum+(JmBwI(^F%!VwTo4Rt`$ zlv0VCGx|6?XG&mG7WSCW!&I#>6R#a`o&e4i6#vC0$);PJazb7tEvx8rsr&lGzQ3`a z(EbZa+>KA;&*yky@rsa-mz{^TT4Hpr~76N4x|wlgw@_4fhaz{ofw#?Q~M#J7S6 zO>NA*h%pOh<3x3(O-&4Dd0Omt{f4voGpQ!9I1o-kTmVIoc!VQ zdpc7r9IWvdu$}?#4w#4wf>5vvL$I_#DK%h;ILNR?)a)pjF8~-XRr=H3$Gvs@dOnrZ zUu^z>#F26w(Xj-XZ)!gdybVl9+Z&L?w0WBH!cp+wpH3Cbj>1jT_zz^66f? z@GA52GQbs+s;9Veg9Q5JUmG9kXJ;OJthxlF8Fh!U?`TGSjn?1O=; z=C-J;V8GNjfdbYU%1Qp<5j6hNdo4559EqSBE51X?rr#7z(Rp!|_OckV4roPqszXgwsU(v8e}v0ssK?}=fdh`$re+cJ}dR*e?a zlq^zEDJ+s-pzl3+x;$iiVVBw8;@c_#WtvLp#SX-1fq#e1^8z`oE@cc!IJz4#7K0@>0I z4QRv;1Dr%b2eXaIUu4j#{PcAnN&g`_MWBM~9>JVv(p-d^o(&C${&p*obB+^pNy#|Q zP3YFeOe9ql#*^rSCUdfc`2GS;URt{@p`nYVrVb6!A*q^}gM(2Xg=Wbs=(3d!FOBY= zl01j(LS@{!(zjx)2{PoV?p6Iq`1t?ApO!#d>i;P~^1s3OUkk?nek~Kn`(JiFH!vCE ztStjSU_FYsG9T^5b05V2w`1dvxT)s zR*tf&Q;*#$hHT983!cV=^7G&xClHqi3NzlU31)%R^1D-(3!wRcf^?w(%E5WlGudqZ zl!J6hZ|v1Bb@U^)`o}(e_^@qC6NW6%uVg4ZhGNM$1g{iT0DZxOPEQ=mq{$z*mW-O($m9eZ_ZNr( zB)Vzy?gggR&J&C(Fhn=or(s!0n$kUn+-9pBkD-`LuUoUcnGFBC6q*kWIC)= z1FB-u^huB%h9a~;pC;Xg1RhrOt`weBazXU;o%2U3{M_IUIkX>yHwz(U0U5152vdZ1 z2WfIuc;QhR93(av;OWJLH!H82wWk2%1Z~?qK!1H1TSV(hGx|;<<#y0kXrD;W`@bsP z549)%bKzhweohD8!iVT4`@)w#yx#MkZh(wM0ojEgL6L$3aupE5c&ZfoU{m6S(_~N$ z#8O_U&Yp?*5l-Y6O45x_z$lU#ETo$pKU&3)OlU!2$;Qgc`f8Ezq-aPZQ_slXf$Lf_ z25EH>dW*Q05bYw|M^G!H6GyGoW1$gU)GGLi1n6N<#>hQhK&b6=(U7!BlQa~eWS~ki zK7jn(832&Q7lE{sy)UN7rohXm06hyBSFX7{Sf&HBBhoSKBVJJU@8FWybOA1VhK3@| zLjH2Y!80N4k6xoM^PcGre8W={I7=(%?Yiv``C%X4jgB5u3aafG7IKm1~}yg;y1J-9GfPCn0Rw0!oGPi5m*WFevn zL4Qj8iF_oQTEy0>EdjVk+9L4V-K1VKcUY94Y} zNNDW14ERcZ*uuCwC=3XE+js6{-QuU!I>xPb3o;tk+HU@vHHZw(gsMN~)cKoVWj@7q z?Vp;Z-Z9YAX5VdkC0~74Re6Cr|57Ttwj)nCDxI(Ux#_VR@XuC%UPn0zK{&>h-mj@MN}hK={G zu@BygnJAWD7|kI&y0{`!J@Ew;m1+R=oq!;4 z73Ox*><{rI9ripjhXXKq>E8~Mvg zTKjg<&OY6e7c7>4N!pwLSQ) zQ+5o8F#+Re4<}U7*#wgLRZ#wFsNgkEa-8+L?x~F3!NP{lA@8HVy&&^H;NgdVM-y^t zHPyyyTgB8T^clkMM1JCvkk-#ERDhOQ%mJRjEH_g?2*?JCAKp1Qz0I_TS!n?6qhS~- zM}8a@AyK@vZqjX7qSy1mZpp?BD~wOPWoWu{6cS_#mX~yG>0m7LDfSKTaxJ4-D&ZnP zDp$iz5fufzYM+(Cy;w9uky?aUVxeCm$!A7ztXqoi_B()%UdSr<;K)qC@bcf;H+-F% zQUwo$p<=wm@_>dPbRvFFM`y!80{oJNuKQ#l3+WO95cH4z|H^yszn=Rxj{9qmkS|Ru zeM3V@Wh9$Qq$n$^(lAb^m5?-$(L|*}C0eq}9+6ThTe2w;Dx;)~=KXv-uY3G*{{i>o z?r}b@^SUniem~>=KHkUidcBU<0eMwUzuDg|!^pO8mLB-X&3-z#$yzx5khb(I$If9U z=}MP58yr!Q^wam;ob~2KuTL#f zBSChxhYgzzsYws%<^{q_ic5&HM@JAdsLcx=?u#sy1mT5-;qP>zBu0E9f_IoZ_M#g% zrd7<{g0{q{lPaB0!jyyQNt3tPjYy*R+mpU6czqoM|Brpd{Esh+uL?CNJ>f>VL;JjDKXZa%J+j&z~)4)cP9S|F}mF z1ViX{VX*#=OF}UfGshW%WQ3n|(%Rj;<&`rV<*)p)ciGi!S-|#s0uhNn+m?^pM)V^u z7AH_w+@lz$%?3#y@R_J3JCb%7~W!4oJaPVwv^6R>B$%_tIHH zlOU`szx2Uczfrhonx z>oFr{w3F{MQ#3DvXkb2*_eVKMWZQg%n98$uqxz;1kyc0rXw>aHSd(ZVUE|C>wQ?%cdNktW5vC4rTl za#cjUWvQVLDPRNtwfz87HOw`RJ~r~eyD8!`78YsL8()YPPOBW<&xj-n3JQs|eWqa1 zD-4GE2|2b<{@90Z|1!RN?Q1_C%mky_*IO1>Y-rn3*X1fKE6e>t{_x>Ll5m!RfxTT4*nDl}^-y8%1n0CIHpcrUV*rNz`}~klT|rwHG6zsJ zL5occvF7>S4def=UY`nC}V6Q4(o)<0HNi3Z*u=&}rkHVamyvc~n_N6Tz`PNo+BN&^`ub5|UsSHbI!lNzU|}B;Cl3jCUZJ8v6jTEQhAw749&|jh zb|tfhjlldzSOC0M!)R|F4C4%#^r2*7;x1T=B0m+qp}b(Bb^`{R#Y+{u;?1>Hl^>VJ ze?wzNXr9htAQsoM_?6OKYHziN&WLKC<`$T+zm6zSi1*`3hYWk$>ctf?13z`Twf_C@ zUmq7&@Z3?x>@hIMe&TkyZOMRMNpENG8mK5t&cqlfXcQY(l-Tgt2fTO@lZr6}R4#mJs7qkpiWRl$ei!W zLB9U)?K_e+J>?UQaUvas-49#Y@OJfrfb(?J37IAja8HeIyA>Bye|h5V&Hg*h^Lzbg zq7yn75%q;Whj79gv<8ZO#tb~>G)_Cp`}O=3y8%JL3bOV`8_&z%8L;WK*Q^BuuP0d3 z6BexL>n&uFSN=P%o@F5dPyUW2FD?~}`rP?e7pqv4S@aQyB^LYCThaLOjBUKPs`=&B z$`qYb=R3cBFuiq8t36IFHr=y}(z5O2pPf=DUtLrw|8)Nyo0ZpJw>EAyD&zc_;I$v$j#;vAv{B`kCDW|C zpkYR(tJuqUwFC)~8OuT1v!~poNt31?7}DY6$B(avnQfD?(3@?a(XLXG zBU`FfuZ7 zuPl1=w&gbt#idVAs4`B`3MlaX*|U`j9zQOrsJOk))8KKGzJl3C?PQV>u@eO^CSW$HMqU$s7-n|^eJ6(JB4B|9!CTA;q za$K2DPQ&DG=p<{io3CE=EGjCxwDHFP@eqjshef%W_=VKeOlX|)=MS89Y#TndooRSl zy=INU;>ULsVy$=5$EiMgbno42V{=*6;>}|=(DK5C3spaU+yqV=%%II+6@!+XsYBO? zTMqS}`h)1`zkBzs%`I9vIy?K(&Zc2tAVtWOL$zQ74CmtFa^u#mb}X@Nr%&rks$}W= zyf_e+k~{X0Gxn|!F22d?ZMl0n#Ci zgoL5-#FZ-tfn)|V0Q7ihG;O_$h=sTs_n9!emMjUfC~6YpaJVQj0Crb}9_-A!Uo zU>P7h^jYTJL$_YkQw-twI_i*dovl7jR`8ofr_MozeI+8m!Pv zv@ZW4*mbtwRP}ZXu1#}D*!qfT0ar#1AAa#t^xVZ>2&>OfrPq$KGIargHpKf$n*S;e zNJN`Q5Jd@Y3A^n=yd-~fm2uH{3fvx?s2wE9Xv`K$G%=QYOK*b!On;T(#hi#DtsrjV6FtC zu-Mp>_FV_z0!UN%KzX$>L*_-_>F6CN$g^_Vm;L>{C`p|D`A3JeaMrW~2-#%h5)mxf z$;6Dy#R`5~cMTOQP+nes&Dyo$u$Os67Skh-^U3aVNNH&|a@rOdIr`79Xwpith)}^z zqI<3d!W=#i*6%eH-4Z70~Ru z@#hytyNN4PFq6)iGbjJ<-O>x%W-9858M@6e9nr(x1e$mVoo8%p+}F^s8~WK{`;L6V z8E~t=m}f0fIjgNx_cbgsatC?}H4dt!uP*3qPM>^(g&)irqdsO#AM&pm7elR&OeEl` zC~Hmlqj|84oZQ4IQ#vxTN*Xk-HjR1eZ+_n9?*>HvVDUWwFJ!BPHcEj6s)SL*%lY4_A6SJSoxTz zv6FkqDk$hiDnL%&4n=IBIBrSh7B=(igNF|v?h&S}*tyXsJSHZXrIbJtlA4-2j$W?N z=;$Cs^Ak-6qeMHm^ifONOK*|KzAY`yes5*4$W%l%eN_%~rQE{Fu6HX`BNqJG`IG@@ z3)v7kw{K4o%brbXq@sjV+~f}O@}jv3!=w4ZPf86xuzh-nlj3>#)3IEh0hj0IH(75dPevp?ub^qIPFoM?};@VHYjt>f3L3Y#tV1>G?ICR zy&4IF9!P{Txw0j&h=f@T+~X3JCtFxdpR_x$^o;>N0$0+(J={2wG%lR^;%K_?6;EIU zPoPU~q4rp%)G~d9`USb` z%;OY`U;*djj$qpFwC~yT=M_he90|s@+rRW)2P4K|Gi+uru_vJY?x8s!I5fI$*svk( z!i7ix&7eHrk3FPPU``aBk#GHoG`&uDJA zA2UKjxIV8La{BI{gyQeC7YZ}QGIx`5RD-SCwX^i}yyAuXvWRY+DKqq29$k9vQY?^+ zc!^HSmk%Ve*aX@A(a&9S@4zfan|&SQ_*Me>e!3nrTj!E87bIKtR2}TN8OAK7@i}p) zv$=Ma-Hag}>+I*8uv0&E>Qn)V*X2#WurC@PI()zY^Vs5@f#yw*0>v8vf3`pm=>W|T@jt#e{(eR4m;at9aIqbjz9eS`s zsR(Y9!6_MVp>QC20BC~6`!mETJz^J_-9`F?Xle~x7AxMs!yo=A&T0RVeZ_U>URYNg@a({^W9H8dJs!<-iSLQwdkgzqP2MOVkdG$+zJ$uY()Z0}0Jjcl>k7ybqgqI9gVk z@$kpbpGxF6VjE-ma&+33I&k9tv{s6l%R)CD^5V7O&i^X3tN&GK*BoG6Z zEnDVB_$R{@Nb>qBDD-%(Kr!(3?JhONgAE#@^?JW;^ZBI}~rFzDz}?Sy4HWdvuZB}+5~h(a!Xn=+5wU@oI<{Yz`TN%a-zG!(;U zJ>ut?R;w+iMW&HdE0+^ zSi9k-Gebf0$E`Do9kQxu0Y1p5tlJ*gySJZguf)t2?FOx1n;5GSCJa`%V?@Q zUT3f4rLW{I*Q57=urQIUDlb{GL_`w*(vSr)Y>%l7RToH}U!2)?4d28ch;Q50-$!EH zA0(Yg5#%3!jf=@OE85S{@d{3f>P2*tbFS#$iU_qNZL^tvety1`r06K9QD$|6W*Sc& zfJ#Oq*TAapchbA+kJ8U&ki(Z{gKbJcaW%mUyOZ3MJn^Qwkwc#MKVH1DZtUZNf<#WH04CCSy?SvVHTE_6BjT3XgPMGje=I7 z?Y}DqoiuHooSbYidGZ!gVaZz(qKV|;Pu4N5LEg*;)VGa*q}c2e!9qw^Lr5F@>gsm- z`SWLPU3=H9)<^szA{0d`9vs|B1Y-VC>noeC&_>phxF;D$n&IZ=HXS+3I<=DPwwJ)8 zrdpvKi3t{IIMveNp+jd~Y#|XC0WGlRr2D8_+Am+eTr$qov^8}7RwfU`16#Lk?4vjc zbjZWQ;|7ovnD%%#H$y(`iI4ukfpYBSTRgK6!o7g8x=Q`xGM%kTHh%XA4-ap}>Ona_ zW8o3DdsTJy1TG5)e))j|2XapKmC0K2*K&B9j(l+Q#nJLvgeM4DRm^yDkcdU?e$jT_Q2l;L1e#e~Kb z^fi@Y&b{nm3+w`0NXT_Nz}Tt+7CZ#!|LJVjm;7(E^21-tz@o>{#U{#;cuFI?%WnOP zIJ{jJF9zaKDtmlH9@Kg}MD!vOHWj1Aonm5QBvlJe4!-tWxxD%Ms^T%?SpvmOgcUWw zZJim8eMEQUGiENxpon*IFeK2$nDcUFL3<6w`zfvMs2S)`%_DAiHdil=lOsRtAr`vE z`P6^&z$ds4a{D~r*v+E}3Dk^<95@iN>iBba-Lz@bxvPlR*sq8x1i&K7 zA3(_dfa%P2m4O;{lm1+8wuJ-RY3f38U=PYIEUz>)THhk%+;TE#$!0dqcpHUue8>hH z7;K5xV(W~bFrg(cY%@JoDH>KORj?4!7cXAy8@XW+XpuxRd{t34@ga>)-b+`nI+IlT z2L&b7DetE4X1^*6^&_yJ&s9Rdq zpOY;HYbff?NYDocRE#v_jk8f8wn{<7%}K7@)~@~jHnSBFYi}hbC1U&+ma#`u!w5bl zsp!S^wV%3BTJ1owz&`8SuU~7R9lq#@=S45@AN!4DjZqUa!6UG$Gab)4 ztzSQk!(uQE+>F(25C4&R;lhr1t<-bpTHknYwdnCLhoeW2db|%A{*b8#47Bb7K;K%z zaovkFhpBj5!o$0BTx^4RXeB`?Ls2b9Jg-gP)iH`Ua{s*?Iav~J-`tKv^t$} zYJQC$Nl1S~u03Qb(yki4&%v`z61r(LTj(~%##3kn5<-M)e%{G}jiPhjfW2Dz6!GS? z0zi*tK%=c&xAs4O{`|S+&vyOTwbZmJ)DDrr-)%#uEN+2;Tq-_WPxAD1dyZ-P?JO|C zx?ddbx52)1o;;bQtEZR5zODvJz3JF;&gfk;wZ_yne#i;6tGgV*lh?R^|Na?-3(mvN z-hGatgQz`v&+W}|E$m039W9Ahi2!W{kd&8wo3F3wId4Z?MIsf@Es|wN+FDr22y_2s86b(F;FPM2Z@g zTg{`5t?+%IPu!zbr?D_yK(^O+W(??wz=~+UTQ%758?m_H;wlGTaVP|r zzeQ2|Lh-pLPwHtt)|8g?`&Oe_9%R>C_bfx?vUEijUXm!Dq+GH9Mi$wbIKxTV4Ov&~ zCOj2ob-Y%$-o00pMuyop$!3G-X6NLXDVT+V@ddV@794n!^eMZbz?J~7PcEY%YVTqH z?z<9)@#;iDXpUKYFj%v>mAn`Rhwpba+v*Oc7utMFu)9(Mp!;O<(Ht5U0DuRuO$@Qq zs+Cq_wLveSRqgMaJ9{a}gIMqUzcbma8|~E_wMnjbu<&~fq@vLv!l{1JYFk%wo*`bh zIG@s(NXuES$bW4op)*+u=AW9DX0USND&4#LwP@AWk#H!2{LTdB?hIkjwNF=Z z@cQ0H!7LNO%OioYjU3r3Z`1EglFKJMXYTo4_Dk72W;HL(?$T;?!S&#p!N%Qo>wwdt z$K?X#X92ute&o1$^LlWaJtWbqRd2^{ZQH+pe~(S?CUyyI2OvQ%A&Ne$;G;7ZIkpID zI!Apd@G!ch%3huyI5O1f(AYf)3fe#{ZLvPGseEe*Cd!k{&D#ROgcMHEv1pRp1ud^i zWT+T(Gdv-F4vzekZAv(BT1=bfd-Mj$*IYp0pdVUn5!q9wqja1xV}>{*`OK2Eiu9CB`_@ z!H8b$=FU|IzC>I+o{BLuJn-mJj>LB4B|CzGSx}zx$ z#As*R_U)?aai1QvmIS6M>lsz6jjHWk=a3_}=6znq8`z8Oyas1p({q*v`*lg_(}*#? z?wY3#Imwm7|FcsefG112^!)G+U6Dvw*+xqDDiS{_5?bD`qlOTFH?+id0 z2`bed&Zc$VrJM|9&rYjRgzbXmV8?V;8Rao}(jJ9y(rgVat@adnrW78jfd6dw?u5yk z9GHW#$?*DdZ@7O*hzNwiZ(bW(Wjye2FGx8uokdJzAarMAr!KAuU7gC+{jL*$63AKJ zmX-OlH=JNwRVeJZ&=3-5M@}IT-&4K*xKTode0n*v@2(p<8r5Uy-yb#hlJQQpztn8``{kt4z--EDmDj)3ALq2kiLEUmg0~x=Jo5>!Ec7t1^v5Xg|t_%U`m~y zbTp4zrswiU?Z-&qJ~no%Ij0ubXbdH#8t36*yt#Qn>Zc6jPrWt1!U&BpZB4x~(cOIr zR1!piVPmED;>80{crbyp1xGo%_V^Di+kBa9>BB=Upn0Z{Aw1OVe zAAgl zbvtaUsi`^1)D+sYKYBDk{zPgNkf5lsUd4nx7tU88htN=U_FyDrh0?}NcMcR5&SYqb zfW7iT=AL{oiT#$ej!I6B)XV0cHoub06(I?4a1Hh8gqDAWDXNDfl z)TbW+P4tr^qkQTrNB8e<*0xJ8Ee=}2+eSu4ihqeQQ&&#;j4UmlcIkn_=vZq_=iFea znN4Bk{ERm>EP59Vm7H8%`}4|YMBx@~D&&9oG3h@Y7PY>va_oo^b8%_V5m^>h@)V%# z94?({8@j{>BMGy3K|f#Lr|6F) zLi_Q(s$`-N*onLs3x>_K)wSH5X|bKP_zsY(!a_}FL0$2NzN@y0{EtDsTgb4*SF?92 zOk`W}M-Vg?06->7_sOr9FJ2s`6sE4#`}tt{ybr~zK%?XPtb)(nO2(|t66TFew6jy` zGuEe*q;2T^X=DCm=fxdz4^x&^@Q(ch11C`9*Z_MES^l&W-f(@I7*6iQAzraQPtB&1 z0V{n*dE8iPCHU^cirh>G`Gnr;H_92mD7gfY z#zA$P0D%L*DfLHbXIgE(!;9$;@7})cONDOx?B4zRkufp7_A^;4nAs@2 z?Tw#O+L!%m$l8!FHd(n7dJeLPC*|J^8>(H$;z@cJX2q*^2V=t9M#9R&L^p zUSC;fXC>6#kp!gG6;~KJ2_Z9j#`WU(7?TOko9R|S_q1t``@mze_r>(@yXdTu&kJ#cq1&C5bIvV} z@6B^aM_4RXR#vXvrZsXTi_g|A?s0YO!dWV*K2|9Ng8Si*@DCroV=F4H3BucDO3Ie%0s zt)}rUaC2Xh62s@#Gk>d;7b=l#hX{E{T=M8(_b`Js>Gs-te4SPU-%*u}@~%@rNHz|L z>h9g@j}`JAI*`8&5e)9bhtmbr!bfFs@}_~v)O>vvWfMIi+jvR74dMb1PfwY#vC8`O z>j@0)xQ|6kmh5Cmku&q5o~Lg#D%$v|H`IMA22JbWq$~y;s$MNmvTpKL=d6TqG6x(J zm9}Iv+4T4!pnU(R3|ZP0o<7k!UQQyo?t;V)?|Ye*qLv_?naJ;#R95ch>D{<{w-X1n zIoC?|v`yHauF0a@e23Wo7Gw^WvgFIhD3f6Kf4cvVG33P!GB?RD69wvo!x=sI0;bZd zTtN4VRQiItaNqv@`2sYBt7%&anYl}_`l7%KfO?@wJ2o~bbXQiU@saf(n#cSo&a9`1 z6%^SSge`zJwU!!bfIa<57rU9+xR6WgH|o~a*KGL2UI4F|WmBtd$+DwLVV)>>g+o6#Lny3$i>qI8>7mWEfe0xZ!^$KUx zzxuX8KbO6{Wud9sUCyrmzU$8S!E_K{O(a!ZQdP$xo_+W3RDpn<_s{8mIR2Kn^ar1r zRo5e3xXG7Ooi>1WRMV>Gk1Y>Cow=S9xRk~9x{85AhvjWu_2_NR!Jnng)<2r>H*>qT z=l_}YhnVR~8=|cMCQn^wQ;lHaMj;b+|0^VqRetiBAyyvg!TBIoo>N@%LRMZH*}>Lw zca!|vx5LQk^T~5MMSlLE?^GV7In%wS_eQ8Xv9m>3Vqa7vFFV$5o-~wOc!9(M1jrxW zoZ8Pu@Yamz72}enC9w(d@S4Xy)^KxmrNRBlvkgiPH6$RMnK->uP`cWibJ+1IH5yeciWOhF=t(4As%T*Mssk#`|?GX&@3(&tYD7AeCD4DnT@zY z_H_Z}W7_%i5gcX#{v|oGUvcp_EshKu9DiyBsduk0kI(sqS{L--D)%*v%(#A51<3`K zU1Lc-P_z&pu)YG%dAlmUk9k@9uEh`DiBRbXJg0^M#hjBe0my*M1!6A&*)&gy~05SM^{y?$C ze+izTe!eb>OZH|ZMy9e7%SM(eugxWHhXLPBwzVy=JpDnrcW*ti_CW>1B4-7!{U_YV z7uY`>5IW_R24^0rc6MgwcyH66qr8&0cMomNUsDXVfi?J|OnBGU0d_R;GMUk5-n zO@SWqg9UP=SdVXt1=%O&{ka8MYI0d2(!Z*``Ol9;bo$?AWeNQe%`ZZ9cLs6gi>n(> zi!QaQR`pLdJMC)DX+Dyb!(J}){S_V-CX`>~FOF57Zf<9U(&5^*YeGL#QH{Hm z;_{Jq3`vlqugeR~2uR!<^l$2dGlZiVI_0cq?LX!C71GYpu1!;?mC;$ZERL)K87tdQ@HE1nM z=k3?8oq!Oy_tAd%n5kqz6Rl7`6;9cm27bb;J(BCoW&<~g)WpHcp7&P-}_ z8X?f7jiinOp6^V@b|=9pu0Lz3*!X=cvR4pzQDdlfSC_P;FD8J=1?t0xD~n_mVi{eF9S{92 zG|BS|D|)hOA0dJ((~t1zv(Wwdk!5~OqxAIjI#cXSz|FYu8P$ro2f?~omX*^EGf?;jk!JbQMOea!9?E)R=0 zT3K3lM@^n~_G~y0`1-jbqLesngeH`lBj?Rs!+=ohua2hITtGx#aZlYfViVWtI!1I8 z^29mz8aAh%DS(L8L4v!#T2*7y}ZIi!+ z{dI@t6dXO$pAS^6xfO_=Q6BgPli1A(>isyCr^h{wD5gpgT1G5^r@yw+Tgx%q<$wHk zkoST01_hJn-D){oThv6$aMezWwn=Wp9wltgp1z>q?>>JX2$C#dT_KAy9ywBI-N*s= zYMPmqLBooZ;h66OPH3_}!3Lwq$##6hiWB0{UgRdoVUtC;dl=_lbalftO7v11kh@C% zLdcL50L2M6RzOlNIPq2Yj>up^w!)D%^wb^eU3ZSDspEv%)vg`y%}gInv9?x%*A;@O zD_?-D$RRa&EVJaU3k4hT;wW0UPCbX+wY#ThWUM(iKZ`==cj=-6Lk=0^#>_ehFI&vZ zg~B0p{>%%!d_8cr?q)Uu5MM|ZF@UINtGV>as^bt`0O=dc)9%gu?U6RePy}gKjSkOQ zA+-6+K*$<#K$Ba#j8EHVP;k^LIAyuKAjye>PKn`=tEp20#Mwu(Dk#9`R@y&QJ`Fjz zcdx=mRNfd?nKD-I_#pkV`)`0}by&3(W^EOF6MY>>ZKz570h-dQZx57(T`Vs96$|_? zA|DIvtpBX3|Gi04byN!fy~+L^+Q&wLvQ@ReOB{ZVQ6J|<^U^TN+@-D=)7vmYQ1*it9({$6;HwIl>4 z{D`bM^Fe%8PSEHT^{X`Fh@xRidh;ye#&~zsb$Uao_WY&YQ=X+B&r11KTcGQFiUoBapbwkZ2|B46_($ zJWVX)H*+|5Bn?Ym**vnaw$4SYvJ1~5MfAt*_WX(sYEM1gQKG^SK`K`R-zG1Po*ri} zyG{@Qz$lN+l)@SWPH<*{(u0_G0`8MEkB~R^X)vaxt;PKL^PPuXzH4{IU0Xol2qjxc zM2iCot_4@zJAfAYZQtHTkch@3MjS@i1kIOAXGIXd;IVO|>g8Hjv2>+U4ve=bFfREt znh3fNT#x132FXY2nKQP}@JcaY-3Gx*m+)Kd8CC0a=jdId*&l*a&N;=L^$;>wLFS0w zI3bbo7Xs2m8kedOQ3YBQX&oXVokHJgDWYY=#cS{Mx<3hf)XHXD#F_NhS;)EAjp&8+#>nK%J&wU*D6nUn| zwS>w|2t1iWJC{VP(^JtzFg0$>LQ)VYuhr2CE!b&un!X+`wx<6$iefS!bX;4uF-KP< zVZ<*12Q2iZh!jNMm_Aok1{WOdjU1*ExlYgja-AtUN~!)SruBFp=%FG|0tXaRXs(Q+ zj&E$HQ})L$dud6|!S&aE+ieU@uLN5fJNr{04>RFZ?N6FOaR_IbUbjstRPvJZ>}H_Y=ljeHuF~nKC6%NW5Uzo^*dKWRxHvW~PY*TcONEq9inm0*#^TGM}O3 z!B2ml*+#YLWV)~(>YsPhznw=vdf?CXQEh_VBYP1Y+BsLe%o%gi#gXXH zUA%+BOB*T=;x=xE=&T%cPdbm8ghxhBXU0}*9 zE}$tj5CMaa_irz`K}Jj+atq+03eM3PO(6=S*|TTU=H$0?XFFy%IG0YdSwF(W?!^_dnY&6j$fpYL5K&&~*#j;B zAhhCEa|x?YY7tCH#3T-t;VXN6d=Ph9>2yjJ64m8}DN;~Op*}f(XrBYnmym6s^6f0C zq7Mjx*VI@OMbM~s7Yt+s0B))k(ruFbY+&P%H;INhMMZsB@&?Ey($;>mMr7KWqbCcJ zW-zThlIe2uXCzO}Bpe1(O}fQtw`7}I`iAR}cq{AMZ;RLJD)mAFC?rs{CyTm-hNV74 zjR{oh-*ILJmQhk$1~QnY!0fjpYdf^Zq{s#&&L2dv&Q)(S&;vEQol_1e%aHF^2DRLRSK< z^AA5D+Aa1BuU;Y8(9xblI2`2Y`*I#~zR)@GY|SFmHU~#HgS3=aaw8<3QY7} zJFq3$E}EEjykv<)oR?5Hxe&Gn0KoQB71X^GjX=R~+zGX&6=vP#fPfCP3U`3Vs%^~j z5BZon)@acE$%_~8v_9lmEFqKoPBXVNP;bR@&p#Tmyo9!Y|LQ@x=>eG|QvLl?@~WI3 zT&mGbU{889{5HTa@u%(gz(8vR!k&lQN!Tv11l#@J5`!QI9(^Mxc!*$1oN2L^<~9oU z)4N{Vyf#c#l)73=$DH9=XWqNl8NZxNv4s8%Y9X3%%3fX6rwj@q#0xzT=#FjBuGfQR z+OBS`#NBHrWhA_0OeNg92Gl8JW80j82hS<>Ha7_fZ0lT+n>og6Bw^=FAYn%qZ#%`; zwxYbegPBb#@+CpS<_y#zD@v&njNq5Y!9k(cIzcMx{bVwo4}#_-+dAvv>JseUO>vOY z>0lE5HRNipQaL-y{iGP`R$;Q<6svh6_i)C(<&dK^s10L z`}e6IvVzSzp3@T?LZ0^3w@DV77d!twr~xQdJ5=uIZnG?F%sr2qVlhBO+NV#KzN=b4 zWrTuVdc3WlbONWgOrs>HwKQS?*Tnbg$kFbJ%Pv1D3L@|u5%~Z6nWAcLa`c>m!TMJJ z`#(}vr+Jtqb^d$H;sQ+*mx-14-%n_=IFbJMb0D`RFXg|VWj+4?^B11R?cKjW0KIZw zBcsR9NrJ^v+xBU{sp(9mhBxnHWis2m6*>{r`-@hr2!VD(vY~o)gm`K*r&Nssp1Hj> zV^jM3_N>wWehP;CU+=@;;_=R=&V$`_i+v)o* zIW4)0h=?{4Ak`hA^CyT8Pm(?`3h+#%bN8qL=5Ys`x|k(|
    af=B z=7ur<&+ju182SJH8Y651;~fEZw_jiBgRyZYXHfH}Uy6~afZhsK@uMbh?xF58kVt!U zGIa0s)K!C90O0v2^^52~j@Q~Jx^k*<2j~h7fI$_2(eL{Dn=fDX-~`!my zdkbB5R|SREG%o?nw?OTnf@LVmT!{pral-WJU3>NJjSe7y`0ET8L>k^SQZ3Hk{s!yN zM%$1sUadfcuZ}BMcA=62JnMjoF)|<{yI-IY`mqg)$&exMKz?99_FNIFLP4l7RjVWM z6jon-$PfkOE3=>dX|@&|?QtJ-?&GKrp5j78W-+>$ss)2j}mPj@CfcJhq{#6~xW92V?3x!JlOl0pxys z?yK{IWh3Ew7LfL_WX4fsiRuTXZXa<*;-JN_#q~Gt>Yo0*!OcylV7(~rL}zoad%ZHwo~}M85&r6jbwr-Vd7+T8DGUCRbyV4KAPtaGFE1 zB;WV+=GuloYkQ6L>BZS2sqQ$78An}k#|=DpZirY6P(}3Bb%ovP)VK2EJW@~5#aLU+ zK$$+IPd^-9eWyUkdhtS4WXM-e0K&IT$U+Sg!eTk@yMf}C*G|GXN2|Dv^g{`IFWxp6 z{En88l-VbH0F?wHtd)m7_}gv|Jh|>#L)s1cGljSuNT(t%g`%wanXfn|SG&85AAu^r%sV;kQB1RT{j^Z0Njf z4Shz9D-q{6fBQQRL1YYVKmV+FA_p$md~>)3J2&?NITS$sjhi=H2{#7nYtz=_>>uT$ zD(t-?A}nkx)rb??FHuROA88@sg__ewXVUTx1U<{@7HNBv`$%-yt1F8}i^b(zH<}lk zL+ZVo)4ZwtSJy|)yE!-G4G+P8E+x68C#$2EH-rsoW;>yBhjkbI@T=FZ-L!19G!ubF zDzLV&h@fdsyXq6JmJ&ssnd1cH=*p8tp`6KQ7j}QB+!)c_@)|CcP7qq+9wQjdVnRdk0j$3Ea(MdO@`D?G}$aXwObKm z<93q2xF$3{qC754sK!!x+G9sef)?r9mfl^iKr9SyN;Lb+#T-0D-w?8~ZC9_3WN1YQ zPe!TW;&@YuIKo`bxxqCk(_1=R^{yW#fK_@kAQ0nut7z6zGfg0NDjK?c|1Na8&8M41 zZ&XQHSxRoZ7R=d^2!Pgp;|E0q-U%#kA$VjT9y;}7j3M~PIG;!_A(s|%Ua%=g2xD4`vneP$J$(anj1@tEkY5pV zelvZgQq1<9M8*d-h1)o{qJ<{#&($ir0No#8x-rJjZfv zs>Qs~(C~0Sa=%QNW@MQ{#l^y_0^XNwY;%Ho2Q;)3`)I~{PoS9`1W#2k9muyWLgYWr z0hFZcye$DCiy*v)vYPt;kNm5d^x0G9cOtjFiG$Cc(9kVBEtL~907}HA2_Y0C6Sr}U zs|0t{jL+Elcj>>Qi#mljz2!8nXbPm)H$=sRCI(c|@y6yBZ9j`+g?uds#I>o|_fAof z{pd_D8y@{&1QxpU&b1eXM^Mlm%MDu1w*!)zjONXqdzdo6;tVqhT1voin~HyRhd^8N zp|n&qqC;?I5lfyl3r#-h`oGUpTZ}8ZJOQnADi$z;sL^vAF><7cru^;%b93`8ZB8_x z0-TO^6&kCleAr8ls24B($QNtK#Ho1t@c=gSu4!bpI;|M1B9JOdt=f!J&iJLUvMB1E zxEoOslJmmhx9hvgu!dWD7cs|v3+S@RiHyF83NQIKkKzAqJAeMG<}3C6+ zr)dsNBIn2ygu4o`Jx?rufBHP)5oap5ay1k4|Oq@Ww+qZA8m9Br_Z-o=6D8l^AegtJ< z4 11\u001b[0m R \u001b[38;5;241m=\u001b[39m RMSD(xtc_u, xtc_u,\n\u001b[1;32m 12\u001b[0m select\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbackbone\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 13\u001b[0m )\n\u001b[1;32m 14\u001b[0m R\u001b[38;5;241m.\u001b[39mrun(backend\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdask\u001b[39m\u001b[38;5;124m'\u001b[39m, n_workers\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m)\n\u001b[1;32m 16\u001b[0m stop \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n", + "\u001b[0;31mNameError\u001b[0m: name 'xtc_u' is not defined" + ] + } + ], + "source": [ + "from MDAnalysis.analysis.rms import RMSD\n", + "import time\n", + "\n", + "\n", + "## Dask \n", + "\n", + "### XTC \n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(xtc_u, xtc_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='dask', n_workers=4)\n", + "\n", + "stop = time.time()\n", + "\n", + "xtc_time_dask = stop-start\n", + "\n", + "### ZarrMD, disk\n", + "start = time.time()\n", + "\n", + "R = RMSD(zarrmd_disk_u, zarrmd_disk_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='dask', n_workers=4)\n", + "\n", + "stop = time.time()\n", + "\n", + "zarrmd_disk_time_dask = stop-start\n", + "\n", + "## H5MD, disk\n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(h5md_disk_u, h5md_disk_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='dask', n_workers=4)\n", + "\n", + "stop = time.time()\n", + "\n", + "h5md_disk_time_dask = stop-start\n", + "\n", + "## ZarrMD, S3\n", + "start = time.time()\n", + "\n", + "R = RMSD(zarrm_s3_u, zarrm_s3_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='dask', n_workers=4)\n", + "\n", + "stop = time.time()\n", + "\n", + "zarrmd_s3_time_dask = stop-start\n", + "\n", + "## H5MD, S3\n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(h5md_s3_u, h5md_s3_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='dask', n_workers=4)\n", + "\n", + "stop = time.time()\n", + "\n", + "h5md_s3_time_dask = stop-start\n", + "\n", + "## Serial\n", + "\n", + "## XTC\n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(xtc_u, xtc_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='serial')\n", + "\n", + "stop = time.time()\n", + "\n", + "xtc_time_serial = stop-start\n", + "\n", + "\n", + "## ZarrMD, disk\n", + "start = time.time()\n", + "\n", + "R = RMSD(zarrmd_disk_u, zarrmd_disk_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='serial')\n", + "\n", + "\n", + "stop = time.time()\n", + "\n", + "zarrmd_disk_time_serial = stop-start\n", + "\n", + "## H5MD, disk\n", + "\n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(h5md_disk_u, h5md_disk_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='serial')\n", + "\n", + "\n", + "stop = time.time()\n", + "\n", + "h5md_disk_time_serial = stop-start\n", + "\n", + "## ZarrMD, S3\n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(zarrm_s3_u, zarrm_s3_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='serial')\n", + "\n", + "stop = time.time()\n", + "\n", + "zarrm_s3_time_serial = stop-start\n", + "\n", + "## H5MD, S3\n", + "\n", + "start = time.time()\n", + "\n", + "R = RMSD(h5md_s3_u, h5md_s3_u,\n", + " select=\"backbone\", \n", + ")\n", + "R.run(backend='serial')\n", + "\n", + "\n", + "stop = time.time()\n", + "\n", + "h5md_s3_time_serial = stop-start" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.5079891562461853\n", + "1.5919220209121705\n", + "4.785086027781168\n", + "15.081525770823161\n", + "0.8399416049321492\n", + "1.9880372802416484\n", + "5.266235820452372\n", + "19.71386777162552\n", + "2.5871417999267576\n", + "5.6665968219439184\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2862458/849001616.py:51: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n", + " ax1.set_xticklabels(labels, rotation=45, ha='right')\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
    " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.ticker as ticker\n", + "\n", + "labels = [\n", + "'XTC, Dask', \n", + "'XTC, Serial', \n", + "'H5MD, Dask', \n", + "'H5MD, Serial', \n", + "'ZarrMD, Dask', \n", + "'ZarrMD, Serial', \n", + "# '',\n", + "'H5MD, Dask ', \n", + "'H5MD, Serial ', \n", + "'ZarrMD, Dask ', \n", + "'ZarrMD, Serial ']\n", + "\n", + "values = [\n", + "# XTC, disk\n", + "30.479349374771118 / 60.0,\n", + "95.51532125473022 / 60.0,\n", + "# H5MD, disk\n", + "287.1051616668701 / 60.0,\n", + "904.8915462493896 / 60.0,\n", + "# ZarrMD, disk\n", + "50.396496295928955 / 60.0,\n", + "119.2822368144989 / 60.0,\n", + "# Sep\n", + "# 0,\n", + "# H5MD, S3\n", + "315.97414922714233 / 60.0,\n", + "1182.8320662975311 / 60.0,\n", + "# ZarrMD, S3\n", + "155.22850799560547 / 60.0,\n", + "339.99580931663513 / 60.0,\n", + "]\n", + "\n", + "for value in values:\n", + " print(value)\n", + "colors = [\n", + "'#009e73', '#009e73', \n", + "'#e69f00', '#e69f00', \n", + "'#56b4e9', '#56b4e9', \n", + "# 'none',\n", + "'#e69f00', '#e69f00', \n", + "'#56b4e9', '#56b4e9']\n", + "\n", + "\n", + "fig1, ax1 = plt.subplots(figsize=(12, 8))\n", + "ax1.bar(labels, values, color=colors)\n", + "ax1.set_xticklabels(labels, rotation=45, ha='right')\n", + "ax1.set_ylabel('Time (minutes)')\n", + "\n", + "# Axis 2 (labels)\n", + "ax2 = ax1.twiny()\n", + "ax2.spines[\"bottom\"].set_position((\"axes\", -0.20))\n", + "ax2.tick_params('both', length=0, width=0, which='minor')\n", + "ax2.tick_params('both', direction='in', which='major')\n", + "ax2.xaxis.set_ticks_position(\"bottom\")\n", + "ax2.xaxis.set_label_position(\"bottom\")\n", + "\n", + "ax2.set_xticks([0.0, 0.6, 1.0])\n", + "ax2.xaxis.set_major_formatter(ticker.NullFormatter())\n", + "ax2.xaxis.set_minor_locator(ticker.FixedLocator([0.3, 0.8]))\n", + "ax2.xaxis.set_minor_formatter(ticker.FixedFormatter(['Disk (SSD)', 'AWS S3']))\n", + "\n", + "\n", + "\n", + "plt.title('Comparison of RMSD Calculation Speed for Different Storage Backends, Trajectory Formats, and Execution Strategies')\n", + "\n", + "plt.tight_layout()\n", + "\n", + "plt.savefig('RMSD.png')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "zarrtraj", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d4ab7710ec63813750d7224fe09bf5843e513570 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 14:59:33 -0700 Subject: [PATCH 03/23] update citations --- joss_paper/paper.bib | 65 ++++++++++++++++++++++++++++++++++++++++++++ joss_paper/paper.md | 58 +++++++++++++++++++++++---------------- 2 files changed, 100 insertions(+), 23 deletions(-) diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index dba3826..b0f0132 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -19,6 +19,36 @@ @misc{FoldingAtHome:2020 note = {Accessed: September 25, 2024} } +@article{GPCRmd:2019, + title = {Bringing Molecular Dynamics Simulation Data into View}, + volume = {44}, + ISSN = {0968-0004}, + url = {http://dx.doi.org/10.1016/j.tibs.2019.06.004}, + DOI = {10.1016/j.tibs.2019.06.004}, + number = {11}, + journal = {Trends in Biochemical Sciences}, + publisher = {Elsevier BV}, + author = {Hildebrand, Peter W. and Rose, Alexander S. and Tiemann, Johanna K.S.}, + year = {2019}, + month = nov, + pages = {902–913} +} + +@article{GPCRome:2020, + title = {GPCRmd uncovers the dynamics of the 3D-GPCRome}, + volume = {17}, + ISSN = {1548-7105}, + url = {http://dx.doi.org/10.1038/s41592-020-0884-y}, + DOI = {10.1038/s41592-020-0884-y}, + number = {8}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media LLC}, + author = {Rodríguez-Espigares, Ismael and Torrens-Fontanals, Mariona and Tiemann, Johanna K. S. and Aranda-García, David and Ramírez-Anguita, Juan Manuel and Stepniewski, Tomasz Maciej and Worp, Nathalie and Varela-Rial, Alejandro and Morales-Pastor, Adrián and Medel-Lacruz, Brian and Pándy-Szekeres, Gáspár and Mayol, Eduardo and Giorgino, Toni and Carlsson, Jens and Deupi, Xavier and Filipek, Slawomir and Filizola, Marta and Gómez-Tamayo, José Carlos and Gonzalez, Angel and Gutiérrez-de-Terán, Hugo and Jiménez-Rosés, Mireia and Jespers, Willem and Kapla, Jon and Khelashvili, George and Kolb, Peter and Latek, Dorota and Marti-Solano, Maria and Matricon, Pierre and Matsoukas, Minos-Timotheos and Miszta, Przemyslaw and Olivella, Mireia and Perez-Benito, Laura and Provasi, Davide and Ríos, Santiago and R. Torrecillas, Iván and Sallander, Jessica and Sztyler, Agnieszka and Vasile, Silvana and Weinstein, Harel and Zachariae, Ulrich and Hildebrand, Peter W. and De Fabritiis, Gianni and Sanz, Ferran and Gloriam, David E. and Cordomi, Arnau and Guixà-González, Ramon and Selent, Jana}, + year = {2020}, + month = jul, + pages = {777–787} +} + @article{H5MD:2014, title = {H5MD: A structured, efficient, and portable file format for molecular data}, journal = {Computer Physics Communications}, @@ -89,6 +119,23 @@ @article{MDAnalysis:2011 year = {2011} } +@article{MDSsrv:2022, + author = {Kampfrath, Michelle and Staritzbichler, René and Hernández, Guillermo Pérez and Rose, Alexander S and Tiemann, Johanna K S and Scheuermann, Gerik and Wiegreffe, Daniel and Hildebrand, Peter W}, + title = "{MDsrv: visual sharing and analysis of molecular dynamics simulations}", + journal = {Nucleic Acids Research}, + volume = {50}, + number = {W1}, + pages = {W483-W489}, + year = {2022}, + month = {05}, + abstract = "{Molecular dynamics simulation is a proven technique for computing and visualizing the time-resolved motion of macromolecules at atomic resolution. The MDsrv is a tool that streams MD trajectories and displays them interactively in web browsers without requiring advanced skills, facilitating interactive exploration and collaborative visual analysis. We have now enhanced the MDsrv to further simplify the upload and sharing of MD trajectories and improve their online viewing and analysis. With the new instance, the MDsrv simplifies the creation of sessions, which allows the exchange of MD trajectories with preset representations and perspectives. An important innovation is that the MDsrv can now access and visualize trajectories from remote datasets, which greatly expands its applicability and use, as the data no longer needs to be accessible on a local server. In addition, initial analyses such as sequence or structure alignments, distance measurements, or RMSD calculations have been implemented, which optionally support visual analysis. Finally, based on Mol*, MDsrv now provides faster and more efficient visualization of even large trajectories compared to its predecessor tool NGL.}", + issn = {0305-1048}, + doi = {10.1093/nar/gkac398}, + url = {https://doi.org/10.1093/nar/gkac398}, + eprint = {https://academic.oup.com/nar/article-pdf/50/W1/W483/44375694/gkac398.pdf}, +} + + @article {MDverse:2024, article_type = {journal}, title = {MDverse, shedding light on the dark matter of molecular dynamics simulations}, @@ -184,6 +231,24 @@ @inproceedings{ParallelAnalysis:2010 series = {FAST'10} } +@article{SharingMD:2019, +author = {Abraham, Mark and Apostolov, Rossen and Barnoud, Jonathan and Bauer, Paul and Blau, Christian and Bonvin, Alexandre M.J.J. and Chavent, Matthieu and Chodera, John and Čondić-Jurkić, Karmen and Delemotte, Lucie and Grubmüller, Helmut and Howard, Rebecca J. and Jordan, E. Joseph and Lindahl, Erik and Ollila, O. H. Samuli and Selent, Jana and Smith, Daniel G. A. and Stansfeld, Phillip J. and Tiemann, Johanna K.S. and Trellet, Mikael and Woods, Christopher and Zhmurov, Artem}, +title = {Sharing Data from Molecular Simulations}, +journal = {Journal of Chemical Information and Modeling}, +volume = {59}, +number = {10}, +pages = {4093-4099}, +year = {2019}, +doi = {10.1021/acs.jcim.9b00665}, + note ={PMID: 31525920}, +URL = { + https://doi.org/10.1021/acs.jcim.9b00665 +}, +eprint = { + https://doi.org/10.1021/acs.jcim.9b00665 +} +} + @article{SplitApplyCombine:2011, title={The Split-Apply-Combine Strategy for Data Analysis}, volume={40}, diff --git a/joss_paper/paper.md b/joss_paper/paper.md index f587687..351d359 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -59,14 +59,22 @@ The computing power in HPC environments has increased to the point where running simulation algorithms is often no longer the constraint in obtaining scientific insights from molecular dynamics trajectory data. Instead, the ability to process, analyze and share large volumes of data provide -new constraints on research in this field. +new constraints on research in this field [@SharingMD:2019]. Other groups in the field recognize this same need for adherence to -FAIR principles [@FAIR:2019] including the MDDB (Molecular Dynamics Data Bank), an EU-scale -repository for biosimulation data [@MDDB:2024] and MDverse, a prototype search engine -for publicly-available GROMACS simulation data [@MDverse:2024]. -While these efforts currently offer prototype solutions for indexing and -searching MD trajectory data, the problem of efficiently distributing the data remains. +FAIR principles [@FAIR:2019] including +MDsrv, a tool that can stream MD trajectories into a web browser for visual exploration [@MDsrv:2022], +GCPRmd, a web service that builds on MDsrv to provide a predefined set of analysis results and simple +geometric features for G-protein-coupled receptors [@GPCRmd:2019] [@GPCRome:2020], +MDDB (Molecular Dynamics Data Bank), an EU-scale +repository for biosimulation data [@MDDB:2024], +and MDverse, a prototype search engine +for publicly-available GROMACS simulation data [@MDverse:2024]. + +While these efforts currently offer solutions for indexing, +searching, and vizualizing MD trajectory data, the problem of distributing trajectories +in way that enables *NumPy*-like slicing and parallel reading for use in arbitrary analysis +tasks remains. Though exposing download links on the open internet offers a simple solution to this problem, on-disk representations of molecular dynamics trajectories often range in size @@ -84,13 +92,29 @@ This is possible thanks to the *Zarr* [@Zarr:2024] package which allows streaming array-like data from a variety of storage mediums and [Kerchunk](https://github.com/fsspec/kerchunk), which extends the capability of *Zarr* by allowing it to read HDF5 files. Because it implements the standard MDAnalysis trajectory reader API, -*Zarrtraj* can leverage *Zarr*'s ability to read a slice of a file and even +*Zarrtraj* can leverage *Zarr*'s ability to read a slice of a file and to read a file in parallel, making it compatible with analysis algorithms that use the "split-apply-combine" parallelization strategy [@SplitApplyCombine:2011]. In addition to the H5MD format, *Zarrtraj* can stream and write trajectories in the experimental ZarrMD format, which ports the H5MD layout to the *Zarr* filetype. +This work builds on the existing MDAnalysis `H5MDReader` +[@H5MDReader:2021], and similarly uses *NumPy* [@NumPy:2020] as a common interface in-between MDAnalysis +and the file storage medium. *Zarrtraj* was inspired and made possible by similar efforts in the +geosciences community to align data practices with FAIR principles [@PANGEO:2022]. + +With *Zarrtraj*, we envision research groups making their data publicly available +via a cloud URL so that anyone can reuse their trajectories and reproduce their results. +Large databases, like MDDB and MDverse, can expose a URL associated with each +trajectory in their databases so that users can make a query and immediately use the resulting +trajectories to run an analysis on the hits that match their search. Groups seeking to +collect a large volume of trajectory data to train machine learning models [@MLMDMethods:2023] can make use +of our tool to efficiently and inexpensively obtain the data they need from these published +URLs. + +# Features and Benchmarks + Once imported, *Zarrtraj* allows passing trajectory URLs just like ordinary files: ```python import zarrtraj @@ -98,6 +122,7 @@ import MDAnalysis as mda u = mda.Universe("topology.pdb", "s3://sample-bucket-name/trajectory.h5md") ``` + Initial benchmarks show that *Zarrtraj* can iterate serially through an AWS S3 cloud trajectory (load into memory one frame at a time) at roughly 1/2 or 1/3 the speed it can iterate through the same trajectory from disk and roughly @@ -115,24 +140,11 @@ to just 4.9GB after compression with the Zstandard algorithm [@Zstandard:2021] and quantization to 3 digits of precision. See [performance considerations](https://zarrtraj.readthedocs.io/en/latest/performance_considerations.html) for more. -This work builds on the existing MDAnalysis `H5MDReader` -[@H5MDReader:2021], and similarly uses *NumPy* [@NumPy:2020] as a common interface in-between MDAnalysis -and the file storage medium. *Zarrtraj* was inspired and made possible by similar efforts in the -geosciences community to align data practices with FAIR principles [@PANGEO:2022]. - -With *Zarrtraj*, we envision research groups making their data publicly available -via a cloud URL so that anyone can reuse their trajectories and reproduce their results. -Large databases, like MDDB and MDverse, can expose a URL associated with each -trajectory in their databases so that users can make a query and immediately use the resulting -trajectories to run an analysis on the hits that match their search. Groups seeking to -collect a large volume of trajectory data to train machine learning models [@MLMDMethods:2023] can make use -of our tool to efficiently and inexpensively obtain the data they need from these published -URLs. - # Acknowledgements -We thank Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis. -We also thank Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream codebase + +We thank Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis and +Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream codebase necessary for this project. LW was a participant in the Google Summer of Code 2024 program. # References \ No newline at end of file From 9e009129941a1289e9766a854704d51da843215a Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 15:05:50 -0700 Subject: [PATCH 04/23] semantics --- joss_paper/paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 351d359..9c2f9cf 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -130,9 +130,9 @@ at roughly 1/2 or 1/3 the speed it can iterate through the same trajectory from However, it should be noted that this speed is influenced by network latency and that writing parallelized algorithms can offset this loss of speed as in \autoref{fig:RMSD}. -![Benchmarks performed on a machine with 2 Intel Xeon 2.00GHz CPUs, 32GB of RAM, and an SSD configured with RAID 0. The trajectory used for benchmarking was the YiiP trajectory from MDAnalysisData [@YiiP:2019], a 9000-frame (90ns), 111,815 particle simulation of a membrane-protein system. The original 3.47GB XTC trajectory was converted into an uncompressed 11.3GB H5MD trajectory and an uncompressed 11.3GB ZarrMD trajectory using the MDAnalysis `H5MDWriter` and *Zarrtraj* `ZarrMD` writers, respectively. \label{fig:benchmark}](benchmark.png) +![Benchmarks performed on a machine with 2 Intel Xeon 2.00GHz CPUs, 32GB of RAM, and an SSD configured with RAID 0. The trajectory used for benchmarking was the YiiP trajectory from MDAnalysisData [@YiiP:2019], a 9000-frame (90ns), 111,815 particle simulation of a membrane-protein system. The original 3.47GB XTC trajectory was converted into an uncompressed 11.3GB H5MD trajectory and an uncompressed 11.3GB ZarrMD trajectory using the MDAnalysis `H5MDWriter` and *Zarrtraj* `ZarrMD` writers, respectively. XTC trajectory read using the MDAnalysis `XTCReader` for comparison. \label{fig:benchmark}](benchmark.png) -![RMSD benchmarks performed on the same machine as \autoref{fig:benchmark}. YiiP trajectory aligned to first frame as reference using `MDAnalysis.analysis.align.AlignTraj` and converted to compressed, quantized H5MD (7.8GB) and ZarrMD (4.9GB) trajectories. RMSD performed using development branch of MDAnalysis (2.8.0dev) with "serial" and "dask" backends. See [this notebook]() for full benchmark codes. \label{fig:RMSD}](rmsd.png) +![RMSD benchmarks performed on the same machine as \autoref{fig:benchmark}. YiiP trajectory aligned to first frame as reference using `MDAnalysis.analysis.align.AlignTraj` and converted to compressed, quantized H5MD (7.8GB) and ZarrMD (4.9GB) trajectories. RMSD performed using development branch of MDAnalysis (2.8.0dev) with "serial" and "dask" backends. See [this notebook](https://github.com/Becksteinlab/zarrtraj/blob/d4ab7710ec63813750d7224fe09bf5843e513570/joss_paper/figure_2.ipynb) for full benchmark codes. \label{fig:RMSD}](RMSD.png) *Zarrtraj* is capable of making use of *Zarr*'s powerful compression and quantization when writing ZarrMD trajectories. The uncompressed MDAnalysisData YiiP trajectory in ZarrMD format is reduced from 11.3GB uncompressed From b7bb1f9f4791f55f6b96fee595fa25697d091b08 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 15:07:24 -0700 Subject: [PATCH 05/23] citation typo --- joss_paper/paper.bib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index b0f0132..f4243a5 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -119,7 +119,7 @@ @article{MDAnalysis:2011 year = {2011} } -@article{MDSsrv:2022, +@article{MDsrv:2022, author = {Kampfrath, Michelle and Staritzbichler, René and Hernández, Guillermo Pérez and Rose, Alexander S and Tiemann, Johanna K S and Scheuermann, Gerik and Wiegreffe, Daniel and Hildebrand, Peter W}, title = "{MDsrv: visual sharing and analysis of molecular dynamics simulations}", journal = {Nucleic Acids Research}, From 1f2142c1e4aad11f938c16bd711d3cc8897cb119 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 15:07:56 -0700 Subject: [PATCH 06/23] missing paranethis --- joss_paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 9c2f9cf..57f4607 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -126,7 +126,7 @@ u = mda.Universe("topology.pdb", "s3://sample-bucket-name/trajectory.h5md") Initial benchmarks show that *Zarrtraj* can iterate serially through an AWS S3 cloud trajectory (load into memory one frame at a time) at roughly 1/2 or 1/3 the speed it can iterate through the same trajectory from disk and roughly -1/5 to 1/10 the speed it can iterate through the same trajectory on disk in XTC format \autoref{fig:benchmark}. +1/5 to 1/10 the speed it can iterate through the same trajectory on disk in XTC format (\autoref{fig:benchmark}). However, it should be noted that this speed is influenced by network latency and that writing parallelized algorithms can offset this loss of speed as in \autoref{fig:RMSD}. From 09380479f353ff6341b34bd54626f0dd402b5dbc Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Thu, 3 Oct 2024 15:16:06 -0700 Subject: [PATCH 07/23] MDDB, typos --- joss_paper/paper.bib | 10 ++++++++++ joss_paper/paper.md | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index f4243a5..e094f20 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -119,6 +119,16 @@ @article{MDAnalysis:2011 year = {2011} } +@misc{MDDB:2024, + title={The need to implement FAIR principles in biomolecular simulations}, + author={Rommie Amaro and Johan Åqvist and Ivet Bahar and Federica Battistini and Adam Bellaiche and Daniel Beltran and Philip C. Biggin and Massimiliano Bonomi and Gregory R. Bowman and Richard Bryce and Giovanni Bussi and Paolo Carloni and David Case and Andrea Cavalli and Chie-En A. Chang and Thomas E. Cheatham III au2 and Margaret S. Cheung and Cris Chipot and Lillian T. Chong and Preeti Choudhary and Gerardo Andres Cisneros and Cecilia Clementi and Rosana Collepardo-Guevara and Peter Coveney and Roberto Covino and T. Daniel Crawford and Matteo Dal Peraro and Bert de Groot and Lucie Delemotte and Marco De Vivo and Jonathan Essex and Franca Fraternali and Jiali Gao and Josep Lluís Gelpí and Francesco Luigi Gervasio and Fernando Danilo Gonzalez-Nilo and Helmut Grubmüller and Marina Guenza and Horacio V. Guzman and Sarah Harris and Teresa Head-Gordon and Rigoberto Hernandez and Adam Hospital and Niu Huang and Xuhui Huang and Gerhard Hummer and Javier Iglesias-Fernández and Jan H. Jensen and Shantenu Jha and Wanting Jiao and William L. Jorgensen and Shina Caroline Lynn Kamerlin and Syma Khalid and Charles Laughton and Michael Levitt and Vittorio Limongelli and Erik Lindahl and Kresten Lindorff-Larsen and Sharon Loverde and Magnus Lundborg and Yun Lyna Luo and Francisco Javier Luque and Charlotte I. Lynch and Alexander MacKerell and Alessandra Magistrato and Siewert J. Marrink and Hugh Martin and J. Andrew McCammon and Kenneth Merz and Vicent Moliner and Adrian Mulholland and Sohail Murad and Athi N. Naganathan and Shikha Nangia and Frank Noe and Agnes Noy and Julianna Oláh and Megan O'Mara and Mary Jo Ondrechen and José N. Onuchic and Alexey Onufriev and Silvia Osuna and Anna R. Panchenko and Sergio Pantano and Carol Parish and Michele Parrinello and Alberto Perez and Tomas Perez-Acle and Juan R. Perilla and B. Montgomery Pettitt and Adriana Pietropalo and Jean-Philip Piquemal and Adolfo Poma and Matej Praprotnik and Maria J. Ramos and Pengyu Ren and Nathalie Reuter and Adrian Roitberg and Edina Rosta and Carme Rovira and Benoit Roux and Ursula Röthlisberger and Karissa Y. Sanbonmatsu and Tamar Schlick and Alexey K. Shaytan and Carlos Simmerling and Jeremy C. Smith and Yuji Sugita and Katarzyna Świderek and Makoto Taiji and Peng Tao and Irina G. Tikhonova and Julian Tirado-Rives and Inaki Tunón and Marc W. Van Der Kamp and David Van der Spoel and Sameer Velankar and Gregory A. Voth and Rebecca Wade and Ariel Warshel and Valerie Vaissier Welborn and Stacey Wetmore and Chung F. Wong and Lee-Wei Yang and Martin Zacharias and Modesto Orozco}, + year={2024}, + eprint={2407.16584}, + archivePrefix={arXiv}, + primaryClass={q-bio.BM}, + url={https://arxiv.org/abs/2407.16584}, +} + @article{MDsrv:2022, author = {Kampfrath, Michelle and Staritzbichler, René and Hernández, Guillermo Pérez and Rose, Alexander S and Tiemann, Johanna K S and Scheuermann, Gerik and Wiegreffe, Daniel and Hildebrand, Peter W}, title = "{MDsrv: visual sharing and analysis of molecular dynamics simulations}", diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 57f4607..242a487 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -16,7 +16,7 @@ authors: affiliation: [1, 2] - name: Yuxuan Zhuang orcid: 0000-0003-4390-8556 - affiliations: [5, 6] + affiliation: [5, 6] - name: Richard J Gowers orcid: 0000-0002-3241-1846 - name: Oliver Beckstein @@ -127,7 +127,7 @@ Initial benchmarks show that *Zarrtraj* can iterate serially through an AWS S3 cloud trajectory (load into memory one frame at a time) at roughly 1/2 or 1/3 the speed it can iterate through the same trajectory from disk and roughly 1/5 to 1/10 the speed it can iterate through the same trajectory on disk in XTC format (\autoref{fig:benchmark}). -However, it should be noted that this speed is influenced by network latency and that +However, it should be noted that this speed is influenced by network bandwidth and that writing parallelized algorithms can offset this loss of speed as in \autoref{fig:RMSD}. ![Benchmarks performed on a machine with 2 Intel Xeon 2.00GHz CPUs, 32GB of RAM, and an SSD configured with RAID 0. The trajectory used for benchmarking was the YiiP trajectory from MDAnalysisData [@YiiP:2019], a 9000-frame (90ns), 111,815 particle simulation of a membrane-protein system. The original 3.47GB XTC trajectory was converted into an uncompressed 11.3GB H5MD trajectory and an uncompressed 11.3GB ZarrMD trajectory using the MDAnalysis `H5MDWriter` and *Zarrtraj* `ZarrMD` writers, respectively. XTC trajectory read using the MDAnalysis `XTCReader` for comparison. \label{fig:benchmark}](benchmark.png) From 6532e43b6d64335777a666f75fe3bf6941bd5c70 Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Sun, 20 Oct 2024 12:21:25 -0700 Subject: [PATCH 08/23] added example --- docs/source/index.rst | 2 +- docs/source/yiip_example.rst | 29 +++++++++++++++++++++++++++++ joss_paper/paper.md | 29 ++++++++++++++++++----------- 3 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 docs/source/yiip_example.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 919eb21..f507eab 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -16,9 +16,9 @@ This means users can interact with massive trajectory files without ever storing :caption: Contents: installation + yiip_example walkthrough api performance_considerations - zarrmd-file-spec/v0.2.0 benchmarks diff --git a/docs/source/yiip_example.rst b/docs/source/yiip_example.rst new file mode 100644 index 0000000..02d75e7 --- /dev/null +++ b/docs/source/yiip_example.rst @@ -0,0 +1,29 @@ +YiiP Protein Example +==================== + +To get started immediately with *Zarrtraj*, we have made the topology and trajectory of the +[YiiP protein in a POPC membrane](https://www.mdanalysis.org/MDAnalysisData/yiip_equilibrium.html) +publicly available for streaming. The trajectory is stored in in the `zarrmd` format +for optimal streaming performance. + +To access the trajectory, follow this example: + +```python +import zarrtraj +import MDAnalysis as mda +import fsspec + + +with fsspec.open("gcs://zarrtraj-test-data/YiiP_system.pdb", "r") as top: + + u = mda.Universe( + top, "gcs://zarrtraj-test-data/yiip.zarrmd", topology_format="PDB" + ) + + for ts in u.trajectory: + # Do something +``` + +While there is not yet an officially recommended way to access cloud-stored topologies, this +method of opening a Python `File`-like object from the topology URL in PDB format using *FSSpec* +works with MDAnalysis 2.7.0. Check back later for further development! \ No newline at end of file diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 242a487..51a16dd 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -9,32 +9,35 @@ tags: authors: - name: Lawson Woods orcid: 0009-0003-0713-4167 - affiliation: [3, 4] - - name: Hugo Macdermott-Opeskin + affiliation: [1, 2] + - name: Hugo MacDermott-Opeskin orcid: 0000-0002-7393-7457 + affiliation: [3] - name: Edis Jakupovic - affiliation: [1, 2] + affiliation: [4, 5] - name: Yuxuan Zhuang orcid: 0000-0003-4390-8556 - affiliation: [5, 6] + affiliation: [6, 7] - name: Richard J Gowers orcid: 0000-0002-3241-1846 - name: Oliver Beckstein orcid: 000-0003-1340-0831 - affiliation: [1, 2] + affiliation: [4, 5] affiliations: - - name: Department of Physics, Arizona State University, Tempe, Arizona, United States of America + - name: School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, United States of America index: 1 - - name: Center for Biological Physics, Arizona State University, Tempe, AZ, United States of America + - name: School of Molecular Sciences, Arizona State University, Tempe, Arizona, United States of America index: 2 - - name: School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, United States of America + - name: Open Molecular Software Foundation, Davis, CA, United States of America index: 3 - - name: School of Molecular Sciences, Arizona State University, Tempe, Arizona, United States of America + - name: Center for Biological Physics, Arizona State University, Tempe, AZ, United States of America index: 4 - - name: Department of Computer Science, Stanford University, Stanford, CA 94305, USA. + - name: Department of Physics, Arizona State University, Tempe, Arizona, United States of America index: 5 - - name: Departments of Molecular and Cellular Physiology and Structural Biology, Stanford University School of Medicine, Stanford, CA 94305, USA. + - name: Department of Computer Science, Stanford University, Stanford, CA 94305, USA. index: 6 + - name: Departments of Molecular and Cellular Physiology and Structural Biology, Stanford University School of Medicine, Stanford, CA 94305, USA. + index: 7 date: 22 September 2024 bibliography: paper.bib --- @@ -140,6 +143,10 @@ to just 4.9GB after compression with the Zstandard algorithm [@Zstandard:2021] and quantization to 3 digits of precision. See [performance considerations](https://zarrtraj.readthedocs.io/en/latest/performance_considerations.html) for more. +# Example + +The YiiP membrane protein trajectory [@YiiP:2019] used for benchmarking in this paper is publicly available for streaming from the Google Cloud Bucket *"gcs://zarrtraj-test-data"*. To access it, +follow the up-to-date instructions [here](https://zarrtraj.readthedocs.io/en/latest/yiip_example.html). # Acknowledgements From 656a21c86e738197c242173526c71c6934914f0c Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Sun, 20 Oct 2024 12:28:43 -0700 Subject: [PATCH 09/23] rst formatting --- docs/source/yiip_example.rst | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/source/yiip_example.rst b/docs/source/yiip_example.rst index 02d75e7..6e88ae8 100644 --- a/docs/source/yiip_example.rst +++ b/docs/source/yiip_example.rst @@ -2,27 +2,28 @@ YiiP Protein Example ==================== To get started immediately with *Zarrtraj*, we have made the topology and trajectory of the -[YiiP protein in a POPC membrane](https://www.mdanalysis.org/MDAnalysisData/yiip_equilibrium.html) +`YiiP protein in a POPC membrane `_ publicly available for streaming. The trajectory is stored in in the `zarrmd` format for optimal streaming performance. To access the trajectory, follow this example: -```python -import zarrtraj -import MDAnalysis as mda -import fsspec +.. code-block:: python + import zarrtraj + import MDAnalysis as mda + import fsspec -with fsspec.open("gcs://zarrtraj-test-data/YiiP_system.pdb", "r") as top: - u = mda.Universe( - top, "gcs://zarrtraj-test-data/yiip.zarrmd", topology_format="PDB" - ) + with fsspec.open("gcs://zarrtraj-test-data/YiiP_system.pdb", "r") as top: + + u = mda.Universe( + top, "gcs://zarrtraj-test-data/yiip.zarrmd", topology_format="PDB" + ) + + for ts in u.trajectory: + # Do something - for ts in u.trajectory: - # Do something -``` While there is not yet an officially recommended way to access cloud-stored topologies, this method of opening a Python `File`-like object from the topology URL in PDB format using *FSSpec* From 70a1b6a1f6256287806f43d747d3b747334f9259 Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Sun, 20 Oct 2024 12:33:28 -0700 Subject: [PATCH 10/23] add docs --- docs/source/yiip_example.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/yiip_example.rst b/docs/source/yiip_example.rst index 6e88ae8..30a6a32 100644 --- a/docs/source/yiip_example.rst +++ b/docs/source/yiip_example.rst @@ -26,5 +26,6 @@ To access the trajectory, follow this example: While there is not yet an officially recommended way to access cloud-stored topologies, this -method of opening a Python `File`-like object from the topology URL in PDB format using *FSSpec* +method of opening a Python `File`-like object from the topology URL in PDB format using +`FSSpec `_ works with MDAnalysis 2.7.0. Check back later for further development! \ No newline at end of file From fa339302bd0b4c658d883d27bd478c1896d1b389 Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Sun, 20 Oct 2024 12:53:57 -0700 Subject: [PATCH 11/23] replace removed file --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index f507eab..8651a10 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -20,5 +20,6 @@ This means users can interact with massive trajectory files without ever storing walkthrough api performance_considerations + zarrmd-file-spec/v0.2.0 benchmarks From 39cb6def52484bb997426950a67a37584171d8df Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 23 Oct 2024 17:17:25 -0700 Subject: [PATCH 12/23] edits for better transitions and smoother flow --- joss_paper/paper.md | 58 +++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 51a16dd..9a3fe8d 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -38,15 +38,15 @@ affiliations: index: 6 - name: Departments of Molecular and Cellular Physiology and Structural Biology, Stanford University School of Medicine, Stanford, CA 94305, USA. index: 7 -date: 22 September 2024 +date: 23 October 2024 bibliography: paper.bib --- # Summary Molecular dynamics (MD) simulations provide a microscope into the behavior of -atomic-scale environments otherwise prohibitively difficult to observe, however, -the resulting trajectory data is too often siloed in a single institutions' +atomic-scale environments otherwise prohibitively difficult to observe. However, +the resulting trajectory data are too often siloed in a single institutions' HPC environment, rendering it unusable by the broader scientific community. Zarrtraj enables these trajectories to be read directly from cloud storage providers like AWS, Google Cloud, and Microsoft Azure into MDAnalysis, a popular Python @@ -70,40 +70,46 @@ MDsrv, a tool that can stream MD trajectories into a web browser for visual expl GCPRmd, a web service that builds on MDsrv to provide a predefined set of analysis results and simple geometric features for G-protein-coupled receptors [@GPCRmd:2019] [@GPCRome:2020], MDDB (Molecular Dynamics Data Bank), an EU-scale -repository for biosimulation data [@MDDB:2024], +repository for bio-simulation data [@MDDB:2024], and MDverse, a prototype search engine for publicly-available GROMACS simulation data [@MDverse:2024]. While these efforts currently offer solutions for indexing, -searching, and vizualizing MD trajectory data, the problem of distributing trajectories +searching, and visualizing MD trajectory data, the problem of distributing trajectories in way that enables *NumPy*-like slicing and parallel reading for use in arbitrary analysis tasks remains. -Though exposing download links on the open internet offers a simple solution to this problem, +Although exposing download links on the open internet offers a simple solution to this problem, on-disk representations of molecular dynamics trajectories often range in size -with large datasets up to TBs in scale [@ParallelAnalysis:2010] [@FoldingAtHome:2020], +up to TBs in scale [@ParallelAnalysis:2010] [@FoldingAtHome:2020], so a solution which could prevent this duplication of storage and unnecessary download step would provide greater utility -for the computational molecular sciences ecosystem. - -Enter *Zarrtraj*, the first fully-functioning tool to our knowledge that allows -streaming trajectories into analysis software using an established trajectory format. -*Zarrtraj* is implemented as an MDAnalysis [@MDAnalysis:2016] MDAKit [@MDAKits:2023] that -enables streaming MD trajectories in the popular HDF5-based H5MD format [@H5MD:2014] -from AWS S3, Google Cloud Buckets, and Azure Blob Storage & Data Lakes without ever downloading them. -This is possible thanks to the *Zarr* [@Zarr:2024] package which allows -streaming array-like data from a variety of storage mediums and [Kerchunk](https://github.com/fsspec/kerchunk), +for the computational molecular sciences ecosystem, especially if it +provides access to slices or subsampled portions of these large files. + +To address this need, we developed *Zarrtraj* as a prototype for streaming +trajectories into analysis software using an established trajectory +format. *Zarrtraj* extends MDAnalysis [@MDAnalysis:2016], a popular +Python-based library for the analysis of molecular simulation data in a wide +range of formats, to also accept remote file locations for trajectories instead +of local filenames. Instead of being integrated directly into MDAnalysis, +*Zarrtraj* is built as an external MDAKit [@MDAKits:2023] that automatically +registers its capabilities with MDAnalysis on import and thus acts as a plugin. +*Zarrtraj* enables streaming MD trajectories in the popular HDF5-based H5MD format [@H5MD:2014] +from AWS S3, Google Cloud Buckets, and Azure Blob Storage and Data Lakes without ever downloading them. +*Zarrtraj* relies on the *Zarr* [@Zarr:2024] package for +streaming array-like data from a variety of storage mediums and on [Kerchunk](https://github.com/fsspec/kerchunk), which extends the capability of *Zarr* by allowing it to read HDF5 files. -Because it implements the standard MDAnalysis trajectory reader API, -*Zarrtraj* can leverage *Zarr*'s ability to read a slice of a file and -to read a file in parallel, making it compatible with -analysis algorithms that use the "split-apply-combine" parallelization strategy [@SplitApplyCombine:2011]. -In addition to the H5MD format, -*Zarrtraj* can stream and write trajectories in the experimental ZarrMD -format, which ports the H5MD layout to the *Zarr* filetype. +*Zarrtraj* leverages *Zarr*'s ability to read a slice of a file and to read a +file in parallel and it implements the standard MDAnalysis trajectory reader +API, which taken together make it compatible with analysis algorithms that use +the "split-apply-combine" parallelization strategy [@SplitApplyCombine:2011]. +In addition to the H5MD format, *Zarrtraj* can stream and write trajectories in +the experimental ZarrMD format, which ports the H5MD layout to the *Zarr* +file type. This work builds on the existing MDAnalysis `H5MDReader` -[@H5MDReader:2021], and similarly uses *NumPy* [@NumPy:2020] as a common interface in-between MDAnalysis +[@H5MDReader:2021], and uses *NumPy* [@NumPy:2020] as a common interface in-between MDAnalysis and the file storage medium. *Zarrtraj* was inspired and made possible by similar efforts in the geosciences community to align data practices with FAIR principles [@PANGEO:2022]. @@ -151,7 +157,7 @@ follow the up-to-date instructions [here](https://zarrtraj.readthedocs.io/en/lat # Acknowledgements We thank Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis and -Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream codebase +Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream code base necessary for this project. LW was a participant in the Google Summer of Code 2024 program. -# References \ No newline at end of file +# References From 1eca65b79666e14e2fc2162e442ba36e437f0678 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 23 Oct 2024 18:10:36 -0700 Subject: [PATCH 13/23] extended example - show remote top with fsspec - show analysis example --- joss_paper/paper.bib | 13 +++++++++- joss_paper/paper.md | 59 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index e094f20..b742aae 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -304,4 +304,15 @@ @misc{Zstandard:2021 year = 2021, month = feb, abstract = {Zstandard, or "zstd" (pronounced "zee standard"), is a lossless data compression mechanism. This document describes the mechanism and registers a media type, content encoding, and a structured syntax suffix to be used when transporting zstd-compressed content via MIME. Despite use of the word "standard" as part of Zstandard, readers are advised that this document is not an Internet Standards Track specification; it is being published for informational purposes only. This document replaces and obsoletes RFC 8478.}, -} \ No newline at end of file +} + + +@article{Liu:2010, + author = {Liu, Pu and Agrafiotis, Dimitris K and Theobald, Douglas L}, + journal = {J Comput Chem}, + month = {May}, + number = 7, + pages = {1561-1563}, + title = {Fast determination of the optimal rotational matrix for macromolecular superpositions}, + volume = 31, + year = 2010} diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 9a3fe8d..827ad1f 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -151,8 +151,63 @@ for more. # Example -The YiiP membrane protein trajectory [@YiiP:2019] used for benchmarking in this paper is publicly available for streaming from the Google Cloud Bucket *"gcs://zarrtraj-test-data"*. To access it, -follow the up-to-date instructions [here](https://zarrtraj.readthedocs.io/en/latest/yiip_example.html). +The YiiP membrane protein trajectory [@YiiP:2019] used for benchmarking in this +paper is publicly available for streaming from the Google Cloud Bucket +*gcs://zarrtraj-test-data/yiip.zarrmd*. The topology file in PDB format, which contains +information about the chemical composition of the system, can also be accessed +remotely from the same bucket (*gcs://zarrtraj-test-data/YiiP_system.pdb*) using +[fsspec](https://filesystem-spec.readthedocs.io/en/latest/). + +In the following example, we access the topology file and the trajectory from +the *gcs://zarrtraj-test-data* cloud bucket. We initially create an +`MDAnalysis.Universe`, the basic object in MDAnalysis that ties static topology +data and dynamic trajectory data together and manages access to all data. We +iterate through a slice of the trajectory, starting from frame index 100 and +skipping forward in steps of 20 frames: + +```python +import zarrtraj +import MDAnalysis as mda +import fsspec + +with fsspec.open("gcs://zarrtraj-test-data/YiiP_system.pdb", "r") as top: + u = mda.Universe(top, "gcs://zarrtraj-test-data/yiip.zarrmd", + topology_format="PDB") + + for ts in u.trajectory[100::20]: + print(ts) +``` + +Inside the loop over trajectory frames we print information for the current +frame `ts` although in principle, any kind of analysis code can run here and +process the coordinates available in `u.atoms.positions`. + +The `Universe` object can be used as if the underlying trajectory file were a +local file. For example, we can use `u` from the preceeding example with one of +the standard analysis tools in MDAnalysis, the calculation of the root mean +square distance (RMSD) after optimal structural superposition [@Liu:2010] in +the `MDAnalysis.analysis.rms.RMSD` class. In the example below we select only the +C$_\alpha$ atoms of the protein with a MDAnalysis selection. We run the +analysis with the `.run()` method while stepping through the trajectory at +increments of 100 frames. We then print the first and last data point from the +results array: + +```python +>>> import MDAnalysis.analysis.rms +>>> R = MDAnalysis.analysis.rms.RMSD(u, select="protein and name CA").run(step=100, verbose=True) +100%|██████████████████████████████████████████| 91/91 [00:28<00:00, 3.21it/s] +>>> print(f"Initial RMSD (frame={R.results.rmsd[0, 0]:g}): {R.results.rmsd[0, 2]:.3f} Å") +Initial RMSD (frame=0) : 0.000 Å +>>> print(f"Final RMSD (frame={R.results.rmsd[-1, 0]:g}): {R.results.rmsd[-1, 2]:.3f} Å") +Final RMSD (frame=9000) : 2.373 Å +``` + +This example demonstrates that the *Zarrtraj* interface enables seamless use of +cloud-hosted trajectories with the standard tools that are either available +with MDAnalysis itself, through MDAKits [@MDAKits:2023] (see the [MDAKit +registry](https://mdakits.mdanalysis.org/mdakits.html) for available packages), +or any script or package that uses MDAnalysis for file I/O. + # Acknowledgements From a7a737498b618ccfac447a9863b7e03bf36b2d41 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 23 Oct 2024 18:20:06 -0700 Subject: [PATCH 14/23] YiiP example: caveats and link --- joss_paper/paper.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 827ad1f..791d09f 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -156,14 +156,16 @@ paper is publicly available for streaming from the Google Cloud Bucket *gcs://zarrtraj-test-data/yiip.zarrmd*. The topology file in PDB format, which contains information about the chemical composition of the system, can also be accessed remotely from the same bucket (*gcs://zarrtraj-test-data/YiiP_system.pdb*) using -[fsspec](https://filesystem-spec.readthedocs.io/en/latest/). - -In the following example, we access the topology file and the trajectory from -the *gcs://zarrtraj-test-data* cloud bucket. We initially create an -`MDAnalysis.Universe`, the basic object in MDAnalysis that ties static topology -data and dynamic trajectory data together and manages access to all data. We -iterate through a slice of the trajectory, starting from frame index 100 and -skipping forward in steps of 20 frames: +[fsspec](https://filesystem-spec.readthedocs.io/en/latest/), although this is +currently an experimental feature and details may change. + +In the following example (see also the [YiiP Example in the zarrtraj +docs](https://zarrtraj.readthedocs.io/en/latest/yiip_example.html)), we access +the topology file and the trajectory from the *gcs://zarrtraj-test-data* cloud +bucket. We initially create an `MDAnalysis.Universe`, the basic object in +MDAnalysis that ties static topology data and dynamic trajectory data together +and manages access to all data. We iterate through a slice of the trajectory, +starting from frame index 100 and skipping forward in steps of 20 frames: ```python import zarrtraj From 3f5480bf27f3126a695d4ba48852622aef549e78 Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Thu, 24 Oct 2024 17:36:59 -0700 Subject: [PATCH 15/23] citation tff --- AUTHORS.md | 3 +++ CHANGELOG.md | 8 +++++++ CITATION.cff | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 18 +++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 CITATION.cff diff --git a/AUTHORS.md b/AUTHORS.md index 32aeff8..8f79b3b 100755 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -7,6 +7,9 @@ All contributing authors are listed in this file below. The repository history at https://github.com/ljwoods2/zarrtraj and the CHANGELOG show individual code contributions. +New contributors should add themselves to the end of this file AND to +the file CITATION.cff at the end of the top-level authors list. + ## Chronological list of authors +## [0.3.0] 2024-10-24 + +## Authors +- ljwoods2 + +## Added +- added CITATION.cff file (issue #69, PR #68) + ## [0.2.1] 2024-07-28 diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..1b494ea --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,63 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: 'Zarrtraj: A Python package for streaming molecular dynamics trajectories from cloud services' +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Lawson + email: lawsonw84@gmail.com + family-names: Woods + orcid: 'https://orcid.org/0009-0003-0713-4167' + affiliation: >- + School of Computing and Augmented Intelligence, + Arizona State University, Tempe, Arizona, United + States of America + - given-names: 'Hugo ' + family-names: MacDermott-Opeskin + orcid: 'https://orcid.org/0000-0002-7393-7457' + affiliation: >- + Open Molecular Software Foundation, Davis, CA, United + States of America + email: hugomacdermott-opeskin@mdanalysis.org + - given-names: 'Edis ' + family-names: 'Jakupovic ' + affiliation: >- + Center for Biological Physics, Arizona State + University, Tempe, AZ, United States of America + - given-names: 'Yuxuan ' + orcid: 'https://orcid.org/0000-0003-4390-8556' + family-names: Zhuang + affiliation: >- + Department of Computer Science, Stanford University, + Stanford, CA 94305, USA. + - given-names: 'Richard ' + orcid: 'https://orcid.org/0000-0002-3241-1846' + family-names: Gowers + name-particle: 'J ' + - given-names: 'Oliver ' + family-names: Beckstein + affiliation: >- + Center for Biological Physics, Arizona State + University, Tempe, AZ, United States of America + orcid: 'https://orcid.org/0000-0003-1340-0831' +identifiers: + - type: doi + value: 10.5281/zenodo.13887976 +repository-code: 'https://github.com/Becksteinlab/zarrtraj' +url: 'https://zarrtraj.readthedocs.io/en/latest/index.html' +abstract: >- + Zarrtraj is an MDAnalysis MDAKit for streaming H5MD and + ZarrMD trajectory files from cloud storage like AWS S3, + Google Cloud Buckets, and Azure Data lakes and Blob + Storage +keywords: + - streaming + - molecular-dynamics + - file-format + - mdanalysis + - zarr +license: GPL-3.0-or-later diff --git a/pyproject.toml b/pyproject.toml index 3996118..c3e52c4 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,3 +106,21 @@ line_length = 80 COLUMN_LIMIT = 80 INDENT_WIDTH = 4 USE_TABS = false + +classifiers = [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', + 'Operating System :: POSIX', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Bio-Informatics', + 'Topic :: Scientific/Engineering :: Chemistry', + 'Topic :: Software Development :: Libraries :: Python Modules', +] \ No newline at end of file From 32775862f6a9f17fa3db7593f781fad2d52d3596 Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Thu, 24 Oct 2024 17:38:14 -0700 Subject: [PATCH 16/23] formatting --- CITATION.cff | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 1b494ea..9baac3d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -16,29 +16,29 @@ authors: School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, United States of America - - given-names: 'Hugo ' + - given-names: Hugo family-names: MacDermott-Opeskin orcid: 'https://orcid.org/0000-0002-7393-7457' affiliation: >- Open Molecular Software Foundation, Davis, CA, United States of America email: hugomacdermott-opeskin@mdanalysis.org - - given-names: 'Edis ' - family-names: 'Jakupovic ' + - given-names: Edis + family-names: Jakupovic affiliation: >- Center for Biological Physics, Arizona State University, Tempe, AZ, United States of America - - given-names: 'Yuxuan ' + - given-names: Yuxuan orcid: 'https://orcid.org/0000-0003-4390-8556' family-names: Zhuang affiliation: >- Department of Computer Science, Stanford University, Stanford, CA 94305, USA. - - given-names: 'Richard ' + - given-names: Richard orcid: 'https://orcid.org/0000-0002-3241-1846' family-names: Gowers - name-particle: 'J ' - - given-names: 'Oliver ' + name-particle: J + - given-names: Oliver family-names: Beckstein affiliation: >- Center for Biological Physics, Arizona State From fc07891e40a258c2ad15d779d93b17b843596286 Mon Sep 17 00:00:00 2001 From: ljwoods2 Date: Thu, 24 Oct 2024 17:59:52 -0700 Subject: [PATCH 17/23] ORCID typo --- joss_paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 791d09f..fccf839 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -21,7 +21,7 @@ authors: - name: Richard J Gowers orcid: 0000-0002-3241-1846 - name: Oliver Beckstein - orcid: 000-0003-1340-0831 + orcid: 0000-0003-1340-0831 affiliation: [4, 5] affiliations: - name: School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, United States of America From e0e15fb6ce2a93bf46fb36bf38ce2f1065092472 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Sat, 26 Oct 2024 15:06:27 -0700 Subject: [PATCH 18/23] add NSF grant to acknowledgements --- joss_paper/paper.md | 1 + 1 file changed, 1 insertion(+) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index fccf839..2917017 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -216,5 +216,6 @@ or any script or package that uses MDAnalysis for file I/O. We thank Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis and Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream code base necessary for this project. LW was a participant in the Google Summer of Code 2024 program. +Some work on *Zarrtraj* was supported by the National Science Foundation under grant number 2311372. # References From 91f76ade637c637acfe7d4d69901ec91354e50ba Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Sat, 26 Oct 2024 15:07:30 -0700 Subject: [PATCH 19/23] consistently emphasize *Zarrtraj* --- joss_paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 2917017..07f3c9d 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -48,7 +48,7 @@ Molecular dynamics (MD) simulations provide a microscope into the behavior of atomic-scale environments otherwise prohibitively difficult to observe. However, the resulting trajectory data are too often siloed in a single institutions' HPC environment, rendering it unusable by the broader scientific community. -Zarrtraj enables these trajectories to be read directly from cloud storage providers +*Zarrtraj* enables these trajectories to be read directly from cloud storage providers like AWS, Google Cloud, and Microsoft Azure into MDAnalysis, a popular Python package for analyzing trajectory data, providing a method to open up access to trajectory data to anyone with an internet connection. Enabling cloud streaming From 1472cafd3496948767cc6586b18da4689fd1dba2 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Wed, 30 Oct 2024 21:37:20 +0000 Subject: [PATCH 20/23] add rjg affiliation --- joss_paper/paper.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 07f3c9d..3272cc9 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -20,6 +20,7 @@ authors: affiliation: [6, 7] - name: Richard J Gowers orcid: 0000-0002-3241-1846 + affiliation: [8] - name: Oliver Beckstein orcid: 0000-0003-1340-0831 affiliation: [4, 5] @@ -38,6 +39,8 @@ affiliations: index: 6 - name: Departments of Molecular and Cellular Physiology and Structural Biology, Stanford University School of Medicine, Stanford, CA 94305, USA. index: 7 + - name: Charm Therapeutics, London, United Kingdom + index: 8 date: 23 October 2024 bibliography: paper.bib --- From 03c0b955f5d2b78c94ad8806f93a796f838ab6e1 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Wed, 30 Oct 2024 15:50:22 -0700 Subject: [PATCH 21/23] revisions --- joss_paper/paper.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 3272cc9..bf1ef43 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -51,6 +51,8 @@ Molecular dynamics (MD) simulations provide a microscope into the behavior of atomic-scale environments otherwise prohibitively difficult to observe. However, the resulting trajectory data are too often siloed in a single institutions' HPC environment, rendering it unusable by the broader scientific community. +Additionally, it is increasingly common for trajectory data to be entirely +stored in a cloud storage provider, rather than a traditional on-premise storage site. *Zarrtraj* enables these trajectories to be read directly from cloud storage providers like AWS, Google Cloud, and Microsoft Azure into MDAnalysis, a popular Python package for analyzing trajectory data, providing a method to open up access to @@ -179,12 +181,12 @@ with fsspec.open("gcs://zarrtraj-test-data/YiiP_system.pdb", "r") as top: u = mda.Universe(top, "gcs://zarrtraj-test-data/yiip.zarrmd", topology_format="PDB") - for ts in u.trajectory[100::20]: - print(ts) + for timestep in u.trajectory[100::20]: + print(timestep) ``` Inside the loop over trajectory frames we print information for the current -frame `ts` although in principle, any kind of analysis code can run here and +frame `timestep` although in principle, any kind of analysis code can run here and process the coordinates available in `u.atoms.positions`. The `Universe` object can be used as if the underlying trajectory file were a @@ -217,7 +219,7 @@ or any script or package that uses MDAnalysis for file I/O. # Acknowledgements We thank Dr. Jenna Swarthout Goddard for supporting the GSoC program at MDAnalysis and -Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream code base +Dr. Martin Durant, author of Kerchunk, for helping refine and merge features in his upstream code base necessary for this project. LW was a participant in the Google Summer of Code 2024 program. Some work on *Zarrtraj* was supported by the National Science Foundation under grant number 2311372. From b0d3fc385620619e9ded96200fee0ce9641620d7 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Tue, 5 Nov 2024 11:51:01 -0700 Subject: [PATCH 22/23] add missing affiliations --- CITATION.cff | 2 ++ joss_paper/paper.md | 1 + 2 files changed, 3 insertions(+) diff --git a/CITATION.cff b/CITATION.cff index 9baac3d..40ae659 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -25,6 +25,7 @@ authors: email: hugomacdermott-opeskin@mdanalysis.org - given-names: Edis family-names: Jakupovic + orcid: 'https://orcid.org/0000-0001-8813-6356' affiliation: >- Center for Biological Physics, Arizona State University, Tempe, AZ, United States of America @@ -38,6 +39,7 @@ authors: orcid: 'https://orcid.org/0000-0002-3241-1846' family-names: Gowers name-particle: J + affiliation: Charm Therapeutics, London, United Kingdom - given-names: Oliver family-names: Beckstein affiliation: >- diff --git a/joss_paper/paper.md b/joss_paper/paper.md index bf1ef43..3b7fe85 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -15,6 +15,7 @@ authors: affiliation: [3] - name: Edis Jakupovic affiliation: [4, 5] + orcid: 0000-0001-8813-6356 - name: Yuxuan Zhuang orcid: 0000-0003-4390-8556 affiliation: [6, 7] From 1df5c285670395dafc3388523260618ea8a728df Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Tue, 5 Nov 2024 12:00:41 -0700 Subject: [PATCH 23/23] line length --- joss_paper/paper.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 3b7fe85..e442bbe 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -202,11 +202,14 @@ results array: ```python >>> import MDAnalysis.analysis.rms ->>> R = MDAnalysis.analysis.rms.RMSD(u, select="protein and name CA").run(step=100, verbose=True) +>>> R = MDAnalysis.analysis.rms.RMSD(u, select="protein and name CA").run( + step=100, verbose=True) 100%|██████████████████████████████████████████| 91/91 [00:28<00:00, 3.21it/s] ->>> print(f"Initial RMSD (frame={R.results.rmsd[0, 0]:g}): {R.results.rmsd[0, 2]:.3f} Å") +>>> print(f"Initial RMSD (frame={R.results.rmsd[0, 0]:g}): " + f"{R.results.rmsd[0, 2]:.3f} Å") Initial RMSD (frame=0) : 0.000 Å ->>> print(f"Final RMSD (frame={R.results.rmsd[-1, 0]:g}): {R.results.rmsd[-1, 2]:.3f} Å") +>>> print(f"Final RMSD (frame={R.results.rmsd[-1, 0]:g}): " + f"{R.results.rmsd[-1, 2]:.3f} Å") Final RMSD (frame=9000) : 2.373 Å ```