From bb4acf978165278a1dac14b86c98ffecd1534b29 Mon Sep 17 00:00:00 2001 From: tdayris Date: Fri, 30 Aug 2024 09:47:07 +0200 Subject: [PATCH] feat: Bwameth index (#3162) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### QC * [x] I confirm that I have followed the [documentation for contributing to `snakemake-wrappers`](https://snakemake-wrappers.readthedocs.io/en/stable/contributing.html). While the contributions guidelines are more extensive, please particularly ensure that: * [x] `test.py` was updated to call any added or updated example rules in a `Snakefile` * [x] `input:` and `output:` file paths in the rules can be chosen arbitrarily * [x] wherever possible, command line arguments are inferred and set automatically (e.g. based on file extensions in `input:` or `output:`) * [x] temporary files are either written to a unique hidden folder in the working directory, or (better) stored where the Python function `tempfile.gettempdir()` points to * [x] the `meta.yaml` contains a link to the documentation of the respective tool or command under `url:` * [x] conda environments use a minimal amount of channels and packages, in recommended ordering ## Summary by CodeRabbit - **New Features** - Introduced `environment.linux-64.pin.txt` and `environment.yaml` files for simplified conda environment setup tailored for bioinformatics applications. - Added `meta.yaml` file containing essential metadata for the `bwa-meth index` functionality, enhancing user guidance. - Implemented a new Snakefile for testing `bwameth` indexing functionality, ensuring reliable performance. - Created a new genomic FASTA file for sample data storage and analysis. - Developed a wrapper script to automate the BWA-Meth indexing process, improving user experience. - **Tests** - Added a new test function to validate the `bwameth` indexing command, ensuring reliability in processing. --------- Co-authored-by: tdayris Co-authored-by: tdayris Co-authored-by: Johannes Köster Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: snakedeploy-bot[bot] <115615832+snakedeploy-bot[bot]@users.noreply.github.com> Co-authored-by: Felix Mölder Co-authored-by: Christopher Schröder Co-authored-by: Filipe G. Vieira <1151762+fgvieira@users.noreply.github.com> --- .../index/environment.linux-64.pin.txt | 46 +++++++++++++++++++ bio/bwameth/index/environment.yaml | 7 +++ bio/bwameth/index/meta.yaml | 15 ++++++ bio/bwameth/index/test/Snakefile | 40 ++++++++++++++++ bio/bwameth/index/test/genome.fasta | 2 + bio/bwameth/index/wrapper.py | 38 +++++++++++++++ test.py | 25 ++++++++++ 7 files changed, 173 insertions(+) create mode 100644 bio/bwameth/index/environment.linux-64.pin.txt create mode 100644 bio/bwameth/index/environment.yaml create mode 100644 bio/bwameth/index/meta.yaml create mode 100644 bio/bwameth/index/test/Snakefile create mode 100644 bio/bwameth/index/test/genome.fasta create mode 100644 bio/bwameth/index/wrapper.py diff --git a/bio/bwameth/index/environment.linux-64.pin.txt b/bio/bwameth/index/environment.linux-64.pin.txt new file mode 100644 index 00000000000..799bd84aca6 --- /dev/null +++ b/bio/bwameth/index/environment.linux-64.pin.txt @@ -0,0 +1,46 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.7.4-hbcca054_0.conda#23ab7665c5f63cfb9f1f6195256daac6 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda#b80f2f396ca2c28b8c14c437a4ed1e74 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h8827d51_1.conda#8bfdead4e0fff0383ae4c9c50d0531bd +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_0.conda#ae061a5ed5f05818acdf9adab72c146d +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h77fa898_0.conda#ca0fad6a41ddaef54a153b78eccb5037 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.33.1-heb4867d_0.conda#0d3c60291342c0c025db231353376dfb +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-hc0a3c3a_0.conda#1cb187a157136398ddbaae90713e2498 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h4ab18f5_6.conda#27329162c0dc732bcf67a4e0cd488125 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda#70caf8bb6cf39a0b6b7efc885f51c0fe +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-hb9d3cd8_3.conda#6c566a46baae794daf34775d41eb180a +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/bioconda/linux-64/bwa-mem2-2.2.1-hd03093a_5.tar.bz2#a8de1b9c3db53f3436e06f86997b5316 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.0-hde9e2c9_0.conda#18aa975d2094c34aef978060ae7da7d8 +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda#f2cfec9406850991f4e3d960cc9e3321 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/bioconda/linux-64/bwa-0.7.18-he4a0461_1.tar.bz2#4ecde7d4a03fabe440a90aea1389d62d +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/python-3.12.3-hab00c5b_0_cpython.conda#2540b74d304f71d3e89c81209db4db84 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.8.0-hca28451_1.conda#b8afb3e3cb3423cc445cf611ab95fdb0 +https://conda.anaconda.org/conda-forge/noarch/setuptools-72.2.0-pyhd8ed1ab_0.conda#1462aa8b243aad09ef5d0841c745eb89 +https://conda.anaconda.org/bioconda/noarch/toolshed-0.4.6-pyh864c0ab_3.tar.bz2#61065996a8b60a54d5350427f51a54f3 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584 +https://conda.anaconda.org/bioconda/linux-64/htslib-1.20-h5efdd21_2.tar.bz2#f0923cdb85b44dc77bdead8018645ea9 +https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyhd8ed1ab_0.conda#6721aef6bfe5937abe70181545dd2c51 +https://conda.anaconda.org/bioconda/linux-64/samtools-1.20-h50ea8bc_1.tar.bz2#1a4c52f8079fea1b18b3e4bd329bab3a +https://conda.anaconda.org/bioconda/noarch/bwameth-0.2.7-pyh7cba7a3_0.tar.bz2#ab1318507241aab310382b26aaf429e2 diff --git a/bio/bwameth/index/environment.yaml b/bio/bwameth/index/environment.yaml new file mode 100644 index 00000000000..22ad7716083 --- /dev/null +++ b/bio/bwameth/index/environment.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - bwameth =0.2.7 + - bwa-mem2 =2.2.1 diff --git a/bio/bwameth/index/meta.yaml b/bio/bwameth/index/meta.yaml new file mode 100644 index 00000000000..e741c5b0dc1 --- /dev/null +++ b/bio/bwameth/index/meta.yaml @@ -0,0 +1,15 @@ +name: bwa-meth index +url: https://github.com/brentp/bwa-meth +description: > + Index a reference sequence for future BS-Seq mapping. +authors: + - Thibault Dayris +input: + - Path to reference fasta file. +output: + - List of paths to index files. +params: + No optional argument used, everything is handled by the wrapper. +notes: > + While bwa-meth explicitely writes the index alongside the reference file, this wrapper lets user freely chose the output directory. + diff --git a/bio/bwameth/index/test/Snakefile b/bio/bwameth/index/test/Snakefile new file mode 100644 index 00000000000..02a084c69b7 --- /dev/null +++ b/bio/bwameth/index/test/Snakefile @@ -0,0 +1,40 @@ +rule test_bwameth_index_mem: + input: + "genome.fasta", + output: + multiext( + "genome.fasta.bwameth", + ".c2t", + ".c2t.amb", + ".c2t.ann", + ".c2t.bwt", + ".c2t.pac", + ".c2t.sa", + ), + cache: True # save space and time with between workflow caching (see docs) + threads: 1 + log: + "bwameth_index.log", + wrapper: + "master/bio/bwameth/index" + + +rule test_bwameth_index_mem2: + input: + "genome.fasta", + output: + multiext( + "genome.fasta.bwameth", + ".c2t", + ".c2t.amb", + ".c2t.ann", + ".c2t.bwt.2bit.64", + ".c2t.pac", + ".c2t.0123", + ), + cache: True # save space and time with between workflow caching (see docs) + threads: 1 + log: + "bwameth_index.log", + wrapper: + "master/bio/bwameth/index" diff --git a/bio/bwameth/index/test/genome.fasta b/bio/bwameth/index/test/genome.fasta new file mode 100644 index 00000000000..308e288c979 --- /dev/null +++ b/bio/bwameth/index/test/genome.fasta @@ -0,0 +1,2 @@ +>Sheila +GCTAGCTCAGAAAAAAAAAA \ No newline at end of file diff --git a/bio/bwameth/index/wrapper.py b/bio/bwameth/index/wrapper.py new file mode 100644 index 00000000000..12239a55212 --- /dev/null +++ b/bio/bwameth/index/wrapper.py @@ -0,0 +1,38 @@ +# coding: utf-8 + +"""Snakemake wrapper for BWA-Meth index""" + +__author__ = "Thibault Dayris" +__mail__ = "thibault.dayris@gustaveroussy.fr" +__copyright__ = "Copyright 2024, Thibault Dayris" +__license__ = "MIT" + +import os +import os.path + +from tempfile import TemporaryDirectory +from snakemake import shell + +log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True) + +# Automatic detection of aligner based on one output file +subcommand = "index" +if any(str(outfile).endswith(".0123") for outfile in snakemake.output): + subcommand = "index-mem2" + +with TemporaryDirectory() as tempdir: + # Create symlink to avoid bwa-meth index to be written next to the input reference file + ref_basename = os.path.basename(snakemake.input[0]) + used_reference = os.path.join(tempdir, ref_basename) + os.symlink(os.path.abspath(snakemake.input[0]), os.path.join(tempdir, ref_basename)) + + # Find user-defined reference directory + prefix = os.path.commonprefix(snakemake.output) + out_dir = os.path.dirname(prefix) or "./" + + # Run bwameth index command + shell("bwameth.py {subcommand} {used_reference} {log}") + + # Return index file to user where they expect them + os.unlink(used_reference) + shell("mv -v {used_reference}.bwameth.c2t* {out_dir} {log}") diff --git a/test.py b/test.py index 18751ca6849..57c158f4d38 100644 --- a/test.py +++ b/test.py @@ -488,6 +488,31 @@ def test_sickle_se(): ], ) +@skip_if_not_modified +def test_bwameth_index(): + run( + "bio/bwameth/index", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "genome.fasta.bwameth.c2t.sa", + ], + ) + run( + "bio/bwameth/index", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "genome.fasta.bwameth.c2t.0123", + ], + ) + @skip_if_not_modified def test_bwa_memx_index():