diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 70e0ec0ac..96b09dc60 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,13 +37,13 @@ jobs: - name: Build new docker image if: env.MATCHED_FILES - run: docker build --no-cache . -t nfcore/eager:2.4.1 + run: docker build --no-cache . -t nfcore/eager:2.4.2 - name: Pull docker image if: ${{ !env.MATCHED_FILES }} run: | docker pull nfcore/eager:dev - docker tag nfcore/eager:dev nfcore/eager:2.4.1 + docker tag nfcore/eager:dev nfcore/eager:2.4.2 - name: Install Nextflow env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 31d4adf35..e479bc5aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,20 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [2.4.2] - 2022-01-24 + +### `Added` + +### `Fixed` + +- [#824](https://github.com/nf-core/eager/issues/824) Fixes large memory footprint of bedtools coverage calculation. +- [#822](https://github.com/nf-core/eager/issues/822) Fixed post-adapterremoval trimmed files not being lane-merged and included in downstream analyses +- Fixed a couple of software version reporting commands + +### `Dependencies` + +### `Deprecated` + ## [2.4.1] - 2021-11-30 ### `Added` diff --git a/Dockerfile b/Dockerfile index 1354408d3..3cc4ec4ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-eager-2.4.1/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-eager-2.4.2/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-eager-2.4.1 > nf-core-eager-2.4.1.yml \ No newline at end of file +RUN conda env export --name nf-core-eager-2.4.2 > nf-core-eager-2.4.2.yml \ No newline at end of file diff --git a/README.md b/README.md index 284f9046d..1b4ff5b36 100644 --- a/README.md +++ b/README.md @@ -171,9 +171,10 @@ Those who have provided conceptual guidance, suggestions, bug reports etc. * [Işın Altınkaya](https://github.com/isinaltinkaya) * [Johan Nylander](https://github.com/nylander) * [Katerine Eaton](https://github.com/ktmeaton) -* [Katrin Nägele](https://github.com/KathrinNaegele) +* [Kathrin Nägele](https://github.com/KathrinNaegele) * [Luc Venturini](https://github.com/lucventurini) * [Marcel Keller](https://github.com/marcel-keller) +* [Megan Michel](https://github.com/meganemichel) * [Pierre Lindenbaum](https://github.com/lindenb) * [Pontus Skoglund](https://github.com/pontussk) * [Raphael Eisenhofer](https://github.com/EisenRa) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index ed7008381..41e4f2bcb 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -9,13 +9,13 @@ "FastQC": ["v_fastqc.txt", r"FastQC v(\S+)"], "MultiQC": ["v_multiqc.txt", r"multiqc, version (\S+)"], 'AdapterRemoval':['v_adapterremoval.txt', r"AdapterRemoval ver. (\S+)"], - 'Picard MarkDuplicates': ['v_markduplicates.txt', r"(\S+)"], + 'Picard MarkDuplicates': ['v_markduplicates.txt', r"Version:(\S+)"], 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], 'Preseq': ['v_preseq.txt', r"Version: (\S+)"], 'BWA': ['v_bwa.txt', r"Version: (\S+)"], 'Bowtie2': ['v_bowtie2.txt', r"bowtie2-([0-9]+\.[0-9]+\.[0-9]+) -fdebug"], 'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"], - 'GATK HaplotypeCaller': ['v_gatk.txt', r" v(\S+)"], + 'GATK HaplotypeCaller': ['v_gatk.txt', r"The Genome Analysis Toolkit \(GATK\) v(\S+)"], 'GATK UnifiedGenotyper': ['v_gatk3.txt', r"(\S+)"], 'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"], 'fastP': ['v_fastp.txt', r"([\d\.]+)"], diff --git a/environment.yml b/environment.yml index db6c11b10..0db40a045 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-eager-2.4.1 +name: nf-core-eager-2.4.2 channels: - conda-forge - bioconda diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index c2390f966..52ee73043 100644 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -259,7 +259,7 @@ class NfcoreSchema { return new_params } - /* + /* * This method tries to read a JSON params file */ private static LinkedHashMap params_load(String json_schema) { diff --git a/main.nf b/main.nf index 804b44609..6f89cd0fa 100644 --- a/main.nf +++ b/main.nf @@ -1039,8 +1039,7 @@ if ( params.skip_collapse ){ // Inline barcode removal bypass when not running it if (params.run_post_ar_trimming) { - ch_adapterremoval_for_skip_post_ar_trimming - .dump(tag: "inline_removal_bypass") + ch_post_ar_trimming_for_lanemerge .into { ch_inlinebarcoderemoval_for_fastqc_after_clipping; ch_inlinebarcoderemoval_for_lanemerge; } } else { ch_adapterremoval_for_skip_post_ar_trimming @@ -2055,8 +2054,12 @@ process bedtools { script: """ - bedtools coverage -nonamecheck -a ${anno_file} -b $bam | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz - bedtools coverage -nonamecheck -a ${anno_file} -b $bam -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz + ## Create genome file from bam header + samtools view -H ${bam} | grep '@SQ' | sed 's#@SQ\tSN:\\|LN:##g' > genome.txt + + ## Run bedtools + bedtools coverage -nonamecheck -g genome.txt -sorted -a ${anno_file} -b ${bam} | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz + bedtools coverage -nonamecheck -g genome.txt -sorted -a ${anno_file} -b ${bam} -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz """ } @@ -3085,7 +3088,7 @@ process get_software_versions { ( exec 7>&1; picard MarkDuplicates --version 2>&1 >&7 | grep -v '/' >&2 ) 2> v_markduplicates.txt || true qualimap --version &> v_qualimap.txt 2>&1 || true preseq &> v_preseq.txt 2>&1 || true - gatk --version 2>&1 | head -n 1 > v_gatk.txt 2>&1 || true + gatk --version 2>&1 | grep '(GATK)' > v_gatk.txt 2>&1 || true gatk3 --version 2>&1 | head -n 1 > v_gatk3.txt 2>&1 || true freebayes --version &> v_freebayes.txt 2>&1 || true bedtools --version &> v_bedtools.txt 2>&1 || true diff --git a/nextflow.config b/nextflow.config index 20076e27e..36c2a0355 100644 --- a/nextflow.config +++ b/nextflow.config @@ -284,7 +284,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/eager:2.4.1' +process.container = 'nfcore/eager:2.4.2' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -414,7 +414,7 @@ manifest { description = 'A fully reproducible and state-of-the-art ancient DNA analysis pipeline' mainScript = 'main.nf' nextflowVersion = '>=20.07.1' - version = '2.4.1' + version = '2.4.2' } // Function to ensure that resource requirements don't go beyond