From eb594057f35172c4601294a935b4df4d1bfc4dc6 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 1 Feb 2019 12:26:42 +0100 Subject: [PATCH 01/56] Extra clarifications for indices, FastP and general cleanup --- main.nf | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/main.nf b/main.nf index f25ad3e79..826b0bff5 100644 --- a/main.nf +++ b/main.nf @@ -26,9 +26,9 @@ def helpMessage() { Mandatory arguments: --reads Path to input data (must be surrounded with quotes) - -profile Hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentatoin. + -profile Institution or personal hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentation. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) - --pairedEnd Specifies that the input is paired end reads (required if not singleend) + --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) @@ -36,11 +36,11 @@ def helpMessage() { Input Data Additional Options: --snpcapture Runs in SNPCapture mode (specify a BED file if you do this!) - References If not specified in the configuration file or you wish to overwrite any of the references. - --bwa_index Path to BWA index + References If not specified in the configuration file, or you wish to overwrite any of the references. + --bwa_index Path to directory containing BWA index files --bedfile Path to BED file for SNPCapture methods - --seq_dict Path to sequence dictionary file - --fasta_index Path to FastA index + --seq_dict Path to picard sequence dictionary file (typically ending in '.dict') + --fasta_index Path to samtools FASTA index (typically ending in '.fai') --saveReference Saves reference genome indices for later reusage Skipping Skip any of the mentioned steps @@ -50,8 +50,8 @@ def helpMessage() { --skip_deduplication Complexity Filtering - --complexity_filtering Run complexity filtering on FastQ files - --complexity_filter_poly_g_min Specify poly-g min filter (default: 10) for filtering + --complexity_filtering Run poly-G removal on FASTQ files + --complexity_filter_poly_g_min Specify length of poly-g min for clipping to be performed (default: 10) Clipping / Merging --clip_forward_adaptor Specify adapter sequence to be clipped off (forward) @@ -61,23 +61,23 @@ def helpMessage() { --min_adap_overlap Specify minimum adapter overlap BWA Mapping - --bwaalnn Specify the -n parameter for BWA aln + --bwaalnn Specify the -n parameter for BWA aln. --bwaalnk Specify the -k parameter for BWA aln --bwaalnl Specify the -l parameter for BWA aln CircularMapper --circularmapper Turn on CircularMapper (CM) - --circularextension Specify the number of bases to extend + --circularextension Specify the number of bases to extend reference by --circulartarget Specify the target chromosome for CM --circularfilter Specify to filter off-target reads BWA Mem Mapping - --bwamem Turn on BWA Mem instead of CM/BWA aln for mapping + --bwamem Turn on BWA Mem instead of BWA aln for mapping BAM Filtering - --bam_discard_unmapped Discards unmapped reads in either FASTQ or BAM format, depending on choice. - --bam_unmapped_type Defines whether to discard all unmapped reads, keep only bam and/or keep only fastq format (options: discard, bam, fastq, both). --bam_mapping_quality_threshold Minimum mapping quality for reads filter, default 0. + --bam_discard_unmapped Discards unmapped reads in either FASTQ or BAM format, depending on choice (see --bam_unmapped_type). + --bam_unmapped_type Defines whether to discard all unmapped reads, keep only bam and/or keep only fastq format (options: discard, bam, fastq, both). DeDuplication --dedupper Deduplication method to use (options: dedup, markduplicates). Default: dedup @@ -101,10 +101,6 @@ def helpMessage() { --bamutils_clip_left / --bamutils_clip_right Specify the number of bases to clip off reads --bamutils_softclip Use softclip instead of hard masking - - For a full list and more information of available parameters, consider the documentation. - - Other options: --outdir The output directory where the results will be saved --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits @@ -114,6 +110,9 @@ def helpMessage() { --max_time Time limit for each step of the pipeline. Should be in form e.g. --max_memory '2.h' --max_cpus Maximum number of CPUs to use for each step of the pipleine. Should be in form e.g. --max_cpus 1 + For a full list and more information of available parameters, consider the documentation (https://github.com/nf-core/eager/). + + """.stripIndent() } /* From f43f4b09c343eecafb0def62ec6af02b4af5cc4e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 2 Feb 2019 14:18:32 +0100 Subject: [PATCH 02/56] Further clarification on `--max-cpus` Emphasis on each process, not the overall nextflow submission. --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 6f0838d7d..c242216f0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -237,7 +237,7 @@ Use to set a top-limit for the default time requirement for each process. Should be a string in the format integer-unit. eg. `--max_time '2.h'`. If not specified, will be taken from the configuration in the `-profile` flag. ### `--max_cpus` -Use to set a top-limit for the default CPU requirement for each process. +Use to set a top-limit for the default CPU requirement for each **process**. This is not the maximum number of CPUs that can be used for the whole pipeline, but the maximum number of CPUs each program can use for each program submission (known as a process). Should be a string in the format integer-unit. eg. `--max_cpus 1`. If not specified, will be taken from the configuration in the `-profile` flag. ### `--email` From 45ce5479efc2232b50e04ff7afe444a17df6fb15 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 2 Feb 2019 21:10:11 +0100 Subject: [PATCH 03/56] Update docs/usage.md Co-Authored-By: jfy133 --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index c242216f0..dbb3889a1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -237,7 +237,7 @@ Use to set a top-limit for the default time requirement for each process. Should be a string in the format integer-unit. eg. `--max_time '2.h'`. If not specified, will be taken from the configuration in the `-profile` flag. ### `--max_cpus` -Use to set a top-limit for the default CPU requirement for each **process**. This is not the maximum number of CPUs that can be used for the whole pipeline, but the maximum number of CPUs each program can use for each program submission (known as a process). +Use to set a top-limit for the default CPU requirement for each **process**. This is not the maximum number of CPUs that can be used for the whole pipeline, but the maximum number of CPUs each program can use for each program submission (known as a process). Do not set this higher than what is available on your workstation or computing node can provide. If you're unsure, ask your local IT administrator for details on compute node capabilities! Should be a string in the format integer-unit. eg. `--max_cpus 1`. If not specified, will be taken from the configuration in the `-profile` flag. ### `--email` From 91d05cbba0cd860eee240a24dba99380997f528e Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 16:48:58 +0100 Subject: [PATCH 04/56] add noCollase option --- .travis.yml | 6 +++++ main.nf | 74 +++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 459e925cb..ecb69d6d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,14 +40,20 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference # Run the basic pipeline with single end data (pretending its single end actually) - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/ + # Run the basic pipeline with paired end data + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --saveReference # Run the same pipeline testing optional step: fastp, complexity - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test BAM Trimming - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test running with CircularMapper - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2' + # Test running with CircularMapper on paired end without collapsing + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --circularmapper --circulartarget 'NC_007596.2' # Test running with BWA Mem - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ + # Test running with BWA Mem on paired end without collapsing + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test with zipped reference input - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz' # Run the basic pipeline with the bam input profile diff --git a/main.nf b/main.nf index f25ad3e79..12878e70c 100644 --- a/main.nf +++ b/main.nf @@ -28,7 +28,8 @@ def helpMessage() { --reads Path to input data (must be surrounded with quotes) -profile Hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentatoin. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) - --pairedEnd Specifies that the input is paired end reads (required if not singleend) + --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) + --noCollapse Specifies to avoid merging Forward and Reverse reads together. (Only for pairedEnd samples) --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) @@ -131,6 +132,7 @@ if (params.help){ params.name = false params.singleEnd = false params.pairedEnd = false +params.noCollapse = false params.genome = "Custom" params.snpcapture = false params.bedfile = '' @@ -265,6 +267,11 @@ if( params.singleEnd || params.pairedEnd || params.bam){ exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!" } +//Validate that noCollase is only set to True for pairedEnd reads! +if (params.noCollapse && params.singleEnd){ + exit 1, "--noCollapse can only be set for pairedEnd samples!" +} + //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ @@ -349,6 +356,7 @@ summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' +summary['Collapse'] = !params.noCollapse ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -508,7 +516,7 @@ process convertBam { output: set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = "${bam.baseName}" @@ -594,19 +602,25 @@ process adapter_removal { //Readprefixing only required for PE data with merging fixprefix = (params.singleEnd) ? "" : "AdapterRemovalFixPrefix ${prefix}.combined.fq.gz ${prefix}.combined.prefixed.fq.gz" - if( !params.singleEnd ){ + if( !params.singleEnd && !params.noCollapse){ """ - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files - zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${prefix}.combined.fq.gz - ${fixprefix} - rm ${prefix}.combined.fq.gz + zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${name}.combined.fq.gz """ - } else { + } else if (!params.singleEnd && params.noCollapse) { + """ + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + #Rename files + mv ${name}.pair1.truncated.gz ${name}.pair1.combined.fq.gz + mv ${name}.pair2.truncated.gz ${name}.pair2.combined.fq.gz """ - AdapterRemoval --file1 ${reads[0]} --basename ${prefix} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} + } + else { + """ + AdapterRemoval --file1 ${reads[0]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} # Pseudo-Combine - mv *.truncated.gz ${prefix}.combined.fq.gz + mv *.truncated.gz ${name}.combined.fq.gz """ } } @@ -615,7 +629,7 @@ process adapter_removal { * STEP 2.1 - FastQC after clipping/merging (if applied!) */ process fastqc_after_clipping { - tag "${reads[0].baseName}" + tag "${prefix}" publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} @@ -628,6 +642,7 @@ process fastqc_after_clipping { file "*_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping script: + prefix = reads[0].toString().tokenize('.')[0] """ fastqc -q $reads """ @@ -654,13 +669,24 @@ process bwa { script: + fasta = "${index}/*.fasta" + if (!params.singleEnd && params.noCollapse){ + prefix = reads[0].toString().tokenize('.')[0] + """ + bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai + bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai + bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.sorted.bam + samtools index ${prefix}.sorted.bam + """ + } else { prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - fasta = "${index}/*.fasta" """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${prefix}".sorted.bam """ + } + } process circulargenerator{ @@ -708,9 +734,21 @@ process circularmapper{ script: filter = "${params.circularfilter}" ? '' : '-f true -x false' - prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + fasta = "${index}/*_*.fasta" + if (!params.singleEnd && params.noCollapse){ + prefix = reads[0].toString().tokenize('.')[0] + """ + bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai + bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai + bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r1.sai ${reads[0]} ${reads[0]} > tmp.out + realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter + samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam + samtools index ${prefix}.sorted.bam + """ + } else { + prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out @@ -718,6 +756,8 @@ process circularmapper{ samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam samtools index "${prefix}".sorted.bam """ + } + } process bwamem { @@ -738,10 +778,18 @@ process bwamem { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" + if (!params.singleEnd && params.noCollapse){ + """ + bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam + samtools index -@ ${task.cpus} "${prefix}".sorted.bam + """ + } else { """ bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index -@ ${task.cpus} "${prefix}".sorted.bam """ + } + } /* From 758fb7fa53b7711e4563ed6bb059cea372c21087 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 16:53:37 +0100 Subject: [PATCH 05/56] fix nf-PE read index --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 12878e70c..b6d9a32bf 100644 --- a/main.nf +++ b/main.nf @@ -741,8 +741,8 @@ process circularmapper{ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai - bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai - bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r1.sai ${reads[0]} ${reads[0]} > tmp.out + bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai + bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam samtools index ${prefix}.sorted.bam From e2aaba071e8595d2c3df0f403cb8fc443ad13d74 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 16:58:24 +0100 Subject: [PATCH 06/56] update doc with noCollapse --- docs/usage.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 6f0838d7d..02000d1b5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -162,6 +162,14 @@ A normal glob pattern, enclosed in quotation marks, can then be used for `--read --pairedEnd --reads '*.fastq' ``` +### `--noCollapse` +If you have paired-end data, but you don't want to merge them, add the commind line argument `--noCollapse`. + +For example +```bash +--pairedEnd --noCollapse '*.fastq' +``` + ### `--fasta` If you prefer, you can specify the full path to your reference genome when you run the pipeline: From 55743dd25b85562549d290cbdca93b11b4153858 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 17:31:39 +0100 Subject: [PATCH 07/56] main.nf to dev --- main.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/main.nf b/main.nf index 090d86e00..b71ca88b4 100644 --- a/main.nf +++ b/main.nf @@ -29,10 +29,7 @@ def helpMessage() { -profile Institution or personal hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentation. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) -<<<<<<< HEAD -======= --noCollapse Specifies to avoid merging Forward and Reverse reads together. (Only for pairedEnd samples) ->>>>>>> master --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) From 8ec9d674a94016369190ba04ab21a09da9018ce7 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 11 Feb 2019 18:14:19 +0100 Subject: [PATCH 08/56] fix funky prefix --- main.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index b71ca88b4..6a179c0dd 100644 --- a/main.nf +++ b/main.nf @@ -678,11 +678,11 @@ process bwa { samtools index ${prefix}.sorted.bam """ } else { - prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + prefix = reads[0].toString().tokenize('.')[0] """ - bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" - bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index "${prefix}".sorted.bam + bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai + bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam + samtools index ${prefix}.sorted.bam """ } @@ -747,13 +747,13 @@ process circularmapper{ samtools index ${prefix}.sorted.bam """ } else { - prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + prefix = reads[0].toString().tokenize('.')[0] """ - bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" - bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out + bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai + bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter - samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam - samtools index "${prefix}".sorted.bam + samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > ${prefix}.sorted.bam + samtools index ${prefix}.sorted.bam """ } From 6d864e10dfef753edd9a8678b63e2fc906388611 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 11 Feb 2019 23:49:00 +0100 Subject: [PATCH 09/56] Update docs/usage.md Co-Authored-By: maxibor --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 8a17ecee5..34dd9f9bb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -163,7 +163,7 @@ A normal glob pattern, enclosed in quotation marks, can then be used for `--read ``` ### `--noCollapse` -If you have paired-end data, but you don't want to merge them, add the commind line argument `--noCollapse`. +If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. For example ```bash From f3a6ff7adf80ed9820a092f6b15fe21b1549d3f2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 12 Feb 2019 14:52:16 +0100 Subject: [PATCH 10/56] Update docs/usage.md Co-Authored-By: maxibor --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 34dd9f9bb..fecaa9c13 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -167,7 +167,7 @@ If you have paired-end data, but you don't want to merge them, add the command l For example ```bash ---pairedEnd --noCollapse '*.fastq' +--pairedEnd --noCollapse --reads '*.fastq' ``` ### `--fasta` From 0e03a000d6e3fe11dc91657215074a30645d096e Mon Sep 17 00:00:00 2001 From: maxibor Date: Tue, 12 Feb 2019 17:32:34 +0100 Subject: [PATCH 11/56] skip trimming and collapsing --- docs/usage.md | 28 ++++++++++++++------- main.nf | 69 +++++++++++++++++++++++++++++++++------------------ 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index fecaa9c13..bee24f9ba 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -162,14 +162,6 @@ A normal glob pattern, enclosed in quotation marks, can then be used for `--read --pairedEnd --reads '*.fastq' ``` -### `--noCollapse` -If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. - -For example -```bash ---pairedEnd --noCollapse --reads '*.fastq' -``` - ### `--fasta` If you prefer, you can specify the full path to your reference genome when you run the pipeline: @@ -287,12 +279,30 @@ This part of the documentation contains a list of user-adjustable parameters in ## Step skipping parameters -Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules. +Some of the steps in the pipeline can be executed optionally. If you specify specific steps to be skipped, there won't be any output related to these modules. ### `--skip_preseq` Turns off the computation of library complexity estimation. +### `--skip_collapse` +If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. + +For example +```bash +--pairedEnd --skip_collapse --reads '*.fastq' +``` + +### `--skip_trimming` + +Turns off the adaptor and quality trimming + +For example +```bash +--pairedEnd --skip_trimming --reads '*.fastq' +``` + + ### `--skip_damage_calculation` Turns off the DamageProfiler module to compute DNA damage profiles. diff --git a/main.nf b/main.nf index 6a179c0dd..c98aadee6 100644 --- a/main.nf +++ b/main.nf @@ -29,7 +29,6 @@ def helpMessage() { -profile Institution or personal hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentation. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) --pairedEnd Specifies that the input is paired end reads (required if not singleEnd) - --noCollapse Specifies to avoid merging Forward and Reverse reads together. (Only for pairedEnd samples) --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) @@ -45,6 +44,8 @@ def helpMessage() { --saveReference Saves reference genome indices for later reusage Skipping Skip any of the mentioned steps + --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) + --skip_trim Skip adaptor and quality trimming --skip_preseq --skip_damage_calculation --skip_qualimap @@ -131,7 +132,6 @@ if (params.help){ params.name = false params.singleEnd = false params.pairedEnd = false -params.noCollapse = false params.genome = "Custom" params.snpcapture = false params.bedfile = '' @@ -147,6 +147,8 @@ params.email = false params.plaintext_email = false // Skipping parts of the pipeline for impatient users +params.skip_collapse = false +params.skip_trim = false params.skip_preseq = false params.skip_damage_calculation = false params.skip_qualimap = false @@ -266,12 +268,11 @@ if( params.singleEnd || params.pairedEnd || params.bam){ exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!" } -//Validate that noCollase is only set to True for pairedEnd reads! -if (params.noCollapse && params.singleEnd){ +//Validate that skip_collapse is only set to True for pairedEnd reads! +if (params.skip_collapse && params.singleEnd){ exit 1, "--noCollapse can only be set for pairedEnd samples!" } - //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -355,7 +356,8 @@ summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Collapse'] = !params.noCollapse ? 'Yes' : 'No' +summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No' +summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -375,7 +377,7 @@ if(workflow.profile == 'awsbatch'){ summary['AWS Queue'] = params.awsqueue } if(params.email) summary['E-mail Address'] = params.email -log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") +log.info summary.collect { k,v -> "${k.padRight(35)}: $v" }.join("\n") log.info "=========================================" @@ -587,6 +589,8 @@ process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' + echo true + when: !params.bam input: @@ -594,32 +598,49 @@ process adapter_removal { output: file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) - file "*.settings" into ch_adapterremoval_logs + file("*.settings") optional true into ch_adapterremoval_logs script: - prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - //Readprefixing only required for PE data with merging - fixprefix = (params.singleEnd) ? "" : "AdapterRemovalFixPrefix ${prefix}.combined.fq.gz ${prefix}.combined.prefixed.fq.gz" + base = reads[0].baseName - if( !params.singleEnd && !params.noCollapse){ + if( !params.singleEnd && !params.skip_collapse && !params.skip_trim){ """ - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files - zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${name}.combined.fq.gz + zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${base}.combined.fq.gz + """ + } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { + """ + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + #Rename files + mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz + mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } else if (!params.singleEnd && params.noCollapse) { + } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { + bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor #Rename files - mv ${name}.pair1.truncated.gz ${name}.pair1.combined.fq.gz - mv ${name}.pair2.truncated.gz ${name}.pair2.combined.fq.gz + mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz + mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } + } else if (params.singleEnd && params.skip_collapse && params.skip_trim){ + """ + mv ${reads[0]} ${base}.combined.fq.gz + echo "Skipped trimming and merging by AdapterRemoval" + """ + } else if (params.pairedEnd && params.skip_collapse && params.skip_trim){ + """ + mv ${reads[0]} ${base}.pair1.combined.fq.gz + mv ${reads[1]} ${base}.pair2.combined.fq.gz + echo "Skipped trimming and merging by AdapterRemoval" + """ + } else { """ - AdapterRemoval --file1 ${reads[0]} --basename ${name} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} + AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} # Pseudo-Combine - mv *.truncated.gz ${name}.combined.fq.gz + mv *.truncated.gz ${base}.combined.fq.gz """ } } @@ -669,7 +690,7 @@ process bwa { script: fasta = "${index}/*.fasta" - if (!params.singleEnd && params.noCollapse){ + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -736,7 +757,7 @@ process circularmapper{ fasta = "${index}/*_*.fasta" - if (!params.singleEnd && params.noCollapse){ + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -777,7 +798,7 @@ process bwamem { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" - if (!params.singleEnd && params.noCollapse){ + if (!params.singleEnd && params.skip_collapse ){ """ bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index -@ ${task.cpus} "${prefix}".sorted.bam From 757e930003034026af27cc851b849edadc59f585 Mon Sep 17 00:00:00 2001 From: maxibor Date: Tue, 12 Feb 2019 17:45:07 +0100 Subject: [PATCH 12/56] update test --- .travis.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index ecb69d6d6..7fe187219 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,20 +40,18 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference # Run the basic pipeline with single end data (pretending its single end actually) - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/ - # Run the basic pipeline with paired end data - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --saveReference + # Run the basic pipeline with paired end data without collapsing + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference + # Run the basic pipeline with paired end data without collapsing nor trimming + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --skip_trim --saveReference # Run the same pipeline testing optional step: fastp, complexity - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test BAM Trimming - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test running with CircularMapper - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2' - # Test running with CircularMapper on paired end without collapsing - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --circularmapper --circulartarget 'NC_007596.2' # Test running with BWA Mem - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ - # Test running with BWA Mem on paired end without collapsing - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --noCollapse --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test with zipped reference input - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz' # Run the basic pipeline with the bam input profile From 949fd28017b7bb1c0abcc2f22e6e5fd590b50945 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 19 Feb 2019 20:30:28 +0100 Subject: [PATCH 13/56] Address issue with picard memory --- CHANGELOG.md | 6 ++++++ conf/base.config | 5 ++++- main.nf | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 047f5b207..315efb773 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished / Dev Branch] +### `Added` + +### `Fixed` +* [#145](https://github.com/nf-core/eager/issues/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) + + ## [2.0.5] - 2019-01-28 ### `Added` diff --git a/conf/base.config b/conf/base.config index c65d74400..8c2868214 100644 --- a/conf/base.config +++ b/conf/base.config @@ -31,7 +31,10 @@ process { withName:convertBam { cpus = { check_max(8 * task.attempt, 'cpus') } } - + withName:makeSeqDict { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + withName:bwa { memory = { check_max( 16.GB * task.attempt, 'memory' ) } cpus = { check_max(8 * task.attempt, 'cpus') } diff --git a/main.nf b/main.nf index 826b0bff5..7513f0649 100644 --- a/main.nf +++ b/main.nf @@ -488,7 +488,7 @@ process makeSeqDict { mkdir -p seq_dict mv $fasta "seq_dict/${base}" cd seq_dict - picard CreateSequenceDictionary R=$base O="${fasta.baseName}.dict" + picard -Xmx${task.memory.toMega()}M -Xms${task.memory.toMega()}M CreateSequenceDictionary R=$base O="${fasta.baseName}.dict" """ } @@ -970,7 +970,7 @@ process markDup{ script: prefix = "${bam.baseName}" """ - picard MarkDuplicates INPUT=$bam OUTPUT=${prefix}.markDup.bam REMOVE_DUPLICATES=TRUE AS=TRUE METRICS_FILE="${prefix}.markdup.metrics" VALIDATION_STRINGENCY=SILENT + picard -Xmx${task.memory.toMega()}M -Xms${task.memory.toMega()}M MarkDuplicates INPUT=$bam OUTPUT=${prefix}.markDup.bam REMOVE_DUPLICATES=TRUE AS=TRUE METRICS_FILE="${prefix}.markdup.metrics" VALIDATION_STRINGENCY=SILENT """ } From b311d614dc93bf5a22a6e0a121bf6f1bdfa7e4fb Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 21 Feb 2019 13:11:38 +0100 Subject: [PATCH 14/56] Use CSI indices wherever possible --- main.nf | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/main.nf b/main.nf index 826b0bff5..9a64570d6 100644 --- a/main.nf +++ b/main.nf @@ -649,7 +649,7 @@ process bwa { output: file "*.sorted.bam" into ch_mapped_reads_idxstats,ch_mapped_reads_filter,ch_mapped_reads_preseq, ch_mapped_reads_damageprofiler - file "*.bai" into ch_bam_index_for_damageprofiler + file "*.csi" into ch_bam_index_for_damageprofiler script: @@ -658,7 +658,7 @@ process bwa { """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index "${prefix}".sorted.bam + samtools index -c "${prefix}".sorted.bam """ } @@ -703,7 +703,7 @@ process circularmapper{ output: file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm - file "*.bai" + file "*.csi" script: filter = "${params.circularfilter}" ? '' : '-f true -x false' @@ -715,7 +715,7 @@ process circularmapper{ bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam - samtools index "${prefix}".sorted.bam + samtools index -c "${prefix}".sorted.bam """ } @@ -731,7 +731,7 @@ process bwamem { output: file "*.sorted.bam" into ch_bwamem_mapped_reads_idxstats,ch_bwamem_mapped_reads_filter,ch_bwamem_mapped_reads_preseq, ch_bwamem_mapped_reads_damageprofiler - file "*.bai" + file "*.csi" script: @@ -739,7 +739,7 @@ process bwamem { fasta = "${index}/*.fasta" """ bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index -@ ${task.cpus} "${prefix}".sorted.bam + samtools index -c -@ ${task.cpus} "${prefix}".sorted.bam """ } @@ -786,7 +786,7 @@ process samtools_filter { file "*filtered.bam" into ch_bam_filtered_qualimap, ch_bam_filtered_dedup, ch_bam_filtered_markdup, ch_bam_filtered_pmdtools, ch_bam_filtered_angsd, ch_bam_filtered_gatk file "*.fastq.gz" optional true file "*.unmapped.bam" optional true - file "*.bai" + file "*.csi" script: prefix="$bam" - ~/(\.bam)?/ @@ -794,30 +794,30 @@ process samtools_filter { if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){ """ samtools view -h -b $bam -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index ${prefix}.filtered.bam + samtools index -c ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "bam"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index ${prefix}.filtered.bam + samtools index -c ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "fastq"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index ${prefix}.filtered.bam + samtools index -c ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz rm ${prefix}.unmapped.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "both"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index ${prefix}.filtered.bam + samtools index -c ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz """ } else { //Only apply quality filtering, default """ samtools view -h -b $bam -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index ${prefix}.filtered.bam + samtools index -c ${prefix}.filtered.bam """ } } @@ -841,7 +841,7 @@ process dedup{ file "*.hist" into ch_hist_for_preseq file "*.log" into ch_dedup_results_for_multiqc file "${prefix}.sorted.bam" into ch_dedup_bam - file "*.bai" + file "*.csi" script: prefix="${bam.baseName}" @@ -852,14 +852,14 @@ process dedup{ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index "$prefix".sorted.bam + samtools index -c "$prefix".sorted.bam """ } else { """ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index "$prefix".sorted.bam + samtools index -c "$prefix".sorted.bam """ } } @@ -1037,7 +1037,7 @@ process bam_trim { output: file "*.trimmed.bam" into ch_trimmed_bam_for_genotyping - file "*.bai" + file "*.csi" script: prefix="${bam.baseName}" @@ -1045,7 +1045,7 @@ process bam_trim { """ bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip} samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam - samtools index ${prefix}.trimmed.bam + samtools index -c ${prefix}.trimmed.bam """ } From 9eb36c7d7f8b154f8db429cb7b25254d76604ff7 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 21 Feb 2019 13:14:20 +0100 Subject: [PATCH 15/56] Add proper changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 315efb773..0d2c7f047 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` ### `Fixed` -* [#145](https://github.com/nf-core/eager/issues/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) - +* [#145](https://github.com/nf-core/eager/pull/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) +* [#147](https://github.com/nf-core/eager/pull/147) - Fix Samtools Index for [large references](https://github.com/nf-core/eager/issues/146) ## [2.0.5] - 2019-01-28 From 8d26b3391f7a513bb450758f8836490c4aceca2e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 21 Feb 2019 13:22:06 +0100 Subject: [PATCH 16/56] Added a contributor section to README --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index d602fd450..02626dd76 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,18 @@ James Fellows Yates, Raphael Eisenhofer and Judith Neukamm. If you want to contribute, please open an issue and ask to be added to the project - happy to do so and everyone is welcome to contribute here! +## Contributors + +- [James A. Fellows-Yates](https://github.com/jfy133) +- [Stephen Clayton](https://github.com/sc13-bioinf) +- [Judith Neukamm](https://github.com/JudithNeukamm) +- [Raphael Eisenhofer](https://github.com/EisenRa) +- [Maxime Garcia](https://github.com/MaxUlysse) +- [Luc Venturini](https://github.com/lucventurini) +- [Hester van Schalkwyk](https://github.com/hesterjvs) + +If you've contributed and you're missing in here, please let me know and I'll add you in. + ## Tool References * **EAGER v1**, CircularMapper, DeDup* Peltzer, A., Jäger, G., Herbig, A., Seitz, A., Kniep, C., Krause, J., & Nieselt, K. (2016). EAGER: efficient ancient genome reconstruction. Genome Biology, 17(1), 1–14. [https://doi.org/10.1186/s13059-016-0918-z](https://doi.org/10.1186/s13059-016-0918-z) Download: [https://github.com/apeltzer/EAGER-GUI](https://github.com/apeltzer/EAGER-GUI) and [https://github.com/apeltzer/EAGER-CLI](https://github.com/apeltzer/EAGER-CLI) From 8e29effb93cd612a1125cc49a5a86889e327cfaf Mon Sep 17 00:00:00 2001 From: Evan Floden Date: Thu, 21 Feb 2019 13:23:49 +0100 Subject: [PATCH 17/56] Update README.md with instructions for test data --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d602fd450..b5b8298b0 100644 --- a/README.md +++ b/README.md @@ -45,20 +45,28 @@ Additional functionality contained by the pipeline currently includes: ## Quick Start 1. Install [`nextflow`](docs/installation.md) + 2. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html) + 3. Download the EAGER pipeline ```bash nextflow pull nf-core/eager ``` -4. Set up your job with default parameters +4. Test the pipeline using the provided test data + +```bash +nextflow run nf-core/eager -profile ,test --pairedEnd +``` + +5. Start running your own ancient DNA analysis! ```bash -nextflow run nf-core -profile --reads'*_R{1,2}.fastq.gz' --fasta '.fasta' +nextflow run nf-core/eager -profile --reads'*_R{1,2}.fastq.gz' --fasta '.fasta' ``` -5. See the overview of the run with under `/MultiQC/multiqc_report.html` +NB. You can see an overview of the run in the MultiQC report located at `/MultiQC/multiqc_report.html` Modifications to the default pipeline are easily made using various options as described in the documentation. From abf46427569d25642ed76c40c96309863776daa0 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 21 Feb 2019 13:59:49 +0100 Subject: [PATCH 18/56] Add docs on this --- docs/usage.md | 4 ++++ main.nf | 40 ++++++++++++++++++++++------------------ nextflow.config | 1 + 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index dbb3889a1..e4d3409a0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -170,6 +170,10 @@ If you prefer, you can specify the full path to your reference genome when you r ``` > If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. You may also specify the path to a gzipped (`*.gz` file extension) FastA as reference genome - this will be uncompressed by the pipeline automatically for you. Note that other file extensions such as `.fna`, `.fa` are also supported but will be renamed to `.fasta` automatically by the pipeline. +### `--size` + +This parameter is automatically set by the pipeline depending on the size of your chosen reference FastA genome. If this is larger than 3.5GB, the `samtools index` calls in the pipeline automatically generate `CSI` indices instead of `BAI` indices to accompensate for the size of the reference genome. Shouldn't be required for smaller genomes, but `>4GB` genomes have been shown to need `CSI` indices. You cannot set this parameter yourselves, but it is nevertheless documented for the sake of completeness in here. + ### `--genome` (using iGenomes) The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. diff --git a/main.nf b/main.nf index 64b34a61c..5e449d769 100644 --- a/main.nf +++ b/main.nf @@ -240,7 +240,10 @@ if("${params.fasta}".endsWith(".gz")){ .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper_index} } - + + +//Check genome size for large reference genomes +params.size = (file("${params.fasta}").size() > 3500000000) ? "-c" : "" @@ -346,6 +349,7 @@ summary['Pipeline Version'] = workflow.manifest.version summary['Run Name'] = custom_runName ?: workflow.runName summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta +summary['BAM Index Type'] = (params.size == "") ? 'BAI' : 'CSI' if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' summary['Max Memory'] = params.max_memory @@ -649,7 +653,7 @@ process bwa { output: file "*.sorted.bam" into ch_mapped_reads_idxstats,ch_mapped_reads_filter,ch_mapped_reads_preseq, ch_mapped_reads_damageprofiler - file "*.csi" into ch_bam_index_for_damageprofiler + file "*.{bai,csi}" into ch_bam_index_for_damageprofiler script: @@ -658,7 +662,7 @@ process bwa { """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index -c "${prefix}".sorted.bam + samtools index ${params.size} "${prefix}".sorted.bam """ } @@ -703,7 +707,7 @@ process circularmapper{ output: file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm - file "*.csi" + file "*.{bai,csi}" script: filter = "${params.circularfilter}" ? '' : '-f true -x false' @@ -715,7 +719,7 @@ process circularmapper{ bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam - samtools index -c "${prefix}".sorted.bam + samtools index ${params.size} "${prefix}".sorted.bam """ } @@ -731,7 +735,7 @@ process bwamem { output: file "*.sorted.bam" into ch_bwamem_mapped_reads_idxstats,ch_bwamem_mapped_reads_filter,ch_bwamem_mapped_reads_preseq, ch_bwamem_mapped_reads_damageprofiler - file "*.csi" + file "*.{bai,csi}" script: @@ -786,7 +790,7 @@ process samtools_filter { file "*filtered.bam" into ch_bam_filtered_qualimap, ch_bam_filtered_dedup, ch_bam_filtered_markdup, ch_bam_filtered_pmdtools, ch_bam_filtered_angsd, ch_bam_filtered_gatk file "*.fastq.gz" optional true file "*.unmapped.bam" optional true - file "*.csi" + file "*.{bai,csi}" script: prefix="$bam" - ~/(\.bam)?/ @@ -794,30 +798,30 @@ process samtools_filter { if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){ """ samtools view -h -b $bam -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index -c ${prefix}.filtered.bam + samtools index ${params.size} ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "bam"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index -c ${prefix}.filtered.bam + samtools index ${params.size} ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "fastq"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index -c ${prefix}.filtered.bam + samtools index ${params.size} ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz rm ${prefix}.unmapped.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "both"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index -c ${prefix}.filtered.bam + samtools index ${params.size} ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz """ } else { //Only apply quality filtering, default """ samtools view -h -b $bam -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index -c ${prefix}.filtered.bam + samtools index ${params.size} ${prefix}.filtered.bam """ } } @@ -841,25 +845,25 @@ process dedup{ file "*.hist" into ch_hist_for_preseq file "*.log" into ch_dedup_results_for_multiqc file "${prefix}.sorted.bam" into ch_dedup_bam - file "*.csi" + file "*.{bai,csi}" script: prefix="${bam.baseName}" treat_merged="${params.dedup_all_merged}" ? '-m' : '' - + if(params.singleEnd) { """ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index -c "$prefix".sorted.bam + samtools index ${params.size} "$prefix".sorted.bam """ } else { """ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index -c "$prefix".sorted.bam + samtools index ${params.size} "$prefix".sorted.bam """ } } @@ -1037,7 +1041,7 @@ process bam_trim { output: file "*.trimmed.bam" into ch_trimmed_bam_for_genotyping - file "*.csi" + file "*.{bai,csi}" script: prefix="${bam.baseName}" @@ -1045,7 +1049,7 @@ process bam_trim { """ bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip} samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam - samtools index -c ${prefix}.trimmed.bam + samtools index ${params.size} ${prefix}.trimmed.bam """ } diff --git a/nextflow.config b/nextflow.config index f4ac236ad..057876140 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,6 +23,7 @@ params { tracedir = "${params.outdir}/pipeline_info" readPaths = false bam = false + size = "" //More defaults complexity_filter = false From 8a6dccb4a8480a5f0ae6ea66638cab6a4eb29663 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 25 Feb 2019 14:43:57 +0100 Subject: [PATCH 19/56] Update new parameter `large_ref` --- docs/usage.md | 4 ++-- main.nf | 39 ++++++++++++++++++--------------------- nextflow.config | 4 ++-- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index e4d3409a0..60c65e6a0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -170,9 +170,9 @@ If you prefer, you can specify the full path to your reference genome when you r ``` > If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. You may also specify the path to a gzipped (`*.gz` file extension) FastA as reference genome - this will be uncompressed by the pipeline automatically for you. Note that other file extensions such as `.fna`, `.fa` are also supported but will be renamed to `.fasta` automatically by the pipeline. -### `--size` +### `--large_ref` -This parameter is automatically set by the pipeline depending on the size of your chosen reference FastA genome. If this is larger than 3.5GB, the `samtools index` calls in the pipeline automatically generate `CSI` indices instead of `BAI` indices to accompensate for the size of the reference genome. Shouldn't be required for smaller genomes, but `>4GB` genomes have been shown to need `CSI` indices. You cannot set this parameter yourselves, but it is nevertheless documented for the sake of completeness in here. +This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `CSI` indices instead of `BAI` indices to accompensate for the size of the reference genome. This parameter is not required for smaller references (including a human `hg19` or `grch37`/`grch38` reference), but `>4GB` genomes have been shown to need `CSI` indices. ### `--genome` (using iGenomes) diff --git a/main.nf b/main.nf index 5e449d769..1bfe1e926 100644 --- a/main.nf +++ b/main.nf @@ -241,15 +241,6 @@ if("${params.fasta}".endsWith(".gz")){ .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper_index} } - -//Check genome size for large reference genomes -params.size = (file("${params.fasta}").size() > 3500000000) ? "-c" : "" - - - - - - //Index files provided? Then check whether they are correct and complete if (params.aligner != 'bwa' && !params.circularmapper && !params.bwamem){ exit 1, "Invalid aligner option. Default is bwa, but specify --circularmapper or --bwamem to use these." @@ -349,7 +340,7 @@ summary['Pipeline Version'] = workflow.manifest.version summary['Run Name'] = custom_runName ?: workflow.runName summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta -summary['BAM Index Type'] = (params.size == "") ? 'BAI' : 'CSI' +summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI' if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' summary['Max Memory'] = params.max_memory @@ -659,10 +650,11 @@ process bwa { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" + size = ${params.large_ref} ? '-c' : '' """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index ${params.size} "${prefix}".sorted.bam + samtools index $size "${prefix}".sorted.bam """ } @@ -713,13 +705,14 @@ process circularmapper{ filter = "${params.circularfilter}" ? '' : '-f true -x false' prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*_*.fasta" + size = ${params.large_ref} ? '-c' : '' """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam - samtools index ${params.size} "${prefix}".sorted.bam + samtools index $size "${prefix}".sorted.bam """ } @@ -741,9 +734,10 @@ process bwamem { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" + size = ${params.large_ref} ? '-c' : '' """ bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index -c -@ ${task.cpus} "${prefix}".sorted.bam + samtools index $size -@ ${task.cpus} "${prefix}".sorted.bam """ } @@ -794,34 +788,35 @@ process samtools_filter { script: prefix="$bam" - ~/(\.bam)?/ + size = ${params.large_ref} ? '-c' : '' if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){ """ samtools view -h -b $bam -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index ${params.size} ${prefix}.filtered.bam + samtools index $size ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "bam"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index ${params.size} ${prefix}.filtered.bam + samtools index $size ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "fastq"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index ${params.size} ${prefix}.filtered.bam + samtools index $size ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz rm ${prefix}.unmapped.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "both"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index ${params.size} ${prefix}.filtered.bam + samtools index $size ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz """ } else { //Only apply quality filtering, default """ samtools view -h -b $bam -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index ${params.size} ${prefix}.filtered.bam + samtools index $size ${prefix}.filtered.bam """ } } @@ -850,20 +845,21 @@ process dedup{ script: prefix="${bam.baseName}" treat_merged="${params.dedup_all_merged}" ? '-m' : '' + size = ${params.large_ref} ? '-c' : '' if(params.singleEnd) { """ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index ${params.size} "$prefix".sorted.bam + samtools index $size "$prefix".sorted.bam """ } else { """ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index ${params.size} "$prefix".sorted.bam + samtools index $size "$prefix".sorted.bam """ } } @@ -1046,10 +1042,11 @@ process bam_trim { script: prefix="${bam.baseName}" softclip = "${params.bamutils_softclip}" ? '-c' : '' + size = ${params.large_ref} ? '-c' : '' """ bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip} samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam - samtools index ${params.size} ${prefix}.trimmed.bam + samtools index $size ${prefix}.trimmed.bam """ } diff --git a/nextflow.config b/nextflow.config index 057876140..150d9bc25 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,8 +23,8 @@ params { tracedir = "${params.outdir}/pipeline_info" readPaths = false bam = false - size = "" - + large_ref = false + //More defaults complexity_filter = false complexity_filter_poly_g_min = 10 From b6f65b179ee1b4aec6b18016ac448ba4d25cda63 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 25 Feb 2019 15:01:58 +0100 Subject: [PATCH 20/56] Fixing indices hopefully --- main.nf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index 1bfe1e926..4cb76fd20 100644 --- a/main.nf +++ b/main.nf @@ -654,7 +654,7 @@ process bwa { """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index $size "${prefix}".sorted.bam + samtools index "${size}" "${prefix}".sorted.bam """ } @@ -712,7 +712,7 @@ process circularmapper{ bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam - samtools index $size "${prefix}".sorted.bam + samtools index "${size}" "${prefix}".sorted.bam """ } @@ -737,7 +737,7 @@ process bwamem { size = ${params.large_ref} ? '-c' : '' """ bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam - samtools index $size -@ ${task.cpus} "${prefix}".sorted.bam + samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam """ } @@ -793,30 +793,30 @@ process samtools_filter { if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){ """ samtools view -h -b $bam -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index $size ${prefix}.filtered.bam + samtools index "${size}" ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "bam"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index $size ${prefix}.filtered.bam + samtools index "${size}" ${prefix}.filtered.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "fastq"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index $size ${prefix}.filtered.bam + samtools index "${size}" ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz rm ${prefix}.unmapped.bam """ } else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "both"){ """ samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam) - samtools index $size ${prefix}.filtered.bam + samtools index "${size}" ${prefix}.filtered.bam samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz """ } else { //Only apply quality filtering, default """ samtools view -h -b $bam -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam - samtools index $size ${prefix}.filtered.bam + samtools index "${size}" ${prefix}.filtered.bam """ } } @@ -852,14 +852,14 @@ process dedup{ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index $size "$prefix".sorted.bam + samtools index "${size}" "$prefix".sorted.bam """ } else { """ dedup -i $bam $treat_merged -o . -u mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam - samtools index $size "$prefix".sorted.bam + samtools index "${size}" "$prefix".sorted.bam """ } } @@ -1046,7 +1046,7 @@ process bam_trim { """ bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip} samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam - samtools index $size ${prefix}.trimmed.bam + samtools index "${size}" ${prefix}.trimmed.bam """ } From f9ac1d462fc4adb2d51b3740f25f82b6d79cfabe Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 25 Feb 2019 15:07:59 +0100 Subject: [PATCH 21/56] Fixing indexing --- main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 4cb76fd20..2c8a586d8 100644 --- a/main.nf +++ b/main.nf @@ -650,7 +650,7 @@ process bwa { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" - size = ${params.large_ref} ? '-c' : '' + size = "${params.large_ref}" ? '-c' : '' """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam @@ -705,7 +705,7 @@ process circularmapper{ filter = "${params.circularfilter}" ? '' : '-f true -x false' prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*_*.fasta" - size = ${params.large_ref} ? '-c' : '' + size = "${params.large_ref}" ? '-c' : '' """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" @@ -734,7 +734,7 @@ process bwamem { script: prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ fasta = "${index}/*.fasta" - size = ${params.large_ref} ? '-c' : '' + size = "${params.large_ref}" ? '-c' : '' """ bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam @@ -788,7 +788,7 @@ process samtools_filter { script: prefix="$bam" - ~/(\.bam)?/ - size = ${params.large_ref} ? '-c' : '' + size = "${params.large_ref}" ? '-c' : '' if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){ """ @@ -845,7 +845,7 @@ process dedup{ script: prefix="${bam.baseName}" treat_merged="${params.dedup_all_merged}" ? '-m' : '' - size = ${params.large_ref} ? '-c' : '' + size = "${params.large_ref}" ? '-c' : '' if(params.singleEnd) { """ @@ -1042,7 +1042,7 @@ process bam_trim { script: prefix="${bam.baseName}" softclip = "${params.bamutils_softclip}" ? '-c' : '' - size = ${params.large_ref} ? '-c' : '' + size = "${params.large_ref}" ? '-c' : '' """ bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip} samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam From 7405c8ece148227eae671375a026ccec34ed00a3 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 25 Feb 2019 17:21:54 +0100 Subject: [PATCH 22/56] Fix for post-dup steps --- CHANGELOG.md | 1 + main.nf | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 047f5b207..28b97779b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` * [#128](https://github.com/nf-core/eager/issues/128) - Fixed reference genome handling errors +* [#140](https://github.com/nf-core/eager/issues/140) - Fixed post-deduplication step errors ### `Dependencies` * Picard Tools 2.18.21 -> 2.18.23 diff --git a/main.nf b/main.nf index 826b0bff5..d0e0c4f38 100644 --- a/main.nf +++ b/main.nf @@ -907,7 +907,7 @@ process damageprofiler { input: file bam from ch_mapped_reads_damageprofiler.mix(ch_mapped_reads_damageprofiler_cm,ch_bwamem_mapped_reads_damageprofiler) - file fasta from ch_fasta_for_damageprofiler + file fasta from ch_fasta_for_damageprofiler.first() file bai from ch_bam_index_for_damageprofiler @@ -934,7 +934,7 @@ process qualimap { input: file bam from ch_bam_filtered_qualimap - file fasta from ch_fasta_for_qualimap + file fasta from ch_fasta_for_qualimap.first() output: file "*" into ch_qualimap_results From 7e4035d74f119be16b3bf0b4838dc41c6fda7acd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 25 Feb 2019 17:24:24 +0100 Subject: [PATCH 23/56] Nicer changelog [skip ci] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28b97779b..a5b579f5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` * [#128](https://github.com/nf-core/eager/issues/128) - Fixed reference genome handling errors -* [#140](https://github.com/nf-core/eager/issues/140) - Fixed post-deduplication step errors +* [#151](https://github.com/nf-core/eager/pull/151) - Fixed [post-deduplication step errors](https://github.com/nf-core/eager/issues/128) ### `Dependencies` * Picard Tools 2.18.21 -> 2.18.23 From 76e0cbd859f935196e03dcc5003f5e33257d9781 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 25 Feb 2019 17:25:44 +0100 Subject: [PATCH 24/56] Its unpublished stuff [skip ci] --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5b579f5e..9389bdc0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished / Dev Branch] +### `Fixed` +* [#151](https://github.com/nf-core/eager/pull/151) - Fixed [post-deduplication step errors](https://github.com/nf-core/eager/issues/128 + ## [2.0.5] - 2019-01-28 ### `Added` @@ -15,7 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` * [#128](https://github.com/nf-core/eager/issues/128) - Fixed reference genome handling errors -* [#151](https://github.com/nf-core/eager/pull/151) - Fixed [post-deduplication step errors](https://github.com/nf-core/eager/issues/128) +) ### `Dependencies` * Picard Tools 2.18.21 -> 2.18.23 From e971cacb14b021a695b1203f5774f73a4b888905 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 25 Feb 2019 21:11:56 +0100 Subject: [PATCH 25/56] Made polyG param clearer what it is --- main.nf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index 7513f0649..8d7c9050f 100644 --- a/main.nf +++ b/main.nf @@ -50,7 +50,7 @@ def helpMessage() { --skip_deduplication Complexity Filtering - --complexity_filtering Run poly-G removal on FASTQ files + --complexity_filter_poly_g Run poly-G removal on FASTQ files --complexity_filter_poly_g_min Specify length of poly-g min for clipping to be performed (default: 10) Clipping / Merging @@ -151,7 +151,7 @@ params.skip_qualimap = false params.skip_deduplication = false //Complexity filtering reads -params.complexity_filter = false +params.complexity_filter_poly_g = false params.complexity_filter_poly_g_min = 10 //Read clipping and merging parameters @@ -291,14 +291,14 @@ if( params.readPaths ){ .from( params.readPaths ) .map { row -> [ row[0], [ file( row[1][0] ) ] ] } .ifEmpty { exit 1, "params.readPaths or params.bams was empty - no input files supplied!" } - .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering } + .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filter_poly_g } ch_bam_to_fastq_convert = Channel.empty() } else if (!params.bam){ Channel .from( params.readPaths ) .map { row -> [ row[0], [ file( row[1][0] ), file( row[1][1] ) ] ] } .ifEmpty { exit 1, "params.readPaths or params.bams was empty - no input files supplied!" } - .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering } + .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filter_poly_g } ch_bam_to_fastq_convert = Channel.empty() } else { Channel @@ -310,7 +310,7 @@ if( params.readPaths ){ //Set up clean channels ch_read_files_fastqc = Channel.empty() - ch_read_files_complexity_filtering = Channel.empty() + ch_read_files_complexity_filter_poly_g = Channel.empty() ch_read_files_clip = Channel.empty() } } else if (!params.bam){ @@ -318,7 +318,7 @@ if( params.readPaths ){ .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs" + "to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." } - .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering } + .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filter_poly_g } ch_bam_to_fastq_convert = Channel.empty() } else { Channel @@ -331,7 +331,7 @@ if( params.readPaths ){ //Set up clean channels ch_read_files_fastqc = Channel.empty() - ch_read_files_complexity_filtering = Channel.empty() + ch_read_files_complexity_filter_poly_g = Channel.empty() ch_read_files_clip = Channel.empty() } @@ -548,13 +548,13 @@ process fastp { tag "$name" publishDir "${params.outdir}/FastP", mode: 'copy' - when: params.complexity_filter + when: params.complexity_filter_poly_g input: - set val(name), file(reads) from ch_read_files_complexity_filtering.mix(ch_read_files_converted_fastp) + set val(name), file(reads) from ch_read_files_complexity_filter_poly_g.mix(ch_read_files_converted_fastp) output: - set val(name), file("*pG.fq.gz") into ch_clipped_reads_complexity_filtered + set val(name), file("*pG.fq.gz") into ch_clipped_reads_complexity_filtered_poly_g file("*.json") into ch_fastp_for_multiqc script: @@ -582,7 +582,7 @@ process adapter_removal { when: !params.bam input: - set val(name), file(reads) from ( params.complexity_filter ? ch_clipped_reads_complexity_filtered : ch_read_files_clip ) + set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) From 0feeb191517554876034bbfcd47ee42c8b8505fd Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 25 Feb 2019 21:31:54 +0100 Subject: [PATCH 26/56] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 315efb773..c01691fce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` * [#145](https://github.com/nf-core/eager/issues/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) - +* Clarified `--complexity_filter` flag to be specifically for poly G trimming. ## [2.0.5] - 2019-01-28 From 29306df77f93746c15bebaed2a734441e6115c3e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 25 Feb 2019 21:33:15 +0100 Subject: [PATCH 27/56] Updated ploy G trim flag --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index dbb3889a1..d2caf6821 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -299,7 +299,7 @@ Turns off duplicate removal methods DeDup and MarkDuplicates respectively. No du ## Complexity Filtering Options -### `--complexity_filter` +### `--complexity_filter_poly_g` Performs a poly-G tail removal step in the beginning of the pipeline, if turned on. This can be useful for trimming ploy-G tails from short-fragments sequenced on two-colour Illumina chemistry such as NextSeqs (where no-fluorescence is read as a G on two-colour chemistry), which can inflate reported GC content values. From bc02be25023f53d8a7c4cd8d8160bc172a72d3ea Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 25 Feb 2019 21:36:22 +0100 Subject: [PATCH 28/56] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c01691fce..a5bf6fe11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` * [#145](https://github.com/nf-core/eager/issues/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) -* Clarified `--complexity_filter` flag to be specifically for poly G trimming. ## [2.0.5] - 2019-01-28 From 7836ab8df5f8d64638a87f7fb7a67d6a6e4b549a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 25 Feb 2019 21:41:51 +0100 Subject: [PATCH 29/56] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5bf6fe11..9084fd20d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` * [#145](https://github.com/nf-core/eager/issues/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) +* Clarified `--complexity_filter` flag to be specifically for poly G trimming. + ## [2.0.5] - 2019-01-28 From 6d13545b86d3636e8d6aae0465e7cf61adbd8dc1 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 25 Feb 2019 21:46:56 +0100 Subject: [PATCH 30/56] Added fastP position to MultiQC config --- conf/multiqc_config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/multiqc_config.yaml b/conf/multiqc_config.yaml index 8e561e902..1b6bdaa63 100644 --- a/conf/multiqc_config.yaml +++ b/conf/multiqc_config.yaml @@ -9,6 +9,7 @@ top_modules: - '*_fastqc.zip' path_filters_exclude: - '*.combined.prefixed_fastqc.zip' + - 'fastp' - 'adapterRemoval' - 'fastqc': name: 'FastQC (post-AdapterRemoval)' From fabd1a3dbdcb715211dcf9d70494b2d57295cd7a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 27 Feb 2019 00:08:51 +0100 Subject: [PATCH 31/56] Should fix the remaining issues --- main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index d0e0c4f38..5c053ef28 100644 --- a/main.nf +++ b/main.nf @@ -1139,12 +1139,12 @@ process multiqc { file multiqc_config from ch_multiqc_config.collect().ifEmpty([]) file ('fastqc_raw/*') from ch_fastqc_results.collect().ifEmpty([]) file('fastqc/*') from ch_fastqc_after_clipping.collect().ifEmpty([]) - file ('software_versions/*') from software_versions_yaml.collect().ifEmpty([]) + file ('software_versions/software_versions_mqc*') from software_versions_yaml.collect().ifEmpty([]) file ('adapter_removal/*') from ch_adapterremoval_logs.collect().ifEmpty([]) file ('idxstats/*') from ch_idxstats_for_multiqc.collect().ifEmpty([]) file ('preseq/*') from ch_preseq_results.collect().ifEmpty([]) - file ('damageprofiler/*') from ch_damageprofiler_results.collect().ifEmpty([]) - file ('qualimap/*') from ch_qualimap_results.collect().ifEmpty([]) + file ('damageprofiler/dmgprof*') from ch_damageprofiler_results.collect().ifEmpty([]) + file ('qualimap*') from ch_qualimap_results.collect().ifEmpty([]) file ('markdup/*') from ch_markdup_results_for_multiqc.collect().ifEmpty([]) file ('dedup*/*') from ch_dedup_results_for_multiqc.collect().ifEmpty([]) file ('fastp/*') from ch_fastp_for_multiqc.collect().ifEmpty([]) From c69261d268b2633dee44e4f518d0544e6f45b048 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 27 Feb 2019 00:17:58 +0100 Subject: [PATCH 32/56] Address issues with qualimap / multiqc / multiple samples and reporting --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index f30b22ea2..b594697c7 100644 --- a/main.nf +++ b/main.nf @@ -1144,8 +1144,8 @@ process multiqc { file ('adapter_removal/*') from ch_adapterremoval_logs.collect().ifEmpty([]) file ('idxstats/*') from ch_idxstats_for_multiqc.collect().ifEmpty([]) file ('preseq/*') from ch_preseq_results.collect().ifEmpty([]) - file ('damageprofiler/dmgprof*') from ch_damageprofiler_results.collect().ifEmpty([]) - file ('qualimap*') from ch_qualimap_results.collect().ifEmpty([]) + file ('damageprofiler/dmgprof*/*') from ch_damageprofiler_results.collect().ifEmpty([]) + file ('qualimap/qualimap*/*') from ch_qualimap_results.collect().ifEmpty([]) file ('markdup/*') from ch_markdup_results_for_multiqc.collect().ifEmpty([]) file ('dedup*/*') from ch_dedup_results_for_multiqc.collect().ifEmpty([]) file ('fastp/*') from ch_fastp_for_multiqc.collect().ifEmpty([]) From 817b9e004b841d23e3be94483498b810f4c443e4 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 1 Mar 2019 16:31:28 +0100 Subject: [PATCH 33/56] Adding in publishing dedup log files as well --- CHANGELOG.md | 3 ++- main.nf | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 550ca6319..661f73ea6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` -* Clarified `--complexity_filter` flag to be specifically for poly G trimming. +* [#152](https://github.com/nf-core/eager/pull/152) - Clarified `--complexity_filter` flag to be specifically for poly G trimming. +* [#155](https://github.com/nf-core/eager/pull/155) - Added [Dedup log to output folders](https://github.com/nf-core/eager/issues/154) ### `Fixed` diff --git a/main.nf b/main.nf index 2fa36ad26..ee465329a 100644 --- a/main.nf +++ b/main.nf @@ -828,7 +828,9 @@ Step 6: DeDup / MarkDuplicates process dedup{ tag "${bam.baseName}" - publishDir "${params.outdir}/deduplication/dedup" + publishDir "${params.outdir}/deduplication/dedup", mode: 'copy', + saveAs: {filename -> (filename.endsWith(".hist") || filename.endsWith(".log")) ? "${prefix}/$filename" : "$filename"} + when: !params.skip_deduplication && params.dedupper == 'dedup' From 3320464a5586077ceb714d8bee5b95c74953093e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 11:19:14 +0100 Subject: [PATCH 34/56] move size out of the if clause --- main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 5b323b662..c8781788b 100644 --- a/main.nf +++ b/main.nf @@ -685,9 +685,10 @@ process bwa { script: fasta = "${index}/*.fasta" + size = "${params.large_ref}" ? '-c' : '' + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] - size = "${params.large_ref}" ? '-c' : '' """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai From def1b350c4836c123aca29dded1a0207cfe0faaf Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:21:08 +0100 Subject: [PATCH 35/56] match skip_* pattern --- docs/usage.md | 37 ++++++++++++++++++-------------- main.nf | 59 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 61 insertions(+), 35 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 1c8894865..2d78d15d0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -289,23 +289,10 @@ Some of the steps in the pipeline can be executed optionally. If you specify spe Turns off the computation of library complexity estimation. -### `--skip_collapse` -If you have paired-end data, but you don't want to merge them, add the command line argument `--noCollapse`. - -For example -```bash ---pairedEnd --skip_collapse --reads '*.fastq' -``` - -### `--skip_trimming` - -Turns off the adaptor and quality trimming - -For example -```bash ---pairedEnd --skip_trimming --reads '*.fastq' -``` +### `--skip_adapterremoval` +Turns off adaptor trimming and paired-end read merging. +Equivalent to setting both `--skip_collapse` and `--skip_trim` ### `--skip_damage_calculation` @@ -351,6 +338,24 @@ Defines the minimum read quality per base that is required for a base to be kept ### `--clip_min_adap_overlap` 1 Sets the minimum overlap between two reads when read merging is performed. Default is set to `1` base overlap. +### `--skip_collapse` + +Turns off the paired-end read merging. + +For example +```bash +--pairedEnd --skip_collapse --reads '*.fastq' +``` + +### `--skip_trim` + +Turns off the adaptor and quality trimming. + +For example +```bash +--pairedEnd --skip_trim --reads '*.fastq' +``` + ## Read Mapping Parameters ## BWA (default) diff --git a/main.nf b/main.nf index c8781788b..81540b346 100644 --- a/main.nf +++ b/main.nf @@ -44,8 +44,7 @@ def helpMessage() { --saveReference Saves reference genome indices for later reusage Skipping Skip any of the mentioned steps - --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) - --skip_trim Skip adaptor and quality trimming + --skip_adapterremoval --skip_preseq --skip_damage_calculation --skip_qualimap @@ -61,6 +60,8 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap + --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) + --skip_trim Skip adaptor and quality trimming BWA Mapping --bwaalnn Specify the -n parameter for BWA aln. @@ -147,8 +148,7 @@ params.email = false params.plaintext_email = false // Skipping parts of the pipeline for impatient users -params.skip_collapse = false -params.skip_trim = false +params.skip_adapterremoval = false params.skip_preseq = false params.skip_damage_calculation = false params.skip_qualimap = false @@ -164,6 +164,8 @@ params.clip_reverse_adaptor = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA" params.clip_readlength = 30 params.clip_min_read_quality = 20 params.min_adap_overlap = 1 +params.skip_collapse = false +params.skip_trim = false //Read mapping parameters (default = BWA aln default) params.bwaalnn = 0.04 @@ -267,6 +269,25 @@ if (params.skip_collapse && params.singleEnd){ exit 1, "--noCollapse can only be set for pairedEnd samples!" } + + +//Skip adapterremoval compatibility with skip_trim and skip_collapse + +skip_collapse = params.skip_collapse +skip_trim = params.skip_trim +skip_adapterremoval = params.skip_adapterremoval + +if (params.skip_collapse && params.skip_trim){ + skip_adapterremoval = true +} + +if (params.skip_adapterremoval){ + skip_adapterremoval = true + skip_collapse = true + skip_trim = true +} + + //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -351,8 +372,8 @@ summary['Fasta Ref'] = params.fasta summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI' if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No' -summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No' +summary['Skip Collapsing'] = skip_collapse ? 'Yes' : 'No' +summary['Skip Trimming'] = skip_trim ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -579,7 +600,6 @@ process fastp { * STEP 2 - Adapter Clipping / Read Merging */ - process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' @@ -598,20 +618,20 @@ process adapter_removal { script: base = reads[0].baseName - if( !params.singleEnd && !params.skip_collapse && !params.skip_trim){ + if( !params.singleEnd && !skip_collapse && !params.skip_trim && !skip_adapterremoval){ """ AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${base}.combined.fq.gz """ - } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { + } else if (!params.singleEnd && skip_collapse && !params.skip_trim && !skip_adapterremoval) { """ AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} #Rename files mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { + } else if (!params.singleEnd && !skip_collapse && params.skip_trim && !skip_adapterremoval) { bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor @@ -619,19 +639,18 @@ process adapter_removal { mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz """ - } else if (params.singleEnd && params.skip_collapse && params.skip_trim){ + } else if (params.singleEnd && skip_adapterremoval){ """ mv ${reads[0]} ${base}.combined.fq.gz echo "Skipped trimming and merging by AdapterRemoval" """ - } else if (params.pairedEnd && params.skip_collapse && params.skip_trim){ + } else if (params.pairedEnd && skip_adapterremoval){ """ mv ${reads[0]} ${base}.pair1.combined.fq.gz mv ${reads[1]} ${base}.pair2.combined.fq.gz echo "Skipped trimming and merging by AdapterRemoval" """ - } - else { + } else { """ AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} # Pseudo-Combine @@ -640,9 +659,11 @@ process adapter_removal { } } + + /* - * STEP 2.1 - FastQC after clipping/merging (if applied!) - */ +* STEP 2.1 - FastQC after clipping/merging (if applied!) +*/ process fastqc_after_clipping { tag "${prefix}" publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', @@ -687,7 +708,7 @@ process bwa { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && params.skip_collapse ){ + if (!params.singleEnd && skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -755,7 +776,7 @@ process circularmapper{ fasta = "${index}/*_*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && params.skip_collapse ){ + if (!params.singleEnd && skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -798,7 +819,7 @@ process bwamem { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && params.skip_collapse ){ + if (!params.singleEnd && skip_collapse ){ """ bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam From 2bef839be75de13538d21d7d70a7196770a741fc Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:21:24 +0100 Subject: [PATCH 36/56] update travis test --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7fe187219..dab87ae31 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,8 +42,10 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/ # Run the basic pipeline with paired end data without collapsing - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference - # Run the basic pipeline with paired end data without collapsing nor trimming - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --skip_trim --saveReference + # Run the basic pipeline with paired end data without trimming + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_trim --saveReference + # Run the basic pipeline with paired end data without adapterRemoval + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_adapterremoval --saveReference # Run the same pipeline testing optional step: fastp, complexity - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/ # Test BAM Trimming From e62b5032be1d65ad3b8354dfebadf00ba0ab669b Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:21:52 +0100 Subject: [PATCH 37/56] local executor for skipping AR process --- nextflow.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nextflow.config b/nextflow.config index 150d9bc25..5a701555c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,6 +38,14 @@ params { custom_config_version = 'master' } +if(params.skip_adapterremoval){ + process { + withName: adapter_removal{ + executor = 'local' + } + } +} + // Load base.config by default for all pipelines includeConfig 'conf/base.config' From e807c5859de9edbe6e9c6d3e62f2be4ad1145b98 Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 17:33:29 +0100 Subject: [PATCH 38/56] initialize skip_adapterremoval --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 5a701555c..584710829 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,6 +14,7 @@ params { //Pipeline options aligner = 'bwa' + skip_adapterremoval = false saveReference = false saveTrimmed = true saveAlignedIntermediates = false From 01d056c613b616bbf0d474990696fad727b1309d Mon Sep 17 00:00:00 2001 From: Maxime Date: Mon, 4 Mar 2019 18:26:02 +0100 Subject: [PATCH 39/56] fix bam test --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 81540b346..fd6d1f663 100644 --- a/main.nf +++ b/main.nf @@ -533,7 +533,7 @@ process convertBam { output: set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = "${bam.baseName}" From a38c5ff8c3895db8a7131a82ae88ee8de7ccbb86 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 21:22:08 +0100 Subject: [PATCH 40/56] Define defaults in nextflow.config --- nextflow.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 584710829..86736297f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { //Pipeline options aligner = 'bwa' - skip_adapterremoval = false + saveReference = false saveTrimmed = true saveAlignedIntermediates = false @@ -31,6 +31,11 @@ params { complexity_filter_poly_g_min = 10 trim_bam = false + //Skipping adapterremoval, trimming or collapsing defaults + skip_adapterremoval = false + skip_trim = false + skip_adapterremoval = false + // AWS Batch awsqueue = false awsregion = 'eu-west-1' From 20177fa6d421b1ef966b2eaec8a54129591ab04c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 21:22:31 +0100 Subject: [PATCH 41/56] Starting to polish PR --- main.nf | 64 ++++++++++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/main.nf b/main.nf index fd6d1f663..b38a4ba51 100644 --- a/main.nf +++ b/main.nf @@ -60,7 +60,7 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap - --skip_collapse Skip merging Forward and Reverse reads together. (Only for pairedEnd samples) + --skip_collapse Skip merging Forward and Reverse reads together. (Only for PE samples) --skip_trim Skip adaptor and quality trimming BWA Mapping @@ -266,17 +266,9 @@ if( params.singleEnd || params.pairedEnd || params.bam){ //Validate that skip_collapse is only set to True for pairedEnd reads! if (params.skip_collapse && params.singleEnd){ - exit 1, "--noCollapse can only be set for pairedEnd samples!" + exit 1, "--skip_collapse can only be set for pairedEnd samples!" } - - -//Skip adapterremoval compatibility with skip_trim and skip_collapse - -skip_collapse = params.skip_collapse -skip_trim = params.skip_trim -skip_adapterremoval = params.skip_adapterremoval - if (params.skip_collapse && params.skip_trim){ skip_adapterremoval = true } @@ -372,8 +364,8 @@ summary['Fasta Ref'] = params.fasta summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI' if(params.bwa_index) summary['BWA Index'] = params.bwa_index summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Skip Collapsing'] = skip_collapse ? 'Yes' : 'No' -summary['Skip Trimming'] = skip_trim ? 'Yes' : 'No' +summary['Skip Collapsing'] = params.skip_collapse ? 'Yes' : 'No' +summary['Skip Trimming'] = params.skip_trim ? 'Yes' : 'No' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -604,57 +596,49 @@ process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' - echo true - - when: !params.bam + when: !params.bam && !params.skip_adapterremoval input: set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: - file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) - file("*.settings") optional true into ch_adapterremoval_logs + file "output/*.fq.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) + file("*.settings") into ch_adapterremoval_logs script: base = reads[0].baseName - if( !params.singleEnd && !skip_collapse && !params.skip_trim && !skip_adapterremoval){ + //PE, collapse & trim reads + if( !params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse #Combine files - zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > ${base}.combined.fq.gz + zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz """ - } else if (!params.singleEnd && skip_collapse && !params.skip_trim && !skip_adapterremoval) { + //PE, don't collapse, but trim reads + } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval) { """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} - #Rename files - mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz - mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz + mv ${base}.pair*.truncated.gz output/ """ - } else if (!params.singleEnd && !skip_collapse && params.skip_trim && !skip_adapterremoval) { + //PE, collapse, but don't trim reads + } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim && !params.skip_adapterremoval) { bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor - #Rename files - mv ${base}.pair1.truncated.gz ${base}.pair1.combined.fq.gz - mv ${base}.pair2.truncated.gz ${base}.pair2.combined.fq.gz - """ - } else if (params.singleEnd && skip_adapterremoval){ - """ - mv ${reads[0]} ${base}.combined.fq.gz - echo "Skipped trimming and merging by AdapterRemoval" - """ - } else if (params.pairedEnd && skip_adapterremoval){ - """ - mv ${reads[0]} ${base}.pair1.combined.fq.gz - mv ${reads[1]} ${base}.pair2.combined.fq.gz - echo "Skipped trimming and merging by AdapterRemoval" + + mv ${base}.pair*.truncated.gz output/ """ } else { + //SE, collapse not possible, trim reads """ + mkdir -p output AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} - # Pseudo-Combine - mv *.truncated.gz ${base}.combined.fq.gz + + mv *.truncated.gz output/ """ } } From f9b79c2e47a70ab4cf459231effcf3944771bdc9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 21:49:43 +0100 Subject: [PATCH 42/56] Some more fixes for channel handling --- main.nf | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index b38a4ba51..8bfa5049a 100644 --- a/main.nf +++ b/main.nf @@ -270,13 +270,13 @@ if (params.skip_collapse && params.singleEnd){ } if (params.skip_collapse && params.skip_trim){ - skip_adapterremoval = true + params.skip_adapterremoval = true } if (params.skip_adapterremoval){ - skip_adapterremoval = true - skip_collapse = true - skip_trim = true + params.skip_adapterremoval = true + params.skip_collapse = true + params.skip_trim = true } @@ -591,7 +591,10 @@ process fastp { /* * STEP 2 - Adapter Clipping / Read Merging */ - +//Initialize empty channel if we skip adapterremoval entirely +if(params.skip_adapterremoval) { + ch_clipped_reads = Channel.empty() +} process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' @@ -602,7 +605,7 @@ process adapter_removal { set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: - file "output/*.fq.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) + set val(base), file "output/*.gz" into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) file("*.settings") into ch_adapterremoval_logs script: @@ -653,7 +656,7 @@ process fastqc_after_clipping { publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} - when: !params.bam + when: !params.bam && !params.skip_adapterremoval input: file(reads) from ch_clipped_reads_for_fastqc @@ -679,7 +682,8 @@ process bwa { when: !params.circularmapper && !params.bwamem input: - file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) + set val(name), file(reads) from ( params.skip_adapterremoval ? ch_read_files_clip : ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) ) + file index from ch_bwa_index.first() @@ -692,7 +696,8 @@ process bwa { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && skip_collapse ){ + //PE data without merging, PE data without any AR applied + if (!params.singleEnd && (params.skip_collapse || params.skip_adapterremoval)){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai @@ -701,6 +706,7 @@ process bwa { samtools index "${size}" "${prefix}".sorted.bam """ } else { + //PE collapsed, or SE data prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai @@ -747,7 +753,7 @@ process circularmapper{ when: params.circularmapper input: - file reads from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) + set val(name), file reads from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) ) file index from ch_circularmapper_indices.first() output: @@ -790,7 +796,7 @@ process bwamem { when: params.bwamem && !params.circularmapper input: - file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) + set val(name), file(reads) from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) ) file index from ch_bwa_index_bwamem.first() output: From 5caefbfe758ad483bc8fd4bc36b6ffe7e2803ecd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 22:23:16 +0100 Subject: [PATCH 43/56] Fix being able to handle modern data too --- main.nf | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/main.nf b/main.nf index 8bfa5049a..7c755d9f1 100644 --- a/main.nf +++ b/main.nf @@ -269,17 +269,6 @@ if (params.skip_collapse && params.singleEnd){ exit 1, "--skip_collapse can only be set for pairedEnd samples!" } -if (params.skip_collapse && params.skip_trim){ - params.skip_adapterremoval = true -} - -if (params.skip_adapterremoval){ - params.skip_adapterremoval = true - params.skip_collapse = true - params.skip_trim = true -} - - //AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" @@ -593,8 +582,11 @@ process fastp { */ //Initialize empty channel if we skip adapterremoval entirely if(params.skip_adapterremoval) { - ch_clipped_reads = Channel.empty() -} + //No logs if no AR is run + ch_adapterremoval_logs = Channel.empty() + //Either coming from complexity filtering, or directly use reads normally directed to clipping first and push them through to the other channels downstream! + ch_clipped_reads_complexity_filtered_poly_g.mix(ch_read_files_clip).into { ch_clipped_reads;ch_clipped_reads_for_fastqc;ch_clipped_reads_circularmapper;ch_clipped_reads_bwamem } +} else { process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' @@ -605,7 +597,7 @@ process adapter_removal { set val(name), file(reads) from ( params.complexity_filter_poly_g ? ch_clipped_reads_complexity_filtered_poly_g : ch_read_files_clip ) output: - set val(base), file "output/*.gz" into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) + set val(base), file("output/*.gz") into (ch_clipped_reads,ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) file("*.settings") into ch_adapterremoval_logs script: @@ -645,6 +637,7 @@ process adapter_removal { """ } } +} @@ -659,7 +652,7 @@ process fastqc_after_clipping { when: !params.bam && !params.skip_adapterremoval input: - file(reads) from ch_clipped_reads_for_fastqc + set val(name), file(reads) from ch_clipped_reads_for_fastqc output: file "*_fastqc.{zip,html}" optional true into ch_fastqc_after_clipping @@ -682,7 +675,7 @@ process bwa { when: !params.circularmapper && !params.bwamem input: - set val(name), file(reads) from ( params.skip_adapterremoval ? ch_read_files_clip : ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) ) + set val(name), file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) file index from ch_bwa_index.first() @@ -753,7 +746,7 @@ process circularmapper{ when: params.circularmapper input: - set val(name), file reads from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) ) + set val(name), file(reads) from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) file index from ch_circularmapper_indices.first() output: @@ -796,7 +789,7 @@ process bwamem { when: params.bwamem && !params.circularmapper input: - set val(name), file(reads) from (params.skip_adapterremoval ? ch_clipped_reads : ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) ) + set val(name), file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) file index from ch_bwa_index_bwamem.first() output: From 48075bfd94c346c06897938032e03a533a2f65cd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 4 Mar 2019 22:36:11 +0100 Subject: [PATCH 44/56] Remove if clause here --- nextflow.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 86736297f..9fcbb2404 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,13 +44,6 @@ params { custom_config_version = 'master' } -if(params.skip_adapterremoval){ - process { - withName: adapter_removal{ - executor = 'local' - } - } -} // Load base.config by default for all pipelines includeConfig 'conf/base.config' From 990140c14316fd8e47dfc961040211e01bebb32d Mon Sep 17 00:00:00 2001 From: phue Date: Tue, 5 Mar 2019 08:45:41 +0100 Subject: [PATCH 45/56] Update main.nf Co-Authored-By: apeltzer --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 7c755d9f1..05d69596f 100644 --- a/main.nf +++ b/main.nf @@ -60,7 +60,7 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap - --skip_collapse Skip merging Forward and Reverse reads together. (Only for PE samples) + --skip_collapse Skip merging forward and reverse reads together. (Only for PE samples) --skip_trim Skip adaptor and quality trimming BWA Mapping From 9f45104f29d28e66a4f9ee4fb4de614c6d4b03dc Mon Sep 17 00:00:00 2001 From: phue Date: Tue, 5 Mar 2019 08:45:48 +0100 Subject: [PATCH 46/56] Update main.nf Co-Authored-By: apeltzer --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 05d69596f..b833a1cd7 100644 --- a/main.nf +++ b/main.nf @@ -604,7 +604,7 @@ process adapter_removal { base = reads[0].baseName //PE, collapse & trim reads - if( !params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ + if (!params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ """ mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse From 68fa27df622c67b1483de2e058a967fb9c47ac8d Mon Sep 17 00:00:00 2001 From: phue Date: Tue, 5 Mar 2019 08:45:58 +0100 Subject: [PATCH 47/56] Update main.nf Co-Authored-By: apeltzer --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index b833a1cd7..2193d3f10 100644 --- a/main.nf +++ b/main.nf @@ -802,7 +802,7 @@ process bwamem { fasta = "${index}/*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && skip_collapse ){ + if (!params.singleEnd && params.skip_collapse){ """ bwa mem -t ${task.cpus} $fasta ${reads[0]} ${reads[1]} -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam samtools index "${size}" -@ ${task.cpus} "${prefix}".sorted.bam From b2b9189de3dc30d15ce67c691363c47a8b1e4783 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 08:47:16 +0100 Subject: [PATCH 48/56] Fix remaining issue with circularmapper --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 2193d3f10..ac5ca98ad 100644 --- a/main.nf +++ b/main.nf @@ -759,7 +759,7 @@ process circularmapper{ fasta = "${index}/*_*.fasta" size = "${params.large_ref}" ? '-c' : '' - if (!params.singleEnd && skip_collapse ){ + if (!params.singleEnd && params.skip_collapse ){ prefix = reads[0].toString().tokenize('.')[0] """ bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai From 5b924db9fd7fe4d98a5826dc2fa87ddd74b5e9b9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 10:42:09 +0100 Subject: [PATCH 49/56] Fix cardinality issues --- main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/main.nf b/main.nf index ac5ca98ad..59dfabd46 100644 --- a/main.nf +++ b/main.nf @@ -513,8 +513,7 @@ process convertBam { file bam from ch_bam_to_fastq_convert output: - set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp, ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = "${bam.baseName}" From 712535fa18b8c4d391a6baeb34be315f19f0f6c5 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:38:34 +0100 Subject: [PATCH 50/56] No need to check for AR skipping here --- main.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 59dfabd46..203889f92 100644 --- a/main.nf +++ b/main.nf @@ -601,9 +601,11 @@ process adapter_removal { script: base = reads[0].baseName + //This checks whether we skip trimming and defines a variable respectively + trim_me = params.skip_trim ? '' : '' //PE, collapse & trim reads - if (!params.singleEnd && !params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval){ + if (!params.singleEnd && !params.skip_collapse && !params.skip_trim){ """ mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse @@ -611,14 +613,14 @@ process adapter_removal { zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz """ //PE, don't collapse, but trim reads - } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim && !params.skip_adapterremoval) { + } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { """ mkdir -p output AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} mv ${base}.pair*.truncated.gz output/ """ //PE, collapse, but don't trim reads - } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim && !params.skip_adapterremoval) { + } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ mkdir -p output From f6b7313c14bd90fd8d327e35eb3a6f2764085e93 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:51:40 +0100 Subject: [PATCH 51/56] Polish PR even further --- main.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 203889f92..5b807c8ef 100644 --- a/main.nf +++ b/main.nf @@ -602,13 +602,14 @@ process adapter_removal { script: base = reads[0].baseName //This checks whether we skip trimming and defines a variable respectively - trim_me = params.skip_trim ? '' : '' + trim_me = params.skip_trim ? '' : "--trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap}" + collapse_me = params.skip_collapse ? '' : '--collapse' - //PE, collapse & trim reads + //PE mode, dependent on trim_me and collapse_me the respective procedure is run or not :-) if (!params.singleEnd && !params.skip_collapse && !params.skip_trim){ """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} --collapse + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} ${trim_me} --gzip --threads ${task.cpus} ${collapse_me} #Combine files zcat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz | gzip > output/${base}.combined.fq.gz """ @@ -616,15 +617,14 @@ process adapter_removal { } else if (!params.singleEnd && params.skip_collapse && !params.skip_trim) { """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me} ${collapse_me} mv ${base}.pair*.truncated.gz output/ """ //PE, collapse, but don't trim reads } else if (!params.singleEnd && !params.skip_collapse && params.skip_trim) { - bogus_adaptor = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} --collapse --adapter1 $bogus_adaptor --adapter2 $bogus_adaptor + AdapterRemoval --file1 ${reads[0]} --file2 ${reads[1]} --basename ${base} --gzip --threads ${task.cpus} --basename ${base} ${collapse_me} ${trim_me} mv ${base}.pair*.truncated.gz output/ """ @@ -632,7 +632,7 @@ process adapter_removal { //SE, collapse not possible, trim reads """ mkdir -p output - AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} + AdapterRemoval --file1 ${reads[0]} --basename ${base} --gzip --threads ${task.cpus} ${trim_me} mv *.truncated.gz output/ """ From cb72002746969c0ca348dd14a9470108b55b044d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:55:47 +0100 Subject: [PATCH 52/56] Typos fixed in usage --- docs/usage.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 2d78d15d0..5a848760b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -291,8 +291,7 @@ Turns off the computation of library complexity estimation. ### `--skip_adapterremoval` -Turns off adaptor trimming and paired-end read merging. -Equivalent to setting both `--skip_collapse` and `--skip_trim` +Turns off adaptor trimming and paired-end read merging. Equivalent to setting both `--skip_collapse` and `--skip_trim`. ### `--skip_damage_calculation` @@ -340,7 +339,7 @@ Sets the minimum overlap between two reads when read merging is performed. Defau ### `--skip_collapse` -Turns off the paired-end read merging. +Turns off the paired-end read merging. For example ```bash @@ -349,9 +348,9 @@ For example ### `--skip_trim` -Turns off the adaptor and quality trimming. +Turns off adaptor and quality trimming. -For example +For example: ```bash --pairedEnd --skip_trim --reads '*.fastq' ``` From 2099a36573407a6acc115ca90b3488371990130a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 12:57:20 +0100 Subject: [PATCH 53/56] Add proper changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 661f73ea6..e281225a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#152](https://github.com/nf-core/eager/pull/152) - Clarified `--complexity_filter` flag to be specifically for poly G trimming. * [#155](https://github.com/nf-core/eager/pull/155) - Added [Dedup log to output folders](https://github.com/nf-core/eager/issues/154) +* [#159](https://github.com/nf-core/eager/pull/159) - Added Possibility to skip AdapterRemoval, skip merging, skip trimming fixing [#64](https://github.com/nf-core/eager/issues/64),[#137](https://github.com/nf-core/eager/issues/137) - thanks to @maxibor, @jfy133 ### `Fixed` From 4dad40ecd826c3e18a03e2d09f67d3c887778653 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 22:34:18 +0100 Subject: [PATCH 54/56] PR for release 2.0.6 --- .travis.yml | 2 +- CHANGELOG.md | 11 +++++++++-- Dockerfile | 2 +- Singularity | 4 ++-- environment.yml | 8 ++++---- nextflow.config | 4 ++-- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index dab87ae31..e165c7afd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/eager:dev # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/eager:dev nfcore/eager:2.0.5 + - docker tag nfcore/eager:dev nfcore/eager:2.0.6 install: # Install Nextflow diff --git a/CHANGELOG.md b/CHANGELOG.md index e281225a3..967aa4466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished / Dev Branch] +## [2.0.6] - 2019-03-05 + ### `Added` * [#152](https://github.com/nf-core/eager/pull/152) - Clarified `--complexity_filter` flag to be specifically for poly G trimming. @@ -18,6 +20,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#147](https://github.com/nf-core/eager/pull/147) - Fix Samtools Index for [large references](https://github.com/nf-core/eager/issues/146) * [#145](https://github.com/nf-core/eager/pull/145) - Added Picard Memory Handling [fix](https://github.com/nf-core/eager/issues/144) +### `Dependencies` +* Picard Tools 2.18.23 -> 2.18.27 +* GATK 4.0.12.0 -> 4.1.0.0 +* FastP 0.19.6 -> 0.19.7 + ## [2.0.5] - 2019-01-28 ### `Added` @@ -30,8 +37,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Dependencies` * Picard Tools 2.18.21 -> 2.18.23 -* R-Markdown 1.10 -> 1.11 -* FastP 0.19.5 -> 0.19.6 +* R-Markdown 1.10 -> 1.11 +* FastP 0.19.5 -> 0.19.6 ## [2.0.4] - 2019-01-09 diff --git a/Dockerfile b/Dockerfile index 396593bd5..5a3050fff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,4 +3,4 @@ FROM nfcore/base LABEL description="Docker image containing all requirements for nf-core/eager pipeline" COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-eager-2.0.5/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-eager-2.0.6/bin:$PATH diff --git a/Singularity b/Singularity index eb302a82d..43d148bb9 100644 --- a/Singularity +++ b/Singularity @@ -4,10 +4,10 @@ Bootstrap:docker %labels MAINTAINER Alexander Peltzer DESCRIPTION Container image containing all requirements for the nf-core/eager pipeline - VERSION 2.0.5 + VERSION 2.0.6 %environment - PATH=/opt/conda/envs/nf-core-eager-2.0.5/bin:$PATH + PATH=/opt/conda/envs/nf-core-eager-2.0.6/bin:$PATH export PATH %files diff --git a/environment.yml b/environment.yml index 9a191fefe..c8c0338c9 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: nf-core-eager-2.0.5 +name: nf-core-eager-2.0.6 channels: - defaults - bioconda @@ -9,12 +9,12 @@ dependencies: - bioconda::adapterremoval=2.2.2 - bioconda::adapterremovalfixprefix=0.0.4 - bioconda::bwa=0.7.17 - - bioconda::picard=2.18.23 + - bioconda::picard=2.18.27 - bioconda::samtools=1.9 - bioconda::dedup=0.12.3 - bioconda::angsd=0.923 - bioconda::circularmapper=1.93.4 - - bioconda::gatk4=4.0.12.0 + - bioconda::gatk4=4.1.0.0 - bioconda::qualimap=2.2.2b - bioconda::vcf2genome=0.91 - bioconda::damageprofiler=0.4.4 @@ -25,6 +25,6 @@ dependencies: - conda-forge::pigz=2.3.4 - bioconda::sequencetools=1.2.2 - bioconda::preseq=2.0.3 - - bioconda::fastp=0.19.6 + - bioconda::fastp=0.19.7 - bioconda::bamutil=1.0.14 #Missing Schmutzi,snpAD diff --git a/nextflow.config b/nextflow.config index 9fcbb2404..170e234e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,7 @@ // Global default params, used in configs params { - container = 'nfcore/eager:2.0.5' + container = 'nfcore/eager:2.0.6' //Pipeline options aligner = 'bwa' @@ -95,7 +95,7 @@ manifest { name = 'nf-core/eager' author = 'Alexander Peltzer, Stephen Clayton, James A Fellows-Yates' homePage = 'https://github.com/nf-core/eager' - version = '2.0.5' + version = '2.0.6' description = 'A fully reproducible and modern ancient DNA pipeline in Nextflow and with cloud support.' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' From b493e12a591731d986f15a2125ff735e1c4d68a0 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 22:37:11 +0100 Subject: [PATCH 55/56] Missing some details --- CHANGELOG.md | 2 -- README.md | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 967aa4466..345f5ada3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [Unpublished / Dev Branch] - ## [2.0.6] - 2019-03-05 ### `Added` diff --git a/README.md b/README.md index 3485ac55c..df99db945 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ do so and everyone is welcome to contribute here! - [James A. Fellows-Yates](https://github.com/jfy133) - [Stephen Clayton](https://github.com/sc13-bioinf) +- [Maxime Borry](https://github.com/maxibor) - [Judith Neukamm](https://github.com/JudithNeukamm) - [Raphael Eisenhofer](https://github.com/EisenRa) - [Maxime Garcia](https://github.com/MaxUlysse) From 3f9d682032f9ff06af132ea60a98fa1553ec601d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Mar 2019 23:35:46 +0100 Subject: [PATCH 56/56] Fix custom config version --- nextflow.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 170e234e8..539785beb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { igenomesIgnore = false custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" } @@ -49,7 +50,11 @@ params { includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config" +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} profiles { awsbatch { includeConfig 'conf/awsbatch.config' }