diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 7a38bd0..dccc7cf 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -2,53 +2,53 @@ name: Bug report
description: Report something that is broken or incorrect
labels: bug
body:
-- type: textarea
- id: description
- attributes:
- label: Description of the bug
- description: A clear and concise description of what the bug is.
- validations:
- required: true
-- type: textarea
- id: command_used
- attributes:
- label: Command used and terminal output
- description: Steps to reproduce the behaviour. Please paste the command you used
- to launch the pipeline and the output from your terminal.
- render: console
- placeholder: '$ nextflow run ...
-
-
- Some output where something broke
-
- '
-- type: textarea
- id: files
- attributes:
- label: Relevant files
- description: 'Please drag and drop the relevant files here. Create a `.zip` archive
- if the extension is not allowed.
-
- Your verbose log file `.nextflow.log` is often useful _(this is a hidden file
- in the directory where you launched the pipeline)_ as well as custom Nextflow
- configuration files.
-
- '
-- type: textarea
- id: system
- attributes:
- label: System information
- description: '* Nextflow version _(eg. 23.04.0)_
-
- * Hardware _(eg. HPC, Desktop, Cloud)_
-
- * Executor _(eg. slurm, local, awsbatch)_
-
- * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud,
- or Apptainer)_
-
- * OS _(eg. CentOS Linux, macOS, Linux Mint)_
-
- * Version of mskcc/sif _(eg. 1.1, 1.5, 1.8.2)_
-
- '
+ - type: textarea
+ id: description
+ attributes:
+ label: Description of the bug
+ description: A clear and concise description of what the bug is.
+ validations:
+ required: true
+ - type: textarea
+ id: command_used
+ attributes:
+ label: Command used and terminal output
+ description: Steps to reproduce the behaviour. Please paste the command you used
+ to launch the pipeline and the output from your terminal.
+ render: console
+ placeholder: "$ nextflow run ...
+
+
+ Some output where something broke
+
+ "
+ - type: textarea
+ id: files
+ attributes:
+ label: Relevant files
+ description: "Please drag and drop the relevant files here. Create a `.zip` archive
+ if the extension is not allowed.
+
+ Your verbose log file `.nextflow.log` is often useful _(this is a hidden file
+ in the directory where you launched the pipeline)_ as well as custom Nextflow
+ configuration files.
+
+ "
+ - type: textarea
+ id: system
+ attributes:
+ label: System information
+ description: "* Nextflow version _(eg. 23.04.0)_
+
+ * Hardware _(eg. HPC, Desktop, Cloud)_
+
+ * Executor _(eg. slurm, local, awsbatch)_
+
+ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud,
+ or Apptainer)_
+
+ * OS _(eg. CentOS Linux, macOS, Linux Mint)_
+
+ * Version of mskcc/sif _(eg. 1.1, 1.5, 1.8.2)_
+
+ "
diff --git a/.nf-core.yml b/.nf-core.yml
index d35f6c0..4d32798 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,23 +1,23 @@
lint:
files_exist:
- - CODE_OF_CONDUCT.md
- - assets/nf-core-sif_logo_light.png
- - docs/images/nf-core-sif_logo_light.png
- - docs/images/nf-core-sif_logo_dark.png
- - .github/ISSUE_TEMPLATE/config.yml
- - .github/workflows/awstest.yml
- - .github/workflows/awsfulltest.yml
+ - CODE_OF_CONDUCT.md
+ - assets/nf-core-sif_logo_light.png
+ - docs/images/nf-core-sif_logo_light.png
+ - docs/images/nf-core-sif_logo_dark.png
+ - .github/ISSUE_TEMPLATE/config.yml
+ - .github/workflows/awstest.yml
+ - .github/workflows/awsfulltest.yml
files_unchanged:
- - CODE_OF_CONDUCT.md
- - assets/nf-core-sif_logo_light.png
- - docs/images/nf-core-sif_logo_light.png
- - docs/images/nf-core-sif_logo_dark.png
- - .github/ISSUE_TEMPLATE/bug_report.yml
+ - CODE_OF_CONDUCT.md
+ - assets/nf-core-sif_logo_light.png
+ - docs/images/nf-core-sif_logo_light.png
+ - docs/images/nf-core-sif_logo_dark.png
+ - .github/ISSUE_TEMPLATE/bug_report.yml
multiqc_config:
- - report_comment
+ - report_comment
nextflow_config:
- - manifest.name
- - manifest.homePage
+ - manifest.name
+ - manifest.homePage
repository_type: pipeline
template:
prefix: mskcc
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 658754f..bf90038 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,8 +1,8 @@
report_comment: >
-
+
This report has been generated by the mskcc/sif
analysis pipeline.
-
+
report_section_order:
"mskcc-sif-methods-description":
order: -1000
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 4a758fe..8e44707 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -24,39 +24,32 @@ class RowChecker:
"""
- VALID_FORMATS = (
- ".fq.gz",
- ".fastq.gz",
- )
+ VALID_FORMATS = ".bam"
def __init__(
self,
- sample_col="sample",
- first_col="fastq_1",
- second_col="fastq_2",
- single_col="single_end",
+ pairId="pairId",
+ tumorBam="tumorBam",
+ normalBam="normalBam",
+ assay="assay",
+ normalType="normalType",
+ bedFile="bedFile",
**kwargs,
):
"""
Initialize the row checker with the expected column names.
Args:
- sample_col (str): The name of the column that contains the sample name
- (default "sample").
- first_col (str): The name of the column that contains the first (or only)
- FASTQ file path (default "fastq_1").
- second_col (str): The name of the column that contains the second (if any)
- FASTQ file path (default "fastq_2").
- single_col (str): The name of the new column that will be inserted and
- records whether the sample contains single- or paired-end sequencing
- reads (default "single_end").
+
"""
super().__init__(**kwargs)
- self._sample_col = sample_col
- self._first_col = first_col
- self._second_col = second_col
- self._single_col = single_col
+ self._pairId = pairId
+ self._tumorBam = tumorBam
+ self._normalBam = normalBam
+ self._assay = assay
+ self._normalType = normalType
+ self._bedFile = bedFile
self._seen = set()
self.modified = []
@@ -69,65 +62,53 @@ def validate_and_transform(self, row):
(values).
"""
- self._validate_sample(row)
- self._validate_first(row)
- self._validate_second(row)
- self._validate_pair(row)
- self._seen.add((row[self._sample_col], row[self._first_col]))
+ self._validate_names(row)
+ self._validate_bams(row)
+ self._validate_normalType(row)
+ self._validate_bed_format(row)
+ self._seen.add((row[self._pairId]))
self.modified.append(row)
- def _validate_sample(self, row):
- """Assert that the sample name exists and convert spaces to underscores."""
- if len(row[self._sample_col]) <= 0:
- raise AssertionError("Sample input is required.")
- # Sanitize samples slightly.
- row[self._sample_col] = row[self._sample_col].replace(" ", "_")
+ def _validate_names(self, row):
+ """Assert that the sample names exist"""
+ if len(row[self._pairId]) <= 0:
+ raise AssertionError("pairId is required.")
+
+ def _validate_pairId_format(self, row):
+ id_value = row[self._pairId]
+ if "." in id_value:
+ raise AssertionError("pairId:{} cannot contain any periods ('.') ".format(id_value))
- def _validate_first(self, row):
+ def _validate_bams(self, row):
"""Assert that the first FASTQ entry is non-empty and has the right format."""
- if len(row[self._first_col]) <= 0:
- raise AssertionError("At least the first FASTQ file is required.")
- self._validate_fastq_format(row[self._first_col])
-
- def _validate_second(self, row):
- """Assert that the second FASTQ entry has the right format if it exists."""
- if len(row[self._second_col]) > 0:
- self._validate_fastq_format(row[self._second_col])
-
- def _validate_pair(self, row):
- """Assert that read pairs have the same file extension. Report pair status."""
- if row[self._first_col] and row[self._second_col]:
- row[self._single_col] = False
- first_col_suffix = Path(row[self._first_col]).suffixes[-2:]
- second_col_suffix = Path(row[self._second_col]).suffixes[-2:]
- if first_col_suffix != second_col_suffix:
- raise AssertionError("FASTQ pairs must have the same file extensions.")
- else:
- row[self._single_col] = True
-
- def _validate_fastq_format(self, filename):
+ if len(row[self._tumorBam]) <= 0 or len(row[self._normalBam]) <= 0:
+ raise AssertionError("Both bam files are required.")
+ self._validate_bam_format(row[self._tumorBam])
+ self._validate_bam_format(row[self._normalBam])
+
+ def _validate_normalType(self, row):
+ """Assert that bait set exists."""
+ if len(row[self._normalType]) <= 0:
+ raise AssertionError("normalType is required.")
+
+ def _validate_bam_format(self, filename):
"""Assert that a given filename has one of the expected FASTQ extensions."""
if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
raise AssertionError(
- f"The FASTQ file has an unrecognized extension: {filename}\n"
+ f"The BAM file has an unrecognized extension: {filename}\n"
f"It should be one of: {', '.join(self.VALID_FORMATS)}"
)
- def validate_unique_samples(self):
- """
- Assert that the combination of sample name and FASTQ filename is unique.
-
- In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the
- number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
-
- """
- if len(self._seen) != len(self.modified):
- raise AssertionError("The pair of sample name and FASTQ must be unique.")
- seen = Counter()
- for row in self.modified:
- sample = row[self._sample_col]
- seen[sample] += 1
- row[self._sample_col] = f"{sample}_T{seen[sample]}"
+ def _validate_bed_format(self, row):
+ """Assert that a given filename has one of the expected BED extensions."""
+ filename = row[self._bedFile]
+ if filename and filename != "NONE":
+ if not filename.endswith(".bed"):
+ raise AssertionError(
+ f"The BED file has an unrecognized extension: {filename}\n"
+ f"It should be .bed\n"
+ f"If you would like one generated for you leave it bank or enter 'NONE'\n"
+ )
def read_head(handle, num_lines=10):
@@ -164,10 +145,9 @@ def sniff_format(handle):
def check_samplesheet(file_in, file_out):
"""
- Check that the tabular samplesheet has the structure expected by nf-core pipelines.
+ Check that the tabular samplesheet has the structure expected by the ODIN pipeline.
- Validate the general shape of the table, expected columns, and each row. Also add
- an additional column which records whether one or two FASTQ reads were found.
+ Validate the general shape of the table, expected columns, and each row.
Args:
file_in (pathlib.Path): The given tabular samplesheet. The format can be either
@@ -179,19 +159,14 @@ def check_samplesheet(file_in, file_out):
This function checks that the samplesheet follows the following structure,
see also the `viral recon samplesheet`_::
- sample,fastq_1,fastq_2
- SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz
- SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz
- SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,
-
- .. _viral recon samplesheet:
- https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv
+ pairId,tumorBam,normalBam,assay,normalType,bedFile
+ SAMPLE_TUMOR.SAMPLE_NORMAL,BAM_TUMOR,BAM_NORMAL,BAITS,NORMAL_TYPE,BED_FILE
"""
- required_columns = {"sample", "fastq_1", "fastq_2"}
+ required_columns = {"pairId", "tumorBam", "normalBam", "assay", "normalType", "bedFile"}
# See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
with file_in.open(newline="") as in_handle:
- reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
+ reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle), delimiter=",")
# Validate the existence of the expected header columns.
if not required_columns.issubset(reader.fieldnames):
req_cols = ", ".join(required_columns)
@@ -205,9 +180,7 @@ def check_samplesheet(file_in, file_out):
except AssertionError as error:
logger.critical(f"{str(error)} On line {i + 2}.")
sys.exit(1)
- checker.validate_unique_samples()
header = list(reader.fieldnames)
- header.insert(1, "single_end")
# See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
with file_out.open(mode="w", newline="") as out_handle:
writer = csv.DictWriter(out_handle, header, delimiter=",")
diff --git a/bin/concat_with_comments.sh b/bin/concat_with_comments.sh
new file mode 100755
index 0000000..74ce43f
--- /dev/null
+++ b/bin/concat_with_comments.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+set -eux
+
+# This script will concatenate table files, preserving the unique comment lines from all input files and adding a new comment line
+#
+# USAGE:
+# $ concat_with_comments.sh comment_label comment_value output.txt input1.txt input2.txt ... inputn.txt
+#
+# EXAMPLE:
+# $ bin/concat_with_comments.sh helix_filters_01 concat-with-comments-0-ga478e4e output.txt ../test_data/maf/*.muts.maf
+#
+# EXTENDED EXAMPLE:
+# $ cat input1.txt
+# # comment 1
+# HEADER
+# foo1
+# bar1
+#
+# $ cat input2.txt
+# # comment 2
+# HEADER
+# foo2
+# bar2
+#
+# $ bin/concat_with_comments.sh label value output.txt input1.txt input2.txt
+#
+# $ cat output.txt
+# # comment 1
+# # comment 2
+# #label: value
+# HEADER
+# foo1
+# bar1
+# foo2
+# bar2
+
+comment_key="${1}"
+comment_value="${2}"
+output_file="${3}"
+shift
+shift
+shift
+
+# all the remaining args should be filenames
+input_files=( "$@" )
+# echo ${input_files[@]}
+
+# get the unique header lines from all files
+# NOTE: grep exits with code 1 if no comments are found
+if grep -q '#' ${input_files[@]}; then
+ printf "%s\n" "$(grep --no-filename '#' ${input_files[@]} | sort -u)" > "$output_file"
+fi
+
+# make new comment line
+new_comment_line="#${comment_key}: ${comment_value}"
+echo "${new_comment_line}" >> "$output_file"
+
+# turn off set -e because it seems to have issues when the input files lack comments
+set +e
+
+# add the header line from the first file
+grep -v '#' ${input_files[0]} | head -1 >> "$output_file"
+
+# get all the non-comment, non-header lines from all files
+for i in ${input_files[@]}; do
+ grep -v '#' "$i" | tail -n +2 >> "$output_file"
+done
diff --git a/bin/format_maf.sh b/bin/format_maf.sh
new file mode 100755
index 0000000..692ae6f
--- /dev/null
+++ b/bin/format_maf.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+
+#USAGE: format_maf.sh [prefix] [input_maf]
+
+## Remove comments
+
+grep \
+ '^[^#;]' \
+ $2 \
+ > \
+ $1.grepped.txt
+
+## Extract columns
+
+awk \
+ -F "\t" \
+ 'NR==1 { for(i=1;i<=NF;i++) \
+ { \
+ f[$i]=i \
+ } \
+ print "Hugo_Symbol\tEntrez_Gene_Id\tCenter\tTumor_Sample_Barcode\tFusion\tMethod\tFrame" \
+ } \
+ NR>1 \
+ { \
+ print $(f["Hugo_Symbol"])"\t"$(f["Entrez_Gene_Id"])"\t"$(f["Center"])"\t"$(f["Tumor_Sample_Barcode"])"\t"$(f["Fusion"])"\t"$(f["Method"])"\t"$(f["Frame"]) \
+ }' \
+ $1.grepped.txt \
+ > \
+ $1.extracted.txt
+
+## Add two columns - RNA_support and no, DNA_support and yes
+
+sed \
+ '1s/$/\tDNA_support\tRNA_support/;2,$s/$/\tyes\tno/' \
+ $1.extracted.txt \
+ > \
+ $1.columns_added.txt
+
+## Portal format output
+awk \
+ -F "\t" \
+ 'NR==1 \
+ { \
+ for(i=1;i<=NF;i++) \
+ { \
+ f[$i]=i \
+ } \
+ } \
+ { \
+ print $(f["Hugo_Symbol"])"\t"$(f["Entrez_Gene_Id"])"\t"$(f["Center"])"\t"$(f["Tumor_Sample_Barcode"])"\t"$(f["Fusion"])"\t"$(f["DNA_support"])"\t"$(f["RNA_support"])"\t"$(f["Method"])"\t"$(f["Frame"]) \
+ }' \
+ $1.columns_added.txt \
+ > \
+ $1.portal.txt
diff --git a/conf/juno_resources.config b/conf/juno_resources.config
new file mode 100644
index 0000000..f908e1f
--- /dev/null
+++ b/conf/juno_resources.config
@@ -0,0 +1,178 @@
+genome_resources {
+ genomes {
+ 'GRCh37' {
+ fasta = '/juno/work/ci/resources/genomes/GRCh37/fasta/b37.fasta'
+ dbsnp = '/juno/work/ci/resources/genomes/GRCh37/dbsnp/129/dbsnp_138.b37.excluding_sites_after_129.vcf'
+ facets_snps = '/juno/work/ci/resources/genomes/GRCh37/facets_snps/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf'
+ delly = '/juno/work/ci/resources/genomes/GRCh37/delly/human.hg19.excl.tsv'
+ cosmic = '/juno/work/ci/resources/genomes/GRCh37/cosmic/67/CosmicCodingMuts_v67_b37_20131024__NDS.vcf'
+ intervals = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'MT']
+ }
+ }
+
+ resources {
+ hotspot = '/juno/work/ci/resources/qc_resources/hotspot-list-union-v1-v2.vcf'
+ exac_filter = '/juno/work/ci/resources/vep/cache/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz'
+ }
+
+ assay_coverage = [
+ 'IMPACT341' : 896637,
+ 'IMPACT410' : 1016335,
+ 'IMPACT468' : 1139294,
+ 'IMPACT505' : 1213770,
+ 'HemePACT_v4' : 1412046,
+ 'IMPACT_Heme' : 1314920
+ ]
+
+ curated_bams = [
+ 'IMPACT468' : '/juno/work/ci/resources/curated_bams/IMPACT468_b37',
+ 'AgilentExon_51MB' : '/juno/work/ci/resources/curated_bams/AgilentExon_51MB_b37_v3',
+ 'IDT_Exome' : '/juno/work/ci/resources/curated_bams/IDT_Exome_v1_FP_b37',
+ 'IMPACT_Heme' : '/juno/work/ci/resources/curated_bams/IMPACT-Heme_v2_BAITS'
+ ]
+
+ targets {
+ 'AgilentExon_51MB' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_51MB_b37_v3/b37/AgilentExon_51MB_b37_v3_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_51MB_b37_v3/b37/AgilentExon_51MB_b37_v3_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_51MB_b37_v3/b37/AgilentExon_51MB_b37_v3_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_51MB_b37_v3/b37/AgilentExon_51MB_b37_v3_baits.intervals'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_51MB_b37_v3/b37/AgilentExon_51MB_b37_v3_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_51MB_b37_v3/b37/AgilentExon_51MB_b37_v3_targets.intervals'
+ }
+
+ 'IDT_Exome' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IDT_Exome_v1_FP/b37/FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IDT_Exome_v1_FP/b37/FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/IDT_Exome_v1_FP/b37/IDT_Exome_v1_FP_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IDT_Exome_v1_FP/b37/IDT_Exome_v1_FP_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/IDT_Exome_v1_FP/b37/IDT_Exome_v1_FP_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IDT_Exome_v1_FP/b37/IDT_Exome_v1_FP_b37_targets.ilist'
+ }
+
+ 'E90_NimbleGeneV3_WES' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/E90_NimbleGeneV3_WES/b37/E90_NimbleGeneV3_WES_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/E90_NimbleGeneV3_WES/b37/E90_NimbleGeneV3_WES_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/E90_NimbleGeneV3_WES/b37/E90_NimbleGeneV3_WES_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/E90_NimbleGeneV3_WES/b37/E90_NimbleGeneV3_WES_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/E90_NimbleGeneV3_WES/b37/E90_NimbleGeneV3_WES_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/E90_NimbleGeneV3_WES/b37/E90_NimbleGeneV3_WES_b37_targets.ilist'
+ }
+
+ 'IMPACT341' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT341/b37/FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT341/b37/FP_tiling_intervals.list'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT341/b37/picard_baits.interval_list'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT341/b37/picard_targets.interval_list'
+ }
+
+ 'IMPACT410' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT410/b37/FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT410/b37/FP_tiling_intervals.list'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT410/b37/picard_baits.interval_list'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT410/b37/picard_targets.interval_list'
+ }
+
+ 'IMPACT468' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468/b37/FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468/b37/FP_tiling_intervals.list'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468/b37/picard_baits.interval_list'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468/b37/IMPACT468_b37_baits.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468/b37/picard_targets.interval_list'
+ }
+
+ 'IMPACT468_mm10' {
+ FP_genotypes = '/juno/work/ci/resources/genomes/GRCh37_mm10/targets/FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomes/GRCh37_mm10/targets/FP_tiling_intervals.list'
+ baits_list = '/juno/work/ci/resources/genomes/GRCh37_mm10/targets/picard_baits.interval_list'
+ targets_list = '/juno/work/ci/resources/genomes/GRCh37_mm10/targets/picard_targets.interval_list'
+ }
+
+ 'IMPACT468_08390' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08390/b37/IMPACT468_08390_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08390/b37/IMPACT468_08390_FP_tiling_intervals.intervals'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08390/b37/IMPACT468_08390_b37_baits.ilist'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08390/b37/IMPACT468_08390_b37_targets.ilist'
+ }
+
+ 'IMPACT468_08050' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08050/b37/IMPACT468_08050_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08050/b37/IMPACT468_08050_FP_tiling_intervals.intervals'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08050/b37/IMPACT468_08050_b37_baits.ilist'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT468_08050/b37/IMPACT468_08050_b37_targets.ilist'
+ }
+
+ 'IMPACT505' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT505/b37/IMPACT505_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT505/b37/IMPACT505_FP_tiling_intervals.intervals'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT505/b37/IMPACT505_b37_baits.ilist'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT505/b37/IMPACT505_b37_targets.ilist'
+ }
+
+ 'Agilent_v4_51MB_Human' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/Agilent_v4_51MB_Human/b37/Agilent_v4_51MB_Human_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/Agilent_v4_51MB_Human/b37/Agilent_v4_51MB_Human_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/Agilent_v4_51MB_Human/b37/Agilent_v4_51MB_Human_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/Agilent_v4_51MB_Human/b37/Agilent_v4_51MB_Human_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/Agilent_v4_51MB_Human/b37/Agilent_v4_51MB_Human_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/Agilent_v4_51MB_Human/b37/Agilent_v4_51MB_Human_b37_targets.ilist'
+ }
+
+ 'AgilentExon_v2' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_targets.ilist'
+ }
+
+ 'AgilentExon_v5' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v5/b37/AgilentExon_v5_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v5/b37/AgilentExon_v5_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v5/b37/AgilentExon_v5_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v5/b37/AgilentExon_v5_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v5/b37/AgilentExon_v5_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/AgilentExon_v5/b37/AgilentExon_v5_b37_targets.ilist'
+ }
+
+ 'IlluminaExome_38MB' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IlluminaExome_38MB/b37/IlluminaExome_38MB_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IlluminaExome_38MB/b37/IlluminaExome_38MB_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/IlluminaExome_38MB/b37/IlluminaExome_38MB_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IlluminaExome_38MB/b37/IlluminaExome_38MB_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/IlluminaExome_38MB/b37/IlluminaExome_38MB_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IlluminaExome_38MB/b37/IlluminaExome_38MB_b37_targets.ilist'
+ }
+
+ 'SeqCap_EZ_Exome' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/SeqCap_EZ_Exome_v3/b37/SeqCap_EZ_Exome_v3_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/SeqCap_EZ_Exome_v3/b37/SeqCap_EZ_Exome_v3_FP_tiling_intervals.intervals'
+ baits_bed = '/juno/work/ci/resources/genomic_resources/targets/SeqCap_EZ_Exome_v3/b37/SeqCap_EZ_Exome_v3_b37_baits.bed'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/SeqCap_EZ_Exome_v3/b37/SeqCap_EZ_Exome_v3_b37_baits.ilist'
+ targets_bed = '/juno/work/ci/resources/genomic_resources/targets/SeqCap_EZ_Exome_v3/b37/SeqCap_EZ_Exome_v3_b37_targets.bed'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/SeqCap_EZ_Exome_v3/b37/SeqCap_EZ_Exome_v3_b37_targets.ilist'
+ }
+
+ 'HemePACT_v3' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v3/b37/HemePACT_v3_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v3/b37/HemePACT_v3_FP_tiling_intervals.intervals'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v3/b37/HemePACT_v3_b37_baits.ilist'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v3/b37/HemePACT_v3_b37_targets.ilist'
+ }
+
+ 'HemePACT_v4' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v4/b37/HemePACT_v4_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v4/b37/HemePACT_v4_FP_tiling_intervals.intervals'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v4/b37/HemePACT_v4_b37_baits.ilist'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/HemePACT_v4/b37/HemePACT_v4_b37_targets.ilist'
+ }
+
+ 'IMPACT-Heme' {
+ FP_genotypes = '/juno/work/ci/resources/genomic_resources/targets/IMPACT-Heme/IMPACT-Heme_v4/b37/IMPACT-Heme_v4_FP_tiling_genotypes.txt'
+ FP_intervals = '/juno/work/ci/resources/genomic_resources/targets/IMPACT-Heme/IMPACT-Heme_v4/b37/IMPACT-Heme_v4_FP_tiling_intervals.intervals'
+ baits_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT-Heme/IMPACT-Heme_v4/b37/IMPACT-Heme_v4_baits.ilist'
+ targets_list = '/juno/work/ci/resources/genomic_resources/targets/IMPACT-Heme/IMPACT-Heme_v4/b37/IMPACT-Heme_v4_targets.ilist'
+ }
+ }
+}
diff --git a/conf/modules.config b/conf/modules.config
index d91c6ab..bf2f1d2 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -11,7 +11,6 @@
*/
process {
-
publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
@@ -26,8 +25,27 @@ process {
]
}
- withName: FASTQC {
- ext.args = '--quiet'
+ withName: 'SAMTOOLS_HEADER_VIEW' {
+ ext.args = 'view -h -H'
+ ext.args2 = '\\(SM:\\S*\\)'
+ ext.args3 = 's/SM://g'
+
+ }
+
+ withName: 'VCF2MAF' {
+ ext.args = '--ncbi-build GRCh37 --species homo_sapiens --maf-center mskcc.org --max-filter-ac 10 --min-hom-vaf 0.7 --cache-version 86 --buffer-size 5000 --vep-data /var/cache --vep-path /usr/bin/vep --custom-enst /usr/bin/vcf2maf/data/isoform_overrides_at_mskcc'
+ }
+
+ withName: 'BCFTOOLS_CONCAT' {
+ ext.args = '--output-type v --allow-overlaps --rm-dups all'
+ }
+
+ withName: 'DELLY_CALL' {
+ ext.args = '--map-qual 1 --mad-cutoff 9 --geno-qual 5'
+ }
+
+ withName: 'DELLY_FILTER' {
+ ext.args = '--filter somatic --altaf 0.04 --minsize 500 --maxsize 500000000 --ratiogeno 0.0 --pass --coverage 10 --controlcontamination 0 --gq 15 --rddel 0.800000012 --rddup 1.20000005'
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
@@ -37,14 +55,4 @@ process {
pattern: '*_versions.yml'
]
}
-
- withName: 'MULTIQC' {
- ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
- publishDir = [
- path: { "${params.outdir}/multiqc" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
}
diff --git a/conf/test_juno.config b/conf/test_juno.config
new file mode 100644
index 0000000..ba1a535
--- /dev/null
+++ b/conf/test_juno.config
@@ -0,0 +1,58 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Defines input files and everything required to run a fast and simple pipeline test.
+
+ Use as follows:
+ nextflow run nf-core/phoenix -profile test, --outdir
+
+----------------------------------------------------------------------------------------
+*/
+
+includeConfig 'juno_resources.config'
+
+process {
+ beforeScript = "module load singularity/3.7.1; unset R_LIBS; catch_term () { echo 'caught USR2/TERM signal'; set +e; false; on_exit ; } ; trap catch_term USR2 TERM"
+ maxRetries = 3
+
+ singularity.cacheDir = '/juno/work/ci/dev/dev_phoenix/singularity_cachedir'
+
+// executor = "lsf"
+// clusterOptions = "-sla CMOVOYAGER"
+// queueSize = 500
+// perJobMemLimit = true
+}
+
+params {
+ config_profile_name = 'Juno profile'
+ config_profile_description = 'Minimal Juno profile'
+
+ // Limit resources so that this can run on GitHub Actions
+
+ // max_cpus = 2
+ // max_memory = '6.GB'
+ // max_time = '6.h'
+
+ // Genome references
+ genome = 'GRCh37'
+
+ // Ignore MultiQC
+
+ multiqc_config = null
+ multiqc_logo = null
+ multiqc_methods_description = null
+
+ //Ignore igenomes
+ igenomes_ignore = true
+
+ // Reference genome options
+ // genome = null
+ fasta = "${genome_resources.genomes[ params.genome ][ 'fasta' ]}"
+ fasta_index_dict = "${fasta.replaceAll('fasta$','dict')}"
+ fasta_index = ["${fasta}.fai","${fasta}.amb","${fasta}.ann","${fasta}.bwt","${fasta}.pac","${fasta}.sa","${fasta_index_dict}"]
+ exac_filter = "${genome_resources.resources.exac_filter}"
+ exac_filter_index = "${exac_filter}.tbi"
+ delly_type = ['DUP', 'DEL', 'INV', 'INS', 'BND']
+ delly_exclude = "${genome_resources.genomes[params.genome]['delly']}"
+}
diff --git a/lib/WorkflowSif.groovy b/lib/WorkflowSif.groovy
index b9aba05..964b6b7 100755
--- a/lib/WorkflowSif.groovy
+++ b/lib/WorkflowSif.groovy
@@ -12,12 +12,12 @@ class WorkflowSif {
//
public static void initialise(params, log) {
- genomeExistsError(params, log)
+ // genomeExistsError(params, log)
- if (!params.fasta) {
- Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
- }
+ //if (!params.fasta) {
+ // Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
+ //}
}
//
diff --git a/modules/local/add_maf_comment.nf b/modules/local/add_maf_comment.nf
new file mode 100644
index 0000000..5cbb667
--- /dev/null
+++ b/modules/local/add_maf_comment.nf
@@ -0,0 +1,54 @@
+process ADD_MAF_COMMENT {
+ tag "$meta.id"
+ label 'process_single'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/alpine:3.19-with-bash':
+ 'docker.io/mskcc/alpine:3.19-with-bash' }"
+
+ publishDir "${params.outdir}/${meta.id}/", pattern: "${meta.id}.*", mode: params.publish_dir_mode
+
+ containerOptions "--bind $projectDir"
+
+ input:
+ tuple val(meta), path(input_maf)
+ val(tool_name)
+ val(tool_version)
+
+ output:
+ tuple val(meta), path("*.svs.maf") , emit: maf
+ path "versions.yml" , emit: versions
+
+ script:
+ task.ext.when == null || task.ext.when
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def tool_name_trim = "${tool_name}".trim()
+ def tool_version_trim = "${tool_version}".trim()
+
+ """
+ $projectDir/bin/concat_with_comments.sh \\
+ ${tool_name_trim} \\
+ ${tool_version_trim} \\
+ ${prefix}.svs.maf \\
+ ${input_maf}
+
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ grep: BusyBox v1.36.1
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+
+ touch ${prefix}.svs.maf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ grep: BusyBox v1.36.1
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/bcftools_concat.nf b/modules/local/bcftools_concat.nf
new file mode 100644
index 0000000..6413c7f
--- /dev/null
+++ b/modules/local/bcftools_concat.nf
@@ -0,0 +1,49 @@
+process BCFTOOLS_CONCAT {
+
+
+ tag "$meta.id"
+ label 'process_medium'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/htslib:1.9':
+ 'docker.io/mskcc/htslib:1.9' }"
+
+ input:
+ tuple val(meta), path(inputVcfs), path(inputVcfTbis)
+
+ output:
+ tuple val(meta), path("*.vcf") , emit: vcf
+ path "versions.yml" , emit: versions
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def input_vcfs = inputVcfs.join(" ")
+ def output_vcf = "${prefix}.combined.svs.vcf"
+ """
+ /usr/bin/bcftools concat \\
+ ${input_vcfs} \\
+ ${args} \\
+ --output ${output_vcf}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: 1.9
+ htslib: 1.9
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.combined.svs.vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: 1.9
+ htslib: 1.9
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/local/delly_call.nf b/modules/local/delly_call.nf
new file mode 100644
index 0000000..97f21ef
--- /dev/null
+++ b/modules/local/delly_call.nf
@@ -0,0 +1,58 @@
+process DELLY_CALL {
+
+
+ tag "$meta.id"
+ label 'process_medium'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/delly:1.2.6':
+ 'docker.io/mskcc/delly:1.2.6' }"
+
+ input:
+ tuple val(meta), path(normal), path(normal_index)
+ tuple val(meta2), path(tumor), path(tumor_index)
+ tuple val(meta3), path(fasta)
+ tuple val(meta4), path(fai)
+ tuple val(meta4), path(exclude)
+ each delly_type
+
+ output:
+ tuple val(meta), val(delly_type), path("*.bcf"), path("*.bcf.csi") , emit: sv_output
+ path "versions.yml" , emit: versions
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ /opt/delly/bin/delly \\
+ call \\
+ ${args} \\
+ --genome ${fasta} \\
+ --exclude ${exclude} \\
+ --outfile ${prefix}.${delly_type}.bcf \\
+ ${tumor} \\
+ ${normal}
+
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ delly: 1.2.6
+ htslib: 1.15.1
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.${delly_type}.bcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ delly: 1.2.6
+ htslib: 1.15.1
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/local/delly_filter.nf b/modules/local/delly_filter.nf
new file mode 100644
index 0000000..637815b
--- /dev/null
+++ b/modules/local/delly_filter.nf
@@ -0,0 +1,56 @@
+process DELLY_FILTER {
+
+
+ tag "$meta.id"
+ label 'process_medium'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/delly:1.2.6':
+ 'docker.io/mskcc/delly:1.2.6' }"
+
+ input:
+ tuple val(meta), val(delly_type), path(sv_output), path(sv_index)
+
+ output:
+ tuple val(meta), path("*.bcf"), path("*.bcf.csi") , emit: sv_pass_output
+ path "versions.yml" , emit: versions
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def pair_file_name = "tn_pair.txt"
+
+ """
+ cat <<-END_PAIR > ${pair_file_name}
+ ${meta.tumorSampleName} tumor
+ ${meta.normalSampleName} control
+ END_PAIR
+
+ /opt/delly/bin/delly \\
+ filter \\
+ ${args} \\
+ --samples ${pair_file_name} \\
+ --outfile ${prefix}.${delly_type}.pass.bcf \\
+ ${sv_output}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ delly: 1.2.6
+ htslib: 1.15.1
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.${delly_type}.pass.bcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ delly: 1.2.6
+ htslib: 1.15.1
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/local/format_maf.nf b/modules/local/format_maf.nf
new file mode 100644
index 0000000..904908c
--- /dev/null
+++ b/modules/local/format_maf.nf
@@ -0,0 +1,53 @@
+process FORMAT_MAF {
+
+
+ tag "$meta.id"
+ label 'process_medium'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/alpine:3.19-with-bash':
+ 'docker.io/mskcc/alpine:3.19-with-bash' }"
+
+ containerOptions "--bind $projectDir"
+
+ publishDir "${params.outdir}/${meta.id}/", pattern: "${meta.id}.portal.txt", mode: params.publish_dir_mode
+
+ input:
+ tuple val(meta), path(inputMaf)
+
+ output:
+ tuple val(meta), path("*.portal.txt") , emit: portal
+ path "versions.yml" , emit: versions
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ $projectDir/bin/format_maf.sh \\
+ ${prefix} \\
+ ${inputMaf}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ grep: BusyBox v1.36.1
+ awk: BusyBox v1.36.1
+ sed: BusyBox v1.36.1
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.portal.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ grep: BusyBox v1.36.1
+ awk: BusyBox v1.36.1
+ sed: BusyBox v1.36.1
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/local/get_bam_header.nf b/modules/local/get_bam_header.nf
new file mode 100644
index 0000000..909eb2a
--- /dev/null
+++ b/modules/local/get_bam_header.nf
@@ -0,0 +1,36 @@
+process SAMTOOLS_HEADER_VIEW {
+
+
+ tag "$meta.id"
+ label 'process_medium'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/htslib:1.9':
+ 'docker.io/mskcc/htslib:1.9' }"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), stdout , emit: sample_name
+ path "versions.yml" , emit: versions
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def args3 = task.ext.args3 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ /usr/local/bin/samtools \\
+ ${args} \\
+ ${bam} | grep -o '${args2}' | sed '${args3}'
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: 1.9
+ htslib: 1.9
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/local/get_tool_version.nf b/modules/local/get_tool_version.nf
new file mode 100644
index 0000000..b55cfaa
--- /dev/null
+++ b/modules/local/get_tool_version.nf
@@ -0,0 +1,44 @@
+process GET_TOOL_VERSION {
+
+
+ tag "get_version_$tool"
+ label 'process_single'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/alpine:3.19-with-bash':
+ 'docker.io/mskcc/alpine:3.19-with-bash' }"
+
+ input:
+ val(tool)
+ path(version_yaml), stageAs: "tool_version.yml"
+
+ output:
+ stdout emit: tool_version
+ path "versions.yml" , emit: versions
+
+ script:
+ task.ext.when == null || task.ext.when
+ def prefix = task.ext.prefix
+
+ """
+ grep '${tool}:' tool_version.yml | tail -n1 | awk '{ print \$2}'
+
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ grep: BusyBox v1.36.1
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix
+ """
+ echo "1.0"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ grep: BusyBox v1.36.1
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/vcf2maf.nf b/modules/local/vcf2maf.nf
new file mode 100644
index 0000000..f2a402b
--- /dev/null
+++ b/modules/local/vcf2maf.nf
@@ -0,0 +1,52 @@
+process VCF2MAF {
+
+
+ tag "$meta.id"
+ label 'process_medium'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'docker://mskcc/vcf2maf:1.6.17':
+ 'docker.io/mskcc/vcf2maf:1.6.17' }"
+
+ input:
+ tuple val(meta), path(inputVcf)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
+ tuple val(meta4), path(exac_filter)
+ tuple val(meta5), path(exac_filter_tbi)
+
+ output:
+ tuple val(meta), path("*.maf") , emit: maf
+ path "versions.yml" , emit: versions
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def forks = task.cpus * 2
+
+ """
+ perl /usr/bin/vcf2maf/vcf2maf.pl \\
+ ${args} \\
+ --input-vcf ${inputVcf} \\
+ --ref-fasta ${fasta} \\
+ --vcf-tumor-id ${meta.tumorSampleName} \\
+ --tumor-id ${meta.tumorSampleName} \\
+ --vcf-normal-id ${meta.normalSampleName} \\
+ --normal-id ${meta.normalSampleName} \\
+ --filter-vcf ${exac_filter} \\
+ --vep-forks ${forks} \\
+ --output-maf ${prefix}.maf
+
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ perl: \$(perl -v | head -n 2 | grep -o '(v.*)' | sed 's/[()]//g')
+ vcf2maf: 1.6.17
+ VEP: 86
+ htslib: 1.9
+ samtools: 1.9
+ bcftools: 1.9
+ END_VERSIONS
+ """
+
+}
diff --git a/nextflow.config b/nextflow.config
index 1e79485..c6c2ac9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -16,7 +16,7 @@ params {
genome = null
igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = false
-
+
// MultiQC options
multiqc_config = null
@@ -43,7 +43,7 @@ params {
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
config_profile_contact = null
config_profile_url = null
-
+
// Max resource options
// Defaults only, expecting to be overwritten
@@ -170,6 +170,7 @@ profiles {
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
+ test_juno { includeConfig 'conf/test_juno.config' }
}
// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 0aecf87..f212995 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -3,6 +3,7 @@
//
include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
+include { SAMTOOLS_HEADER_VIEW as normal_header; SAMTOOLS_HEADER_VIEW as tumor_header} from '../../modules/local/get_bam_header'
workflow INPUT_CHECK {
take:
@@ -12,33 +13,121 @@ workflow INPUT_CHECK {
SAMPLESHEET_CHECK ( samplesheet )
.csv
.splitCsv ( header:true, sep:',' )
- .map { create_fastq_channel(it) }
- .set { reads }
+ .map { create_bam_channel(it) }
+ .set { bam_files }
+ tumor_sample = bam_files
+ .map {
+ new Tuple(it[0],it[1][0])
+ }
+ normal_sample = bam_files
+ .map {
+ new Tuple(it[0],it[1][1])
+ }
+ tumor_header( tumor_sample )
+ normal_header( normal_sample )
+
+ combined_bams = tuple_join(bam_files, tumor_header.out.sample_name)
+ combined_bams = tuple_join(combined_bams,normal_header.out.sample_name )
+
+ bams = combined_bams
+ .map{ set_samplename_meta(it) }
+
+ ch_versions = Channel.empty()
+ ch_versions = ch_versions.mix(SAMPLESHEET_CHECK.out.versions)
+ ch_versions = ch_versions.mix(tumor_header.out.versions)
+ ch_versions = ch_versions.mix(normal_header.out.versions)
emit:
- reads // channel: [ val(meta), [ reads ] ]
- versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
+ bams = bams // channel: [ val(meta), [ bams ] ]
+ versions = ch_versions // channel: [ versions.yml ]
}
-// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
-def create_fastq_channel(LinkedHashMap row) {
+def tuple_join(first, second) {
+ first_channel = first
+ .map{
+ new Tuple(it[0].id,it)
+ }
+ second_channel = second
+ .map{
+ new Tuple(it[0].id,it)
+ }
+ mergedWithKey = first_channel
+ .join(second_channel)
+ merged = mergedWithKey
+ .map{
+ it[1] + it[2][1]
+ }
+ return merged
+
+}
+
+// Function to get list of [ meta, [ tumorBam, normalBam, assay, normalType ] ]
+def create_bam_channel(LinkedHashMap row) {
// create meta map
def meta = [:]
- meta.id = row.sample
- meta.single_end = row.single_end.toBoolean()
-
- // add path(s) of the fastq file(s) to the meta map
- def fastq_meta = []
- if (!file(row.fastq_1).exists()) {
- exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"
- }
- if (meta.single_end) {
- fastq_meta = [ meta, [ file(row.fastq_1) ] ]
- } else {
- if (!file(row.fastq_2).exists()) {
- exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
+ meta.id = row.pairId
+ meta.assay = row.assay
+ meta.normalType = row.normalType
+
+ // add path(s) of the bam files to the meta map
+ def bams = []
+ def bedFile = null
+ if (!file(row.tumorBam).exists()) {
+ exit 1, "ERROR: Please check input samplesheet -> Tumor BAM file does not exist!\n${row.tumorBam}"
+ }
+ if (!file(row.normalBam).exists()) {
+ exit 1, "ERROR: Please check input samplesheet -> Normal BAM file does not exist!\n${row.normalBam}"
+ }
+
+ def tumorBai = "${row.tumorBam}.bai"
+ def normalBai = "${row.normalBam}.bai"
+ def tumorBaiAlt = "${row.tumorBam}".replaceAll('bam$', 'bai')
+ def normalBaiAlt = "${row.normalBam}".replaceAll('bam$', 'bai')
+
+ def foundTumorBai = ""
+ def foundNormalBai = ""
+
+
+ if (file(tumorBai).exists()) {
+ foundTumorBai = tumorBai
+ }
+ else{
+ if(file(tumorBaiAlt).exists()){
+ foundTumorBai = tumorBaiAlt
+ }
+ else{
+ exit 1, "ERROR: Please verify inputs -> Tumor BAI file does not exist!\n${row.tumorBam}"
+ }
+ }
+ if (file(normalBai).exists()) {
+ foundNormalBai = normalBai
+ }
+ else{
+ if(file(normalBaiAlt).exists()){
+ foundNormalBai = normalBaiAlt
+ }
+ else{
+ exit 1, "ERROR: Please verify inputs -> Normal BAI file does not exist!\n${row.normalBam}"
}
- fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
- return fastq_meta
+
+
+ bams = [ meta, [ file(row.tumorBam), file(row.normalBam) ], [ file(foundTumorBai), file(foundNormalBai) ]]
+ return bams
+}
+
+def set_samplename_meta(List bams) {
+ meta = bams[0]
+ def tumorSample = bams[3]
+ def normalSample = bams[4]
+ if( tumorSample == null || tumorSample.isEmpty() ){
+ exit 1, "ERROR: No sample name found for tumor sample, please make sure the SM tag is set in the bam\n${tumorBam}"
+ }
+ if( normalSample == null || normalSample.isEmpty() ){
+ exit 1, "ERROR: No sample name found for normal sample, please make sure the SM tag is set in the bam\n${normalBam}"
+ }
+ meta.tumorSampleName = tumorSample.trim()
+ meta.normalSampleName = normalSample.trim()
+ return [ meta, bams[1], bams[2] ]
+
}
diff --git a/subworkflows/local/sv.nf b/subworkflows/local/sv.nf
new file mode 100644
index 0000000..fa50d35
--- /dev/null
+++ b/subworkflows/local/sv.nf
@@ -0,0 +1,105 @@
+include { DELLY_CALL } from '../../modules/local/delly_call'
+include { DELLY_FILTER } from '../../modules/local/delly_filter'
+include { BCFTOOLS_CONCAT as concat_sv; BCFTOOLS_CONCAT as concat_filtered_sv} from '../../modules/local/bcftools_concat'
+include { VCF2MAF } from '../../modules/local/vcf2maf'
+include { FORMAT_MAF } from '../../modules/local/format_maf'
+include { GET_TOOL_VERSION } from '../../modules/local/get_tool_version'
+include { ADD_MAF_COMMENT } from '../../modules/local/add_maf_comment'
+
+
+workflow SV {
+ take:
+ ch_normal // normal bam
+ ch_tumor // tumor bam
+ ch_fasta_ref // fasta path
+ ch_fasta_fai_ref // fasta_fai path
+ ch_delly_exclude // delly exclude file
+ delly_type // delly type list
+ ch_exac_filter // Exac filter vcf
+ ch_exac_filter_index // Exac filter index
+
+ main:
+
+ DELLY_CALL (
+ ch_normal,
+ ch_tumor,
+ ch_fasta_ref,
+ ch_fasta_fai_ref,
+ ch_delly_exclude,
+ delly_type
+ )
+
+ delly_call_output = DELLY_CALL.out.sv_output.transpose()
+
+ DELLY_FILTER (
+ delly_call_output
+ )
+
+ combined_sv = delly_call_output
+ .map{
+ new Tuple(it[0].id,it[0],it[2],it[3])
+ }
+ .groupTuple()
+ .map{
+ new Tuple(it[1][0],it[2], it[3])
+ }
+
+ combined_filtered_sv = DELLY_FILTER.out.sv_pass_output
+ .map{
+ new Tuple(it[0].id,it[0],it[1], it[2])
+ }
+ .groupTuple()
+ .map{
+ new Tuple(it[1][0],it[2], it[3])
+ }
+
+ concat_sv (
+ combined_sv
+ )
+
+ concat_filtered_sv (
+ combined_filtered_sv
+ )
+
+ VCF2MAF (
+ concat_filtered_sv.out.vcf,
+ ch_fasta_ref,
+ ch_fasta_fai_ref,
+ ch_exac_filter,
+ ch_exac_filter_index
+ )
+
+ delly_tool = Channel.value("delly")
+
+ GET_TOOL_VERSION (
+ delly_tool,
+ DELLY_CALL.out.versions
+ )
+
+ ADD_MAF_COMMENT (
+ VCF2MAF.out.maf,
+ delly_tool,
+ GET_TOOL_VERSION.out.tool_version
+ )
+
+ FORMAT_MAF (
+ VCF2MAF.out.maf
+ )
+
+ ch_versions = Channel.empty()
+ ch_versions = ch_versions.mix(DELLY_CALL.out.versions)
+ ch_versions = ch_versions.mix(DELLY_FILTER.out.versions)
+ ch_versions = ch_versions.mix(concat_sv.out.versions)
+ ch_versions = ch_versions.mix(concat_filtered_sv.out.versions)
+ ch_versions = ch_versions.mix(VCF2MAF.out.versions)
+ ch_versions = ch_versions.mix(GET_TOOL_VERSION.out.versions)
+ ch_versions = ch_versions.mix(ADD_MAF_COMMENT.out.versions)
+ ch_versions = ch_versions.mix(FORMAT_MAF.out.versions)
+
+ emit:
+ sv = concat_sv.out.vcf
+ sv_filtered = concat_filtered_sv.out.vcf
+ maf_file = ADD_MAF_COMMENT.out.maf
+ portal = FORMAT_MAF.out.portal
+ versions = ch_versions
+}
diff --git a/workflows/sif.nf b/workflows/sif.nf
index 61a2220..2c960e1 100644
--- a/workflows/sif.nf
+++ b/workflows/sif.nf
@@ -35,7 +35,9 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
+
include { INPUT_CHECK } from '../subworkflows/local/input_check'
+include { SV } from '../subworkflows/local/sv'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -46,8 +48,6 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
//
// MODULE: Installed directly from nf-core/modules
//
-include { FASTQC } from '../modules/nf-core/fastqc/main'
-include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
/*
@@ -56,9 +56,6 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-// Info required for completion email and summary
-def multiqc_report = []
-
workflow SIF {
ch_versions = Channel.empty()
@@ -69,45 +66,42 @@ workflow SIF {
INPUT_CHECK (
file(params.input)
)
- ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
- // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input")
- // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
- // ! There is currently no tooling to help you write a sample sheet schema
- //
- // MODULE: Run FastQC
- //
- FASTQC (
- INPUT_CHECK.out.reads
+ ch_fasta_ref = Channel.value([ "reference_genome", file(params.fasta) ])
+ ref_index_list = []
+ for(single_genome_ref in params.fasta_index){
+ ref_index_list.add(file(single_genome_ref))
+ }
+ ch_fasta_fai_ref = Channel.value([ "reference_genome_index",ref_index_list])
+ ch_delly_exclude = Channel.value([ "delly_exclude", file(params.delly_exclude) ])
+ ch_exac_filter = Channel.value(["exac_filter", file(params.exac_filter)])
+ ch_exac_filter_index = Channel.value(["exac_filter_index", file(params.exac_filter_index)])
+ ch_normal = INPUT_CHECK.out.bams
+ .map{
+ new Tuple(it[0],it[1][1], it[2][1])
+ }
+ ch_tumor = INPUT_CHECK.out.bams
+ .map{
+ new Tuple(it[0],it[1][0], it[2][0])
+ }
+
+ SV (
+ ch_normal,
+ ch_tumor,
+ ch_fasta_ref,
+ ch_fasta_fai_ref,
+ ch_delly_exclude,
+ params.delly_type,
+ ch_exac_filter,
+ ch_exac_filter_index
)
- ch_versions = ch_versions.mix(FASTQC.out.versions.first())
+
+ ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
+ ch_versions = ch_versions.mix(SV.out.versions)
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
-
- //
- // MODULE: MultiQC
- //
- workflow_summary = WorkflowSif.paramsSummaryMultiqc(workflow, summary_params)
- ch_workflow_summary = Channel.value(workflow_summary)
-
- methods_description = WorkflowSif.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
- ch_methods_description = Channel.value(methods_description)
-
- ch_multiqc_files = Channel.empty()
- ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
- ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
- ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
- ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
-
- MULTIQC (
- ch_multiqc_files.collect(),
- ch_multiqc_config.toList(),
- ch_multiqc_custom_config.toList(),
- ch_multiqc_logo.toList()
- )
- multiqc_report = MULTIQC.out.report.toList()
}
/*