Skip to content

Commit

Permalink
Merge branch 'develop' into feature/make_cff
Browse files Browse the repository at this point in the history
  • Loading branch information
pintoa1-mskcc authored Aug 2, 2023
2 parents 452bba0 + b53bb98 commit 5bfb0d3
Show file tree
Hide file tree
Showing 19 changed files with 351 additions and 53 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ concurrency:
jobs:
test:
runs-on: ubuntu-20.04
timeout-minutes: 45

name: ${{ matrix.tags }} ${{ matrix.profile }}
strategy:
Expand Down
3 changes: 3 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ show_analysis_paths: False
extra_fn_clean_trim:
- "htseq.count"
export_plots: true

use_filename_as_sample_name:
- kallisto
3 changes: 2 additions & 1 deletion bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ def sniff_format(handle):
sniffer = csv.Sniffer()
if not sniffer.has_header(peek):
logger.critical("The given sample sheet does not appear to contain a header.")
sys.exit(1)
## Commenting out this line because it sometimes fails when it shouldn't, and there is a downstream check for header names that is essentially redundant.
#sys.exit(1)
dialect = sniffer.sniff(peek)
return dialect

Expand Down
4 changes: 4 additions & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ params {
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf"
refflat = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/refFlat.txt.gz"
cdna = "https://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh37.75.cdna.all.fa.gz"
starfusion_url = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh37_gencode_v19_CTAT_lib_Mar012021.plug-n-play.tar.gz"
arriba_blacklist = "/usr/local/var/lib/arriba/blacklist_hg19_hs37d5_GRCh37_v2.3.0.tsv.gz"
arriba_known_fusions = "/usr/local/var/lib/arriba/known_fusions_hg19_hs37d5_GRCh37_v2.3.0.tsv.gz"
Expand Down Expand Up @@ -44,6 +45,7 @@ params {
arriba_blacklist = "/usr/local/var/lib/arriba/blacklist_hg38_GRCh38_v2.3.0.tsv.gz"
arriba_known_fusions = "/usr/local/var/lib/arriba/known_fusions_hg38_GRCh38_v2.3.0.tsv.gz"
arriba_protein_domains = "/usr/local/var/lib/arriba/protein_domains_hg38_GRCh38_v2.3.0.gff3"
cdna = "https://ftp.ensembl.org/pub/release-86/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
}
'smallGRCh37' {
fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta"
Expand All @@ -56,6 +58,8 @@ params {
genebed = "/juno/work/ccs/pintoa1/references/meta_fusion_bed_generation/v75_gene.bed"
info = "/juno/work/ccs/pintoa1/fusion_report/metafusion/MetaFusion/gene_info_20230714.txt"
block = "/juno/work/ccs/pintoa1/fusion_report/metafusion/MetaFusion/reference_files/blocklist_breakpoints.bedpe"
cdna = "https://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh37.75.cdna.all.fa.gz"

}
/*
'hg38' {
Expand Down
44 changes: 44 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ process {
storeDir = { "${params.reference_base}/${params.genome}/star_index" }
}

withName: KALLISTO_INDEX {
storeDir = { "${params.reference_base}/${params.genome}/kallisto" }
}

withName: STARFUSION_DOWNLOAD {
storeDir = { "${params.reference_base}/${params.genome}/starfusion" }
}
Expand Down Expand Up @@ -329,13 +333,53 @@ process {

withName: PICARD_COLLECTRNASEQMETRICS {
ext.args = { "--STRAND_SPECIFICITY ${meta.single_end || meta.strand == "forward" ? "FIRST_READ_TRANSCRIPTION_STRAND" : meta.strand == "reverse" ? "SECOND_READ_TRANSCRIPTION_STRAND" : "NONE" }" }
publishDir = [
path: { "${params.outdir}/${meta ? "analysis/" + meta.sample + "/" : "" }${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/${task.process.tokenize(':')[-2].tokenize('_')[1].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: PICARD_COLLECTHSMETRICS {
publishDir = [
path: { "${params.outdir}/${meta ? "analysis/" + meta.sample + "/" : "" }${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/${task.process.tokenize(':')[-2].tokenize('_')[1].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'RSEQC_.*' {
publishDir = [
path: { "${params.outdir}/${meta ? "analysis/" + meta.sample + "/" : "" }${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/${task.process.tokenize(':')[-3].tokenize('_')[1].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: HTSEQ_COUNT {
ext.args = { "-s ${meta.single_end || meta.strand == "forward" ? "yes" : meta.strand == "reverse" ? "reverse" : "no" } -r pos" }
time = { check_max( 20.h * task.attempt, 'time' ) }
}

withName: KALLISTO_QUANT {
ext.prefix = { "$meta.sample" }
ext.args = {
[
"--bias",
"-b 100",
meta.strand == "forward" ?
"--fr-stranded" :
(
meta.strand == "reverse" ?
"--rf-stranded" :
""
)
].join(" ")
}
ext.fragment_len = params.kallisto_fragment_len
ext.sd = params.kallisto_fragment_sd
}

withName: ARRIBA {
ext.args = {
[
Expand Down
1 change: 1 addition & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf')
params.starfusion_url = WorkflowMain.getGenomeAttribute(params, 'starfusion_url')
params.refflat = WorkflowMain.getGenomeAttribute(params, 'refflat')
params.baits = WorkflowMain.getGenomeAttribute(params, 'baits')
params.cdna = WorkflowMain.getGenomeAttribute(params, 'cdna')
params.arriba_blacklist = WorkflowMain.getGenomeAttribute(params, 'arriba_blacklist')
params.arriba_known_fusions = WorkflowMain.getGenomeAttribute(params, 'arriba_known_fusions')
params.arriba_protein_domains = WorkflowMain.getGenomeAttribute(params, 'arriba_protein_domains')
Expand Down
10 changes: 10 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
},
"kallisto/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"kallisto/quant": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
"git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
Expand Down
2 changes: 1 addition & 1 deletion modules/local/htseq/count/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process HTSEQ_COUNT {
tag "$meta.id"
label 'process_medium'
label 'process_low'

conda "bioconda::htseq=2.0.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand Down
2 changes: 1 addition & 1 deletion modules/local/starfusion/detect/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process STARFUSION {
tag "$meta.id"
label 'process_high'
label 'process_low'

conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a"
container "docker.io/trinityctat/starfusion:1.10.1"
Expand Down
44 changes: 44 additions & 0 deletions modules/nf-core/kallisto/index/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions modules/nf-core/kallisto/index/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 56 additions & 0 deletions modules/nf-core/kallisto/quant/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 70 additions & 0 deletions modules/nf-core/kallisto/quant/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ params {
save_align_intermeds = false

// Fusion
dedup_umi_for_fusions = false
run_oncokb_fusionannotator = false
cosmic_usr = null
fusion_report_cutoff = 1
Expand All @@ -40,6 +39,11 @@ params {
// rseqc_modules can include ['bam_stat','inner_distance','infer_experiment','junction_annotation','junction_saturation','read_distribution','read_duplication','tin']
rseqc_modules = ['bam_stat','inner_distance','infer_experiment','junction_annotation','junction_saturation','read_distribution','read_duplication']

// Quantification
dedup_umi_for_kallisto = true
kallisto_fragment_len = 500
kallisto_fragment_sd = 150

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
Loading

0 comments on commit 5bfb0d3

Please sign in to comment.