Skip to content

Commit

Permalink
Merge pull request #2 from mskilab-org/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
tanubrata authored Oct 10, 2023
2 parents 304c2ae + 770715d commit b1e333b
Show file tree
Hide file tree
Showing 108 changed files with 8,861 additions and 286 deletions.
162 changes: 156 additions & 6 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,73 @@
"items": {
"type": "object",
"properties": {
"patient": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Patient ID must be provided and cannot contain spaces",
"meta": ["patient"]
},
"sample": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces"
"errorMessage": "Sample ID must be provided and cannot contain spaces",
"meta": ["sample"]
},
"fastq_1": {
"sex": {
"errorMessage": "Sex cannot contain spaces",
"meta": ["sex"],
"default": "NA",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+$"
},
{
"type": "string",
"maxLength": 0
}
]
},
"status": {
"type": "integer",
"errorMessage": "Status can only be 0 (normal) or 1 (tumor). Defaults to 0, if none is supplied.",
"meta": ["status"],
"default": "0",
"minimum": 0,
"maximum": 1
},
"lane": {
"type": "string",
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
"pattern": "^\\S+$",
"unique": ["patient", "sample"],
"anyOf": [
{
"dependentRequired": ["fastq_1"]
},
{
"dependentRequired": ["bam"]
}
],
"meta": ["lane"]
},
"fastq_1": {
"errorMessage": "FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.f(ast)?q\\.gz$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"fastq_2": {
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"dependentRequired": ["fastq_1"],
"anyOf": [
{
"type": "string",
Expand All @@ -28,9 +83,104 @@
"type": "string",
"maxLength": 0
}
]
],
"format": "file-path",
"exists": true
},
"table": {
"errorMessage": "Recalibration table cannot contain spaces and must have extension '.table'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.table$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"cram": {
"errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.cram$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"crai": {
"errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.crai$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"bam": {
"errorMessage": "BAM file cannot contain spaces and must have extension '.bam'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.bam$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"bai": {
"errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.bai$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"vcf": {
"errorMessage": "VCF file for reads 1 cannot contain spaces and must have extension '.vcf' or '.vcf.gz'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.vcf(\\.gz)?$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"variantcaller": {
"type": "string"
}
},
"required": ["sample", "fastq_1"]
"required": ["patient", "sample"]
}
}
48 changes: 48 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,54 @@ process {
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
}

withName: 'UNZIP.*|UNTAR.*|TABIX.*|BUILD_INTERVALS|CREATE_INTERVALS_BED|CUSTOM_DUMPSOFTWAREVERSIONS|VCFTOOLS|BCFTOOLS.*|SAMTOOLS_INDEX' {
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 1.GB * task.attempt, 'memory' ) }
}
withName: 'FASTQC'{
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
}
withName: 'FASTP'{
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
}
withName: 'BWAMEM1_MEM|BWAMEM2_MEM' {
cpus = { check_max( 24 * task.attempt, 'cpus' ) }
memory = { check_max( 30.GB * task.attempt, 'memory' ) }
}
withName: 'GATK4_MARKDUPLICATES|GATK4_MARKDUPLICATESSPARK' {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
memory = { check_max( 30.GB * task.attempt, 'memory' ) }
}
withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|GATK4_BASERECALIBRATOR_SPARK|GATK4_GATHERBQSRREPORTS'{
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
}
withName:'MOSDEPTH'{
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
}
withName:'STRELKA.*|MANTA.*' {
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
}
withName:'SAMTOOLS_CONVERT'{
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
}
withName:'GATK4_MERGEVCFS'{
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
}
withName: 'MULTIQC' {
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
}
withName: 'SVABA' {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
}
Expand Down
70 changes: 70 additions & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,76 @@
params {
// illumina iGenomes reference file paths
genomes {
'GATK.GRCh37' {
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes"
dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/"
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz"
dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi"
dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138.b37.vcf.gz'
known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz"
known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi"
known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz"
known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi"
germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz"
germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz.tbi"
intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list"
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem"
ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg19.zip"
ascat_genome = 'hg19'
ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg19.zip"
ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg19.zip"
ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip"
snpeff_db = 87
snpeff_genome = 'GRCh37'
vep_cache_version = 110
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
indel_mask = "${projectDir}/data/snowman_blacklist.bed"
germ_sv_db = "${projectDir}/data/snowman_germline_mini_160413.bed"
simple_seq_db = "${projectDir}/data/repeat_masker_hg19_Simple.bed"
blacklist_gridss = "${projectDir}/data/ENCFF001TDO_hg19_nochr.bed"
pon_gridss = "${projectDir}/data/GRIDSS/pon/hg19/"
}
'GATK.GRCh38' {
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes"
dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict"
bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/"
bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/"
cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len"
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi"
known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz"
known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi"
known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz"
germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz.tbi"
intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed"
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem"
pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz"
pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi"
ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip"
ascat_genome = 'hg38'
ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_loci_hg38.zip"
ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/GC_G1000_hg38.zip"
ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/RT_G1000_hg38.zip"
snpeff_db = 105
snpeff_genome = 'GRCh38'
vep_cache_version = 110
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
indel_mask = "${projectDir}/data/snowman_blacklist.hg38.bed"
germ_sv_db = "${projectDir}/data/snowman_germline_mini_hg38.bed"
simple_seq_db = "${projectDir}/data/repeat_masker_hg38_simple.bed"
blacklist_gridss = "${projectDir}/data/ENCFF356LFX_hg38.bed"
pon_gridss = "${projectDir}/data/GRIDSS/pon/hg38/"
}

'GRCh37' {
fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa"
bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/"
Expand Down
52 changes: 52 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,58 @@ process {

withName: FASTQC {
ext.args = '--quiet'
publishDir = [
[
path: { "${params.outdir}/QCreports/FastQC/${meta.id}" },
mode: params.publish_dir_mode,
pattern: "*{html,zip}"
]
]
}

withName: 'NFCORE_HEISENBIO:HEISENBIO:CRAM_QC_NO_MD:SAMTOOLS_STATS' {
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
ext.prefix = { "${meta.id}.sorted.cram" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/QCreports/samtools/${meta.id}" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MOSDEPTH' {
ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""}
ext.prefix = {
if (params.tools && params.tools.split(',').contains('sentieon_dedup')) {
"${meta.id}.dedup"
} else if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) {
"${meta.id}.sorted"
} else {
"${meta.id}.md"
}
}
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('mosdepth')) }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/QCreports/mosdepth/${meta.id}" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

if ((params.step == 'alignment' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) {
withName: 'NFCORE_HEISENBIO:HEISENBIO:CRAM_QC_RECAL:MOSDEPTH' {
ext.prefix = { "${meta.id}.recal" }
}

withName: 'NFCORE_HEISENBIO:HEISENBIO:CRAM_QC_RECAL:SAMTOOLS_STATS' {
ext.prefix = { "${meta.id}.recal.cram" }
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/QCreports/samtools/${meta.id}" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
Expand Down
Loading

0 comments on commit b1e333b

Please sign in to comment.