diff --git a/conf/conda.config b/conf/conda.config index e6a5678..1d6f5ee 100644 --- a/conf/conda.config +++ b/conf/conda.config @@ -6,4 +6,7 @@ process { withName: mafstats { conda = "$projectDir/assets/maf-environment.yml" } + withName: maf2mfa { + conda = "$projectDir/assets/maf-environment.yml" + } } diff --git a/main.nf b/main.nf index e20c82d..2d48ec8 100644 --- a/main.nf +++ b/main.nf @@ -117,6 +117,7 @@ include {PREPROC} from './modules/subworkflows/preprocess' params(params) include {LIFTOVER} from './modules/subworkflows/liftover' params(params) include {DATA} from './modules/subworkflows/data' params(params) include {make_report} from './modules/processes/postprocess' params(params) +include {maf2mfa; mfa2vcf} from "./modules/processes/postprocess" params(params) workflow { DATA() ch_source = DATA.out.ch_source @@ -134,5 +135,10 @@ workflow { if (params.mafTools || params.annotation || workflow.containerEngine){ rmd = Channel.fromPath("${baseDir}/assets/gatherMetrics.Rmd") make_report(ALIGNER.out.mafs, ALIGNER.out.mafc, ALIGNER.out.mafi, liftstats, rmd) + + // Make VCF file + } + if (params.vcf){ + maf2mfa(ALIGNER.out.maf, ch_source, ch_target) | mfa2vcf } } diff --git a/modules/processes/postprocess.nf b/modules/processes/postprocess.nf index c10a70b..7f77ba9 100644 --- a/modules/processes/postprocess.nf +++ b/modules/processes/postprocess.nf @@ -354,6 +354,58 @@ process mafstats { """ } +process maf2mfa { + tag "mafstats" + publishDir "${params.outdir}/maf", mode: 'copy', overwrite: true + label 'medium' + + input: + path final_maf + val sourceFa + val targetFa + + output: + path "${final_maf.baseName}.mfa" + + stub: + """ + touch ${final_maf.baseName}.mfa + """ + + script: + """ + mafToFastaStitcher --maf ${final_maf} --seqs ${sourceFa},${targetFa} --breakpointPenalty 5 --interstitialSequence 20 --outMfa ${final_maf.baseName}.mfa + """ +} + +process mfa2vcf { + tag "mafstats" + publishDir "${params.outdir}/vcf", mode: 'copy', overwrite: true + label 'medium' + conda "bioconda::ucsc-fatovcf bioconda::tabix" + + input: + path mfa + + output: + path "${mfa.baseName}.vcf.gz" + path "${mfa.baseName}.vcf.gz.tbi" + + stub: + """ + touch ${mfa.baseName}.vcf.gz + touch ${mfa.baseName}.vcf.gz.tbi + """ + + script: + """ + faToVcf ${mfa} ${mfa.baseName}.vcf + bgzip ${mfa.baseName}.vcf + tabix -p vcf ${mfa.baseName}.vcf.gz + """ +} + + // Liftover functions process liftover{ tag "liftover" diff --git a/modules/subworkflows/GSAlign.nf b/modules/subworkflows/GSAlign.nf index 3d69b3d..9ac459d 100644 --- a/modules/subworkflows/GSAlign.nf +++ b/modules/subworkflows/GSAlign.nf @@ -65,10 +65,10 @@ workflow GSALIGN { net_ch = netSynt.out } chainsubset(net_ch, chainMerge.out) - if(!params.no_maf){ + if(!params.no_maf || params.vcf){ chain2maf( chainsubset.out[0], twoBitS, twoBitT, twoBitSN, twoBitTN ) - name_maf_seq( chain2maf.out ) - mafstats( name_maf_seq.out, ch_source.simpleName, ch_target.simpleName ) + maf = name_maf_seq( chain2maf.out ) + mafstats( maf, ch_source.simpleName, ch_target.simpleName ) mafs = mafstats.out[0] mafc = mafstats.out[1] mafi = mafstats.out[2] @@ -84,4 +84,5 @@ workflow GSALIGN { mafs mafc mafi + maf } diff --git a/modules/subworkflows/blat.nf b/modules/subworkflows/blat.nf index 59d13f7..1922997 100644 --- a/modules/subworkflows/blat.nf +++ b/modules/subworkflows/blat.nf @@ -69,10 +69,10 @@ workflow BLAT { net_ch = netSynt.out } chainsubset(net_ch, chainMerge.out) - if(!params.no_maf){ + if(!params.no_maf || params.vcf){ chain2maf( chainsubset.out[0], twoBitS, twoBitT, twoBitSN, twoBitTN ) - name_maf_seq( chain2maf.out ) - mafstats( name_maf_seq.out, ch_source.simpleName, ch_target.simpleName ) + maf = name_maf_seq( chain2maf.out ) + mafstats( maf, ch_source.simpleName, ch_target.simpleName ) mafs = mafstats.out[0] mafc = mafstats.out[1] mafi = mafstats.out[2] @@ -88,4 +88,5 @@ workflow BLAT { mafs mafc mafi + maf } diff --git a/modules/subworkflows/lastz.nf b/modules/subworkflows/lastz.nf index 5d22992..5b66103 100644 --- a/modules/subworkflows/lastz.nf +++ b/modules/subworkflows/lastz.nf @@ -77,10 +77,10 @@ workflow LASTZ { net_ch = netSynt.out } chainsubset(net_ch, chainMerge.out) - if(!params.no_maf){ + if(!params.no_maf || params.vcf){ chain2maf( chainsubset.out[0], twoBitS, twoBitT, twoBitSN, twoBitTN ) - name_maf_seq( chain2maf.out ) - mafstats( name_maf_seq.out, ch_source.simpleName, ch_target.simpleName ) + maf = name_maf_seq( chain2maf.out ) + mafstats( maf, ch_source.simpleName, ch_target.simpleName ) mafs = mafstats.out[0] mafc = mafstats.out[1] mafi = mafstats.out[2] @@ -96,4 +96,5 @@ workflow LASTZ { mafs mafc mafi + maf } diff --git a/modules/subworkflows/minimap2.nf b/modules/subworkflows/minimap2.nf index a25ad8d..47ceb7b 100644 --- a/modules/subworkflows/minimap2.nf +++ b/modules/subworkflows/minimap2.nf @@ -59,10 +59,10 @@ workflow MINIMAP2 { net_ch = netSynt.out } chainsubset(net_ch, chainMerge.out) - if(!params.no_maf){ + if(!params.no_maf || params.vcf){ chain2maf( chainsubset.out[0], twoBitS, twoBitT, twoBitSN, twoBitTN ) - name_maf_seq( chain2maf.out ) - mafstats( name_maf_seq.out, ch_source.simpleName, ch_target.simpleName ) + maf = name_maf_seq( chain2maf.out ) + mafstats( maf, ch_source.simpleName, ch_target.simpleName ) mafs = mafstats.out[0] mafc = mafstats.out[1] mafi = mafstats.out[2] @@ -78,4 +78,5 @@ workflow MINIMAP2 { mafs mafc mafi + maf } diff --git a/nextflow.config b/nextflow.config index 8cac66b..c6b4022 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,6 +34,7 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false no_maf = false + vcf = false no_netsynt = false mafTools = null reciprocal_best = false diff --git a/nextflow_schema.json b/nextflow_schema.json index f0e8f58..f52defd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -22,36 +22,36 @@ }, "source": { "type": "string", - "default": "null" + "default": null }, "target": { "type": "string", - "default": "null" + "default": null }, "ncbi_source": { "type": "boolean", - "default": "false" + "default": false }, "ncbi_target": { "type": "boolean", - "default": "false" + "default": false }, "igenomes_source": { "type": "boolean", - "default": "false" + "default": false }, "igenomes_target": { "type": "boolean", - "default": "false" + "default": false }, "annotation": { "type": "string", - "default": "null" + "default": null }, "annotation_format": { "type": "string", - "default": "null", - "enum": ["null", "gff", "bed", "gtf", "vcf", "bam", "maf"] + "default": null, + "enum": [null, "gff", "bed", "gtf", "vcf", "bam", "maf"] } } }, @@ -93,11 +93,11 @@ }, "qscores": { "type": "string", - "default": "null" + "default": null }, "custom": { "type": "string", - "default": "null" + "default": null } } }, @@ -109,7 +109,7 @@ "properties": { "chainCustom": { "type": "string", - "default": "null" + "default": null }, "chain_name": { "type": "string", @@ -117,7 +117,7 @@ }, "no_netsynt": { "type": "boolean", - "default": "false" + "default": false } } }, @@ -146,11 +146,15 @@ }, "no_maf": { "type": "boolean", - "default": "false" + "default": false + }, + "vcf": { + "type": "boolean", + "default": false }, "mafTools": { "type": "string", - "default": "null" + "default": null } } }, @@ -252,7 +256,7 @@ "mamba": { "description": "Use mamba instead of conda to create the anaconda environment.", "type": "boolean", - "default": "false" + "default": false }, "extra_cluster_options": { "description": "Additional cluster options to be used; valid in some clusters only.",