diff --git a/docs/index.rst b/docs/index.rst index be63c31..589b96f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ Metagenome Assembly Workflow (v1.0.2) -======================================== +===================================== .. image:: workflow_assembly.png :scale: 60% diff --git a/docs/workflow_assembly.png b/docs/workflow_assembly.png index c555347..dedce08 100644 Binary files a/docs/workflow_assembly.png and b/docs/workflow_assembly.png differ diff --git a/input.json b/input.json index 78bdb32..2b3ee69 100755 --- a/input.json +++ b/input.json @@ -1,7 +1,7 @@ { - "jgi_metaAssembly.input_files":["https://data.microbiomedata.org/data/test_data/11809.7.220839.TCCTGAG-ACTGCAT.fastq.gz"], + "jgi_metaAssembly.input_files":["/global/cfs/cdirs/m3408/www/test_data/SRR13128014.pacbio.subsample.ccs.fastq.gz"], "jgi_metaAssembly.proj":"nmdc:503125_160870", "jgi_metaAssembly.memory": "105G", "jgi_metaAssembly.threads": "16", - "jgi_metaAssembly.shortRead": true + "jgi_metaAssembly.shortRead": false } diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 3076f92..a18c20b 100644 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -3,7 +3,7 @@ import "shortReads_assembly.wdl" as srma import "make_interleaved_WDL/make_interleaved_reads.wdl" as int import "https://code.jgi.doe.gov/BFoster/jgi_meta_wdl/-/raw/bc7c4371ea0fa83355bada341ec353b9feb3eff2/metagenome_improved/metaflye.wdl" as lrma -workflow jgi_metaAssembly{ +workflow jgi_metaAssembly { input { Boolean shortRead String proj @@ -20,62 +20,60 @@ workflow jgi_metaAssembly{ String minimap2_container = "staphb/minimap2:2.25" String minimap2_parameters = "-a -x map-hifi -t 32" String samtools_container = "staphb/samtools:1.18" - String bbtools_container = "microbiomedata/bbtools:38.96" + String bbtools_container = "microbiomedata/bbtools:39.03" String spades_container="staphb/spades:4.0.0" } if (shortRead) { - if (length(input_files) > 1){ - call int.make_interleaved_reads{ - input: - input_files = input_files, - container = bbtools_container - + if (length(input_files) > 1) { + call int.make_interleaved_reads { + input: + input_files = input_files, + container = "microbiomedata/bbtools:38.96" } } - call srma.jgi_metaASM{ + call srma.jgi_metaASM { input: - memory = memory, - threads = threads, - input_file = if length(input_files) > 1 then make_interleaved_reads.interleaved_fastq else input_files[0], - proj = proj, - bbtools_container = bbtools_container, - spades_container = spades_container - + memory = memory, + threads = threads, + input_file = if length(input_files) > 1 then make_interleaved_reads.interleaved_fastq else input_files[0], + proj = proj, + bbtools_container = "microbiomedata/bbtools:38.96", + spades_container = spades_container } } if (!shortRead) { - call lrma.metaflye{ + call lrma.metaflye { input: - proj = proj, - input_fastq = input_files, - flye_container = flye_container, - flye_parameters = flye_parameters, - smrtlink_container = smrtlink_container, - racon_container = racon_container, - minimap2_container = minimap2_container, - minimap2_parameters = minimap2_parameters, - samtools_container = samtools_container, - bbtools_container = bbtools_container + proj = proj, + input_fastq = input_files, + flye_container = flye_container, + flye_parameters = flye_parameters, + smrtlink_container = smrtlink_container, + racon_container = racon_container, + minimap2_container = minimap2_container, + minimap2_parameters = minimap2_parameters, + samtools_container = samtools_container, + bbtools_container = bbtools_container } - call finish_lrasm{ + call finish_lrasm { input: - proj = proj, - prefix = prefix, - container = bbtools_container, - contigs = metaflye.final_contigs, - bam = metaflye.final_bam, - scaffolds = metaflye.final_scaffolds, - agp = metaflye.final_agp, - legend = metaflye.final_legend, - basecov = metaflye.final_basecov, - sam = metaflye.final_sam, - output_file = metaflye.final_output_file, - stats = metaflye.final_stats, - summary_stats = metaflye.final_summary_stats, - pileup_out = metaflye.final_pileup_out + proj = proj, + prefix = prefix, + container = bbtools_container, + contigs = metaflye.final_contigs, + bam = metaflye.final_bam, + scaffolds = metaflye.final_scaffolds, + agp = metaflye.final_agp, + legend = metaflye.final_legend, + basecov = metaflye.final_basecov, + sam = metaflye.final_sam, + output_file = metaflye.final_output_file, + stats = metaflye.final_stats, + summary_stats = metaflye.final_summary_stats, + pileup_out = metaflye.final_pileup_out } } output { @@ -100,33 +98,34 @@ workflow jgi_metaAssembly{ File? sr_bam=jgi_metaASM.bam File? sr_samgz=jgi_metaASM.samgz File? sr_covstats=jgi_metaASM.covstats - File? sr_asmstats=jgi_metaASM.asmstats File? sr_asminfo=jgi_metaASM.asminfo File? sr_bbcms_fq = jgi_metaASM.bbcms_fastq - + + #Both + File? stats = if (shortRead) then jgi_metaASM.asmstats else finish_lrasm.asm_stats } } task finish_lrasm { input { - File contigs - File bam - File scaffolds - File agp - File legend - File basecov - File sam - File output_file - File stats - File summary_stats - File pileup_out - String container - String proj - String prefix - String orig_prefix="scaffold" - String sed="s/~{orig_prefix}_/~{proj}_/g" - # String start + File contigs + File bam + File scaffolds + File agp + File legend + File basecov + File sam + File output_file + File stats + File summary_stats + File pileup_out + String container + String proj + String prefix + String orig_prefix="scaffold" + String sed="s/~{orig_prefix}_/~{proj}_/g" + # String start } command<<< @@ -144,12 +143,17 @@ task finish_lrasm { cat ~{basecov} | sed ~{sed} > ~{prefix}_contigs.sorted.bam.pileup.basecov cat ~{pileup_out} | sed ~{sed} > ~{prefix}_contigs.sorted.bam.pileup.out - ## Bam file - samtools view -h ~{bam} | sed ~{sed} | \ - samtools view -hb -o ~{prefix}_pairedMapped_sorted.bam - ## Sam.gz file - samtools view -h ~{sam} | sed ~{sed} | \ - gzip -c - > ~{prefix}_pairedMapped.sam.gz + ## Bam file + samtools view -h ~{bam} | sed ~{sed} | \ + samtools view -hb -o ~{prefix}_pairedMapped_sorted.bam + ## Sam.gz file + samtools view -h ~{sam} | sed ~{sed} | \ + gzip -c - > ~{prefix}_pairedMapped.sam.gz + + # stats file + bbstats.sh format=8 in=~{scaffolds} out=stats.json + sed -i 's/l_gt50k/l_gt50K/g' stats.json + cat stats.json |jq 'del(.filename)' > stats.json >>> output { @@ -164,6 +168,7 @@ task finish_lrasm { File final_stats = "~{prefix}_contigs.sam.stats" File final_summary_stats = "~{prefix}_summary.stats" File final_pileup_out = "~{prefix}_contigs.sorted.bam.pileup.out" + File asm_stats = "stats.json" } runtime { diff --git a/shortReads_assembly.wdl b/shortReads_assembly.wdl index 9f7741a..511b845 100755 --- a/shortReads_assembly.wdl +++ b/shortReads_assembly.wdl @@ -1,6 +1,6 @@ version 1.0 workflow jgi_metaASM { - input{ + input { # String? outdir String? memory String? threads @@ -11,7 +11,7 @@ workflow jgi_metaASM { # Float uniquekmer=1000 String bbtools_container="microbiomedata/bbtools:38.96" String spades_container="staphb/spades:4.0.0" - String worflowmeta_container="microbiomedata/workflowmeta:1.1.1" + String workflowmeta_container="microbiomedata/workflowmeta:1.1.1" Boolean paired = true } @@ -65,7 +65,7 @@ workflow jgi_metaASM { proj=proj, prefix=prefix, # start=stage.start, - container=worflowmeta_container, + container=workflowmeta_container, fasta=create_agp.outcontigs, scaffold=create_agp.outscaffolds, agp=create_agp.outagp, @@ -93,7 +93,7 @@ workflow jgi_metaASM { # samgz_name=basename(finish_asm.outcontigs), # covstats_name=basename(finish_asm.outcontigs), # asmstats_name=basename(finish_asm.outcontigs), - # container = worflowmeta_container + # container = workflowmeta_container # } output { @@ -117,13 +117,14 @@ workflow jgi_metaASM { } task stage { - input{ + input { String container String? input_file String memory = "4G" String target = "staged.fastq.gz" String output1 = "input.left.fastq.gz" - String output2 = "input.right.fastq.gz"} + String output2 = "input.right.fastq.gz" + } command <<< set -euo pipefail @@ -143,7 +144,7 @@ task stage { >>> - output{ + output { Array[File] assembly_input = [output1, output2] String start = read_string("start.txt") } @@ -187,7 +188,7 @@ task make_info_file { } task finish_asm { - input{ + input { File fasta File scaffold File? agp @@ -203,6 +204,7 @@ task finish_asm { String sed="s/~{orig_prefix}_/~{proj}_/g" # String start } + command<<< set -euo pipefail @@ -250,7 +252,7 @@ task finish_asm { task read_mapping_pairs{ - input{ + input { Array[File] reads File ref String container @@ -274,7 +276,7 @@ task read_mapping_pairs{ cpu: 16 maxRetries: 1 } - command{ + command<<< set -euo pipefail if [[ ~{reads[0]} == *.gz ]] ; then cat ~{sep=" " reads} > infile.fastq.gz @@ -314,8 +316,9 @@ task read_mapping_pairs{ ln -s ~{filename_cov} mapping_stats.txt rm $mapping_input - } - output{ + + >>> + output { File outbamfile = filename_sorted File outbamfileidx = filename_sorted_idx File outcovfile = filename_cov @@ -324,7 +327,7 @@ task read_mapping_pairs{ } task create_agp { - input{ + input { File scaffolds_in String? memory String container @@ -340,7 +343,7 @@ task create_agp { memory: "120 GiB" cpu: 16 } - command{ + command<<< set -euo pipefail fungalrelease.sh \ ~{if (defined(memory)) then "-Xmx" + memory else "-Xmx105G" } \ @@ -361,8 +364,10 @@ task create_agp { fi bbstats.sh format=8 in=~{filename_scaffolds} out=stats.json sed -i 's/l_gt50k/l_gt50K/g' stats.json - } - output{ + + >>> + + output { File outcontigs = filename_contigs File outscaffolds = filename_scaffolds File outagp = filename_agp @@ -385,11 +390,11 @@ task assy { Boolean paired = true } runtime { - docker: container - memory: "120 GiB" - cpu: 16 + docker: container + memory: "120 GiB" + cpu: 16 } - command{ + command <<< set -euo pipefail if ~{paired}; then spades.py \ @@ -410,7 +415,8 @@ task assy { -t ~{spades_cpu} \ -s ~{infile1} fi - } + >>> + output { File out = filename_outfile File outlog = filename_spadeslog @@ -433,12 +439,12 @@ task bbcms { String filename_counts="counts.metadata.json" } runtime { - docker: container - memory: "120 GiB" + docker: container + memory: "120 GiB" cpu: 16 } - command { + command<<< set -euo pipefail if file --mime -b ~{input_files[0]} | grep gzip; then cat ~{sep=" " input_files} > infile.fastq.gz @@ -478,7 +484,8 @@ task bbcms { rm $bbcms_input - } + >>> + output { File out = filename_outfile File out1 = if paired then filename_outfile1 else filename_outfile @@ -533,5 +540,4 @@ task bbcms { # File? outcovstats = "~{outdir}/~{covstats_name}" # File? outasmstats = "~{outdir}/~{asmstats_name}" # } -# } - +# } \ No newline at end of file diff --git a/version.txt b/version.txt index 7969702..2148fba 100644 --- a/version.txt +++ b/version.txt @@ -1,3 +1,3 @@ -v1.0.6 +v1.0.7