From 3eab77927a93d8444996a8b017af6d19f1086aaf Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Fri, 2 Dec 2022 13:07:42 -0800 Subject: [PATCH 01/13] remove assemdir artifact --- jgi_assembly.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 4fbaf65..3382d1b 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -135,10 +135,10 @@ task finish_asm { # ln ${agp} ${prefix}_assembly.agp ##RE-ID - cat ${fasta} | sed ${sed} > ${assemdir}/${prefix}_contigs.fna - cat ${scaffold} | sed ${sed} > ${assemdir}/${prefix}_scaffolds.fna - cat ${covstats} | sed ${sed} > ${assemdir}/${prefix}_covstats.txt - cat ${agp} | sed ${sed} > ${assemdir}/${prefix}_assembly.agp + cat ${fasta} | sed ${sed} > {prefix}_contigs.fna + cat ${scaffold} | sed ${sed} > ${prefix}_scaffolds.fna + cat ${covstats} | sed ${sed} > ${prefix}_covstats.txt + cat ${agp} | sed ${sed} > ${prefix}_assembly.agp ## Bam file samtools view -h ${bam} | sed ${sed} | \ From 85ca52b918eb2868d4724f84122b5be16dd40ed3 Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Wed, 7 Dec 2022 12:21:26 -0800 Subject: [PATCH 02/13] change origin of input for finish_asm --- jgi_assembly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 3382d1b..3184709 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -43,7 +43,7 @@ workflow jgi_metaASM { container="microbiomedata/workflowmeta:1.1.0", informed_by=informed_by, resource=resource, - input_file=stage.assembly_input, + input_file=input_file, fasta=create_agp.outcontigs, scaffold=create_agp.outscaffolds, agp=create_agp.outagp, From cfec1c4d5f6e849f2cad4255982e01673e9cf17b Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Wed, 7 Dec 2022 12:54:14 -0800 Subject: [PATCH 03/13] add url inline for finish_asm --- jgi_assembly.wdl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 3184709..c3d01de 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -161,15 +161,15 @@ task finish_asm { ended_at_time=$end \ execution_resource=${resource} \ git_url=${git_url} \ - --url ${url_root}${proj}/assembly/ \ - --extra stats.json \ - --inputs ${input_file} \ - --outputs \ - ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs"\ - ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds"\ - ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats"\ - ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP"\ - ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" + --url ${url_root}${proj}/assembly/ \ + --extra stats.json \ + --inputs ${input_file} \ + --outputs \ + ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs"\ + ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds"\ + ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats"\ + ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP"\ + ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" >>> output { From 2240c3c805ce913079e0be86eb106c3d778d96a7 Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Wed, 7 Dec 2022 13:16:02 -0800 Subject: [PATCH 04/13] fix indentations --- jgi_assembly.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index c3d01de..4cd9764 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -152,19 +152,20 @@ task finish_asm { /scripts/generate_object_json.py \ - --type "nmdc:MetagenomeAssembly" \ - --set metagenome_assembly_set \ - --part ${proj} \ + --type "nmdc:MetagenomeAssembly" \ + --set metagenome_assembly_set \ + --part ${proj} \ -p "name=Metagenome Assembly Activity for ${proj}" \ was_informed_by=${informed_by} \ started_at_time=${start} \ ended_at_time=$end \ execution_resource=${resource} \ git_url=${git_url} \ - --url ${url_root}${proj}/assembly/ \ - --extra stats.json \ - --inputs ${input_file} \ - --outputs \ + + --url ${url_root}${proj}/qa/ \ + --extra stats.json \ + --inputs ${input_file} \ + --outputs \ ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs"\ ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds"\ ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats"\ From 3237049809183e60890fe85eae7497718afb8d9e Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Wed, 7 Dec 2022 13:17:28 -0800 Subject: [PATCH 05/13] fix indentation for outputs --- jgi_assembly.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 4cd9764..58f8cca 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -166,11 +166,11 @@ task finish_asm { --extra stats.json \ --inputs ${input_file} \ --outputs \ - ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs"\ - ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds"\ - ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats"\ - ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP"\ - ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" + ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs"\ + ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds"\ + ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats"\ + ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP"\ + ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" >>> output { From 53003d70f65a2a44a98aa157653fc9d1651c5f64 Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Thu, 8 Dec 2022 06:48:25 -0800 Subject: [PATCH 06/13] add version to werkflow --- jgi_assembly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 58f8cca..fec2d8d 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -161,7 +161,7 @@ task finish_asm { ended_at_time=$end \ execution_resource=${resource} \ git_url=${git_url} \ - + version="v1.0.3-beta" \ --url ${url_root}${proj}/qa/ \ --extra stats.json \ --inputs ${input_file} \ From e3b00e5a8a08830d43d746f81e3c5a6e25e1767b Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Thu, 8 Dec 2022 07:13:37 -0800 Subject: [PATCH 07/13] fix prefix reference --- jgi_assembly.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index fec2d8d..aa8d07c 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -135,7 +135,7 @@ task finish_asm { # ln ${agp} ${prefix}_assembly.agp ##RE-ID - cat ${fasta} | sed ${sed} > {prefix}_contigs.fna + cat ${fasta} | sed ${sed} > ${prefix}_contigs.fna cat ${scaffold} | sed ${sed} > ${prefix}_scaffolds.fna cat ${covstats} | sed ${sed} > ${prefix}_covstats.txt cat ${agp} | sed ${sed} > ${prefix}_assembly.agp @@ -162,14 +162,14 @@ task finish_asm { execution_resource=${resource} \ git_url=${git_url} \ version="v1.0.3-beta" \ - --url ${url_root}${proj}/qa/ \ + --url ${url_root}${proj}/assembly/ \ --extra stats.json \ --inputs ${input_file} \ --outputs \ - ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs"\ - ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds"\ - ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats"\ - ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP"\ + ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs" \ + ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds" \ + ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats" \ + ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP" \ ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" >>> From 2e2db28992ee465cf229352f306031ebe0652738 Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Thu, 8 Dec 2022 07:58:06 -0800 Subject: [PATCH 08/13] update metaworkflow version --- jgi_assembly.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index aa8d07c..d737724 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -40,10 +40,10 @@ workflow jgi_metaASM { start=stage.start, git_url=git_url, url_root=url_root, - container="microbiomedata/workflowmeta:1.1.0", + container="microbiomedata/workflowmeta:1.1.1", informed_by=informed_by, resource=resource, - input_file=input_file, + input_file=stage.assembly_input, fasta=create_agp.outcontigs, scaffold=create_agp.outscaffolds, agp=create_agp.outagp, @@ -106,7 +106,7 @@ task stage { } task finish_asm { - String input_file + Array[File] input_file File fasta File scaffold File? agp @@ -153,7 +153,7 @@ task finish_asm { /scripts/generate_object_json.py \ --type "nmdc:MetagenomeAssembly" \ - --set metagenome_assembly_set \ + --set metagenome_assembly_set \ --part ${proj} \ -p "name=Metagenome Assembly Activity for ${proj}" \ was_informed_by=${informed_by} \ @@ -164,7 +164,7 @@ task finish_asm { version="v1.0.3-beta" \ --url ${url_root}${proj}/assembly/ \ --extra stats.json \ - --inputs ${input_file} \ + --inputs ${input_file[0]} ${input_file[1]} \ --outputs \ ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs" \ ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds" \ From 4997dcef3c1ca88b781c93cf13bf0d29fa17c8ee Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Thu, 8 Dec 2022 08:15:21 -0800 Subject: [PATCH 09/13] add project description --- jgi_assembly.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index d737724..7ac3636 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -166,11 +166,11 @@ task finish_asm { --extra stats.json \ --inputs ${input_file[0]} ${input_file[1]} \ --outputs \ - ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs" \ - ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds" \ - ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats" \ - ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP" \ - ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" + nmdc_mgasm0xxxx.1_contigs.fna "Final assembly contigs fasta" "Assembly Contigs" "Assembly contigs for ${proj}" \ + nmdc_mgasm0xxxx.1_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds" "Assembly scaffolds for ${proj}" \ + nmdc_mgasm0xxxx.1_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats" "Coverage Stats for ${proj}" \ + nmdc_mgasm0xxxx.1_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP" "AGP for ${proj}" \ + nmdc_mgasm0xxxx.1_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" "Sorted Bam for ${proj}" >>> output { From 0b97f24193fdd30836d92d98d0b8af52f2fbdb38 Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Thu, 8 Dec 2022 08:17:39 -0800 Subject: [PATCH 10/13] add prefix back --- jgi_assembly.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 7ac3636..94d80fa 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -166,11 +166,11 @@ task finish_asm { --extra stats.json \ --inputs ${input_file[0]} ${input_file[1]} \ --outputs \ - nmdc_mgasm0xxxx.1_contigs.fna "Final assembly contigs fasta" "Assembly Contigs" "Assembly contigs for ${proj}" \ - nmdc_mgasm0xxxx.1_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds" "Assembly scaffolds for ${proj}" \ - nmdc_mgasm0xxxx.1_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats" "Coverage Stats for ${proj}" \ - nmdc_mgasm0xxxx.1_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP" "AGP for ${proj}" \ - nmdc_mgasm0xxxx.1_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" "Sorted Bam for ${proj}" + ${prefix}_contigs.fna "Final assembly contigs fasta" "Assembly Contigs" "Assembly contigs for ${proj}" \ + ${prefix}_scaffolds.fna "Final assembly scaffolds fasta" "Assembly Scaffolds" "Assembly scaffolds for ${proj}" \ + ${prefix}_covstats.txt "Assembled contigs coverage information" "Assembly Coverage Stats" "Coverage Stats for ${proj}" \ + ${prefix}_assembly.agp "An AGP format file that describes the assembly" "Assembly AGP" "AGP for ${proj}" \ + ${prefix}_pairedMapped_sorted.bam "Sorted bam file of reads mapping back to the final assembly" "Assembly Coverage BAM" "Sorted Bam for ${proj}" >>> output { From 95ae1f9ce5347908c5e89ac0ad84d795642ff698 Mon Sep 17 00:00:00 2001 From: Chienchi Lo Date: Fri, 27 Jan 2023 17:03:31 -0700 Subject: [PATCH 11/13] add make_info_file task --- jgi_assembly.wdl | 112 +++++++++++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 38 deletions(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 94d80fa..0abd4b3 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -34,6 +34,9 @@ workflow jgi_metaASM { input: reads=stage.assembly_input, ref=create_agp.outcontigs, container=bbtools_container, memory=memory, threads=threads, paired = paired } + call make_info_file { + input: bbcms_info= bbcms.outcounts, assy_info = assy.outlog, container=bbtools_container, proj=proj + } call finish_asm { input: proj=proj, @@ -62,6 +65,7 @@ workflow jgi_metaASM { File covstats=finish_asm.outcovstats File asmstats=finish_asm.outasmstats File objects=finish_asm.objects + File asminfo=make_info_file.asminfo } meta { @@ -105,6 +109,36 @@ task stage { } } +task make_info_file { + File assy_info + File bbcms_info + String proj + String prefix=sub(proj, ":", "_") + String container + + command<<< + bbtools_version=`grep BBToolsVer ${bbcms_info}| awk '{print $2}' | sed -e 's/"//g' -e 's/,//' ` + spades_version=`grep 'SPAdes version' ${assy_info} | awk '{print $3}'` + echo -e "The workflow takes paired-end reads runs error correction by bbcms.sh (BBTools(1) version $bbtools_version)." > ${prefix}_metaAsm.info + echo -e "The clean reads are assembled by metaSpades(2) version $spades_version with parameters, --only-assembler -k 33,55,77,99,127 --meta" >> ${prefix}_metaAsm.info + echo -e "After assembly, Contigs and Scaffolds are consumed by the *create_agp* task to rename the FASTA header and generate an AGP format (https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/) file which describes the assembly" + echo -e "In the end, the reads are mapped back to contigs by bbmap (BBTools(1) version $bbtools_version) for coverage information." >> ${prefix}_metaAsm.info + + echo -e "\n(1) B. Bushnell: BBTools software package, http://bbtools.jgi.doe.gov/" >> ${prefix}_metaAsm.info + echo -e "(2) Nurk S, Meleshko D, Korobeynikov A, Pevzner PA. metaSPAdes: a new versatile metagenomic assembler. Genome Res. 2017 May;27(5):824-834." >> ${prefix}_metaAsm.info + >>> + + output { + File asminfo = "${prefix}_metaAsm.info" + } + runtime { + memory: "1 GiB" + cpu: 1 + maxRetries: 1 + docker: container + } +} + task finish_asm { Array[File] input_file File fasta @@ -174,13 +208,13 @@ task finish_asm { >>> output { - File outcontigs = "${prefix}_contigs.fna" - File outscaffolds = "${prefix}_scaffolds.fna" + File outcontigs = "${prefix}_contigs.fna" + File outscaffolds = "${prefix}_scaffolds.fna" File outagp = "${prefix}_assembly.agp" - File outbam = "${prefix}_pairedMapped_sorted.bam" - File outsamgz = "${prefix}_pairedMapped.sam.gz" - File outcovstats = "${prefix}_covstats.txt" - File outasmstats = "stats.json" + File outbam = "${prefix}_pairedMapped_sorted.bam" + File outsamgz = "${prefix}_pairedMapped.sam.gz" + File outcovstats = "${prefix}_covstats.txt" + File outasmstats = "stats.json" File objects = "objects.json" } @@ -209,28 +243,28 @@ task make_output{ String asmstats_name=basename(contigs) String container - command{ - if [ ! -z ${outdir} ]; then - mkdir -p ${outdir} - cp ${contigs} ${scaffolds} ${agp} ${bam} \ - ${samgz} ${covstats} ${asmstats} ${outdir} - chmod 764 -R ${outdir} - fi - } - runtime { + command{ + if [ ! -z ${outdir} ]; then + mkdir -p ${outdir} + cp ${contigs} ${scaffolds} ${agp} ${bam} \ + ${samgz} ${covstats} ${asmstats} ${outdir} + chmod 764 -R ${outdir} + fi + } + runtime { docker: container - memory: "1 GiB" - cpu: 1 - } - output{ - File? outcontigs = "${outdir}/${contigs_name}" - File? outscaffolds = "${outdir}/${scaffolds_name}" - File? outagp = "${outdir}/${agp_name}" - File? outbam = "${outdir}/${bam_name}" - File? outsamgz = "${outdir}/${samgz_name}" - File? outcovstats = "${outdir}/${covstats_name}" - File? outasmstats = "${outdir}/${asmstats_name}" - } + memory: "1 GiB" + cpu: 1 + } + output{ + File? outcontigs = "${outdir}/${contigs_name}" + File? outscaffolds = "${outdir}/${scaffolds_name}" + File? outagp = "${outdir}/${agp_name}" + File? outbam = "${outdir}/${bam_name}" + File? outsamgz = "${outdir}/${samgz_name}" + File? outcovstats = "${outdir}/${covstats_name}" + File? outasmstats = "${outdir}/${asmstats_name}" + } } task read_mapping_pairs{ @@ -253,8 +287,8 @@ task read_mapping_pairs{ runtime { docker: container memory: "120 GiB" - cpu: 16 - maxRetries: 1 + cpu: 16 + maxRetries: 1 } command{ set -eo pipefail @@ -270,7 +304,7 @@ task read_mapping_pairs{ samtools sort -m100M -@ ${jvm_threads} ${filename_unsorted} -o ${filename_sorted} samtools index ${filename_sorted} reformat.sh -Xmx${default="105G" memory} in=${filename_unsorted} out=${filename_outsam} overwrite=true - ln ${filename_cov} mapping_stats.txt + ln ${filename_cov} mapping_stats.txt rm $mapping_input } output{ @@ -294,7 +328,7 @@ task create_agp { runtime { docker: container memory: "120 GiB" - cpu: 16 + cpu: 16 } command{ fungalrelease.sh -Xmx${default="105G" memory} in=${scaffolds_in} out=${filename_scaffolds} outc=${filename_contigs} agp=${filename_agp} legend=${filename_legend} mincontig=200 minscaf=200 sortscaffolds=t sortcontigs=t overwrite=t @@ -305,10 +339,10 @@ task create_agp { sed -i 's/l_gt50k/l_gt50K/g' stats.json } output{ - File outcontigs = filename_contigs - File outscaffolds = filename_scaffolds - File outagp = filename_agp - File outstats = "stats.json" + File outcontigs = filename_contigs + File outscaffolds = filename_scaffolds + File outagp = filename_agp + File outstats = "stats.json" File outlegend = filename_legend } } @@ -327,7 +361,7 @@ task assy { runtime { docker: container memory: "120 GiB" - cpu: 16 + cpu: 16 } command{ set -eo pipefail @@ -360,7 +394,7 @@ task bbcms { runtime { docker: container memory: "120 GiB" - cpu: 16 + cpu: 16 } command { @@ -369,7 +403,7 @@ task bbcms { cat ${sep=" " input_files} > infile.fastq.gz export bbcms_input="infile.fastq.gz" fi - if file --mime -b ${input_files[0]} | grep plain; then + if file --mime -b ${input_files[0]} | grep plain; then cat ${sep=" " input_files} > infile.fastq export bbcms_input="infile.fastq" fi @@ -379,6 +413,7 @@ task bbcms { fi readlength.sh -Xmx${default="105G" memory} in=${filename_outfile} out=${filename_readlen} rm $bbcms_input + } output { File out = filename_outfile @@ -389,6 +424,7 @@ task bbcms { File stderr = filename_errlog File outcounts = filename_counts File outkmer = filename_kmerfile + } } From 2e334d28a3ba432ec153bd4e879a45828e792ef4 Mon Sep 17 00:00:00 2001 From: Chienchi Lo Date: Fri, 27 Jan 2023 17:19:22 -0700 Subject: [PATCH 12/13] udpate README --- README.md | 23 ++++++++++------------- input.json | 9 +++++---- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 8c605a4..f575a0d 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Description of the files: ## The Docker image and Dockerfile can be found here -[microbiomedata/bbtools:38.94](https://hub.docker.com/r/microbiomedata/bbtools) +[microbiomedata/bbtools:38.96](https://hub.docker.com/r/microbiomedata/bbtools) [microbiomedata/spades:3.15.0](https://hub.docker.com/r/microbiomedata/spades) @@ -24,26 +24,23 @@ Description of the files: 2. contig prefix for fasta header -3. output path +3. project name -4. input_interleaved (boolean) +4. resource where run the workflow -5. forwards reads fastq file (required value when input_interleaved is false, otherwise use [] ) +5. informed_by -6. reverse reads fastq file (required value when input_interleaved is false, otherwise use [] ) +6. memory (optional) ex: "jgi_metaASM.memory": "105G" -7. memory (optional) ex: "jgi_metaASM.memory": "105G" - -8. threads (optional) ex: "jgi_metaASM.threads": "16" +7. threads (optional) ex: "jgi_metaASM.threads": "16" ``` { - "jgi_metaASM.input_file":["/global/cfs/projectdirs/m3408/ficus/11809.7.220839.TCCTGAG-ACTGCAT.fastq.gz"], + "jgi_metaASM.input_file":"/global/cfs/projectdirs/m3408/ficus/11809.7.220839.TCCTGAG-ACTGCAT.fastq.gz", "jgi_metaASM.rename_contig_prefix":"503125_160870", - "jgi_metaASM.outdir":"/global/cfs/projectdirs/m3408/aim2/metagenome/assembly/ficus/503125_160870", - "jgi_metaASM.input_interleaved":true, - "jgi_metaASM.input_fq1":[], - "jgi_metaASM.input_fq2":[], + "jgi_metaASM.proj":"nmdc:503125_160870", + "jgi_metaASM.resource": "NERSC -- perlmutter", + "jgi_metaASM.informed_by": "nmdc:xxxxxx", "jgi_metaASM.memory": "105G", "jgi_metaASM.threads": "16" } diff --git a/input.json b/input.json index 8ea0673..f291ba3 100755 --- a/input.json +++ b/input.json @@ -1,8 +1,9 @@ { "jgi_metaASM.input_file":["/global/cfs/projectdirs/m3408/ficus/11809.7.220839.TCCTGAG-ACTGCAT.fastq.gz"], "jgi_metaASM.rename_contig_prefix":"503125_160870", - "jgi_metaASM.input_interleaved":true, - "jgi_metaASM.input_fq1":[], - "jgi_metaASM.input_fq2":[], - "jgi_metaASM.outdir":"/global/cfs/projectdirs/m3408/aim2/metagenome/assembly/ficus/503125_160870" + "jgi_metaASM.proj":"nmdc:503125_160870", + "jgi_metaASM.resource": "NERSC -- perlmutter", + "jgi_metaASM.informed_by": "nmdc:xxxxxx", + "jgi_metaASM.memory": "105G", + "jgi_metaASM.threads": "16" } From 272967486ad1ac78d496a98c9d65d20a554fa4a7 Mon Sep 17 00:00:00 2001 From: Chienchi Lo Date: Fri, 27 Jan 2023 17:28:59 -0700 Subject: [PATCH 13/13] fix missing make_info file direct --- jgi_assembly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jgi_assembly.wdl b/jgi_assembly.wdl index 0abd4b3..ac14ce9 100755 --- a/jgi_assembly.wdl +++ b/jgi_assembly.wdl @@ -121,7 +121,7 @@ task make_info_file { spades_version=`grep 'SPAdes version' ${assy_info} | awk '{print $3}'` echo -e "The workflow takes paired-end reads runs error correction by bbcms.sh (BBTools(1) version $bbtools_version)." > ${prefix}_metaAsm.info echo -e "The clean reads are assembled by metaSpades(2) version $spades_version with parameters, --only-assembler -k 33,55,77,99,127 --meta" >> ${prefix}_metaAsm.info - echo -e "After assembly, Contigs and Scaffolds are consumed by the *create_agp* task to rename the FASTA header and generate an AGP format (https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/) file which describes the assembly" + echo -e "After assembly, Contigs and Scaffolds are consumed by the *create_agp* task to rename the FASTA header and generate an AGP format (https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/) file which describes the assembly" >> ${prefix}_metaAsm.info echo -e "In the end, the reads are mapped back to contigs by bbmap (BBTools(1) version $bbtools_version) for coverage information." >> ${prefix}_metaAsm.info echo -e "\n(1) B. Bushnell: BBTools software package, http://bbtools.jgi.doe.gov/" >> ${prefix}_metaAsm.info