diff --git a/abra2_2.17/README.md b/abra2_2.17/README.md index 5e60a43c..89818e32 100644 --- a/abra2_2.17/README.md +++ b/abra2_2.17/README.md @@ -18,4 +18,71 @@ ```bash > toil-cwl-runner abra2_2.17.cwl example_inputs.yaml ``` - + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/abra2_2.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir abra2_toil_log +> toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr & +``` + +### Usage + +``` +usage: abra2_2.17.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --working_directory WORKING_DIRECTORY + Set the temp directory (overrides java.io.tmpdir) + --reference_fasta REFERENCE_FASTA + Genome reference location + --targets TARGETS + --kmer_size KMER_SIZE + Optional assembly kmer size(delimit with commas if + multiple sizes specified) + --maximum_average_depth MAXIMUM_AVERAGE_DEPTH + Regions with average depth exceeding this value will + be downsampled (default: 1000) + --soft_clip_contig SOFT_CLIP_CONTIG + Soft clip contig args [max_contigs,min_base_qual,frac_ + high_qual_bases,min_soft_clip_len] + (default:16,13,80,15) + --maximum_mixmatch_rate MAXIMUM_MIXMATCH_RATE + Max allowed mismatch rate when mapping reads back to + contigs (default: 0.05) + --scoring_gap_alignments SCORING_GAP_ALIGNMENTS + Scoring used for contig alignments(match, + mismatch_penalty,gap_open_penalty,gap_extend_penalty) + (default:8,32,48,1) + --contig_anchor CONTIG_ANCHOR + Contig anchor + [M_bases_at_contig_edge,max_mismatches_near_edge] + (default:10,2) + --window_size WINDOW_SIZE + Processing window size and overlap (size,overlap) + (default: 400,200) + --consensus_sequence Use positional consensus sequence when aligning high + quality soft clipping + --ignore_bad_assembly + Use this option to avoid parsing errors for corrupted + assemblies + --bam_index Enable BAM index generation when outputting sorted + alignments (may require additonal memory) + --input_vcf INPUT_VCF + VCF containing known (or suspected) variant sites. + Very large files should be avoided. + --no_sort Do not attempt to sort final output + ``` \ No newline at end of file diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index 3fb8976b..1f39fceb 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -170,8 +170,10 @@ arguments: valueFrom: /usr/local/bin/abra2.jar requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" + ramMin: 48000 + coresMin: 4 + #ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" + #coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement dockerPull: 'mskcc/abra2:0.1.0' - class: InlineJavascriptRequirement @@ -193,6 +195,3 @@ requirements: - class: 'doap:Version' 'doap:name': abra2 'doap:revision': 2.17 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/alfred_0.1.17/alfred_0.1.17.cwl b/alfred_0.1.17/alfred_0.1.17.cwl new file mode 100644 index 00000000..7f4251ab --- /dev/null +++ b/alfred_0.1.17/alfred_0.1.17.cwl @@ -0,0 +1,109 @@ +class: CommandLineTool +cwlVersion: v1.0 +id: alfred_0_1_17 +baseCommand: + - alfred + - qc +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: '--reference' + - id: ignore_rg + type: boolean? + inputBinding: + position: 0 + prefix: '--ignore' + - id: bed + type: File? + inputBinding: + position: 0 + prefix: '--bed' + - id: bam + type: File + inputBinding: + position: 3 + secondaryFiles: + - .bai + - id: read_group + type: string? + inputBinding: + position: 0 + prefix: '--rg' + - id: supplementary + type: boolean? + inputBinding: + position: 0 + prefix: '--supplementary' + - id: secondary + type: boolean? + inputBinding: + position: 0 + prefix: '--secondary' + - id: jsonout + type: string? + inputBinding: + position: 0 + prefix: '--jsonout' + - id: sample_name + type: string? + inputBinding: + position: 0 + prefix: '--name' + - id: outfile + type: string? +outputs: + - id: output + type: File? + outputBinding: + glob: '*.pdf' + - id: output_pdf + type: File? + outputBinding: + glob: '*.tsv.gz' +label: alfred_0.1.17 +arguments: + - position: 4 + prefix: '' + shellQuote: false + valueFrom: '&&' + - position: 5 + prefix: '' + shellQuote: false + valueFrom: Rscript + - position: 6 + prefix: '' + shellQuote: false + valueFrom: /opt/alfred/scripts/stats.R + - position: 7 + prefix: '' + shellQuote: false + valueFrom: |- + ${ + if (inputs.outname) { + return inputs.outname + '.tsv.gz'; + } + if (inputs.ignore_rg) { + return inputs.bam.basename.replace('.bam', '') + '.alfred.tsv.gz' + } + return inputs.bam.basename.replace('.bam', '') + 'RG.alfred.tsv.gz' + } + - position: 2 + prefix: '--outfile' + shellQuote: false + valueFrom: |- + ${ + if (inputs.outname) { + return inputs.outname + '.tsv.gz'; + } + if (inputs.ignore_rg) { + return inputs.bam.basename.replace('.bam', '') + '.alfred.tsv.gz'; + } + return inputs.bam.basename.replace('.bam', '') + 'RG.alfred.tsv.gz'; + } +requirements: + - class: ShellCommandRequirement + - class: DockerRequirement + dockerPull: 'cmopipeline/alfred:v0.1.17' + - class: InlineJavascriptRequirement diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md new file mode 100644 index 00000000..75de8a57 --- /dev/null +++ b/bedtools_genomecov_v2.28.0_cv2/README.md @@ -0,0 +1,56 @@ +# CWL and Dockerfile for running Bedtools GenomeCov + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +|--- |--- |--- | +| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help + +usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT + --output_file_name OUTPUT_FILE_NAME + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [--option_bedgraph] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT The input file can be in BAM format (Note: BAM must be + sorted by position) + --output_file_name OUTPUT_FILE_NAME + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS + --option_bedgraph option flag parameter to choose output file format. + -bg refers to bedgraph format \ No newline at end of file diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl similarity index 76% rename from bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov.cwl rename to bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl index fd811d9d..978e017f 100644 --- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov.cwl +++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + edam: http://edamontology.org/ id: bedtools_genomecov baseCommand: - bedtools @@ -51,8 +52,10 @@ label: bedtools_genomecov requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" + ramMin: 20000 + coresMin: 1 + #ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" + #coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement @@ -80,6 +83,3 @@ stdout: |- - class: 'doap:Version' 'doap:name': bedtools 'doap:revision': v2.28.0_cv2 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md new file mode 100644 index 00000000..960664db --- /dev/null +++ b/bedtools_merge_v2.28.0_cv2/README.md @@ -0,0 +1,57 @@ +# CWL and Dockerfile for running Bedtools Merge + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +|--- |--- |--- | +| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help + +usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name + OUTPUT_FILE_NAME + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [--distance_between_features DISTANCE_BETWEEN_FEATURES] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BEDgraph format file generated from Bedtools Genomecov + module + --output_file_name OUTPUT_FILE_NAME + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS + --distance_between_features DISTANCE_BETWEEN_FEATURES + Maximum distance between features allowed for features + to be merged. \ No newline at end of file diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl similarity index 75% rename from bedtools_merge_v2.28.0_cv2/bedtools_merge.cwl rename to bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl index 8300cd64..836a38bc 100644 --- a/bedtools_merge_v2.28.0_cv2/bedtools_merge.cwl +++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl @@ -46,8 +46,10 @@ label: bedtools_merge requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" + ramMin: 20000 + coresMin: 1 +# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" +# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement @@ -75,6 +77,3 @@ stdout: |- - class: 'doap:Version' 'doap:name': bedtools 'doap:revision': v2.28.0_cv2 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md new file mode 100644 index 00000000..b1cd50f0 --- /dev/null +++ b/disambiguate_1.0.0/README.md @@ -0,0 +1,52 @@ + # CWL and Dockerfile for running Disambiguate + +## Version of tools in docker image (/container/Dockerfile) + +Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. + +| Tool | Version | Location | Notes | +|--- |--- |--- | - | +| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub| +| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - | +| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - | + +[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com") + + +## CWL + +- CWL specification 1.0 +- Use `example_inputs.yaml` to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml +``` + +## Command +``` +USAGE: + + cwltool disambiguate_1.0.0.cwl \ + --prefix \ + --output_dir \ + [--aligner ] \ + + +Where: + + --prefix + (required) Sample ID or name used as prefix. Do not include .bam + + --output_dir + (required) Output directory + + --aligner + Aligner option {bwa(default),tophat,hisat2,star} + + + (required) Species A BAM file + + + (required) Species B BAM file +``` diff --git a/disambiguate_1.0.0/container/Dockerfile b/disambiguate_1.0.0/container/Dockerfile new file mode 100644 index 00000000..1c386b09 --- /dev/null +++ b/disambiguate_1.0.0/container/Dockerfile @@ -0,0 +1,7 @@ +FROM biocontainers/biocontainers:latest + +LABEL software="Disambiguate" +LABEL software.version="2016.11.10" + +RUN conda install -c bioconda bamtools=2.4.0 +RUN conda install -c bioconda ngs-disambiguate=2016.11.10 diff --git a/disambiguate_1.0.0/disambiguate_1.0.0.cwl b/disambiguate_1.0.0/disambiguate_1.0.0.cwl new file mode 100644 index 00000000..55e37c2f --- /dev/null +++ b/disambiguate_1.0.0/disambiguate_1.0.0.cwl @@ -0,0 +1,70 @@ +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:bolipatc@mskcc.org' + 'foaf:name': C. Allan Bolipata + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:bolipatc@mskcc.org' + 'foaf:name': C. Allan Bolipata + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': disambiguate + 'doap:revision': 1.0.0 + +class: CommandLineTool +cwlVersion: v1.0 +baseCommand: + - ngs_disambiguate +inputs: + - id: no_sort + type: string? + inputBinding: + position: 0 + prefix: '--no-sort' + - id: prefix + type: string + inputBinding: + position: 0 + prefix: '--prefix' + - id: output_dir + type: string + inputBinding: + position: 0 + prefix: '--output-dir' + - id: aligner + type: string? + inputBinding: + position: 0 + prefix: '--aligner' + default: 'bwa' + - id: species_a_bam + type: File + inputBinding: + position: 1 + - id: species_b_bam + type: File + inputBinding: + position: 2 + +outputs: + - id: output + type: Directory + outputBinding: + glob: '$(inputs.output_dir)' +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'mskcc/disambiguate:1.0.0' + - class: InlineJavascriptRequirement diff --git a/disambiguate_1.0.0/example_inputs.yaml b/disambiguate_1.0.0/example_inputs.yaml new file mode 100644 index 00000000..ece43797 --- /dev/null +++ b/disambiguate_1.0.0/example_inputs.yaml @@ -0,0 +1,7 @@ +{ + prefix: "this_is_my_sample_id", + output_dir: "this_is_my_output_dir", + species_a_bam: { class: File, path: '/path/to/human.bam' }, + species_b_bam: { class: File, path: '/path/to/mouse.bam' }, + aligner: 'bwa' +} diff --git a/gatk_ApplyBQSR_4.1.2.0/README.md b/gatk_ApplyBQSR_4.1.2.0/README.md new file mode 100644 index 00000000..6dd376f7 --- /dev/null +++ b/gatk_ApplyBQSR_4.1.2.0/README.md @@ -0,0 +1,128 @@ +# CWL and Dockerfile for running GATK4 - Apply BQSR + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +|--- |--- |--- | +| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl --help + +usage: gatk_ApplyBQSR_4.1.2.0.cwl [-h] --reference REFERENCE + [--create_output_bam_index] + --bqsr_recal_file BQSR_RECAL_FILE --input + INPUT [--output_file_name OUTPUT_FILE_NAME] + [--add_output_sam_program_record] + [--add_output_vcf_command_line] + [--arguments_file ARGUMENTS_FILE] + [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_md5] + [--create_output_variant_index] + [--create_output_variant_md5] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--emit_original_quals] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--global_qscore_prior GLOBAL_QSCORE_PRIOR] + [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] + [--interval_merging_rule INTERVAL_MERGING_RULE] + [--interval_padding INTERVAL_PADDING] + [--interval_set_rule INTERVAL_SET_RULE] + [--intervals INTERVALS] [--lenient] + [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] + [--quantize_quals QUANTIZE_QUALS] [--quiet] + [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--read_validation_stringency READ_VALIDATION_STRINGENCY] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--sequence_dictionary SEQUENCE_DICTIONARY] + [--sites_only_vcf_output] + [--use_jdk_deflater] [--use_jdk_inflater] + [--use_original_qualities] + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Reference sequence + --create_output_bam_index + --bqsr_recal_file BQSR_RECAL_FILE + Input recalibration table for BQSR. Only run ApplyBQSR + with the covariates table created from the input BAM + --input INPUT A BAM file containing input read data + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --add_output_sam_program_record + --add_output_vcf_command_line + --arguments_file ARGUMENTS_FILE + --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_md5 + --create_output_variant_index + --create_output_variant_md5 + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + --disable_sequence_dictionary_validation + --emit_original_quals + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --global_qscore_prior GLOBAL_QSCORE_PRIOR + --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING + --interval_merging_rule INTERVAL_MERGING_RULE + --interval_padding INTERVAL_PADDING + --interval_set_rule INTERVAL_SET_RULE + --intervals INTERVALS + --lenient + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + --quantize_quals QUANTIZE_QUALS + --quiet + --read_filter READ_FILTER + --read_index READ_INDEX + --read_validation_stringency READ_VALIDATION_STRINGENCY + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --sequence_dictionary SEQUENCE_DICTIONARY + --sites_only_vcf_output + --use_jdk_deflater + --use_jdk_inflater + --use_original_qualities + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS \ No newline at end of file diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl similarity index 85% rename from gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR.cwl rename to gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 67e5df9b..5fda71e8 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + edam: http://edamontology.org/ id: gatk_apply_bqsr_4_1_2_0 baseCommand: - gatk @@ -38,6 +39,9 @@ inputs: doc: A BAM file containing input read data secondaryFiles: - ^.bai + - id: output_file_name + type: string? + doc: Output file name. Not Required - id: add_output_sam_program_record type: boolean? inputBinding: @@ -224,7 +228,14 @@ outputs: - id: output type: File? outputBinding: - glob: '$(inputs.input.basename.replace(''.bam'', ''''))_bqsr.bam' + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } secondaryFiles: - ^.bai label: gatk_apply_bqsr_4.1.2.0 @@ -234,7 +245,14 @@ arguments: valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx4G\"\n }\n else {\n \n \treturn \"-Xmx4G\"\n }\n}" - position: 2 prefix: '--output' - valueFrom: '$(inputs.input.basename.replace(''.bam'', '''') + ''_bqsr.bam'')' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } - position: 2 prefix: '--tmp-dir' valueFrom: . @@ -244,8 +262,10 @@ arguments: valueFrom: ApplyBQSR requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" + ramMin: 10000 + coresMin: 8 +# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" +# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.2.0' - class: InlineJavascriptRequirement @@ -266,7 +286,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': gatk4 - 'doap:revision': 4.1.2.0 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 \ No newline at end of file + 'doap:revision': 4.1.2.0 \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/README.md b/gatk_BaseRecalibrator_4.1.2.0/README.md new file mode 100644 index 00000000..005acb15 --- /dev/null +++ b/gatk_BaseRecalibrator_4.1.2.0/README.md @@ -0,0 +1,142 @@ +# CWL and Dockerfile for running GATK4 - Base Recalibrator + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +|--- |--- |--- | +| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl --help + +usage: gatk_baserecalibrator_4.1.2.0.cwl [-h] --input INPUT --known_sites_1 + KNOWN_SITES_1 --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--add_output_sam_program_record] + [--add_output_vcf_command_line] + [--arguments_file ARGUMENTS_FILE] + [--binary_tag_name BINARY_TAG_NAME] + [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] + [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_index] + [--create_output_bam_md5] + [--create_output_variant_index] + [--create_output_variant_md5] + [--default_base_qualities DEFAULT_BASE_QUALITIES] + [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--indels_context_size INDELS_CONTEXT_SIZE] + [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] + [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] + [--interval_merging_rule INTERVAL_MERGING_RULE] + [--interval_padding INTERVAL_PADDING] + [--interval_set_rule INTERVAL_SET_RULE] + [--intervals INTERVALS] [--lenient] + [--low_quality_tail LOW_QUALITY_TAIL] + [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] + [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] + [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] + [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] + [--quantizing_levels QUANTIZING_LEVELS] + [--QUIET] [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--sequence_dictionary SEQUENCE_DICTIONARY] + [--sites_only_vcf_output] + [--use_original_qualities] + [--number_of_threads NUMBER_OF_THREADS] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--known_sites_2 KNOWN_SITES_2] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BAM/SAM file containing reads + --known_sites_1 KNOWN_SITES_1 + One or more databases of known polymorphic sites used + to exclude regions around known polymorphisms from + analysis + --reference REFERENCE + Reference sequence file + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --add_output_sam_program_record + --add_output_vcf_command_line + --arguments_file ARGUMENTS_FILE + --binary_tag_name BINARY_TAG_NAME + --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY + --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_index + --create_output_bam_md5 + --create_output_variant_index + --create_output_variant_md5 + --default_base_qualities DEFAULT_BASE_QUALITIES + --deletions_default_quality DELETIONS_DEFAULT_QUALITY + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + --disable_sequence_dictionary_validation + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --indels_context_size INDELS_CONTEXT_SIZE + --insertions_default_quality INSERTIONS_DEFAULT_QUALITY + --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING + --interval_merging_rule INTERVAL_MERGING_RULE + --interval_padding INTERVAL_PADDING + --interval_set_rule INTERVAL_SET_RULE + --intervals INTERVALS + --lenient + --low_quality_tail LOW_QUALITY_TAIL + --maximum_cycle_value MAXIMUM_CYCLE_VALUE + --mismatches_context_size MISMATCHES_CONTEXT_SIZE + --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + --quantizing_levels QUANTIZING_LEVELS + --QUIET + --read_filter READ_FILTER + --read_index READ_INDEX + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --sequence_dictionary SEQUENCE_DICTIONARY + --sites_only_vcf_output + --use_original_qualities + --number_of_threads NUMBER_OF_THREADS + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --known_sites_2 KNOWN_SITES_2 \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/example_inputs.yml b/gatk_BaseRecalibrator_4.1.2.0/example_inputs.yml index 580413e6..0e7e657b 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/example_inputs.yml +++ b/gatk_BaseRecalibrator_4.1.2.0/example_inputs.yml @@ -1,15 +1,11 @@ input: class: File metadata: {} - path: SeraCare_0-5.bam - secondaryFiles: - - class: File - path: SeraCare_0-5.bai + path: input.bam reference: class: File metadata: {} - path: chr14_chr16.fasta - secondaryFiles: [] + path: ref.fasta known_sites_1: class: File path: dbsnp_137_14_16.b37.vcf diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl similarity index 90% rename from gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator.cwl rename to gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index 884e1b58..5e9e5df1 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -3,6 +3,7 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' id: gatk_base_recalibrator_4_1_2_0 baseCommand: @@ -35,6 +36,9 @@ inputs: secondaryFiles: - .fai - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required - id: add_output_sam_program_record type: boolean? inputBinding: @@ -260,7 +264,14 @@ outputs: - id: output type: File outputBinding: - glob: '$(inputs.input.basename.replace(''.bam'', ''''))_bqsr.table' + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } label: gatk_base_recalibrator_4.1.2.0 arguments: - position: 0 @@ -275,14 +286,21 @@ arguments: valueFrom: . - position: 2 prefix: '--output' - valueFrom: '$(inputs.input.basename.replace(''.bam'', ''''))_bqsr.table' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } - position: 2 prefix: '--verbosity' valueFrom: INFO requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" + ramMin: 32000 + coresMin: 8 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.2.0' - class: InlineJavascriptRequirement @@ -300,10 +318,3 @@ requirements: 'foaf:mbox': 'mailto:sumans@mskcc.org' 'foaf:name': Shalabh Suman 'foaf:name': Memorial Sloan Kettering Cancer Center -'doap:release': - - class: 'doap:Version' - 'doap:name': gatk4 - 'doap:revision': 4.1.2.0 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/marianas_collapsing_first_pass_1.8.1/README.md b/marianas_collapsing_first_pass_1.8.1/README.md new file mode 100644 index 00000000..eba9e12c --- /dev/null +++ b/marianas_collapsing_first_pass_1.8.1/README.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1/README.md b/marianas_collapsing_second_pass_1.8.1/README.md new file mode 100644 index 00000000..b3cdf8d7 --- /dev/null +++ b/marianas_collapsing_second_pass_1.8.1/README.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml +``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index feaf68ff..7975c3c2 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -126,7 +126,7 @@ arguments: valueFrom: org.mskcc.marianas.umi.duplex.DuplexUMIBamToCollapsedFastqSecondPass requirements: - class: ResourceRequirement - ramMain: 20000 + ramMin: 20000 # ramMin: |- # ${ # if (inputs.memory_per_job && inputs.memory_overhead) { diff --git a/marianas_process_loop_umi_1.8.1/README.md b/marianas_process_loop_umi_1.8.1/README.md new file mode 100644 index 00000000..a5b4e900 --- /dev/null +++ b/marianas_process_loop_umi_1.8.1/README.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1/example_inputs.yaml b/marianas_process_loop_umi_1.8.1/example_inputs.yaml new file mode 100644 index 00000000..e02c29c6 --- /dev/null +++ b/marianas_process_loop_umi_1.8.1/example_inputs.yaml @@ -0,0 +1,3 @@ +fastq1: {class: File, path: test_fastq_read_1.fastq.gz} +fastq2: {class: File, path: test_fastq_read_2.fastq.gz} +umi_length: 3 \ No newline at end of file diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/picard_add_or_replace_read_groups_1.96/README.md index b1f5fe91..b07355a4 100644 --- a/picard_add_or_replace_read_groups_1.96/README.md +++ b/picard_add_or_replace_read_groups_1.96/README.md @@ -8,6 +8,7 @@ | picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | | R | 3.3.3 | r-base for opnejdk:8 | +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") ## CWL @@ -18,3 +19,76 @@ ```bash > toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml ``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl --help +usage: picard_add_or_replace_read_groups_1.96.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequnecing_center READ_GROUP_SEQUNECING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/picard_add_or_replace_read_groups_1.96/example_inputs.yaml b/picard_add_or_replace_read_groups_1.96/example_inputs.yaml index 787b75e2..9c25bd7d 100644 --- a/picard_add_or_replace_read_groups_1.96/example_inputs.yaml +++ b/picard_add_or_replace_read_groups_1.96/example_inputs.yaml @@ -6,7 +6,7 @@ input: memory_overhead: memory_per_job: number_of_threads: -output: somename_srt.bam +output_file_name: somename_srt.bam read_group_description: read_group_identifier: test read_group_library: 1 diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 1b213c32..9bd74860 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -23,14 +23,11 @@ inputs: prefix: I= separate: false doc: Input file (bam or sam). Required. - - id: output - type: string - inputBinding: - position: 0 - prefix: O= - separate: false - valueFrom: '$(inputs.input.basename.replace(/.sam |.bam/, ''_srt.bam''))' - doc: Output file (bam or sam). + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required - id: sort_order type: string? inputBinding: @@ -142,20 +139,62 @@ outputs: - id: bam type: File outputBinding: - glob: '$(inputs.input.basename.replace(/.sam |.bam/, ''_srt.bam''))' + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.sam | .bam/,'_srt.bam') + } + } secondaryFiles: - ^.bai label: picard_add_or_replace_read_groups_1.96 arguments: - position: 0 - valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.sam | .bam/,'_srt.bam') + } + } requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + ramMin: 16000 + coresMin: 2 - class: DockerRequirement dockerPull: 'mskcc/picard_1.96:0.1.0' - class: InlineJavascriptRequirement @@ -177,6 +216,3 @@ requirements: - class: 'doap:Version' 'doap:name': picard 'doap:revision': 1.96 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/picard_fix_mate_information_1.96/README.md b/picard_fix_mate_information_1.96/README.md index 4eecbd86..567a78e3 100644 --- a/picard_fix_mate_information_1.96/README.md +++ b/picard_fix_mate_information_1.96/README.md @@ -1,6 +1,6 @@ # CWL and Dockerfile for running Picard - FixMateInformation -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile) | Tool | Version | Location | |--- |--- |--- | @@ -8,6 +8,7 @@ | picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | | R | 3.3.3 | r-base for opnejdk:8 | +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") ## CWL @@ -18,3 +19,57 @@ ```bash > toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml ``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +``` +usage: picard_fix_mate_information_1.96.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` \ No newline at end of file diff --git a/picard_fix_mate_information_1.96/example_inputs.yaml b/picard_fix_mate_information_1.96/example_inputs.yaml index 0a2d651e..1d9e4ee2 100644 --- a/picard_fix_mate_information_1.96/example_inputs.yaml +++ b/picard_fix_mate_information_1.96/example_inputs.yaml @@ -6,7 +6,7 @@ input: memory_overhead: memory_per_job: number_of_threads: -output: somename_fm.bam +output_file_name: somename_fm.bam sort_order: tmp_dir: validation_stringency: diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 743603a9..cfc74168 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -23,16 +23,12 @@ inputs: prefix: I= separate: false doc: The input file to fix. This option may be specified 0 or more times - - id: output - type: string - inputBinding: - position: 0 - prefix: O= - separate: false - valueFrom: '$(inputs.input.basename.replace(''.bam'', ''_fm.bam''))' + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? doc: >- - The output file to write to. If no output file is supplied, the input file - is overwritten. Default value: null. + Output file name (bam or sam). Not Required - id: sort_order type: string? inputBinding: @@ -86,20 +82,62 @@ outputs: - id: bam type: File outputBinding: - glob: '$(inputs.input.basename.replace(''.bam'', ''_fm.bam''))' + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } secondaryFiles: - ^.bai label: picard_fix_mate_information_1.96 arguments: - position: 0 - valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + ramMin: 16000 + coresMin: 2 - class: DockerRequirement dockerPull: 'mskcc/picard_1.96:0.1.0' - class: InlineJavascriptRequirement @@ -121,6 +159,3 @@ requirements: - class: 'doap:Version' 'doap:name': picard 'doap:revision': 1.96 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/trim_galore_0.6.2/README.md b/trim_galore_0.6.2/README.md index 142cd474..3727924e 100644 --- a/trim_galore_0.6.2/README.md +++ b/trim_galore_0.6.2/README.md @@ -18,4 +18,78 @@ ```bash > toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml ``` - \ No newline at end of file + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/trim_galore_0.6.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir trimgalore_toil_log +> toil-cwl-runner --singularity --logFile /path/to/trimgalore_toil_log/cwltoil.log --jobStore /path/to/trimgalore_jobStore --batchSystem lsf --workDir /path/to/trimgalore_toil_log --outdir . --writeLogs /path/to/trimgalore_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/trim_galore_0.6.2.cwl /path/to/inputs.yaml > trimgalore_toil.stdout 2> trimgalore_toil.stderr & +``` + +### Usage + +``` +usage: trim_galore_0.6.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --path_to_trim_galore PATH_TO_TRIM_GALORE + Path to trim_galore executable file + --adapter ADAPTER Adapter sequence to be trimmed. If not specified + explicitely, the first 13bp of the Illumina adapter + 'AGATCGGAAGAGC' will be used by default. + --adapter2 ADAPTER2 Optional adapter sequence to be trimmed off read 2 of + paired-end files. This option requires '--paired' to + be specified as well + --fastq1 FASTQ1 READ1 of the paired-end run + --fastq2 FASTQ2 READ2 of the pair-end run + --length LENGTH Discard reads that became shorter than length INT + because of either quality or adapter trimming. A value + of '0' effectively disables this behaviour. Default: + 20 bp. + --paired This option performs length trimming of + quality/adapter/RRBS trimmed reads for paired-end + files. To pass the validation test, both sequences of + a sequence pair are required to have a certain minimum + length which is governed by the option --length (see + above). If only one read passes this length threshold + the other read can be rescued (see option + --retain_unpaired). Using this option lets you discard + too short read pairs without disturbing the sequence- + by-sequence order of FastQ files which is required by + many aligners. + --gzip Compress the output file with gzip. If the input files + are gzip-compressed the output files will be + automatically gzip compressed as well. + --quality QUALITY Trim low-quality ends from reads in addition to + adapter removal. For RRBS samples, quality trimming + will be performed first, and adapter trimming is + carried in a second round. Other files are quality and + adapter trimmed in a single pass. The algorithm is the + same as the one used by BWA (Subtract INT from all + qualities; compute partial sums from all indices to + the end of the sequence; cut sequence at the index at + which the sum is minimal). Default Phred score: 20. + --stringency STRINGENCY + "Overlap with adapter sequence required to trim a + sequence. Defaults to a very stringent setting of '1', + i.e. even a single bp of overlapping sequence will be + trimmed of the 3' end of any read." + --suppress_warn If specified any output to STDOUT or STDERR will be + suppressed. + --error_rate ERROR_RATE + Maximum allowed error rate (no. of errors divided by + the length of the matching region) (default: 0.1) +``` \ No newline at end of file diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index e19d0041..e9f946ad 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -4,8 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' -id: trim_galore_0_6_2 +id: trim_galore_0.6.2 baseCommand: - trim_galore inputs: @@ -13,7 +12,6 @@ inputs: type: int inputBinding: position: 0 - positon: 0 doc: Memory per job in megabytes - id: memory_overhead type: int @@ -145,8 +143,10 @@ outputs: label: trim_galore_0.6.2 requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" + ramMin: 8000 + coresMin: 1 + #ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" + #coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement dockerPull: 'mskcc/trim_galore:0.1.0' - class: InlineJavascriptRequirement @@ -168,6 +168,3 @@ requirements: - class: 'doap:Version' 'doap:name': trim_galore 'doap:revision': 0.6.2 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/waltz_count_reads_3.1.1/README.md b/waltz_count_reads_3.1.1/README.md new file mode 100644 index 00000000..8ad83443 --- /dev/null +++ b/waltz_count_reads_3.1.1/README.md @@ -0,0 +1,57 @@ +# CWL and Dockerfile for running Waltz - Count Reads + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | + +[![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict waltz_count_reads_3.1.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 waltz_count_reads_3.1.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner waltz_count_reads_3.1.1.cwl --help + +usage: waltz_count_reads_3.1.1.cwl [-h] --bam BAM --gene_list GENE_LIST + [--coverage_threshold COVERAGE_THRESHOLD] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --bed_file BED_FILE + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --bam BAM + --gene_list GENE_LIST + --coverage_threshold COVERAGE_THRESHOLD + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --number_of_threads NUMBER_OF_THREADS + --bed_file BED_FILE +``` diff --git a/waltz_count_reads_3.1.1/example_inputs.yml b/waltz_count_reads_3.1.1/example_inputs.yml index 51503cd8..e57e3e67 100644 --- a/waltz_count_reads_3.1.1/example_inputs.yml +++ b/waltz_count_reads_3.1.1/example_inputs.yml @@ -2,9 +2,6 @@ bam: class: File metadata: {} path: "input.bam" - secondaryFiles: - - class: File - path: "input.bai" bed_file: class: File path: "intervals.bed" diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index 8f6c82a0..b06321c7 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + edam: http://edamontology.org/ id: waltz_count_reads baseCommand: - java @@ -72,8 +73,10 @@ arguments: valueFrom: org.mskcc.juber.waltz.countreads.CountReads requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" + ramMin: 8000 + coresMin: 1 +# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" +# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'mskcc/waltz:1.0.0' - class: InlineJavascriptRequirement @@ -95,6 +98,3 @@ requirements: - class: 'doap:Version' 'doap:name': waltz 'doap:revision': 3.1.1 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/waltz_pileupmatrices_3.1.1/README.md b/waltz_pileupmatrices_3.1.1/README.md new file mode 100644 index 00000000..b5aed666 --- /dev/null +++ b/waltz_pileupmatrices_3.1.1/README.md @@ -0,0 +1,58 @@ +# CWL and Dockerfile for running Waltz - PileupMetrics + +## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | + +[![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict waltz_pileupmatrices_3.1.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 waltz_pileupmatrices_3.1.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner wwaltz_pileupmatrices_3.1.1.cwl --help + +usage: waltz_pileupmatrices_3.1.1.cwl [-h] --bam BAM --referece_fasta + REFERECE_FASTA + [--min_map_quality MIN_MAP_QUALITY] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --bed_file BED_FILE + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --bam BAM + --referece_fasta REFERECE_FASTA + --min_map_quality MIN_MAP_QUALITY + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --number_of_threads NUMBER_OF_THREADS + --bed_file BED_FILE +``` diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 4a826edb..65a73de9 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + edam: http://edamontology.org/ id: waltz_pileupmetrics baseCommand: - java @@ -84,8 +85,10 @@ arguments: valueFrom: PileupMetrics requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" + ramMin: 8000 + coresMin: 1 +# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" +# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'mskcc/waltz:1.0.0' - class: InlineJavascriptRequirement @@ -107,6 +110,4 @@ requirements: - class: 'doap:Version' 'doap:name': waltz 'doap:revision': 3.1.1 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 +