Skip to content

Commit

Permalink
Merge pull request #58 from mskcc/release/0.3.0
Browse files Browse the repository at this point in the history
Release 0.3.0
  • Loading branch information
sivkovic authored Jul 31, 2019
2 parents fa8351b + a8a3eb5 commit 5f4a0b5
Show file tree
Hide file tree
Showing 34 changed files with 1,259 additions and 98 deletions.
69 changes: 68 additions & 1 deletion abra2_2.17/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,71 @@
```bash
> toil-cwl-runner abra2_2.17.cwl example_inputs.yaml
```


**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**

```bash
#Using CWLTOOL
> cwltool --singularity --non-strict /path/to/abra2_2.17.cwl /path/to/inputs.yaml

#Using toil-cwl-runner
> mkdir abra2_toil_log
> toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr &
```

### Usage

```
usage: abra2_2.17.cwl [-h]
positional arguments:
job_order Job input json file
optional arguments:
-h, --help show this help message and exit
--memory_per_job MEMORY_PER_JOB
Memory per job in megabytes
--memory_overhead MEMORY_OVERHEAD
Memory overhead per job in megabytes
--number_of_threads NUMBER_OF_THREADS
--working_directory WORKING_DIRECTORY
Set the temp directory (overrides java.io.tmpdir)
--reference_fasta REFERENCE_FASTA
Genome reference location
--targets TARGETS
--kmer_size KMER_SIZE
Optional assembly kmer size(delimit with commas if
multiple sizes specified)
--maximum_average_depth MAXIMUM_AVERAGE_DEPTH
Regions with average depth exceeding this value will
be downsampled (default: 1000)
--soft_clip_contig SOFT_CLIP_CONTIG
Soft clip contig args [max_contigs,min_base_qual,frac_
high_qual_bases,min_soft_clip_len]
(default:16,13,80,15)
--maximum_mixmatch_rate MAXIMUM_MIXMATCH_RATE
Max allowed mismatch rate when mapping reads back to
contigs (default: 0.05)
--scoring_gap_alignments SCORING_GAP_ALIGNMENTS
Scoring used for contig alignments(match,
mismatch_penalty,gap_open_penalty,gap_extend_penalty)
(default:8,32,48,1)
--contig_anchor CONTIG_ANCHOR
Contig anchor
[M_bases_at_contig_edge,max_mismatches_near_edge]
(default:10,2)
--window_size WINDOW_SIZE
Processing window size and overlap (size,overlap)
(default: 400,200)
--consensus_sequence Use positional consensus sequence when aligning high
quality soft clipping
--ignore_bad_assembly
Use this option to avoid parsing errors for corrupted
assemblies
--bam_index Enable BAM index generation when outputting sorted
alignments (may require additonal memory)
--input_vcf INPUT_VCF
VCF containing known (or suspected) variant sites.
Very large files should be avoided.
--no_sort Do not attempt to sort final output
```
9 changes: 4 additions & 5 deletions abra2_2.17/abra2_2.17.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ arguments:
valueFrom: /usr/local/bin/abra2.jar
requirements:
- class: ResourceRequirement
ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}"
coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}"
ramMin: 48000
coresMin: 4
#ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}"
#coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}"
- class: DockerRequirement
dockerPull: 'mskcc/abra2:0.1.0'
- class: InlineJavascriptRequirement
Expand All @@ -193,6 +195,3 @@ requirements:
- class: 'doap:Version'
'doap:name': abra2
'doap:revision': 2.17
- class: 'doap:Version'
'doap:name': cwl-wrapper
'doap:revision': 1.0.0
109 changes: 109 additions & 0 deletions alfred_0.1.17/alfred_0.1.17.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
class: CommandLineTool
cwlVersion: v1.0
id: alfred_0_1_17
baseCommand:
- alfred
- qc
inputs:
- id: reference
type: File
inputBinding:
position: 0
prefix: '--reference'
- id: ignore_rg
type: boolean?
inputBinding:
position: 0
prefix: '--ignore'
- id: bed
type: File?
inputBinding:
position: 0
prefix: '--bed'
- id: bam
type: File
inputBinding:
position: 3
secondaryFiles:
- .bai
- id: read_group
type: string?
inputBinding:
position: 0
prefix: '--rg'
- id: supplementary
type: boolean?
inputBinding:
position: 0
prefix: '--supplementary'
- id: secondary
type: boolean?
inputBinding:
position: 0
prefix: '--secondary'
- id: jsonout
type: string?
inputBinding:
position: 0
prefix: '--jsonout'
- id: sample_name
type: string?
inputBinding:
position: 0
prefix: '--name'
- id: outfile
type: string?
outputs:
- id: output
type: File?
outputBinding:
glob: '*.pdf'
- id: output_pdf
type: File?
outputBinding:
glob: '*.tsv.gz'
label: alfred_0.1.17
arguments:
- position: 4
prefix: ''
shellQuote: false
valueFrom: '&&'
- position: 5
prefix: ''
shellQuote: false
valueFrom: Rscript
- position: 6
prefix: ''
shellQuote: false
valueFrom: /opt/alfred/scripts/stats.R
- position: 7
prefix: ''
shellQuote: false
valueFrom: |-
${
if (inputs.outname) {
return inputs.outname + '.tsv.gz';
}
if (inputs.ignore_rg) {
return inputs.bam.basename.replace('.bam', '') + '.alfred.tsv.gz'
}
return inputs.bam.basename.replace('.bam', '') + 'RG.alfred.tsv.gz'
}
- position: 2
prefix: '--outfile'
shellQuote: false
valueFrom: |-
${
if (inputs.outname) {
return inputs.outname + '.tsv.gz';
}
if (inputs.ignore_rg) {
return inputs.bam.basename.replace('.bam', '') + '.alfred.tsv.gz';
}
return inputs.bam.basename.replace('.bam', '') + 'RG.alfred.tsv.gz';
}
requirements:
- class: ShellCommandRequirement
- class: DockerRequirement
dockerPull: 'cmopipeline/alfred:v0.1.17'
- class: InlineJavascriptRequirement
56 changes: 56 additions & 0 deletions bedtools_genomecov_v2.28.0_cv2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# CWL and Dockerfile for running Bedtools GenomeCov

## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)

| Tool | Version | Location |
|--- |--- |--- |
| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 |

[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
## CWL

- CWL specification 1.0
- Use example_inputs.yml to see the inputs to the cwl
- Example Command using [toil](https://toil.readthedocs.io):

```bash
> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml
```

**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**

```bash
#Using CWLTOOL
> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml

#Using toil-cwl-runner
> mkdir run_directory
> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
```

## Usage

```bash
> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help

usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT
--output_file_name OUTPUT_FILE_NAME
[--memory_overhead MEMORY_OVERHEAD]
[--memory_per_job MEMORY_PER_JOB]
[--number_of_threads NUMBER_OF_THREADS]
[--option_bedgraph]
[job_order]

positional arguments:
job_order Job input json file

optional arguments:
-h, --help show this help message and exit
--input INPUT The input file can be in BAM format (Note: BAM must be
sorted by position)
--output_file_name OUTPUT_FILE_NAME
--memory_overhead MEMORY_OVERHEAD
--memory_per_job MEMORY_PER_JOB
--number_of_threads NUMBER_OF_THREADS
--option_bedgraph option flag parameter to choose output file format.
-bg refers to bedgraph format
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ $namespaces:
dct: 'http://purl.org/dc/terms/'
doap: 'http://usefulinc.com/ns/doap#'
foaf: 'http://xmlns.com/foaf/0.1/'
edam: http://edamontology.org/
id: bedtools_genomecov
baseCommand:
- bedtools
Expand Down Expand Up @@ -51,8 +52,10 @@ label: bedtools_genomecov
requirements:
- class: ShellCommandRequirement
- class: ResourceRequirement
ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}"
coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}"
ramMin: 20000
coresMin: 1
#ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}"
#coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}"
- class: DockerRequirement
dockerPull: 'biocontainers/bedtools:v2.28.0_cv2'
- class: InlineJavascriptRequirement
Expand Down Expand Up @@ -80,6 +83,3 @@ stdout: |-
- class: 'doap:Version'
'doap:name': bedtools
'doap:revision': v2.28.0_cv2
- class: 'doap:Version'
'doap:name': cwl-wrapper
'doap:revision': 1.0.0
57 changes: 57 additions & 0 deletions bedtools_merge_v2.28.0_cv2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# CWL and Dockerfile for running Bedtools Merge

## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)

| Tool | Version | Location |
|--- |--- |--- |
| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 |

[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
## CWL

- CWL specification 1.0
- Use example_inputs.yml to see the inputs to the cwl
- Example Command using [toil](https://toil.readthedocs.io):

```bash
> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml
```

**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**

```bash
#Using CWLTOOL
> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml

#Using toil-cwl-runner
> mkdir run_directory
> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
```

## Usage

```bash
> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help

usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name
OUTPUT_FILE_NAME
[--memory_overhead MEMORY_OVERHEAD]
[--memory_per_job MEMORY_PER_JOB]
[--number_of_threads NUMBER_OF_THREADS]
[--distance_between_features DISTANCE_BETWEEN_FEATURES]
[job_order]

positional arguments:
job_order Job input json file

optional arguments:
-h, --help show this help message and exit
--input INPUT BEDgraph format file generated from Bedtools Genomecov
module
--output_file_name OUTPUT_FILE_NAME
--memory_overhead MEMORY_OVERHEAD
--memory_per_job MEMORY_PER_JOB
--number_of_threads NUMBER_OF_THREADS
--distance_between_features DISTANCE_BETWEEN_FEATURES
Maximum distance between features allowed for features
to be merged.
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ label: bedtools_merge
requirements:
- class: ShellCommandRequirement
- class: ResourceRequirement
ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}"
coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}"
ramMin: 20000
coresMin: 1
# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}"
# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}"
- class: DockerRequirement
dockerPull: 'biocontainers/bedtools:v2.28.0_cv2'
- class: InlineJavascriptRequirement
Expand Down Expand Up @@ -75,6 +77,3 @@ stdout: |-
- class: 'doap:Version'
'doap:name': bedtools
'doap:revision': v2.28.0_cv2
- class: 'doap:Version'
'doap:name': cwl-wrapper
'doap:revision': 1.0.0
Loading

0 comments on commit 5f4a0b5

Please sign in to comment.