-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from icgc-argo/[email protected].…
…1-1.3.1 [release]
- Loading branch information
Showing
67 changed files
with
22,611 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
{ | ||
"name": "demo-dna-seq-alignment-wf", | ||
"version": "1.7.0-1.3.0", | ||
"version": "1.7.1-1.3.1", | ||
"description": "DNA Seq Alignment Workflow with QC Metrics Collection", | ||
"main": "main.nf", | ||
"scripts": { | ||
|
@@ -21,7 +21,7 @@ | |
}, | ||
"dependencies": [ | ||
"github.com/icgc-argo/demo-wfpkgs/[email protected]", | ||
"github.com/icgc-argo/demo-wfpkgs/[email protected].0", | ||
"github.com/icgc-argo/demo-wfpkgs/demo-dna-seq-alignment-wf@1.7.1", | ||
"github.com/icgc-argo/demo-wfpkgs/[email protected]" | ||
], | ||
"devDependencies": [], | ||
|
5 changes: 5 additions & 0 deletions
5
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/.dockerignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.nextflow* | ||
.gitignore | ||
work | ||
outdir | ||
tests |
11 changes: 11 additions & 0 deletions
11
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
FROM quay.io/icgc-argo/dna-seq-processing-tools:base-docker.0.1.1 | ||
|
||
LABEL org.opencontainers.image.source https://github.com/icgc-argo/demo-wfpkgs | ||
|
||
ENV PATH="/tools:${PATH}" | ||
|
||
WORKDIR /tools | ||
|
||
COPY *.py /tools/ | ||
|
||
CMD ["/bin/bash"] |
73 changes: 73 additions & 0 deletions
73
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/aligned-seq-qc.nf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
* Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR). | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as published | ||
* by the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
/* | ||
* Contributors | ||
* Junjun Zhang <[email protected]> | ||
* Linda Xiang <[email protected]> | ||
*/ | ||
|
||
/********************************************************************/ | ||
/* this block is auto-generated based on info from pkg.json where */ | ||
/* changes can be made if needed, do NOT modify this block manually */ | ||
nextflow.enable.dsl = 2 | ||
name = 'demo-aligned-seq-qc' | ||
version = '1.1.0' | ||
container = [ | ||
'ghcr.io': 'ghcr.io/icgc-argo/demo-wfpkgs.demo-aligned-seq-qc', | ||
'quay.io': 'quay.io/icgc-argo/demo-wfpkgs.demo-aligned-seq-qc' | ||
] | ||
default_container_registry = 'ghcr.io' | ||
/********************************************************************/ | ||
|
||
params.seq = "" | ||
params.container_version = "" | ||
params.ref_genome_gz = "" | ||
|
||
params.container_registry = default_container_registry | ||
params.publish_dir = "" | ||
params.cpus = 1 | ||
params.mem = 2 // in GB | ||
|
||
|
||
process alignedSeqQC { | ||
container "${container[params.container_registry]}:${params.container_version ?: version}" | ||
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", | ||
mode: "copy", | ||
enabled: "${params.publish_dir ? true : ''}" | ||
|
||
cpus params.cpus | ||
memory "${params.mem} GB" | ||
|
||
input: | ||
path seq | ||
path ref_genome_gz | ||
path ref_genome_gz_secondary_file | ||
val dependencies | ||
|
||
output: | ||
path "*.qc_metrics.tgz", emit: metrics | ||
|
||
script: | ||
""" | ||
aligned-seq-qc.py -s ${seq} \ | ||
-r ${ref_genome_gz} \ | ||
-n ${params.cpus} | ||
""" | ||
} |
103 changes: 103 additions & 0 deletions
103
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/aligned-seq-qc.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR). | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU Affero General Public License as published | ||
by the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU Affero General Public License for more details. | ||
You should have received a copy of the GNU Affero General Public License | ||
along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
Contributors: | ||
Junjun Zhang <[email protected]> | ||
Linda Xiang <[email protected]> | ||
""" | ||
|
||
import os | ||
import sys | ||
from argparse import ArgumentParser | ||
import subprocess | ||
from multiprocessing import cpu_count | ||
import tarfile | ||
import glob | ||
import json | ||
|
||
|
||
def collect_metrics(args): | ||
# generate stats_args string | ||
stats_args = [ | ||
'--reference', args.reference, | ||
'-@', str(args.cpus), | ||
'-r', args.reference, | ||
'--split', 'RG', | ||
'-P', os.path.join(os.getcwd(), os.path.basename(args.seq)) | ||
] | ||
|
||
try: | ||
cmd = ['samtools', 'stats'] + stats_args + [args.seq] | ||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) | ||
except Exception as e: | ||
sys.exit("Error: %s. 'samtools stats' failed: %s\n" % (e, args.seq)) | ||
|
||
with open(os.path.join(os.getcwd(), os.path.basename(args.seq)+".bamstat"), 'w') as f: | ||
f.write(p.stdout.decode('utf-8')) | ||
|
||
extra_info = {} | ||
collected_sum_fields = { | ||
'raw total sequences': 'total_reads', | ||
'reads mapped': 'mapped_reads', | ||
'reads paired': 'paired_reads', | ||
'reads properly paired': 'properly_paired_reads', | ||
'pairs on different chromosomes': 'pairs_on_different_chromosomes', | ||
'total length': 'total_bases', | ||
'bases mapped (cigar)': 'mapped_bases_cigar', | ||
'mismatches': 'mismatch_bases', | ||
'error rate': 'error_rate', | ||
'bases duplicated': 'duplicated_bases', | ||
'insert size average': 'average_insert_size', | ||
'average length': 'average_length' | ||
} | ||
with open(os.path.join(os.getcwd(), os.path.basename(args.seq)+".bamstat"), 'r') as f: | ||
for row in f: | ||
if not row.startswith('SN\t'): continue | ||
cols = row.replace(':', '').strip().split('\t') | ||
|
||
if cols[1] not in collected_sum_fields: continue | ||
extra_info.update({ | ||
collected_sum_fields[cols[1]]: float(cols[2]) if ('.' in cols[2] or 'e' in cols[2]) else int(cols[2]) | ||
}) | ||
|
||
p = subprocess.run('samtools --version | grep samtools', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, shell=True) | ||
tool_ver = "samtools:stats@%s" % p.stdout.decode('utf-8').strip().split(' ')[-1] | ||
extra_info.update({ "tool": tool_ver }) | ||
with open(os.path.basename(args.seq) + '.extra_info.json', "w") as j: | ||
j.write(json.dumps(extra_info, indent=2)) | ||
|
||
# make tar gzip ball of the *.bamstat files | ||
tarfile_name = os.path.basename(args.seq)+'.qc_metrics.tgz' | ||
with tarfile.open(tarfile_name, "w:gz") as tar: | ||
for statsfile in glob.glob(os.path.join(os.getcwd(), "*.bamstat")) + glob.glob(os.path.join(os.getcwd(), "*.extra_info.json")): | ||
tar.add(statsfile, arcname=os.path.basename(statsfile)) | ||
|
||
|
||
def main(): | ||
parser = ArgumentParser() | ||
parser.add_argument("-s", "--seq", dest="seq", help="Aligned sequence file", type=str, required=True) | ||
parser.add_argument('-r', '--reference', dest='reference', type=str, help='reference fasta', required=True) | ||
parser.add_argument('-n', '--cpus', dest='cpus', type=int, help='number of cpu cores', default=cpu_count()) | ||
|
||
args = parser.parse_args() | ||
|
||
collect_metrics(args) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
4 changes: 4 additions & 0 deletions
4
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/nextflow.config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
docker { | ||
enabled = true | ||
runOptions = '-u \$(id -u):\$(id -g)' | ||
} |
53 changes: 53 additions & 0 deletions
53
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/pkg.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
{ | ||
"name": "demo-aligned-seq-qc", | ||
"version": "1.1.0", | ||
"description": "Collect QC metrics from aligned sequencing reads", | ||
"main": "aligned-seq-qc.nf", | ||
"scripts": { | ||
"test": "pushd tests && ./run_tests.sh; popd" | ||
}, | ||
"deprecated": false, | ||
"keywords": [ | ||
"bioinformatics", | ||
"alignment", | ||
"qc", | ||
"metrics" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/icgc-argo/demo-wfpkgs.git" | ||
}, | ||
"container": { | ||
"registries": [ | ||
{ | ||
"registry": "quay.io", | ||
"type": "docker", | ||
"org": "icgc-argo", | ||
"default": true | ||
}, | ||
{ | ||
"registry": "ghcr.io", | ||
"type": "docker", | ||
"org": "icgc-argo", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"dependencies": [], | ||
"devDependencies": [ | ||
"github.com/icgc-argo/demo-wfpkgs/[email protected]" | ||
], | ||
"contributors": [ | ||
{ | ||
"name": "Junjun Zhang", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Linda Xiang", | ||
"email": "[email protected]" | ||
} | ||
], | ||
"license": "AGPL-3.0", | ||
"bugReport": "https://github.com/icgc-argo/demo-wfpkgs/issues", | ||
"homepage": "https://github.com/icgc-argo/demo-wfpkgs#readme" | ||
} |
55 changes: 55 additions & 0 deletions
55
wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/tests/checker.nf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
* Copyright (c) 2019-2020, Ontario Institute for Cancer Research (OICR). | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as published | ||
* by the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
/* | ||
* Contributors: | ||
* Junjun Zhang <[email protected]> | ||
* Linda Xiang <[email protected]> | ||
*/ | ||
|
||
nextflow.preview.dsl = 2 | ||
|
||
params.seq = "" | ||
params.container_version = "" | ||
params.ref_genome_gz = "" | ||
|
||
params.container_registry = "" | ||
params.cpus = 1 | ||
params.mem = 2 // in GB | ||
|
||
|
||
include { alignedSeqQC } from '../aligned-seq-qc.nf' params(params) | ||
include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/main.nf' | ||
|
||
Channel | ||
.fromPath(params.seq, checkIfExists: true) | ||
.set { aligned_seq } | ||
|
||
Channel | ||
.fromPath(getSecondaryFiles(params.ref_genome_gz, ['fai', 'gzi']), checkIfExists: true) | ||
.set { ref_genome_gz_idx } | ||
|
||
workflow { | ||
alignedSeqQC( | ||
aligned_seq.flatten(), | ||
file(params.ref_genome_gz), | ||
ref_genome_gz_idx.collect(), | ||
true | ||
) | ||
} |
Binary file added
BIN
+9.79 KB
...rgo/demo-wfpkgs/[email protected]/tests/input/SA610149.0.20200122.wgs.grch38.cram
Binary file not shown.
Binary file added
BIN
+4.75 KB
...gc-argo/demo-wfpkgs/[email protected]/tests/input/grch38-aligned.C0HVY_2.lane.bam
Binary file not shown.
Binary file added
BIN
+5.31 KB
...gc-argo/demo-wfpkgs/[email protected]/tests/input/grch38-aligned.D0RE2_1.lane.bam
Binary file not shown.
Binary file added
BIN
+4.4 KB
...gc-argo/demo-wfpkgs/[email protected]/tests/input/grch38-aligned.D0RH0_2.lane.bam
Binary file not shown.
1 change: 1 addition & 0 deletions
1
..._modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/tests/nextflow.config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
includeConfig '../nextflow.config' |
Oops, something went wrong.