Skip to content

Commit

Permalink
Merge pull request #27 from icgc-argo/[email protected]
Browse files Browse the repository at this point in the history
[release]
  • Loading branch information
junjun-zhang authored Feb 16, 2021
2 parents 496d722 + 2f24a0e commit 077af8f
Show file tree
Hide file tree
Showing 35 changed files with 11,155 additions and 4 deletions.
4 changes: 2 additions & 2 deletions demo-dna-seq-alignment-wf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

nextflow.enable.dsl = 2
name = 'demo-dna-seq-alignment-wf'
version = '1.7.1'
version = '1.7.2'


params.ref_genome_fa = ""
Expand Down Expand Up @@ -71,7 +71,7 @@ include {
} from './wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/main.nf'

include { bwaMemAligner as bwaMem } from './wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/bwa-mem-aligner.nf' params(bwaMemAligner_params)
include { bamMergeSortMarkdup as merSorMkdup } from './wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected].0/bam-merge-sort-markdup.nf' params(bamMergeSortMarkdup_params)
include { bamMergeSortMarkdup as merSorMkdup } from './wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected].1/bam-merge-sort-markdup.nf' params(bamMergeSortMarkdup_params)


workflow DnaAln {
Expand Down
4 changes: 2 additions & 2 deletions demo-dna-seq-alignment-wf/pkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "demo-dna-seq-alignment-wf",
"version": "1.7.1",
"version": "1.7.2",
"description": "DNA Seq Alignment Workflow",
"main": "main.nf",
"scripts": {
Expand All @@ -20,7 +20,7 @@
},
"dependencies": [
"github.com/icgc-argo/demo-wfpkgs/[email protected]",
"github.com/icgc-argo/demo-wfpkgs/[email protected].0",
"github.com/icgc-argo/demo-wfpkgs/[email protected].1",
"github.com/icgc-argo/demo-wfpkgs/[email protected]"
],
"devDependencies": [],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.nextflow*
.gitignore
tests
work
outdir
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM quay.io/icgc-argo/dna-seq-processing-tools:base-docker.0.2.0

LABEL org.opencontainers.image.source https://github.com/icgc-argo/demo-wfpkgs

ENV PATH="/tools:${PATH}"

COPY *.py /tools/

CMD ["/bin/bash"]


Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env nextflow

/*
* Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

/*
* Contributors:
* Junjun Zhang <[email protected]>
* Linda Xiang <[email protected]>
*/

/********************************************************************/
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
name = 'demo-bam-merge-sort-markdup'
version = '1.12.1'
container = [
'ghcr.io': 'ghcr.io/icgc-argo/demo-wfpkgs.demo-bam-merge-sort-markdup',
'quay.io': 'quay.io/icgc-argo/demo-wfpkgs.demo-bam-merge-sort-markdup'
]
default_container_registry = 'quay.io'
/********************************************************************/

params.aligned_lane_bams = ""
params.ref_genome_gz = ""
params.aligned_basename = "grch38-aligned.merged"
params.markdup = true
params.output_format = "cram"
params.lossy = false
params.container_version = ""
params.container_registry = default_container_registry
params.cpus = 1
params.mem = 2 // in GB
params.publish_dir = ""
params.tempdir = ""


process bamMergeSortMarkdup {
container "${container[params.container_registry]}:${params.container_version ?: version}"
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}",
mode: "copy",
enabled: "${params.publish_dir ? true : ''}"

cpus params.cpus
memory "${params.mem} GB"


input:
path aligned_lane_bams
path ref_genome_gz
path ref_genome_gz_secondary_file
path tempdir

output:
path "${params.aligned_basename}.{bam,cram}", emit: merged_seq
path "${params.aligned_basename}.{bam.bai,cram.crai}", emit: merged_seq_idx
path "${params.aligned_basename}.duplicates_metrics.tgz", optional: true, emit: duplicates_metrics

script:
arg_markdup = params.markdup ? "-d" : ""
arg_lossy = params.lossy ? "-l" : ""
arg_tempdir = tempdir.name != 'NO_DIR' ? "-t ${tempdir}" : ""
"""
bam-merge-sort-markdup.py \
-i ${aligned_lane_bams} \
-r ${ref_genome_gz} \
-n ${params.cpus} \
-b ${params.aligned_basename} ${arg_markdup} \
-o ${params.output_format} ${arg_lossy} ${arg_tempdir}
"""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/env python3

"""
Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Contributors:
Junjun Zhang <[email protected]>
Linda Xiang <[email protected]>
"""


import sys
import subprocess
import argparse
from multiprocessing import cpu_count
import json
import os

def run_cmd(cmd):
stdout, stderr, p, success = '', '', None, True
try:
p = subprocess.Popen([cmd],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
stdout, stderr = p.communicate()
except Exception as e:
print('Execution failed: %s' % e, file=sys.stderr)
success = False

if p and p.returncode != 0:
print('Execution failed, none zero code returned.', file=sys.stderr)
success = False

print(stdout.decode("utf-8"))
print(stderr.decode("utf-8"), file=sys.stderr)

if not success:
sys.exit(p.returncode if p.returncode else 1)

return stdout, stderr

def main():
""" Main program """
parser = argparse.ArgumentParser(description='Merge and markdup')
parser.add_argument('-i','--input-bams', dest='input_bams',
type=str, help='Input bam file', nargs='+', required=True)
parser.add_argument('-b','--output-base', dest='output_base',
type=str, help='Output merged file basename', required=True)
parser.add_argument('-r', '--reference', dest='reference',
type=str, help='reference fasta', required=True)
parser.add_argument('-t', '--tempdir', dest='tempdir', type=str, default=".",
help='Specify directory for temporary files')
parser.add_argument("-n", "--cpus", dest='cpus', type=int, default=cpu_count())
parser.add_argument("-d", "--mdup", dest='mdup', action='store_true')
parser.add_argument("-l", "--lossy", dest='lossy', action='store_true')
parser.add_argument("-o", "--output-format", dest='output_format', default='cram', choices=['bam', 'cram'])

args = parser.parse_args()

cmd = []

if not os.path.isdir(args.tempdir):
sys.exit('Error: specified tempdir %s does not exist!' % args.tempdir)

if args.mdup:
merge = 'bammarkduplicates2 markthreads=%s tmpfile=%s/tmp level=0 O=/dev/stdout M=%s I=%s ' % \
(str(args.cpus), args.tempdir, args.output_base + ".duplicates_metrics.txt", ' I='.join(args.input_bams))
else:
merge = 'samtools merge --no-PG -uf -@ %s /dev/stdout %s ' % (str(args.cpus), ' '.join(args.input_bams))

if args.lossy:
cram = 'java -jar /tools/cramtools.jar cram -R %s --capture-all-tags --lossy-quality-score-spec \*8 --preserve-read-names -O %s' % (args.reference, args.output_base + ".cram")
else:
cram = 'samtools view -C -T %s -@ %s --write-index /dev/stdin -o %s ' % (args.reference, args.cpus, args.output_base + ".cram")

bam = 'samtools view -b -h -@ %s --write-index /dev/stdin -o %s##idx##%s ' % (args.cpus, args.output_base + ".bam", args.output_base + ".bam.bai")
crai1 = 'samtools index -@ %s %s %s ' % (args.cpus, args.output_base + ".cram", args.output_base + ".cram.crai")

# build command
if args.output_format == 'bam':
cmd.append('|'.join([merge, bam]))

elif args.output_format == 'cram':
cmd.append('|'.join([merge, cram]))
if args.lossy: cmd.append(crai1)
else:
sys.exit("Unsupported sequence format!")

for c in cmd:
run_cmd(c)

if os.path.isfile(os.path.join(os.getcwd(), args.output_base + ".duplicates_metrics.txt")):
stdout, _ = run_cmd('bammarkduplicates2 -v 2>&1 | grep "biobambam2 version"')
version = stdout.decode("utf-8").split(' ')[-1].strip().rstrip('.')
with open("%s.duplicates_metrics.extra_info.json" % args.output_base, "w") as j:
j.write(json.dumps({ "tool": "biobambam2:bammarkduplicates2@%s" % version }, indent=2))

tgz = 'tar czf %s.duplicates_metrics.tgz %s.duplicates_metrics.*' % (args.output_base, args.output_base)
run_cmd(tgz)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker {
enabled = true
runOptions = '-u \$(id -u):\$(id -g)'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"name": "demo-bam-merge-sort-markdup",
"version": "1.12.1",
"description": "Merge multiple aligned BAMs, sort by coordinate and perform markduplicate",
"main": "bam-merge-sort-markdup.nf",
"scripts": {
"test": "pushd tests && ./run_tests.sh; popd"
},
"deprecated": false,
"keywords": [
"bioinformatics",
"alignment",
"markduplicate"
],
"repository": {
"type": "git",
"url": "https://github.com/icgc-argo/demo-wfpkgs.git"
},
"container": {
"registries": [
{
"registry": "quay.io",
"type": "docker",
"org": "icgc-argo",
"default": true
},
{
"registry": "ghcr.io",
"type": "docker",
"org": "icgc-argo",
"default": false
}
]
},
"dependencies": [],
"devDependencies": [
"github.com/icgc-argo/demo-wfpkgs/[email protected]"
],
"contributors": [
{
"name": "Junjun Zhang",
"email": "[email protected]"
},
{
"name": "Linda Xiang",
"email": "[email protected]"
}
],
"license": "AGPL-3.0",
"bugReport": "https://github.com/icgc-argo/demo-wfpkgs/issues",
"homepage": "https://github.com/icgc-argo/demo-wfpkgs#readme"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env nextflow

/*
* Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

/*
* Contributors:
* Junjun Zhang <[email protected]>
*/


nextflow.enable.dsl=2

params.aligned_lane_bams = ""
params.ref_genome_gz = ""
params.tempdir = "NO_DIR"
params.container_registry = "ghcr.io"


include { bamMergeSortMarkdup } from '../bam-merge-sort-markdup.nf' params(params)
include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo/demo-wfpkgs/[email protected]/main.nf'


Channel
.fromPath(params.aligned_lane_bams, checkIfExists: true)
.set { aligned_lane_bams_ch }

Channel
.fromPath(getSecondaryFiles(params.ref_genome_gz, ['fai', 'gzi']), checkIfExists: true)
.set { ref_genome_gz_idx_ch }


// will not run when import as module
workflow {
main:
bamMergeSortMarkdup(
aligned_lane_bams_ch.collect(), // all lane bams to be merged
file(params.ref_genome_gz),
ref_genome_gz_idx_ch.collect(),
file(params.tempdir)
)
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
includeConfig '../nextflow.config'
Loading

0 comments on commit 077af8f

Please sign in to comment.