Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable gzip output for bedtools wrappers #3642

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9c5f406
updated for use of gzip
gtuckerkellogg Feb 7, 2025
2e0bfdc
gzip support for bedtools bamtobed and complement (with tests)
gtuckerkellogg Feb 8, 2025
4f493c0
updated test_wrappers and tests for all current bedtools wrappers for…
gtuckerkellogg Feb 8, 2025
e313300
Merge branch 'snakemake:master' into bedtools-gzip-output
gtuckerkellogg Feb 8, 2025
cd57a18
addressed CodeRabbit review
gtuckerkellogg Feb 8, 2025
4c1265b
from gzip to bgzip
gtuckerkellogg Feb 8, 2025
40fe0c9
merged to bgzip
gtuckerkellogg Feb 8, 2025
0665f1a
all bgzip, no gzip
gtuckerkellogg Feb 8, 2025
8056dc6
all bgzip, no gzip
gtuckerkellogg Feb 8, 2025
760a76f
Merge remote-tracking branch 'refs/remotes/origin/bedtools-gzip-outpu…
gtuckerkellogg Feb 8, 2025
d2028aa
Update bio/bedtools/complement/wrapper.py
gtuckerkellogg Feb 8, 2025
bf9a66b
added hstlib dependency for bgzip in environment.yaml across bedtools…
gtuckerkellogg Feb 9, 2025
10eebb0
black check
gtuckerkellogg Feb 18, 2025
7be0d52
removed vscode cruft
gtuckerkellogg Feb 18, 2025
b926c85
Merge branch 'master' into bedtools-gzip-output
gtuckerkellogg Feb 18, 2025
76245e7
nitpicking resolved
gtuckerkellogg Feb 18, 2025
dda0a28
When coderabbit says nitpick, it means nitpick!
gtuckerkellogg Feb 18, 2025
d891935
Update bio/bedtools/bamtobed/wrapper.py
gtuckerkellogg Feb 18, 2025
db85a32
Remove unused import
fgvieira Feb 18, 2025
2c98855
Merge branch 'snakemake:master' into bedtools-gzip-output
gtuckerkellogg Feb 19, 2025
33aaf57
pinned environments
gtuckerkellogg Feb 19, 2025
4e00419
fixed mistakenly compressed bed file
gtuckerkellogg Feb 19, 2025
1599f1c
also for complement
gtuckerkellogg Feb 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bio/bedtools/bamtobed/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
13 changes: 13 additions & 0 deletions bio/bedtools/bamtobed/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

rule bamtobed:
input:
"{sample}.bam",
Expand All @@ -9,3 +10,15 @@ rule bamtobed:
extra="-bedpe", # optional parameters
wrapper:
"master/bio/bedtools/bamtobed"

rule bamtobed_gz:
input:
"{sample}.bam",
output:
"{sample}.bed.gz",
log:
"logs/bamtobed/{sample}.gz.log",
params:
extra="-bedpe", # optional parameters
wrapper:
"master/bio/bedtools/bamtobed"
7 changes: 4 additions & 3 deletions bio/bedtools/bamtobed/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@
__copyright__ = "Copyright 2022, Filipe G. Vieira"
__license__ = "MIT"


from snakemake.shell import shell

import snakemake # type: ignore
from snakemake.shell import shell # type: ignore

log = snakemake.log_fmt_shell(stdout=False, stderr=True)
extra = snakemake.params.get("extra", "")

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

shell(
"(bamToBed"
" {extra}"
" -i {snakemake.input[0]}"
" {compress}"
" > {snakemake.output[0]}"
") {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/complement/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
14 changes: 14 additions & 0 deletions bio/bedtools/complement/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@ rule bedtools_complement_bed:
wrapper:
"master/bio/bedtools/complement"

rule bedtools_complement_bed_gz:
input:
in_file="a.bed",
genome="dummy.genome"
output:
"results/bed-complement/a.complement.bed.gz"
params:
## Add optional parameters
extra="-L"
log:
"logs/a.complement.bed.gz.log"
wrapper:
"master/bio/bedtools/complement"

rule bedtools_complement_vcf:
input:
in_file="a.vcf",
Expand Down
Binary file not shown.
3 changes: 3 additions & 0 deletions bio/bedtools/complement/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@

extra = snakemake.params.get("extra", "")

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
"(bedtools complement"
" {extra}"
" -i {snakemake.input.in_file}"
" -g {snakemake.input.genome}"
" {compress}"
" > {snakemake.output[0]})"
" {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/coveragebed/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
4 changes: 3 additions & 1 deletion bio/bedtools/coveragebed/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
__email__ = "[email protected]"
__license__ = "MIT"


from snakemake.shell import shell

shell.executable("bash")
Expand All @@ -17,6 +16,8 @@

output_file = snakemake.output[0]

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

if not isinstance(output_file, str) and len(snakemake.output) != 1:
raise ValueError("Output should be one file: " + str(output_file) + "!")

Expand All @@ -25,6 +26,7 @@
" -a {input_a}"
" -b {input_b}"
" {extra_params}"
" {compress}"
" > {output_file}"
" {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/genomecov/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
3 changes: 3 additions & 0 deletions bio/bedtools/genomecov/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
input_file = "-i " + snakemake.input.get("bed")
genome = "-g " + snakemake.input.get("ref")

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

shell(
"(genomeCoverageBed"
" {snakemake.params}"
" {input_file}"
" {genome}"
" {compress}"
" > {snakemake.output[0]}) {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/intersect/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
17 changes: 16 additions & 1 deletion bio/bedtools/intersect/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
rule bedtools_merge:
rule bedtools_intersect:
input:
left="A.bed",
right="B.bed"
Expand All @@ -11,3 +11,18 @@ rule bedtools_merge:
"logs/intersect/A_B.log"
wrapper:
"master/bio/bedtools/intersect"


rule bedtools_intersect_gz:
input:
left="A.bed",
right="B.bed"
output:
"A_B.intersected.bed.gz"
params:
## Add optional parameters
extra="-wa -wb" ## In this example, we want to write original entries in A and B for each overlap.
log:
"logs/intersect/A_B_gz.log"
wrapper:
"master/bio/bedtools/intersect"
Binary file not shown.
Binary file not shown.
6 changes: 5 additions & 1 deletion bio/bedtools/intersect/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@
__email__ = "[email protected]"
__license__ = "MIT"

from snakemake.shell import shell
import snakemake # type: ignore
from snakemake.shell import shell # type: ignore

## Extract arguments
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

shell(
"(bedtools intersect"
" {extra}"
" -a {snakemake.input.left}"
" -b {snakemake.input.right}"
" {compress}"
" > {snakemake.output})"
" {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/merge/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
10 changes: 8 additions & 2 deletions bio/bedtools/merge/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
extra = snakemake.params.get("extra", "")

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

if len(snakemake.input) > 1:
if all(f.endswith(".gz") for f in snakemake.input):
cat = "zcat"
Expand All @@ -20,14 +23,17 @@
"({cat} {snakemake.input} | "
"sort -k1,1 -k2,2n | "
"bedtools merge {extra} "
"-i stdin > {snakemake.output}) "
" -i stdin "
" {compress}"
"> {snakemake.output[0]}) "
" {log}"
)
else:
shell(
"( bedtools merge"
" {extra}"
" -i {snakemake.input}"
" > {snakemake.output})"
" {compress}"
" > {snakemake.output[0]})"
" {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/slop/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
3 changes: 3 additions & 0 deletions bio/bedtools/slop/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

shell(
"(bedtools slop"
" {extra}"
" -i {snakemake.input[0]}"
" -g {snakemake.params.genome}"
" {compress}"
" > {snakemake.output})"
" {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/sort/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
3 changes: 3 additions & 0 deletions bio/bedtools/sort/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

if genome:
extra += " -g {}".format(genome)
elif faidx:
Expand All @@ -20,6 +22,7 @@
"(bedtools sort"
" {extra}"
" -i {snakemake.input.in_file}"
" {compress}"
" > {snakemake.output[0]})"
" {log}"
)
1 change: 1 addition & 0 deletions bio/bedtools/split/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:
- nodefaults
dependencies:
- bedtools =2.31.1
- htslib =1.21
4 changes: 3 additions & 1 deletion bio/bedtools/split/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

n_subfiles = len(snakemake.output)

compress = "| bgzip" if snakemake.output[0].endswith(".gz") else ""

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"bedtools split"
Expand All @@ -23,4 +25,4 @@
for i in range(n_subfiles):
out_tmp = f"{tmpdir}/out.{i+1:05d}.bed"
out = snakemake.output[i]
shell("cat {out_tmp} > {out}")
shell("cat {out_tmp} {compress} > {out}")
23 changes: 21 additions & 2 deletions test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1909,7 +1909,7 @@ def test_bcftools_view_uncompressed_bcf(run):
def test_bedtools_bamtobed(run):
run(
"bio/bedtools/bamtobed",
["snakemake", "--cores", "1", "a.bed", "--use-conda", "-F"],
["snakemake", "--cores", "1", "a.bed", "a.bed.gz", "--use-conda", "-F"],
)


Expand All @@ -1936,10 +1936,16 @@ def test_bedtools_complement(run):
"--cores",
"1",
"results/bed-complement/a.complement.bed",
"results/bed-complement/a.complement.bed.gz",
"results/vcf-complement/a.complement.vcf",
"--use-conda",
"-F",
],
compare_results_with_expected={
"results/bed-complement/a.complement.bed": "expected/bed-complement/a.complement.bed",
"results/bed-complement/a.complement.bed.gz": "expected/bed-complement/a.complement.bed.gz",
"results/vcf-complement/a.complement.vcf": "expected/vcf-complement/a.complement.vcf"
}
)


Expand All @@ -1954,6 +1960,7 @@ def test_bedtools_sort(run):
"results/bed-sorted/a.sorted_by_file.bed",
"results/vcf-sorted/a.sorted_by_file.vcf",
"--use-conda",

"-F",
],
)
Expand All @@ -1977,7 +1984,19 @@ def test_bedtools_split(run):
def test_bedtools_intersect(run):
run(
"bio/bedtools/intersect",
["snakemake", "--cores", "1", "A_B.intersected.bed", "--use-conda", "-F"],
[
"snakemake",
"--cores",
"1",
"A_B.intersected.bed",
"A_B.intersected.bed.gz",
"--use-conda",
"-F"
],
compare_results_with_expected={
"A_B.intersected.bed": "expected/A_B.intersected.bed",
"A_B.intersected.bed.gz": "expected/A_B.intersected.bed.gz",
}
)


Expand Down