Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #700

Merged
merged 13 commits into from
Aug 10, 2024
Merged

Dev #700

4 changes: 4 additions & 0 deletions jcvi/formats/bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ def format(args):
Re-format BED file, e.g. switch sequence ids.
"""
p = OptionParser(format.__doc__)
p.add_argument("--chrprefix", help="Add prefix to seqid")
p.add_argument("--prefix", help="Add prefix to name column (4th)")
p.add_argument("--switch", help="Switch seqids based on two-column file")
p.set_outfile()
Expand All @@ -604,11 +605,14 @@ def format(args):
(bedfile,) = args
switch = DictFile(opts.switch, delimiter="\t") if opts.switch else None
prefix = opts.prefix
chrprefix = opts.chrprefix
bed = Bed(bedfile)
with must_open(opts.outfile, "w") as fw:
for b in bed:
if prefix:
b.accn = prefix + b.accn
if chrprefix:
b.seqid = chrprefix + b.seqid
if switch and b.seqid in switch:
b.seqid = switch[b.seqid]
print(b, file=fw)
Expand Down
160 changes: 125 additions & 35 deletions jcvi/graphics/landscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@
import sys

from collections import Counter, OrderedDict, defaultdict
from typing import List, Optional
from typing import Dict, List, Tuple, Optional

import numpy as np
import seaborn as sns

from ..algorithms.matrix import moving_sum
from ..apps.base import ActionDispatcher, OptionParser, logger
from ..formats.base import BaseFile, DictFile, LineFile, must_open
from ..formats.bed import Bed, bins, get_nbins
from ..formats.sizes import Sizes
from ..utils.cbook import autoscale, human_size
from ..utils.cbook import autoscale, human_size, percentage

from .base import (
CirclePolygon,
Expand Down Expand Up @@ -53,6 +54,9 @@
"Exons": "Genes (exons)",
}

# Consider a depth of 5 as minimum covered depth
MIN_COVERED_DEPTH = 5


class BinLine:
def __init__(self, row):
Expand Down Expand Up @@ -200,8 +204,6 @@ def mosdepth(args):
Plot depth vs. coverage per chromosome. Inspired by mosdepth plot. See also:
https://github.com/brentp/mosdepth
"""
import seaborn as sns

sns.set_style("darkgrid")

p = OptionParser(mosdepth.__doc__)
Expand Down Expand Up @@ -280,6 +282,10 @@ def draw_depth(
logscale: bool = False,
title: Optional[str] = None,
subtitle: Optional[str] = None,
median_line: bool = True,
draw_seqids: bool = True,
calculate_coverage: bool = False,
roi: Optional[List[Tuple[str, int]]] = None,
):
"""Draw depth plot on the given axes, using data from bed

Expand All @@ -302,6 +308,7 @@ def draw_depth(
ends = {}
label_positions = []
start = 0
end = 0
for seqid in seqids:
if seqid not in sizes:
continue
Expand All @@ -315,6 +322,8 @@ def draw_depth(
# Extract plotting data
data = []
data_by_seqid = defaultdict(list)
total_bp = 0
covered_bp = 0
for b in bed:
seqid = b.seqid
if seqid not in starts:
Expand All @@ -325,6 +334,10 @@ def draw_depth(
c = chrinfo[seqid].color if seqid in chrinfo else "k"
data.append((x, y, c))
data_by_seqid[seqid].append(y)
if y >= MIN_COVERED_DEPTH:
covered_bp += b.end - b.start
total_bp += b.end - b.start
logger.debug("cov: %s", percentage(covered_bp, total_bp, precision=0))

x, y, c = zip(*data)
ax.scatter(
Expand All @@ -345,14 +358,15 @@ def draw_depth(
seqid_end = ends[seqid]
seqid_median = np.median(values)
medians[seqid] = seqid_median
ax.plot(
(seqid_start, seqid_end),
(seqid_median, seqid_median),
"-",
lw=4,
color=c,
alpha=0.5,
)
if median_line:
ax.plot(
(seqid_start, seqid_end),
(seqid_median, seqid_median),
"-",
lw=4,
color=c,
alpha=0.5,
)

# Vertical lines for all the breaks
for pos in starts.values():
Expand All @@ -364,32 +378,52 @@ def draw_depth(

median_depth_y = 0.88
chr_label_y = 0.08
rotation = 20 if len(label_positions) > 10 else 0
for seqid, position in label_positions:
xpos = 0.1 + position * 0.8 / xsize
c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor
newseqid = chrinfo[seqid].new_name if seqid in chrinfo else seqid
root.text(
xpos, chr_label_y, newseqid, color=c, ha="center", va="center", rotation=20
)
if draw_seqids:
root.text(
xpos,
chr_label_y,
newseqid,
color=c,
ha="center",
va="center",
rotation=rotation,
)
seqid_median = medians[seqid]
if median_line:
root.text(
xpos,
median_depth_y,
str(int(seqid_median)),
color=c,
ha="center",
va="center",
)

# Plot the regions of interest
if roi:
for chrom, pos, name in roi:
if chrom not in starts:
continue
x = starts[chrom] + pos
# TODO: Remove this special case
color = {"II": "tomato", "low qual": "g"}.get(name, "gray")
ax.plot((x, x), (0, maxdepth), "-", lw=2, color=color)

# Add an arrow to the right of the plot, indicating these are median depths
if median_line:
root.text(
xpos,
median_depth_y,
str(int(seqid_median)),
color=c,
ha="center",
0.91,
0.88,
r"$\leftarrow$median",
color="lightslategray",
va="center",
)

# Add an arrow to the right of the plot, indicating these are median depths
root.text(
0.91,
0.88,
r"$\leftarrow$median",
color="lightslategray",
va="center",
)

if title:
root.text(
0.95,
Expand All @@ -410,6 +444,17 @@ def draw_depth(
va="center",
size=15,
)
if calculate_coverage:
cov_pct = percentage(covered_bp, total_bp, precision=0, mode=None)
root.text(
0.95,
0.25,
latex(f"cov: {cov_pct}"),
color="darkslategray",
ha="center",
va="center",
size=15,
)

ax.set_xticks([])
ax.set_xlim(0, xsize)
Expand All @@ -423,6 +468,22 @@ def draw_depth(
normalize_axes(root)


def read_roi(roi_file: str) -> Dict[str, List[str]]:
"""
Read the regions of interest file, and return a dict of filename => regions.
"""
roi = defaultdict(list)
with open(roi_file, encoding="utf-8") as fp:
for row in fp:
filename, region, name = row.strip().split(",")[:3]
chrom, start_end = region.split(":", 1)
start, end = start_end.split("-")
region = (chrom, (int(start) + int(end)) // 2, name)
roi[filename].append(region)
logger.info("Read %d regions of interest", len(roi))
return roi


def draw_multi_depth(
root,
panel_roots,
Expand All @@ -432,6 +493,9 @@ def draw_multi_depth(
titleinfo_file: str,
maxdepth: int,
logscale: bool,
median_line: bool = True,
calculate_coverage: bool = False,
roi: Optional[str] = None,
):
"""
Draw multiple depth plots on the same canvas.
Expand All @@ -441,19 +505,23 @@ def draw_multi_depth(
npanels = len(bedfiles)
yinterval = 1.0 / npanels
ypos = 1 - yinterval
for bedfile, panel_root, panel_ax in zip(bedfiles, panel_roots, panel_axes):
roi = read_roi(roi) if roi else {}
for i, (bedfile, panel_root, panel_ax) in enumerate(
zip(bedfiles, panel_roots, panel_axes)
):
pf = op.basename(bedfile).split(".", 1)[0]
bed = Bed(bedfile)

if ypos > 0.001:
root.plot((0.02, 0.98), (ypos, ypos), "-", lw=2, color="lightslategray")
root.plot((0.02, 0.98), (ypos, ypos), "-", lw=2, color="lightgray")

title = titleinfo.get(bedfile, pf.split("_", 1)[0])
subtitle = None
if isinstance(title, TitleInfoLine):
subtitle = title.subtitle
title = title.title

draw_seqids = i in (0, npanels - 1)
draw_depth(
panel_root,
panel_ax,
Expand All @@ -463,6 +531,10 @@ def draw_multi_depth(
logscale=logscale,
title=title,
subtitle=subtitle,
median_line=median_line,
draw_seqids=draw_seqids,
calculate_coverage=calculate_coverage,
roi=roi.get(bedfile),
)
ypos -= yinterval

Expand Down Expand Up @@ -505,6 +577,23 @@ def depth(args):
p.add_argument(
"--logscale", default=False, action="store_true", help="Use log-scale on depth"
)
p.add_argument(
"--no-median-line",
default=False,
action="store_true",
help="Do not plot median depth line",
)
p.add_argument(
"--calculate-coverage",
default=False,
action="store_true",
help="Calculate genome coverage",
)
p.add_argument(
"--roi",
help="File that contains regions of interest, format: filename, chr:start-end",
)
p.set_outfile("depth.pdf")
opts, args, iopts = p.set_image_options(args, style="dark", figsize="14x4")

if len(args) < 1:
Expand Down Expand Up @@ -535,13 +624,14 @@ def depth(args):
opts.titleinfo,
opts.maxdepth,
opts.logscale,
median_line=not opts.no_median_line,
calculate_coverage=opts.calculate_coverage,
roi=opts.roi,
)

if npanels > 1:
pf = op.commonprefix(bedfiles)
pf = pf or "depth"
image_name = pf + "." + iopts.format
image_name = opts.outfile
savefig(image_name, dpi=iopts.dpi, iopts=iopts)
return image_name


def add_window_options(p):
Expand Down
3 changes: 2 additions & 1 deletion jcvi/utils/cbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys

from collections import defaultdict
from typing import Optional

from ..apps.base import logger

Expand Down Expand Up @@ -181,7 +182,7 @@ def enumerate_reversed(sequence):
yield index, sequence[index]


def percentage(a, b, precision=1, mode=0):
def percentage(a, b, precision=1, mode: Optional[int] = 0):
"""
>>> percentage(100, 200)
'100 of 200 (50.0%)'
Expand Down
Binary file added tests/graphics/data/VAR0_srtd.wgs.regions.bed.gz
Binary file not shown.
Binary file added tests/graphics/data/VAR1_srtd.wgs.regions.bed.gz
Binary file not shown.
Binary file added tests/graphics/data/VAR2_srtd.wgs.regions.bed.gz
Binary file not shown.
12 changes: 12 additions & 0 deletions tests/graphics/data/chrinfo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
chr01A, #c51b7d, 1A
chr01B, #4d9221, 1B
chr02A, #c51b7d, 2A
chr02B, #4d9221, 2B
chr03A, #c51b7d, 3A
chr03B, #4d9221, 3B
chr04A, #c51b7d, 4A
chr04B, #4d9221, 4B
chr05A, #c51b7d, 5A
chr05B, #4d9221, 5B
chr06A, #c51b7d, 6A
chr06B, #4d9221, 6B
3 changes: 3 additions & 0 deletions tests/graphics/data/titleinfo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
VAR0_srtd.wgs.regions.bed.gz, *S. species*, ‘Variety 1’
VAR1_srtd.wgs.regions.bed.gz, *S. species*, ‘Variety 2’
VAR2_srtd.wgs.regions.bed.gz, *S. species*, ‘Variety 3’
20 changes: 19 additions & 1 deletion tests/graphics/test_landscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,25 @@
import os.path as op

from jcvi.apps.base import cleanup
from jcvi.graphics.landscape import stack
from jcvi.graphics.landscape import depth, stack


def test_depth():
cwd = os.getcwd()
os.chdir(op.join(op.dirname(__file__), "data"))
cleanup("depth.pdf")
image_name = depth(
[
"VAR0_srtd.wgs.regions.bed.gz",
"VAR1_srtd.wgs.regions.bed.gz",
"VAR2_srtd.wgs.regions.bed.gz",
"--chrinfo=chrinfo.txt",
"--titleinfo=titleinfo.txt",
"--figsize=11x7",
]
)
assert op.exists(image_name)
os.chdir(cwd)


def test_stack():
Expand Down
Loading