diff --git a/jcvi/formats/bed.py b/jcvi/formats/bed.py index 05ca8ddb..ac656a58 100755 --- a/jcvi/formats/bed.py +++ b/jcvi/formats/bed.py @@ -594,6 +594,7 @@ def format(args): Re-format BED file, e.g. switch sequence ids. """ p = OptionParser(format.__doc__) + p.add_argument("--chrprefix", help="Add prefix to seqid") p.add_argument("--prefix", help="Add prefix to name column (4th)") p.add_argument("--switch", help="Switch seqids based on two-column file") p.set_outfile() @@ -605,11 +606,14 @@ def format(args): (bedfile,) = args switch = DictFile(opts.switch, delimiter="\t") if opts.switch else None prefix = opts.prefix + chrprefix = opts.chrprefix bed = Bed(bedfile) with must_open(opts.outfile, "w") as fw: for b in bed: if prefix: b.accn = prefix + b.accn + if chrprefix: + b.seqid = chrprefix + b.seqid if switch and b.seqid in switch: b.seqid = switch[b.seqid] print(b, file=fw) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index 0b9f34cf..ad09c86d 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -11,16 +11,17 @@ import sys from collections import Counter, OrderedDict, defaultdict -from typing import List, Optional +from typing import Dict, List, Tuple, Optional import numpy as np +import seaborn as sns from ..algorithms.matrix import moving_sum from ..apps.base import ActionDispatcher, OptionParser, logger from ..formats.base import BaseFile, DictFile, LineFile, must_open from ..formats.bed import Bed, bins, get_nbins from ..formats.sizes import Sizes -from ..utils.cbook import autoscale, human_size +from ..utils.cbook import autoscale, human_size, percentage from .base import ( CirclePolygon, @@ -53,6 +54,9 @@ "Exons": "Genes (exons)", } +# Consider a depth of 5 as minimum covered depth +MIN_COVERED_DEPTH = 5 + class BinLine: def __init__(self, row): @@ -200,8 +204,6 @@ def mosdepth(args): Plot depth vs. coverage per chromosome. Inspired by mosdepth plot. See also: https://github.com/brentp/mosdepth """ - import seaborn as sns - sns.set_style("darkgrid") p = OptionParser(mosdepth.__doc__) @@ -280,6 +282,10 @@ def draw_depth( logscale: bool = False, title: Optional[str] = None, subtitle: Optional[str] = None, + median_line: bool = True, + draw_seqids: bool = True, + calculate_coverage: bool = False, + roi: Optional[List[Tuple[str, int]]] = None, ): """Draw depth plot on the given axes, using data from bed @@ -302,6 +308,7 @@ def draw_depth( ends = {} label_positions = [] start = 0 + end = 0 for seqid in seqids: if seqid not in sizes: continue @@ -315,6 +322,8 @@ def draw_depth( # Extract plotting data data = [] data_by_seqid = defaultdict(list) + total_bp = 0 + covered_bp = 0 for b in bed: seqid = b.seqid if seqid not in starts: @@ -325,6 +334,10 @@ def draw_depth( c = chrinfo[seqid].color if seqid in chrinfo else "k" data.append((x, y, c)) data_by_seqid[seqid].append(y) + if y >= MIN_COVERED_DEPTH: + covered_bp += b.end - b.start + total_bp += b.end - b.start + logger.debug("cov: %s", percentage(covered_bp, total_bp, precision=0)) x, y, c = zip(*data) ax.scatter( @@ -345,14 +358,15 @@ def draw_depth( seqid_end = ends[seqid] seqid_median = np.median(values) medians[seqid] = seqid_median - ax.plot( - (seqid_start, seqid_end), - (seqid_median, seqid_median), - "-", - lw=4, - color=c, - alpha=0.5, - ) + if median_line: + ax.plot( + (seqid_start, seqid_end), + (seqid_median, seqid_median), + "-", + lw=4, + color=c, + alpha=0.5, + ) # Vertical lines for all the breaks for pos in starts.values(): @@ -364,32 +378,52 @@ def draw_depth( median_depth_y = 0.88 chr_label_y = 0.08 + rotation = 20 if len(label_positions) > 10 else 0 for seqid, position in label_positions: xpos = 0.1 + position * 0.8 / xsize c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor newseqid = chrinfo[seqid].new_name if seqid in chrinfo else seqid - root.text( - xpos, chr_label_y, newseqid, color=c, ha="center", va="center", rotation=20 - ) + if draw_seqids: + root.text( + xpos, + chr_label_y, + newseqid, + color=c, + ha="center", + va="center", + rotation=rotation, + ) seqid_median = medians[seqid] + if median_line: + root.text( + xpos, + median_depth_y, + str(int(seqid_median)), + color=c, + ha="center", + va="center", + ) + + # Plot the regions of interest + if roi: + for chrom, pos, name in roi: + if chrom not in starts: + continue + x = starts[chrom] + pos + # TODO: Remove this special case + color = {"II": "tomato", "low qual": "g"}.get(name, "gray") + ax.plot((x, x), (0, maxdepth), "-", lw=2, color=color) + + # Add an arrow to the right of the plot, indicating these are median depths + if median_line: root.text( - xpos, - median_depth_y, - str(int(seqid_median)), - color=c, - ha="center", + 0.91, + 0.88, + r"$\leftarrow$median", + color="lightslategray", va="center", ) - # Add an arrow to the right of the plot, indicating these are median depths - root.text( - 0.91, - 0.88, - r"$\leftarrow$median", - color="lightslategray", - va="center", - ) - if title: root.text( 0.95, @@ -410,6 +444,17 @@ def draw_depth( va="center", size=15, ) + if calculate_coverage: + cov_pct = percentage(covered_bp, total_bp, precision=0, mode=None) + root.text( + 0.95, + 0.25, + latex(f"cov: {cov_pct}"), + color="darkslategray", + ha="center", + va="center", + size=15, + ) ax.set_xticks([]) ax.set_xlim(0, xsize) @@ -423,6 +468,22 @@ def draw_depth( normalize_axes(root) +def read_roi(roi_file: str) -> Dict[str, List[str]]: + """ + Read the regions of interest file, and return a dict of filename => regions. + """ + roi = defaultdict(list) + with open(roi_file, encoding="utf-8") as fp: + for row in fp: + filename, region, name = row.strip().split(",")[:3] + chrom, start_end = region.split(":", 1) + start, end = start_end.split("-") + region = (chrom, (int(start) + int(end)) // 2, name) + roi[filename].append(region) + logger.info("Read %d regions of interest", len(roi)) + return roi + + def draw_multi_depth( root, panel_roots, @@ -432,6 +493,9 @@ def draw_multi_depth( titleinfo_file: str, maxdepth: int, logscale: bool, + median_line: bool = True, + calculate_coverage: bool = False, + roi: Optional[str] = None, ): """ Draw multiple depth plots on the same canvas. @@ -441,12 +505,15 @@ def draw_multi_depth( npanels = len(bedfiles) yinterval = 1.0 / npanels ypos = 1 - yinterval - for bedfile, panel_root, panel_ax in zip(bedfiles, panel_roots, panel_axes): + roi = read_roi(roi) if roi else {} + for i, (bedfile, panel_root, panel_ax) in enumerate( + zip(bedfiles, panel_roots, panel_axes) + ): pf = op.basename(bedfile).split(".", 1)[0] bed = Bed(bedfile) if ypos > 0.001: - root.plot((0.02, 0.98), (ypos, ypos), "-", lw=2, color="lightslategray") + root.plot((0.02, 0.98), (ypos, ypos), "-", lw=2, color="lightgray") title = titleinfo.get(bedfile, pf.split("_", 1)[0]) subtitle = None @@ -454,6 +521,7 @@ def draw_multi_depth( subtitle = title.subtitle title = title.title + draw_seqids = i in (0, npanels - 1) draw_depth( panel_root, panel_ax, @@ -463,6 +531,10 @@ def draw_multi_depth( logscale=logscale, title=title, subtitle=subtitle, + median_line=median_line, + draw_seqids=draw_seqids, + calculate_coverage=calculate_coverage, + roi=roi.get(bedfile), ) ypos -= yinterval @@ -505,6 +577,23 @@ def depth(args): p.add_argument( "--logscale", default=False, action="store_true", help="Use log-scale on depth" ) + p.add_argument( + "--no-median-line", + default=False, + action="store_true", + help="Do not plot median depth line", + ) + p.add_argument( + "--calculate-coverage", + default=False, + action="store_true", + help="Calculate genome coverage", + ) + p.add_argument( + "--roi", + help="File that contains regions of interest, format: filename, chr:start-end", + ) + p.set_outfile("depth.pdf") opts, args, iopts = p.set_image_options(args, style="dark", figsize="14x4") if len(args) < 1: @@ -535,13 +624,14 @@ def depth(args): opts.titleinfo, opts.maxdepth, opts.logscale, + median_line=not opts.no_median_line, + calculate_coverage=opts.calculate_coverage, + roi=opts.roi, ) - if npanels > 1: - pf = op.commonprefix(bedfiles) - pf = pf or "depth" - image_name = pf + "." + iopts.format + image_name = opts.outfile savefig(image_name, dpi=iopts.dpi, iopts=iopts) + return image_name def add_window_options(p): diff --git a/jcvi/utils/cbook.py b/jcvi/utils/cbook.py index e11b1591..20c7c2c5 100644 --- a/jcvi/utils/cbook.py +++ b/jcvi/utils/cbook.py @@ -8,6 +8,7 @@ import sys from collections import defaultdict +from typing import Optional from ..apps.base import logger @@ -181,7 +182,7 @@ def enumerate_reversed(sequence): yield index, sequence[index] -def percentage(a, b, precision=1, mode=0): +def percentage(a, b, precision=1, mode: Optional[int] = 0): """ >>> percentage(100, 200) '100 of 200 (50.0%)' diff --git a/tests/graphics/data/VAR0_srtd.wgs.regions.bed.gz b/tests/graphics/data/VAR0_srtd.wgs.regions.bed.gz new file mode 100644 index 00000000..4a65373b Binary files /dev/null and b/tests/graphics/data/VAR0_srtd.wgs.regions.bed.gz differ diff --git a/tests/graphics/data/VAR1_srtd.wgs.regions.bed.gz b/tests/graphics/data/VAR1_srtd.wgs.regions.bed.gz new file mode 100644 index 00000000..db5f09af Binary files /dev/null and b/tests/graphics/data/VAR1_srtd.wgs.regions.bed.gz differ diff --git a/tests/graphics/data/VAR2_srtd.wgs.regions.bed.gz b/tests/graphics/data/VAR2_srtd.wgs.regions.bed.gz new file mode 100644 index 00000000..9cad7849 Binary files /dev/null and b/tests/graphics/data/VAR2_srtd.wgs.regions.bed.gz differ diff --git a/tests/graphics/data/chrinfo.txt b/tests/graphics/data/chrinfo.txt new file mode 100644 index 00000000..4278f393 --- /dev/null +++ b/tests/graphics/data/chrinfo.txt @@ -0,0 +1,12 @@ +chr01A, #c51b7d, 1A +chr01B, #4d9221, 1B +chr02A, #c51b7d, 2A +chr02B, #4d9221, 2B +chr03A, #c51b7d, 3A +chr03B, #4d9221, 3B +chr04A, #c51b7d, 4A +chr04B, #4d9221, 4B +chr05A, #c51b7d, 5A +chr05B, #4d9221, 5B +chr06A, #c51b7d, 6A +chr06B, #4d9221, 6B diff --git a/tests/graphics/data/titleinfo.txt b/tests/graphics/data/titleinfo.txt new file mode 100644 index 00000000..3b39dadf --- /dev/null +++ b/tests/graphics/data/titleinfo.txt @@ -0,0 +1,3 @@ +VAR0_srtd.wgs.regions.bed.gz, *S. species*, ‘Variety 1’ +VAR1_srtd.wgs.regions.bed.gz, *S. species*, ‘Variety 2’ +VAR2_srtd.wgs.regions.bed.gz, *S. species*, ‘Variety 3’ diff --git a/tests/graphics/test_landscape.py b/tests/graphics/test_landscape.py index ac537051..a758e02c 100644 --- a/tests/graphics/test_landscape.py +++ b/tests/graphics/test_landscape.py @@ -2,7 +2,25 @@ import os.path as op from jcvi.apps.base import cleanup -from jcvi.graphics.landscape import stack +from jcvi.graphics.landscape import depth, stack + + +def test_depth(): + cwd = os.getcwd() + os.chdir(op.join(op.dirname(__file__), "data")) + cleanup("depth.pdf") + image_name = depth( + [ + "VAR0_srtd.wgs.regions.bed.gz", + "VAR1_srtd.wgs.regions.bed.gz", + "VAR2_srtd.wgs.regions.bed.gz", + "--chrinfo=chrinfo.txt", + "--titleinfo=titleinfo.txt", + "--figsize=11x7", + ] + ) + assert op.exists(image_name) + os.chdir(cwd) def test_stack():