Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Apr 29, 2024
1 parent f279ccf commit 3d9dd23
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 39 deletions.
100 changes: 69 additions & 31 deletions jcvi/assembly/hic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from collections import defaultdict
from functools import partial
from multiprocessing import Pool
from typing import List, Tuple
from typing import List, Optional, Tuple

import numpy as np

Expand Down Expand Up @@ -686,7 +686,13 @@ def generate_groups(groupsfile):


def read_matrix(
npyfile: str, header: dict, contig: str, groups: List[Tuple[str, str]], opts
npyfile: str,
header: dict,
contig: str,
groups: List[Tuple[str, str]],
vmin: int,
vmax: int,
breaks: bool,
):
"""
Read the matrix from the npy file and apply log transformation and thresholding.
Expand Down Expand Up @@ -729,7 +735,6 @@ def read_matrix(
B = A.astype("float64")
B += 1.0
B = np.log(B)
vmin, vmax = opts.vmin, opts.vmax
B[B < vmin] = vmin
B[B > vmax] = vmax
print(B)
Expand All @@ -738,12 +743,60 @@ def read_matrix(
breaks = list(header["starts"].values())
breaks += [total_bins] # This is actually discarded
breaks = sorted(breaks)[1:]
if contig or opts.nobreaks:
if contig or not breaks:
breaks = []

return B, new_groups, breaks


def draw_hic_heatmap(
root,
ax,
npyfile: str,
jsonfile: str,
contig: Optional[str],
groups_file: str,
title: str,
vmin: int,
vmax: int,
breaks: bool,
):
"""
Draw heatmap based on .npy file. The .npy file stores a square matrix with
bins of genome, and cells inside the matrix represent number of links
between bin i and bin j. The `genome.json` contains the offsets of each
contig/chr so that we know where to draw boundary lines, or extract per
contig/chromosome heatmap.
"""
groups = list(generate_groups(groups_file)) if groups_file else []

# Load contig/chromosome starts and sizes
header = json.loads(open(jsonfile, encoding="utf-8").read())
resolution = header.get("resolution")
assert resolution is not None, "`resolution` not found in `{}`".format(jsonfile)
logger.debug("Resolution set to %d", resolution)

B, new_groups, breaks = read_matrix(
npyfile, header, contig, groups, vmin, vmax, breaks
)
plot_heatmap(ax, B, breaks, groups=new_groups, binsize=resolution)

# Title
if contig:
title += "-{}".format(contig)
root.text(
0.5,
0.98,
markup(title),
color="darkslategray",
size=18,
ha="center",
va="center",
)

normalize_axes(root)


def heatmap(args):
"""
%prog heatmap input.npy genome.json
Expand Down Expand Up @@ -784,40 +837,25 @@ def heatmap(args):
sys.exit(not p.print_help())

npyfile, jsonfile = args
contig = opts.chr
groups = list(generate_groups(opts.groups)) if opts.groups else []

# Load contig/chromosome starts and sizes
header = json.loads(open(jsonfile, encoding="utf-8").read())
resolution = header.get("resolution")
assert resolution is not None, "`resolution` not found in `{}`".format(jsonfile)
logger.debug("Resolution set to %d", resolution)

B, new_groups, breaks = read_matrix(npyfile, header, contig, groups, opts)

# Canvas
fig = plt.figure(1, (iopts.w, iopts.h))
root = fig.add_axes((0, 0, 1, 1)) # whole canvas
ax = fig.add_axes((0.05, 0.05, 0.9, 0.9)) # just the heatmap

plot_heatmap(ax, B, breaks, groups=new_groups, binsize=resolution)

# Title
pf = npyfile.rsplit(".", 1)[0]
title = opts.title
if contig:
title += "-{}".format(contig)
root.text(
0.5,
0.98,
markup(title),
color="darkslategray",
size=18,
ha="center",
va="center",
draw_hic_heatmap(
root,
ax,
npyfile,
jsonfile,
contig=opts.chr,
groups_file=opts.groups,
title=opts.title,
vmin=opts.vmin,
vmax=opts.vmax,
breaks=not opts.nobreaks,
)

normalize_axes(root)
pf = npyfile.rsplit(".", 1)[0]
image_name = pf + "." + iopts.format
# macOS sometimes has way too verbose output
savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Expand Down
33 changes: 25 additions & 8 deletions jcvi/projects/jcvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ..apps.base import ActionDispatcher, OptionParser, logger
from ..assembly.geneticmap import draw_geneticmap_heatmap
from ..assembly.hic import draw_hic_heatmap
from ..graphics.base import normalize_axes, panel_labels, plt, savefig


Expand All @@ -31,12 +32,12 @@ def genomebuild(args):

fig = plt.figure(1, (iopts.w, iopts.h))
root = fig.add_axes((0, 0, 1, 1))
ax1_root = fig.add_axes((0, 0, 0.32, 1))
ax2_root = fig.add_axes((0.32, 0, 0.34, 1))
ax3_root = fig.add_axes((0.66, 0, 0.34, 1))
ax1 = fig.add_axes((0.03, 0.1, 0.23, 0.8))
ax2 = fig.add_axes((0.35, 0.1, 0.27, 0.8))
ax3 = fig.add_axes((0.69, 0.1, 0.27, 0.8))
ax1_root = fig.add_axes((0, 0, 1 / 3, 1))
ax2_root = fig.add_axes((1 / 3, 0, 1 / 3, 1))
ax3_root = fig.add_axes((2 / 3, 0, 1 / 3, 1))
ax1 = fig.add_axes((1 / 3 * 0.1, 0.1, 1 / 3 * 0.8, 0.8))
ax2 = fig.add_axes((1 / 3 * 1.1, 0.1, 1 / 3 * 0.8, 0.8))
ax3 = fig.add_axes((1 / 3 * 2.1, 0.1, 1 / 3 * 0.8, 0.8))

# Panel A
logger.info("Plotting read kmer histogram")
Expand All @@ -47,10 +48,26 @@ def genomebuild(args):

# Panel C
logger.info("Plotting Hi-C contact map concordance")
draw_hic_heatmap(
ax3_root,
ax3,
hic_matrix,
hic_json,
contig=None,
groups_file="groups",
title="*S. species* Hi-C contact map",
vmin=1,
vmax=6,
breaks=True,
)

labels = ((0.05, 0.95, "A"), (0.35, 0.95, "B"), (0.7, 0.95, "C"))
labels = (
(1 / 3 * 0.1, 0.95, "A"),
(1 / 3 * 1.1, 0.95, "B"),
(1 / 3 * 2.1, 0.95, "C"),
)
panel_labels(root, labels)
normalize_axes([root, ax1_root, ax2_root, ax3_root, ax1, ax2, ax3])
normalize_axes([root, ax1_root, ax2_root, ax3_root])

image_name = "genomebuild.pdf"
savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Expand Down

0 comments on commit 3d9dd23

Please sign in to comment.