From 64ab819e705bd63cd1f08ef268d3c8f42c091cac Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Mon, 29 Apr 2024 23:14:34 -0700 Subject: [PATCH 01/15] cleanup catalog --- jcvi/compara/catalog.py | 57 +++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/jcvi/compara/catalog.py b/jcvi/compara/catalog.py index 1e7935f5..1220a6f0 100644 --- a/jcvi/compara/catalog.py +++ b/jcvi/compara/catalog.py @@ -3,26 +3,27 @@ import os.path as op import sys -import logging import string from collections import defaultdict from itertools import product, combinations -from jcvi.formats.blast import BlastLine -from jcvi.formats.fasta import Fasta -from jcvi.formats.bed import Bed -from jcvi.formats.base import must_open, BaseFile -from jcvi.utils.grouper import Grouper -from jcvi.utils.cbook import gene_name -from jcvi.apps.base import ( +from ..apps.base import ( + ActionDispatcher, OptionParser, glob, - ActionDispatcher, + logger, + mkdir, need_update, sh, - mkdir, ) +from ..formats.base import must_open, BaseFile +from ..formats.bed import Bed +from ..formats.blast import BlastLine +from ..formats.fasta import Fasta +from ..utils.cbook import gene_name +from ..utils.grouper import Grouper + from .base import AnchorFile from .synteny import check_beds @@ -146,10 +147,8 @@ def enrich(args): members = row.strip().split(",") groups.join(*members) - logging.debug( - "Imported {0} families with {1} members.".format( - len(groups), groups.num_members - ) + logger.debug( + "Imported %d families with %d members.", len(groups), groups.num_members ) seen = set() @@ -162,9 +161,7 @@ def enrich(args): omggroups.join(*genes) nmembers = omggroups.num_members - logging.debug( - "Imported {0} OMG families with {1} members.".format(len(omggroups), nmembers) - ) + logger.debug("Imported %d OMG families with %d members.", len(omggroups), nmembers) assert nmembers == len(seen) alltaxa = set(str(x) for x in range(ntaxa)) @@ -230,7 +227,7 @@ def enrich(args): if not ghost: seen.update(best_addition) - logging.debug("Recruited {0} new genes.".format(len(recruited))) + logger.debug("Recruited %d new genes.", len(recruited)) def pairwise_distance(a, b, threadorder): @@ -259,7 +256,7 @@ def insert_into_threaded(atoms, threaded, threadorder): i = min_idx t = threaded[i] threaded.insert(i, atoms) - logging.debug("Insert {0} before {1} (d={2})".format(atoms, t, min_d)) + logger.debug("Insert %s before %s (d=%d)", atoms, t, min_d) def sort_layout(thread, listfile, column=0): @@ -288,7 +285,7 @@ def sort_layout(thread, listfile, column=0): assert len(threaded) == len(imported) total = sum(1 for x in open(listfile)) - logging.debug("Total: {0}, currently threaded: {1}".format(total, len(threaded))) + logger.debug("Total: %d, currently threaded: %d", total, len(threaded)) fp = open(listfile) for row in fp: atoms = row.split() @@ -301,7 +298,7 @@ def sort_layout(thread, listfile, column=0): print("\t".join(atoms), file=fw) fw.close() - logging.debug("File `{0}` sorted to `{1}`.".format(outfile, thread.filename)) + logger.debug("File `%s` sorted to `%s`.", outfile, thread.filename) def layout(args): @@ -355,7 +352,7 @@ def layout(args): cmd = "sort -k{0},{0} {1} -o {1}".format(lastcolumn, listfile) sh(cmd) - logging.debug("List file written to `{0}`.".format(listfile)) + logger.debug("List file written to `%s`.", listfile) sort = opts.sort if sort: thread = Bed(sort) @@ -405,9 +402,7 @@ def group(args): for a, b, idx in ac.iter_pairs(): groups.join(a, b) - logging.debug( - "Created {0} groups with {1} members.".format(len(groups), groups.num_members) - ) + logger.debug("Created %d groups with %d members.", len(groups), groups.num_members) outfile = opts.outfile fw = must_open(outfile, "w") @@ -498,7 +493,7 @@ def geneinfo(bed, genomeidx, ploidy): ) fwinfo.close() - logging.debug("Update info file `{0}`.".format(infofile)) + logger.debug("Update info file `%s`.", infofile) return infofile @@ -546,7 +541,7 @@ def omgprepare(args): cscore([blastfile, "-o", cscorefile, "--cutoff=0", "--pct"]) ac = AnchorFile(anchorfile) pairs = set((a, b) for a, b, i in ac.iter_pairs()) - logging.debug("Imported {0} pairs from `{1}`.".format(len(pairs), anchorfile)) + logger.debug("Imported %d pairs from `%s`.", len(pairs), anchorfile) weightsfile = pf + ".weights" fp = open(cscorefile) @@ -569,7 +564,7 @@ def omgprepare(args): npairs += 1 fw.close() - logging.debug("Write {0} pairs to `{1}`.".format(npairs, weightsfile)) + logger.debug("Write %d pairs to `%s`.", npairs, weightsfile) def make_ortholog(blocksfile, rbhfile, orthofile): @@ -592,7 +587,7 @@ def make_ortholog(blocksfile, rbhfile, orthofile): b += "'" print("\t".join((a, b)), file=fw) - logging.debug("Recruited {0} pairs from RBH.".format(nrecruited)) + logger.debug("Recruited %d pairs from RBH.", nrecruited) fp.close() fw.close() @@ -753,8 +748,8 @@ def ortholog(args): scan(dargs) except ValueError as e: if ignore_zero_anchor: - logging.debug(f"{e}") - logging.debug("Ignoring this error and continuing...") + logger.debug(str(e)) + logger.debug("Ignoring this error and continuing...") return else: raise ValueError(e) From 512b26dcfbb4834e36335e969447d6b2560f6bd5 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Mon, 29 Apr 2024 23:36:37 -0700 Subject: [PATCH 02/15] cleanup allmaps.py --- jcvi/assembly/allmaps.py | 125 ++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 68 deletions(-) diff --git a/jcvi/assembly/allmaps.py b/jcvi/assembly/allmaps.py index 7559c915..a367d438 100644 --- a/jcvi/assembly/allmaps.py +++ b/jcvi/assembly/allmaps.py @@ -12,33 +12,24 @@ import numpy as np import networkx as nx -from cmmodule.utils import read_chain_file -from cmmodule.mapbed import crossmap_bed_file from collections import Counter, defaultdict from functools import partial from itertools import combinations, product -from more_itertools import pairwise from typing import Optional +from cmmodule.utils import read_chain_file +from cmmodule.mapbed import crossmap_bed_file +from more_itertools import pairwise + from jcvi import __version__ as version -from jcvi.algorithms.formula import reject_outliers, spearmanr -from jcvi.algorithms.lis import ( +from ..algorithms.ec import GA_setup, GA_run +from ..algorithms.formula import reject_outliers, spearmanr +from ..algorithms.lis import ( longest_monotonic_subseq_length_loose as lms, longest_monotonic_subsequence_loose as lmseq, ) -from jcvi.algorithms.tsp import hamiltonian -from jcvi.algorithms.matrix import determine_signs -from jcvi.algorithms.ec import GA_setup, GA_run -from jcvi.formats.agp import AGP, order_to_agp, build as agp_build, reindex -from jcvi.formats.base import DictFile, FileMerger, must_open, read_block -from jcvi.formats.bed import Bed, BedLine, natsorted, sort -from jcvi.formats.chain import fromagp -from jcvi.formats.sizes import Sizes -from jcvi.graphics.landscape import draw_gauge -from jcvi.utils.cbook import human_size, percentage -from jcvi.utils.grouper import Grouper -from jcvi.utils.table import tabulate -from jcvi.apps.base import ( +from ..algorithms.matrix import determine_signs +from ..apps.base import ( ActionDispatcher, OptionGroup, OptionParser, @@ -46,10 +37,20 @@ cleanup, flatten, get_today, + logger, mkdir, need_update, sh, ) +from ..formats.agp import AGP, order_to_agp, build as agp_build, reindex +from ..formats.base import DictFile, FileMerger, must_open, read_block +from ..formats.bed import Bed, BedLine, natsorted, sort +from ..formats.chain import fromagp +from ..formats.sizes import Sizes +from ..graphics.landscape import draw_gauge +from ..utils.cbook import human_size, percentage +from ..utils.grouper import Grouper +from ..utils.table import tabulate START, END = "START", "END" @@ -193,9 +194,9 @@ def callback(tour, gen, i=0): return tour i = 0 - best_tour, best_fitness = None, None + _, best_fitness = None, None while True: # Multiple EC rounds due to orientation fixes - logging.debug("Start EC round {0}".format(i)) + logger.debug("Start EC round %d", i) scaffolds_oo = dict(tour) scfs, tour, ww = self.prepare_ec(scaffolds, tour, weights) callbacki = partial(callback, i=i) @@ -206,16 +207,14 @@ def callback(tour, gen, i=0): ) tour = callbacki(tour, "FIN") if best_fitness and fitness <= best_fitness: - logging.debug( - "No fitness improvement: {0}. Exit EC.".format(best_fitness) - ) + logger.debug("No fitness improvement: %s. Exit EC.", best_fitness) break tour = self.fix_orientation(tour) best_tour, best_fitness = tour, fitness print_tour( fwtour, self.object, tag, "GA{0}-FIXORI".format(i), tour, recode=True ) - logging.debug("Current best fitness: {0}".format(best_fitness)) + logger.debug("Current best fitness: %s", best_fitness) i += 1 tour = self.fix_tour(tour) @@ -285,7 +284,7 @@ def distances_to_tour(self): continue G.add_edge(b, a, weight=d) - logging.debug("Graph size: |V|=%d, |E|=%d", len(G), G.size()) + logger.debug("Graph size: |V|=%d, |E|=%d", len(G), G.size()) L = dict(nx.all_pairs_dijkstra_path_length(G)) for a, b in combinations(scaffolds, 2): @@ -421,7 +420,7 @@ def fix_tour(self, tour): if score_with > score_without: keep.add(s) dropped = len(tour) - len(keep) - logging.debug("Dropped {0} minor scaffolds".format(dropped)) + logger.debug("Dropped %d minor scaffolds", dropped) return [(s, o) for (s, o) in tour if s in keep] def fix_orientation(self, tour): @@ -458,7 +457,7 @@ def fix_orientation(self, tour): fixed += 1 tour = [(x, orientations[x]) for x in scaffolds] - logging.debug("Fixed orientations for {0} scaffolds.".format(fixed)) + logger.debug("Fixed orientations for %d scaffolds.", fixed) return tour @@ -489,7 +488,7 @@ def __init__(self, b): try: self.mapname, self.lg = b.accn.split("-", 1) except ValueError: - logging.error("Malformed marker name: {}".format(b.accn)) + logger.error("Malformed marker name: %s", b.accn) sys.exit(1) self.cm = float(cm) self.accn = b.accn @@ -535,10 +534,10 @@ def report(self): self.seqids = sorted(set(x.seqid for x in self)) self.mapnames = sorted(set(x.mapname for x in self)) self.mlgs = sorted(set(x.mlg for x in self)) - logging.debug( - "Map contains {0} markers in {1} linkage groups.".format( - self.nmarkers, len(self.mlgs) - ) + logger.debug( + "Map contains %d markers in %d linkage groups.", + self.nmarkers, + len(self.mlgs), ) def extract(self, seqid): @@ -582,9 +581,7 @@ def get_bins(self, function, remove_outliers): s[pair] = cm original += len(markers) clean += len(cm) - logging.debug( - "Retained {0} clean markers.".format(percentage(clean, original)) - ) + logger.debug("Retained %s clean markers.", percentage(clean, original)) return s def remove_outliers(self, markers, function): @@ -648,7 +645,7 @@ def __init__(self, filename, mapnames, cast=int): self.pivot = pivot self.ref = ref - logging.debug("Map weights: {0}".format(self.items())) + logger.debug("Map weights: %s", self.items()) def update_maps(self, mapnames, default=1): keys = list(self.keys()) @@ -659,15 +656,13 @@ def update_maps(self, mapnames, default=1): if m in self: continue self[m] = default - logging.debug("Weight for `{0}` set to {1}.".format(m, default)) + logger.debug("Weight for `%s` set to %d.", m, default) def get_pivot(self, mapnames): # Break ties by occurence in file common_mapnames = set(self.maps) & set(mapnames) if not common_mapnames: - logging.error( - "No common names found between %s and %s", self.maps, mapnames - ) + logger.error("No common names found between %s and %s", self.maps, mapnames) sys.exit(1) return max( (w, -self.maps.index(m), m) for m, w in self.items() if m in common_mapnames @@ -719,7 +714,7 @@ def calculate_coords(self, r=0.8, gapsize=0.1): class GapEstimator(object): def __init__(self, mapc, agp, seqid, mlg, function=lambda x: x.cm): mm = mapc.extract_mlg(mlg) - logging.debug("Extracted {0} markers for {1}-{2}".format(len(mm), seqid, mlg)) + logger.debug("Extracted %d markers for %s-%s", len(mm), seqid, mlg) self.mlgsize = max(function(x) for x in mm) self.agp = [x for x in agp if x.object == seqid] @@ -1054,9 +1049,7 @@ def split(args): start = end = (start + end) / 2 print("\t".join(str(x) for x in (mi.seqid, start - 1, end))) nbreaks += 1 - logging.debug( - "A total of {} breakpoints inferred (--chunk={})".format(nbreaks, nchunk) - ) + logger.debug("A total of %d breakpoints inferred (--chunk=%d)", nbreaks, nchunk) def movie(args): @@ -1112,7 +1105,7 @@ def movie(args): fwagp = must_open(agpfile, "w") order_to_agp(seqid, tour, sizes, fwagp, gapsize=gapsize, evidence="map") fwagp.close() - logging.debug("%s written to `%s`.", header, agpfile) + logger.debug("%s written to `%s`.", header, agpfile) build([inputbed, scaffoldsfasta, "--cleanup"]) pdf_name = plot([inputbed, seqid, "--title={0}".format(label)]) sh("mv {0} {1}".format(pdf_name, image_name)) @@ -1262,7 +1255,7 @@ def merge(args): try: m = CSVMapLine(row, mapname=mapname) if m.cm < 0: - logging.error("Ignore marker with negative genetic distance") + logger.error("Ignore marker with negative genetic distance") print(row.strip(), file=sys.stderr) else: b.append(BedLine(m.bedline)) @@ -1270,7 +1263,7 @@ def merge(args): continue b.print_to_file(filename=outfile, sorted=True) - logging.debug("A total of %d markers written to `%s`.", len(b), outfile) + logger.debug("A total of %d markers written to `%s`.", len(b), outfile) assert len(maps) == len(mapnames), "You have a collision in map names" write_weightsfile(mapnames, weightsfile=opts.weightsfile) @@ -1309,7 +1302,7 @@ def mergebed(args): continue b.print_to_file(filename=outfile, sorted=True) - logging.debug("A total of %d markers written to `%s`.", len(b), outfile) + logger.debug("A total of %d markers written to `%s`.", len(b), outfile) assert len(maps) == len(mapnames), "You have a collision in map names" write_weightsfile(mapnames, weightsfile=opts.weightsfile) @@ -1317,16 +1310,14 @@ def mergebed(args): def write_weightsfile(mapnames, weightsfile="weights.txt"): if op.exists(weightsfile): - logging.debug( - "Weights file `{0}` found. Will not overwrite.".format(weightsfile) - ) + logger.debug("Weights file `%s` found. Will not overwrite.", weightsfile) return fw = open(weightsfile, "w") for mapname in sorted(mapnames): weight = 1 print(mapname, weight, file=fw) - logging.debug("Weights file written to `%s`.", weightsfile) + logger.debug("Weights file written to `%s`.", weightsfile) def best_no_ambiguous(d, label): @@ -1463,10 +1454,8 @@ def path(args): cpus = opts.cpus seed = opts.seed if sys.version_info[:2] < (2, 7): - logging.debug( - "Python version: {0}. CPUs set to 1.".format( - sys.version.splitlines()[0].strip() - ) + logger.debug( + "Python version: %s. CPUs set to 1.", sys.version.splitlines()[0].strip() ) cpus = 1 @@ -1484,7 +1473,7 @@ def path(args): ref = weights.ref linkage = opts.linkage oseqid = opts.seqid - logging.debug("Linkage function: {0}-linkage".format(linkage)) + logger.debug("Linkage function: %s-linkage", linkage) linkage = { "single": min, "double": double_linkage, @@ -1500,12 +1489,12 @@ def path(args): C.join(mlg) if partitionsfile: - logging.debug("Partition LGs based on `{}`".format(partitionsfile)) + logger.debug("Partition LGs based on `%s`", partitionsfile) fp = open(partitionsfile) for row in fp: C.join(*row.strip().split(",")) else: - logging.debug("Partition LGs based on {0}".format(ref)) + logger.debug("Partition LGs based on %s", ref) for mapname in mapnames: if mapname == ref: continue @@ -1561,9 +1550,9 @@ def path(args): tag = "|".join(lgs) lgs_maps = set(x.split("-")[0] for x in lgs) if pivot not in lgs_maps: - logging.debug("Skipping {0} ...".format(tag)) + logger.debug("Skipping %s ...", tag) continue - logging.debug("Working on {0} ...".format(tag)) + logger.debug("Working on %s ...", tag) s = ScaffoldOO( lgs, scaffolds, @@ -1593,7 +1582,7 @@ def path(args): ) for i, (c, size) in enumerate(sorted(chrsizes.items(), key=lambda x: -x[1])): newc = "chr{0}".format(i + 1) - logging.debug("{0}: {1} => {2}".format(c, size, newc)) + logger.debug("%s: %d => %d", c, size, newc) conversion[c] = newc for s in solutions: s.object = conversion[s.object] @@ -1609,8 +1598,8 @@ def path(args): order_to_agp(s.object, s.tour, sizes, fwagp, gapsize=gapsize, evidence="map") fwagp.close() - logging.debug("AGP file written to `%s`.", agpfile) - logging.debug("Tour file written to `%s`.", tourfile) + logger.debug("AGP file written to `%s`.", agpfile) + logger.debug("Tour file written to `%s`.", tourfile) build([inputbed, fastafile]) @@ -1638,7 +1627,7 @@ def write_unplaced_agp(agpfile, scaffolds, unplaced_agp): if s in scaffolds_seen: continue order_to_agp(s, [(s, "?")], sizes, fwagp) - logging.debug("Write unplaced AGP to `%s`", unplaced_agp) + logger.debug("Write unplaced AGP to `%s`", unplaced_agp) def summary(args): @@ -1764,7 +1753,7 @@ def build(args): liftedbed = mapbed.rsplit(".", 1)[0] + ".lifted.bed" if need_update((mapbed, chainfile), liftedbed): - logging.debug( + logger.debug( "Lifting markers from positions in `%s` to new positions in `%s`", mapbed, liftedbed, @@ -1848,8 +1837,8 @@ def plot(args): s = Scaffold(seqid, cc) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] while not mlgs: - links /= 2 - logging.error("No markers to plot, --links reset to {0}".format(links)) + links //= 2 + logger.error("No markers to plot, --links reset to %d", links) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] mlgsizes = {} From b970e9ab2ffa0c5ff371973542de4c3ebfcec39c Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Mon, 29 Apr 2024 23:41:18 -0700 Subject: [PATCH 03/15] remove unused --- jcvi/assembly/allmaps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jcvi/assembly/allmaps.py b/jcvi/assembly/allmaps.py index a367d438..61343d70 100644 --- a/jcvi/assembly/allmaps.py +++ b/jcvi/assembly/allmaps.py @@ -194,7 +194,7 @@ def callback(tour, gen, i=0): return tour i = 0 - _, best_fitness = None, None + best_fitness = None while True: # Multiple EC rounds due to orientation fixes logger.debug("Start EC round %d", i) scaffolds_oo = dict(tour) @@ -210,7 +210,7 @@ def callback(tour, gen, i=0): logger.debug("No fitness improvement: %s. Exit EC.", best_fitness) break tour = self.fix_orientation(tour) - best_tour, best_fitness = tour, fitness + best_fitness = fitness print_tour( fwtour, self.object, tag, "GA{0}-FIXORI".format(i), tour, recode=True ) From c865d6b26dc5a37ec155eb0d15596dca39a94e46 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Mon, 29 Apr 2024 23:55:47 -0700 Subject: [PATCH 04/15] minor --- jcvi/graphics/landscape.py | 27 ++++++++++++++------------- jcvi/projects/jcvi.py | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index 91be96ff..e8c3290e 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -51,7 +51,7 @@ } -class BinLine(object): +class BinLine: def __init__(self, row): args = row.split() self.chr = args[0] @@ -70,7 +70,7 @@ def __init__(self, filename): super(BinFile, self).__init__(filename) self.mapping = defaultdict(list) - fp = open(filename) + fp = open(filename, encoding="utf-8") for row in fp: b = BinLine(row) self.append(b) @@ -125,13 +125,12 @@ def __init__(self, filename, delimiter=","): def main(): actions = ( - ("stack", "create landscape plot with genic/te composition"), - ("heatmap", "similar to stack but adding heatmap"), ("composite", "combine line plots, feature bars and alt-bars"), - ("multilineplot", "combine multiple line plots in one vertical stack"), - # Related to chromosomal depth ("depth", "show per chromosome depth plot across genome"), + ("heatmap", "similar to stack but adding heatmap"), ("mosdepth", "plot depth vs. coverage per chromosome"), + ("multilineplot", "combine multiple line plots in one vertical stack"), + ("stack", "create landscape plot with genic/te composition"), ) p = ActionDispatcher(actions) p.dispatch(globals()) @@ -152,7 +151,7 @@ def parse_distfile(filename): dists = defaultdict(Counter) with must_open(filename) as fp: for row in fp: - chromosome, start, end, depth = row.split() + chromosome, _, _, depth = row.split() depth = int(float(depth)) dists[chromosome][depth] += 1 logger.debug("Loaded %d seqids", len(dists)) @@ -171,7 +170,7 @@ def parse_groupsfile(filename): filename (str): Path to the groups file. """ groups = [] - with open(filename) as fp: + with open(filename, encoding="utf-8") as fp: for row in fp: chrs, colors = row.split() groups.append((chrs.split(","), colors.split(","))) @@ -579,7 +578,7 @@ def get_beds(s, binned=False): def linearray(binfile, chr, window, shift): mn = binfile.mapping[chr] - m, n = zip(*mn) + m, _ = zip(*mn) m = np.array(m, dtype="float") w = window // shift @@ -718,7 +717,7 @@ def composite(args): for bb in altbarbeds: root.text(xend + 0.01, yy, bb.split(".")[0], va="center") bb = Bed(bb) - for i, b in enumerate(bb): + for b in bb: start, end = xs(b.start), xs(b.end) span = end - start if span < 0.0001: @@ -1074,6 +1073,10 @@ def stack(args): switch = DictFile(opts.switch) stacks = opts.stacks.split(",") + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + bedfiles = get_beds(stacks) binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract, merge=merge) @@ -1084,8 +1087,6 @@ def stack(args): inner = 0.02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) # Gauge ratio = draw_gauge(root, margin, maxl) @@ -1103,7 +1104,7 @@ def stack(args): cc = ca[0].upper() + cb if switch and cc in switch: - cc = "\n".join((cc, "({0})".format(switch[cc]))) + cc = "\n".join((cc, f"({switch[cc]})")) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes((xx, yy, xlen, yinterval - inner)) diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py index dc38dd02..419ce70b 100644 --- a/jcvi/projects/jcvi.py +++ b/jcvi/projects/jcvi.py @@ -96,7 +96,7 @@ def diversity(args): def landscape(args): """ - %prog landscape features.bed athaliana.sizes Fig5.png + %prog landscape features.bed athaliana.sizes TAIR10_chr_all.fas Chr2 Plot landscape composite figure, including: A. Example genomic features painted on Arabidopsis genome From 9a12d12190eef8f036f069c7ab05e12bc2c64913 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 00:14:23 -0700 Subject: [PATCH 05/15] minor formatting --- jcvi/graphics/landscape.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index e8c3290e..1d1d3c42 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -16,11 +16,11 @@ import numpy as np from ..algorithms.matrix import moving_sum -from ..apps.base import OptionParser, ActionDispatcher, logger +from ..apps.base import ActionDispatcher, OptionParser, logger from ..formats.base import BaseFile, DictFile, LineFile, must_open from ..formats.bed import Bed, bins, get_nbins from ..formats.sizes import Sizes -from ..utils.cbook import human_size, autoscale +from ..utils.cbook import autoscale, human_size from .base import ( CirclePolygon, @@ -951,8 +951,10 @@ def heatmap(args): savefig(image_name, dpi=iopts.dpi, iopts=iopts) -def draw_gauge(ax, margin, maxl, rightmargin=None): - # Draw a gauge on the top of the canvas +def draw_gauge(ax, margin: float, maxl: int, rightmargin: Optional[float] = None): + """ + Draw a gauge on the top of the canvas, showing the scale of the chromosome. + """ rightmargin = rightmargin or margin ax.plot([margin, 1 - rightmargin], [1 - margin, 1 - margin], "k-", lw=2) @@ -981,7 +983,13 @@ def draw_gauge(ax, margin, maxl, rightmargin=None): def get_binfiles( - inputfiles, fastafile, shift, mode="span", subtract=None, binned=False, merge=True + inputfiles: List[str], + fastafile: str, + shift: int, + mode: str = "span", + subtract: Optional[int] = None, + binned: bool = False, + merge: bool=True, ): """ Get binfiles from input files. If not binned, then bin them first. From 9c1449101c1c4d92c5183b72184932a8cccb0090 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 06:58:13 -0700 Subject: [PATCH 06/15] cleanup apps.base --- jcvi/apps/base.py | 113 ++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 70 deletions(-) diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py index 5c96eec8..09a58454 100644 --- a/jcvi/apps/base.py +++ b/jcvi/apps/base.py @@ -3,18 +3,16 @@ """ import errno +import fnmatch +import logging import os -import time import os.path as op import shutil import signal import sys -import logging -import fnmatch +import time from collections.abc import Iterable -from http.client import HTTPSConnection -from urllib.parse import urlencode from configparser import ( ConfigParser, RawConfigParser, @@ -22,23 +20,25 @@ NoSectionError, ParsingError, ) + +from http.client import HTTPSConnection +from optparse import OptionGroup, OptionParser as OptionP, SUPPRESS_HELP from socket import gethostname -from subprocess import PIPE, call, check_output -from optparse import OptionParser as OptionP, OptionGroup, SUPPRESS_HELP +from subprocess import CalledProcessError, PIPE, call, check_output +from time import ctime from typing import Any, Collection, List, Optional, Tuple, Union - +from urllib.parse import urlencode from natsort import natsorted from rich.console import Console from rich.logging import RichHandler from jcvi import __copyright__, __version__ -# http://newbebweb.blogspot.com/2012/02/python-head-ioerror-errno-32-broken.html -nobreakbuffer = lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL) -nobreakbuffer() -os.environ["LC_ALL"] = "C" -JCVIHELP = "JCVI utility libraries {} [{}]\n".format(__version__, __copyright__) +os.environ["LC_ALL"] = "C" +# http://newbebweb.blogspot.com/2012/02/python-head-ioerror-errno-32-broken.html +signal.signal(signal.SIGPIPE, signal.SIG_DFL) +JCVIHELP = f"JCVI utility libraries {__version__} [{__copyright__}]\n" TextCollection = Union[str, List[str], Tuple[str, ...]] @@ -58,7 +58,9 @@ def debug(level=logging.DEBUG): def get_logger(name: str, level: int = logging.DEBUG): - """Return a logger with a default ColoredFormatter.""" + """ + Return a logger with a default ColoredFormatter. + """ logger = logging.getLogger(name) if logger.hasHandlers(): logger.handlers.clear() @@ -628,7 +630,9 @@ def set_image_options( return opts, args, iopts def set_dotplot_opts(self, theme: int = 2) -> OptionGroup: - """Used in compara.catalog and graphics.dotplot""" + """ + Used in compara.catalog and graphics.dotplot + """ from jcvi.graphics.base import set1 group = OptionGroup(self, "Dot plot parameters") @@ -1002,34 +1006,7 @@ def set_annot_reformat_opts(self): ) def set_home(self, prog, default=None): - tag = "--{0}_home".format(prog) - default = default or { - "amos": "~/code/amos-code", - "trinity": "~/export/trinityrnaseq-2.0.6", - "hpcgridrunner": "~/export/hpcgridrunner-1.0.2", - "cdhit": "~/export/cd-hit-v4.6.1-2012-08-27", - "maker": "~/export/maker", - "augustus": "~/export/maker/exe/augustus", - "pasa": "~/export/PASApipeline-2.0.2", - "gatk": "~/export", - "gmes": "~/export/gmes", - "gt": "~/export/genometools", - "sspace": "~/export/SSPACE-STANDARD-3.0_linux-x86_64", - "gapfiller": "~/export/GapFiller_v1-11_linux-x86_64", - "pbjelly": "~/export/PBSuite_15.2.20", - "picard": "~/export/picard-tools-1.138", - "khmer": "~/export/khmer", - "tassel": "/usr/local/projects/MTG4/packages/tassel", - "tgi": "~/export/seqclean-x86_64", - "eddyyeh": "/home/shared/scripts/eddyyeh", - "fiona": "~/export/fiona-0.2.0-Linux-x86_64", - "fermi": "~/export/fermi", - "lobstr": "/mnt/software/lobSTR", - "shapeit": "/mnt/software/shapeit", - "impute": "/mnt/software/impute", - "beagle": "java -jar /mnt/software/beagle.14Jan16.841.jar", - "minimac": "/mnt/software/Minimac3/bin", - }.get(prog, None) + tag = f"--{prog}_home" if default is None: # Last attempt at guessing the path try: default = op.dirname(which(prog)) @@ -1037,7 +1014,7 @@ def set_home(self, prog, default=None): default = None else: default = op.expanduser(default) - help = "Home directory for {0}".format(prog.upper()) + help = f"Home directory for {prog.upper()}" self.add_option(tag, default=default, help=help) def set_aligner(self, aligner="bowtie"): @@ -1053,7 +1030,7 @@ def set_verbose(self, help="Print detailed reports"): def ConfigSectionMap(Config, section): """ Read a specific section from a ConfigParser() object and return - a dict() of all key-value pairs in that section + a dict of all key-value pairs in that section """ cfg = {} options = Config.options(section) @@ -1061,9 +1038,9 @@ def ConfigSectionMap(Config, section): try: cfg[option] = Config.get(section, option) if cfg[option] == -1: - logger.debug("skip: %s", option) + logger.debug("Skip: %s", option) except: - logger.debug("exception on %s!", option) + logger.error("Exception on %s", option) cfg[option] = None return cfg @@ -1085,7 +1062,13 @@ def get_abs_path(link_name): datadir = get_abs_path(op.join(op.dirname(__file__), "../utils/data")) -datafile = lambda x: op.join(datadir, x) + + +def datafile(x: str, datadir: str = datadir): + """ + Return the full path to the data file in the data directory. + """ + return op.join(datadir, x) def splitall(path): @@ -1466,9 +1449,8 @@ def download( str: Local file name. """ from urllib.parse import urlsplit - from subprocess import CalledProcessError - scheme, netloc, path, query, fragment = urlsplit(url) + _, _, path, _, _ = urlsplit(url) basepath = op.basename(path) if basepath: url_gzipped = basepath.endswith(".gz") @@ -1546,14 +1528,14 @@ def getfilesize(filename, ratio=None): def main(): actions = ( + ("expand", "move files in subfolders into the current folder"), ("less", "enhance the unix `less` command"), + ("mdownload", "multiple download a list of files"), + ("mergecsv", "merge a set of tsv files"), + ("notify", "send an email/push notification"), ("timestamp", "record timestamps for all files in the current folder"), - ("expand", "move files in subfolders into the current folder"), ("touch", "recover timestamps for files in the current folder"), - ("mdownload", "multiple download a list of files"), ("waitpid", "wait for a PID to finish and then perform desired action"), - ("notify", "send an email/push notification"), - ("mergecsv", "merge a set of tsv files"), ) p = ActionDispatcher(actions) p.dispatch(globals()) @@ -1569,7 +1551,7 @@ def mdownload(args): from jcvi.apps.grid import Jobs p = OptionParser(mdownload.__doc__) - opts, args = p.parse_args(args) + _, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) @@ -1630,13 +1612,13 @@ def timestamp(args): This file can be used later to recover previous timestamps through touch(). """ p = OptionParser(timestamp.__doc__) - opts, args = p.parse_args(args) + _, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (path,) = args - for root, dirs, files in os.walk(path): + for root, _, files in os.walk(path): for f in files: filename = op.join(root, f) atime, mtime = get_times(filename) @@ -1650,10 +1632,8 @@ def touch(args): Recover timestamps for files in the current folder. CAUTION: you must execute this in the same directory as timestamp(). """ - from time import ctime - p = OptionParser(touch.__doc__) - opts, args = p.parse_args(args) + _, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) @@ -1707,7 +1687,7 @@ def less(args): from jcvi.formats.base import must_open p = OptionParser(less.__doc__) - opts, args = p.parse_args(args) + _, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) @@ -2019,7 +1999,6 @@ def pid_exists(pid): """Check whether pid exists in the current process table.""" if pid < 0: return False - import errno try: os.kill(pid, 0) @@ -2137,13 +2116,6 @@ def waitpid(args): if len(args) == 0: sys.exit(not p.print_help()) - if not opts.message: - """ - If notification message not specified by user, just get - the name of the running command and use it as the message - """ - from subprocess import check_output - sep = ":::" cmd = None if sep in args: @@ -2269,7 +2241,8 @@ def inspect(object): def sample_N(a: Collection[Any], N: int, seed: Optional[int] = None) -> List[Any]: - """When size of N is > size of a, random.sample() will emit an error: + """ + When size of N is > size of a, random.sample() will emit an error: ValueError: sample larger than population This method handles such restrictions by repeatedly sampling when that From e78a5e0381dd5702532b0176984d8c47ec74aabe Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 07:05:56 -0700 Subject: [PATCH 07/15] Move trinity.py from assembly to annnotation --- jcvi/{assembly => annotation}/trinity.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename jcvi/{assembly => annotation}/trinity.py (100%) diff --git a/jcvi/assembly/trinity.py b/jcvi/annotation/trinity.py similarity index 100% rename from jcvi/assembly/trinity.py rename to jcvi/annotation/trinity.py From 84243cedc72d93c401cc63b748769826a8c569b2 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 07:20:53 -0700 Subject: [PATCH 08/15] cleanup vanilla --- jcvi/graphics/base.py | 5 ++- jcvi/projects/jcvi.py | 6 ++-- jcvi/projects/vanilla.py | 74 ++++++++++++++++++---------------------- 3 files changed, 38 insertions(+), 47 deletions(-) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index 11f82031..29d3a494 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -38,7 +38,7 @@ FancyBboxPatch, ) -from ..apps.base import datadir, glob, listify, logger, sample_N, which +from ..apps.base import datadir, glob, logger, sample_N, which from ..formats.base import LineFile from ..utils.cbook import human_size @@ -275,11 +275,10 @@ def prettyplot(): blues_r, reds, blue_red, green_purple, red_purple = prettyplot() -def normalize_axes(axes): +def normalize_axes(*axes): """ Normalize the axes to have the same scale. """ - axes = listify(axes) for ax in axes: ax.set_xlim(0, 1) ax.set_ylim(0, 1) diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py index 419ce70b..4eaed4e6 100644 --- a/jcvi/projects/jcvi.py +++ b/jcvi/projects/jcvi.py @@ -88,7 +88,7 @@ def diversity(args): (0.25 + 0.25 * 0.1, 0.95, "B"), ) panel_labels(root, labels) - normalize_axes([root, ax2_root]) + normalize_axes(root, ax2_root) image_name = "diversity.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts) @@ -156,7 +156,7 @@ def landscape(args): (0.42, 0.95, "B"), ) panel_labels(root, labels) - normalize_axes([root, ax1_root]) + normalize_axes(root, ax1_root) image_name = "landscape.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts) @@ -228,7 +228,7 @@ def genomebuild(args): (1 / 3 * 2.1, 0.95, "C"), ) panel_labels(root, labels) - normalize_axes([root, ax1_root, ax2_root, ax3_root]) + normalize_axes(root, ax1_root, ax2_root, ax3_root) image_name = "genomebuild.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts) diff --git a/jcvi/projects/vanilla.py b/jcvi/projects/vanilla.py index 4d6d8829..7ca951c8 100644 --- a/jcvi/projects/vanilla.py +++ b/jcvi/projects/vanilla.py @@ -4,17 +4,18 @@ """ Plotting scripts for the vanilla genome paper. """ -import logging import sys -from jcvi.apps.base import ActionDispatcher, OptionParser -from jcvi.compara.synteny import check_beds -from jcvi.formats.base import get_number -from jcvi.formats.bed import Bed -from jcvi.graphics.base import normalize_axes, panel_labels, plt, savefig -from jcvi.graphics.glyph import TextCircle -from jcvi.graphics.synteny import Synteny, draw_gene_legend +from ..apps.base import ActionDispatcher, OptionParser, logger from ..compara.base import AnchorFile +from ..compara.synteny import check_beds +from ..formats.base import get_number +from ..formats.bed import Bed +from ..graphics.base import normalize_axes, panel_labels, plt, savefig +from ..graphics.chromosome import draw_chromosomes +from ..graphics.glyph import TextCircle +from ..graphics.synteny import Synteny, draw_gene_legend +from ..graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree def main(): @@ -42,20 +43,20 @@ def phylogeny(args): Create a composite figure with (A) tree and (B) ks. """ - from jcvi.graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree + from ..compara.ks import Layout, KsPlot, KsFile p = OptionParser(phylogeny.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="10x12") + _, args, iopts = p.set_image_options(args, figsize="10x12") (datafile, layoutfile) = args - logging.debug("Load tree file `{0}`".format(datafile)) + logger.debug("Load tree file `%s`", datafile) t, hpd = parse_tree(datafile) fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - ax1 = fig.add_axes([0, 0.4, 1, 0.6]) - ax2 = fig.add_axes([0.12, 0.065, 0.8, 0.3]) + root = fig.add_axes((0, 0, 1, 1)) + ax1 = fig.add_axes((0, 0.4, 1, 0.6)) + ax2 = fig.add_axes((0.12, 0.065, 0.8, 0.3)) margin, rmargin = 0.1, 0.2 # Left and right margin leafinfo = LeafInfoFile("leafinfo.csv").cache @@ -77,9 +78,6 @@ def phylogeny(args): wgdinfo=wgdinfo, geoscale=True, ) - - from jcvi.apps.ks import Layout, KsPlot, KsFile - # Panel B ks_min = 0.0 ks_max = 3.0 @@ -106,7 +104,7 @@ def phylogeny(args): kp.draw(filename=None) - normalize_axes([root, ax1]) + normalize_axes(root, ax1) labels = ((0.05, 0.95, "A"), (0.05, 0.4, "B")) panel_labels(root, labels) @@ -120,17 +118,15 @@ def tree(args): Create a tree figure. """ - from jcvi.graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree - p = OptionParser(tree.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="10x8") + _, args, iopts = p.set_image_options(args, figsize="10x8") (datafile,) = args - logging.debug("Load tree file `{0}`".format(datafile)) + logger.debug("Load tree file `%s`", datafile) t, hpd = parse_tree(datafile) fig = plt.figure(1, (iopts.w, iopts.h)) - ax1 = fig.add_axes([0, 0, 1, 1]) + ax1 = fig.add_axes((0, 0, 1, 1)) margin, rmargin = 0.1, 0.2 # Left and right margin leafinfo = LeafInfoFile("leafinfo.csv").cache @@ -153,7 +149,7 @@ def tree(args): geoscale=True, ) - normalize_axes([ax1]) + normalize_axes(ax1) image_name = "tree.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts) @@ -164,15 +160,15 @@ def ks(args): Create a ks figure. """ + from ..compara.ks import Layout, KsPlot, KsFile + p = OptionParser(ks.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="10x4") + _, args, iopts = p.set_image_options(args, figsize="10x4") (layoutfile,) = args - from jcvi.apps.ks import Layout, KsPlot, KsFile - fig = plt.figure(1, (iopts.w, iopts.h)) - ax2 = fig.add_axes([0.12, 0.12, 0.8, 0.8]) + ax2 = fig.add_axes((0.12, 0.12, 0.8, 0.8)) # Panel B ks_min = 0.0 @@ -211,10 +207,8 @@ def synteny(args): Create a composite figure with (A) wgd and (B) microsynteny. """ - from jcvi.graphics.chromosome import draw_chromosomes - p = OptionParser(synteny.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="12x12") + _, args, iopts = p.set_image_options(args, figsize="12x12") (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args @@ -241,7 +235,7 @@ def synteny(args): # Panel B draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) - normalize_axes([root, ax1, ax2]) + normalize_axes(root, ax1, ax2) labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) panel_labels(root, labels) @@ -255,15 +249,13 @@ def wgd(args): Create a wgd figure. """ - from jcvi.graphics.chromosome import draw_chromosomes - p = OptionParser(synteny.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x5") + _, args, iopts = p.set_image_options(args, figsize="8x5") (bedfile, sizesfile) = args fig = plt.figure(1, (iopts.w, iopts.h)) - ax1 = fig.add_axes([0, 0, 1, 1]) + ax1 = fig.add_axes((0, 0, 1, 1)) title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" draw_chromosomes( @@ -279,7 +271,7 @@ def wgd(args): title=title, ) - normalize_axes([ax1]) + normalize_axes(ax1) image_name = "wgd.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts) @@ -297,11 +289,11 @@ def microsynteny(args): (blocksfile, allbedfile, blockslayout) = args fig = plt.figure(1, (iopts.w, iopts.h)) - ax2 = fig.add_axes([0, 0, 1, 1]) + ax2 = fig.add_axes((0, 0, 1, 1)) draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) - normalize_axes([ax2]) + normalize_axes(ax2) image_name = "microsynteny.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts) @@ -362,8 +354,8 @@ def build_bedline(astart, aend, target_pair): ac = AnchorFile(anchorsfile) blocks = ac.blocks outbed = Bed() - for i, block in enumerate(blocks): - a, b, scores = zip(*block) + for block in blocks: + a, b, _ = zip(*block) a = [qorder[x] for x in a] b = [sorder[x] for x in b] astart, aend = min(a)[1], max(a)[1] From e38f7d1d39857c28b6f12f6cd156683008b74275 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 07:28:38 -0700 Subject: [PATCH 09/15] cleanup karyotype and synteny --- jcvi/assembly/allmaps.py | 10 +++++----- jcvi/graphics/karyotype.py | 18 +++++++++--------- jcvi/graphics/synteny.py | 18 +++++++++--------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/jcvi/assembly/allmaps.py b/jcvi/assembly/allmaps.py index 61343d70..7d4c3d84 100644 --- a/jcvi/assembly/allmaps.py +++ b/jcvi/assembly/allmaps.py @@ -876,10 +876,10 @@ def plotratio(args): Illustrate physical vs map distance ratio, that were used in the gap estimation algorithm. """ - from jcvi.graphics.base import plt, savefig, normalize_axes, panel_labels, set2 + from ..graphics.base import plt, savefig, normalize_axes, panel_labels, set2 p = OptionParser(estimategaps.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) + _, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) @@ -1802,7 +1802,7 @@ def plot(args): 1. Parallel axes, and matching markers are shown in connecting lines; 2. Scatter plot. """ - from jcvi.graphics.base import ( + from ..graphics.base import ( plt, savefig, normalize_axes, @@ -1810,7 +1810,7 @@ def plot(args): panel_labels, shorten, ) - from jcvi.graphics.chromosome import Chromosome, GeneticMap, HorizontalChromosome + from ..graphics.chromosome import Chromosome, GeneticMap, HorizontalChromosome p = OptionParser(plot.__doc__) p.add_option("--title", help="Title of the plot") @@ -1987,7 +1987,7 @@ def plot(args): labels = ((0.04, 0.96, "A"), (0.48, 0.96, "B")) panel_labels(root, labels) - normalize_axes((ax1, ax2, root)) + normalize_axes(ax1, ax2, root) image_name = seqid + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) plt.close(fig) diff --git a/jcvi/graphics/karyotype.py b/jcvi/graphics/karyotype.py index 59cf71f5..a7957580 100644 --- a/jcvi/graphics/karyotype.py +++ b/jcvi/graphics/karyotype.py @@ -21,17 +21,17 @@ import sys -import logging from typing import Optional -from jcvi.apps.base import OptionParser -from jcvi.compara.synteny import SimpleFile -from jcvi.formats.bed import Bed -from jcvi.graphics.chromosome import Chromosome, HorizontalChromosome -from jcvi.graphics.glyph import TextCircle -from jcvi.graphics.synteny import Shade, ymid_offset -from jcvi.graphics.base import AbstractLayout, markup, mpl, plt, savefig, update_figname +from ..apps.base import OptionParser, logger +from ..compara.synteny import SimpleFile +from ..formats.bed import Bed + +from .base import AbstractLayout, markup, mpl, plt, savefig, update_figname +from .chromosome import Chromosome, HorizontalChromosome +from .glyph import TextCircle +from .synteny import Shade, ymid_offset class LayoutLine(object): @@ -377,7 +377,7 @@ def __init__( # validate if all seqids are non-empty for k, v in sz.items(): if v == 0: - logging.error("Size of `%s` is empty. Please check", k) + logger.error("Size of `%s` is empty. Please check", k) t.sizes = sz tracks = [] diff --git a/jcvi/graphics/synteny.py b/jcvi/graphics/synteny.py index 649db70f..d8f2dbd3 100644 --- a/jcvi/graphics/synteny.py +++ b/jcvi/graphics/synteny.py @@ -21,31 +21,31 @@ from typing import Optional import numpy as np -import matplotlib.transforms as transforms +from matplotlib import transforms from matplotlib.path import Path from ..apps.base import OptionParser, logger from ..compara.synteny import BlockFile from ..formats.base import DictFile from ..formats.bed import Bed -from ..graphics.base import ( +from ..utils.cbook import human_size +from ..utils.validator import validate_in_choices, validate_in_range + +from .base import ( + AbstractLayout, + PathPatch, markup, plt, savefig, - PathPatch, - AbstractLayout, ) -from ..graphics.glyph import ( +from .glyph import ( BasePalette, Glyph, OrientationPalette, OrthoGroupPalette, RoundLabel, ) -from ..graphics.tree import draw_tree, read_trees - -from ..utils.cbook import human_size -from ..utils.validator import validate_in_choices, validate_in_range +from .tree import draw_tree, read_trees HorizontalAlignments = ("left", "right", "leftalign", "rightalign", "center", "") From 987cb6dd36d0da4f03fcb3629f8425aa04bace24 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 07:43:19 -0700 Subject: [PATCH 10/15] Add Won as an author --- README.md | 2 +- jcvi/__init__.py | 13 +++++++++---- jcvi/apps/base.py | 4 ++-- jcvi/assembly/allmaps.py | 9 ++++----- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index d4c02269..78ec90f2 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,8 @@ computation related to assembly, annotation, and comparative genomics. | ------- | ---------------------------------------------------------------- | | Authors | Haibao Tang ([tanghaibao](http://github.com/tanghaibao)) | | | Vivek Krishnakumar ([vivekkrish](https://github.com/vivekkrish)) | -| | Jingping Li ([Jingping](https://github.com/Jingping)) | | | Xingtan Zhang ([tangerzhang](https://github.com/tangerzhang)) | +| | Won Cheol Yim ([wyim-pgl](https://github.com/wyim-pgl)) | | Email | | | License | [BSD](http://creativecommons.org/licenses/BSD/) | diff --git a/jcvi/__init__.py b/jcvi/__init__.py index df5f218d..75633151 100644 --- a/jcvi/__init__.py +++ b/jcvi/__init__.py @@ -2,8 +2,13 @@ from pkg_resources import get_distribution, DistributionNotFound -__author__ = ("Haibao Tang", "Vivek Krishnakumar", "Jingping Li") -__copyright__ = "Copyright (c) 2010-{}, Haibao Tang".format(datetime.now().year) +__author__ = ( + "Haibao Tang", + "Vivek Krishnakumar", + "Xingtan Zhang", + "Won Cheol Yim", +) +__copyright__ = f"Copyright (c) 2010-{datetime.now().year}, Haibao Tang" __email__ = "tanghaibao@gmail.com" __license__ = "BSD" __status__ = "Development" @@ -14,10 +19,10 @@ except DistributionNotFound: # pragma: no cover try: from .version import version as VERSION # noqa - except ImportError: # pragma: no cover + except ImportError as exc: # pragma: no cover raise ImportError( "Failed to find (autogenerated) version.py. " "This might be because you are installing from GitHub's tarballs, " "use the PyPI ones." - ) + ) from exc __version__ = VERSION diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py index 09a58454..bbc63a27 100644 --- a/jcvi/apps/base.py +++ b/jcvi/apps/base.py @@ -32,13 +32,13 @@ from rich.console import Console from rich.logging import RichHandler -from jcvi import __copyright__, __version__ +from .. import __copyright__, __version__ as version os.environ["LC_ALL"] = "C" # http://newbebweb.blogspot.com/2012/02/python-head-ioerror-errno-32-broken.html signal.signal(signal.SIGPIPE, signal.SIG_DFL) -JCVIHELP = f"JCVI utility libraries {__version__} [{__copyright__}]\n" +JCVIHELP = f"JCVI utility libraries {version} [{__copyright__}]\n" TextCollection = Union[str, List[str], Tuple[str, ...]] diff --git a/jcvi/assembly/allmaps.py b/jcvi/assembly/allmaps.py index 7d4c3d84..1ab3b233 100644 --- a/jcvi/assembly/allmaps.py +++ b/jcvi/assembly/allmaps.py @@ -7,21 +7,19 @@ import os.path as op import os import sys -import logging - -import numpy as np -import networkx as nx from collections import Counter, defaultdict from functools import partial from itertools import combinations, product from typing import Optional +import numpy as np +import networkx as nx + from cmmodule.utils import read_chain_file from cmmodule.mapbed import crossmap_bed_file from more_itertools import pairwise -from jcvi import __version__ as version from ..algorithms.ec import GA_setup, GA_run from ..algorithms.formula import reject_outliers, spearmanr from ..algorithms.lis import ( @@ -41,6 +39,7 @@ mkdir, need_update, sh, + version, ) from ..formats.agp import AGP, order_to_agp, build as agp_build, reindex from ..formats.base import DictFile, FileMerger, must_open, read_block From 07945cc05bb7389383dce82151aea5964e8e6997 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 07:49:06 -0700 Subject: [PATCH 11/15] use load_image --- jcvi/graphics/base.py | 5 ++++- jcvi/projects/jcvi.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index 29d3a494..9b274dff 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -194,7 +194,10 @@ def linear_shade(from_color, fraction=0.5): return linear_blend(from_color, "w", fraction) -def load_image(filename): +def load_image(filename: str) -> np.ndarray: + """ + Load an image file and return as numpy array. + """ img = plt.imread(filename) if len(img.shape) == 2: # Gray-scale image, convert to RGB # http://www.socouldanyone.com/2013/03/converting-grayscale-to-rgb-with-numpy.html diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py index 4eaed4e6..54d19b45 100644 --- a/jcvi/projects/jcvi.py +++ b/jcvi/projects/jcvi.py @@ -14,7 +14,7 @@ from ..assembly.hic import draw_hic_heatmap from ..assembly.kmer import draw_ks_histogram from ..compara.pedigree import Pedigree, calculate_inbreeding -from ..graphics.base import normalize_axes, panel_labels, plt, savefig +from ..graphics.base import load_image, normalize_axes, panel_labels, plt, savefig from ..graphics.chromosome import draw_chromosomes from ..graphics.landscape import draw_multi_depth @@ -54,7 +54,7 @@ def diversity(args): logger.info("Pedigree graph written to `%s`", pngfile) # Show the image as is - ax1_root.imshow(plt.imread(pngfile)) + ax1_root.imshow(load_image(pngfile)) ax1_root.set_axis_off() # Panel B From 06a6a2036db4aea091530cd1b7d7c562fc2c4af2 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 08:02:43 -0700 Subject: [PATCH 12/15] Refactor landscape.stack() --- jcvi/graphics/landscape.py | 96 +++++++++++++++++++++++--------------- jcvi/projects/jcvi.py | 19 ++------ 2 files changed, 61 insertions(+), 54 deletions(-) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index 1d1d3c42..f4470786 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -572,7 +572,10 @@ def check_window_options(opts): return window, shift, subtract, merge -def get_beds(s, binned=False): +def get_beds(s: List[str], binned: bool = False) -> List[str]: + """ + Get the bed files for each feature, and return them as a list. + """ return [x + ".bed" for x in s] if not binned else [x for x in s] @@ -989,7 +992,7 @@ def get_binfiles( mode: str = "span", subtract: Optional[int] = None, binned: bool = False, - merge: bool=True, + merge: bool = True, ): """ Get binfiles from input files. If not binned, then bin them first. @@ -1052,39 +1055,21 @@ def stackplot( ax.set_axis_off() -def stack(args): +def draw_stack( + fig, + root, + stacks: List[str], + fastafile: str, + window: int, + shift: int, + top: int, + merge: bool, + subtract: Optional[int] = None, + switch: Optional[DictFile] = None, +): """ - %prog stack fastafile - - Create landscape plots that show the amounts of genic sequences, and repetitive - sequences along the chromosomes. + Draw stack plot. """ - p = OptionParser(stack.__doc__) - p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes") - p.add_option( - "--stacks", - default="Exons,Introns,DNA_transposons,Retrotransposons", - help="Features to plot in stackplot", - ) - p.add_option("--switch", help="Change chr names based on two-column file") - add_window_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - top = opts.top - window, shift, subtract, merge = check_window_options(opts) - switch = opts.switch - if switch: - switch = DictFile(opts.switch) - - stacks = opts.stacks.split(",") - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - bedfiles = get_beds(stacks) binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract, merge=merge) @@ -1094,8 +1079,6 @@ def stack(args): margin = 0.08 inner = 0.02 # y distance between tracks - pf = fastafile.rsplit(".", 1)[0] - # Gauge ratio = draw_gauge(root, margin, maxl) @@ -1140,10 +1123,47 @@ def stack(args): root.text(xx, yy, b, size=13) xx += len(b) * 0.012 + inner - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() + normalize_axes(root) + +def stack(args): + """ + %prog stack fastafile + + Create landscape plots that show the amounts of genic sequences, and repetitive + sequences along the chromosomes. + """ + p = OptionParser(stack.__doc__) + p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes") + p.add_option( + "--stacks", + default="Exons,Introns,DNA_transposons,Retrotransposons", + help="Features to plot in stackplot", + ) + p.add_option("--switch", help="Change chr names based on two-column file") + add_window_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + top = opts.top + window, shift, subtract, merge = check_window_options(opts) + switch = opts.switch + if switch: + switch = DictFile(opts.switch) + + stacks = opts.stacks.split(",") + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + draw_stack( + fig, root, stacks, fastafile, window, shift, top, merge, subtract, switch + ) + + pf = fastafile.rsplit(".", 1)[0] image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py index 54d19b45..3fc52b4d 100644 --- a/jcvi/projects/jcvi.py +++ b/jcvi/projects/jcvi.py @@ -105,10 +105,10 @@ def landscape(args): p = OptionParser(landscape.__doc__) _, args, iopts = p.set_image_options(args, figsize="10x7") - if len(args) != 3: + if len(args) != 4: sys.exit(not p.print_help()) - bedfile, sizesfile, pngfile = args + bedfile, sizesfile, fasta, ch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes((0, 0, 1, 1)) @@ -134,20 +134,7 @@ def landscape(args): # Panel B logger.info("Plotting landscape of genomic features across the genome") - M = plt.imread(pngfile) - width, height = M.shape[1], M.shape[0] - # Split the image into left and right parts - mid = width // 2 - 10 - left_cut = right_cut = 900 - logger.info("Image size: %dx%d", width, height) - logger.info("Splitting image at %d", mid) - - LM, RM = M[:left_cut, :mid], M[:right_cut, mid:] - logger.info("Left image size: %dx%d", LM.shape[1], LM.shape[0]) - logger.info("Right image size: %dx%d", RM.shape[1], RM.shape[0]) - - ax2_root.imshow(LM, extent=(0.4, 1, 0.5, 1), aspect="auto") - ax3_root.imshow(RM, extent=(0.4, 1, 0, 0.5), aspect="auto") + ax2_root.set_axis_off() ax3_root.set_axis_off() From 210a5a84db402c8bd01a1f8f2a1c44ab1dc0f505 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 08:35:26 -0700 Subject: [PATCH 13/15] refactor --- jcvi/graphics/base.py | 10 ++++++++++ jcvi/graphics/landscape.py | 29 ++++++++++++++++++++--------- jcvi/projects/jcvi.py | 22 +++++++++++++++++----- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index 9b274dff..c2f9c4c7 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -42,6 +42,7 @@ from ..formats.base import LineFile from ..utils.cbook import human_size +Extent = Tuple[float, float, float, float] CHARS = { "&": r"\&", @@ -157,6 +158,15 @@ def __str__(self): return "\n".join(str(x) for x in self) +def adjust_extent(extent: Extent, root_extent: Extent) -> Extent: + """ + Adjust the extent of the root axes. + """ + rx, ry, rw, rh = root_extent + ex, ey, ew, eh = extent + return rx + ex * rw, ry + ey * rh, ew * rw, eh * rh + + def linear_blend(from_color, to_color, fraction=0.5): """Interpolate a new color between two colors. diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index f4470786..f3811f40 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -25,6 +25,7 @@ from .base import ( CirclePolygon, Rectangle, + adjust_extent, adjust_spines, human_readable_base, latex, @@ -1058,12 +1059,13 @@ def stackplot( def draw_stack( fig, root, + root_extent, stacks: List[str], fastafile: str, window: int, shift: int, top: int, - merge: bool, + merge: bool = True, subtract: Optional[int] = None, switch: Optional[DictFile] = None, ): @@ -1097,8 +1099,10 @@ def draw_stack( if switch and cc in switch: cc = "\n".join((cc, f"({switch[cc]})")) + extent = (xx, yy, xlen, yinterval - inner) + adjusted = adjust_extent(extent, root_extent) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) - ax = fig.add_axes((xx, yy, xlen, yinterval - inner)) + ax = fig.add_axes(adjusted) nbins, _ = get_nbins(clen, shift) @@ -1107,10 +1111,6 @@ def draw_stack( xx - 0.04, yy + 0.5 * (yinterval - inner), cc, ha="center", va="center" ) - ax.set_xlim(0, nbins) - ax.set_ylim(0, 1) - ax.set_axis_off() - # Legends yy -= yinterval xx = margin @@ -1121,7 +1121,7 @@ def draw_stack( root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, b, size=13) - xx += len(b) * 0.012 + inner + xx += len(b) * 0.015 + inner normalize_axes(root) @@ -1157,10 +1157,21 @@ def stack(args): stacks = opts.stacks.split(",") fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) + root_extent = (0, 0, 1, 1) + root = fig.add_axes(root_extent) draw_stack( - fig, root, stacks, fastafile, window, shift, top, merge, subtract, switch + fig, + root, + root_extent, + stacks, + fastafile, + window, + shift, + top, + merge, + subtract, + switch, ) pf = fastafile.rsplit(".", 1)[0] diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py index 3fc52b4d..80003a4e 100644 --- a/jcvi/projects/jcvi.py +++ b/jcvi/projects/jcvi.py @@ -16,7 +16,7 @@ from ..compara.pedigree import Pedigree, calculate_inbreeding from ..graphics.base import load_image, normalize_axes, panel_labels, plt, savefig from ..graphics.chromosome import draw_chromosomes -from ..graphics.landscape import draw_multi_depth +from ..graphics.landscape import draw_multi_depth, draw_stack def diversity(args): @@ -103,19 +103,21 @@ def landscape(args): B. Landscape of genomic features across the genome """ p = OptionParser(landscape.__doc__) - _, args, iopts = p.set_image_options(args, figsize="10x7") + _, args, iopts = p.set_image_options(args, figsize="12x8") if len(args) != 4: sys.exit(not p.print_help()) - bedfile, sizesfile, fasta, ch = args + bedfile, sizesfile, fastafile, ch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes((0, 0, 1, 1)) aspect_ratio = iopts.w / iopts.h ax1_root = fig.add_axes((0, 1 / 4, 0.4, 0.5 * aspect_ratio)) - ax2_root = fig.add_axes((0.4, 0.43, 0.54, 0.57)) - ax3_root = fig.add_axes((0.43, -0.13, 0.54, 0.57)) + ax2_root_extent = (0.4, 0.5, 0.6, 0.47) + ax2_root = fig.add_axes(ax2_root_extent) + ax3_root_extent = (0.4, 0, 0.6, 0.47) + ax3_root = fig.add_axes(ax3_root_extent) # Panel A logger.info("Plotting example genomic features painted on Arabidopsis genome") @@ -134,6 +136,16 @@ def landscape(args): # Panel B logger.info("Plotting landscape of genomic features across the genome") + draw_stack( + fig, + ax2_root, + ax2_root_extent, + ["Repeats", "Exons"], + fastafile, + window=250000, + shift=50000, + top=5, + ) ax2_root.set_axis_off() ax3_root.set_axis_off() From f92c19bcc5a29b5f30e68513e784463ef47f8f05 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 08:45:50 -0700 Subject: [PATCH 14/15] refactor to draw_heatmap() --- jcvi/graphics/landscape.py | 104 ++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index f3811f40..b4657a7f 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -24,6 +24,7 @@ from .base import ( CirclePolygon, + Colormap, Rectangle, adjust_extent, adjust_spines, @@ -821,36 +822,23 @@ def multilineplot(args): savefig(image_name, dpi=iopts.dpi, iopts=iopts) -def heatmap(args): +def draw_heatmap( + fig, + root, + fastafile: str, + chr: str, + stacks: List[str], + heatmaps: List[str], + window: int, + shift: int, + cmap: Colormap, + subtract: Optional[int] = None, + merge: bool = False, + meres: Optional[str] = None, +): """ - %prog heatmap fastafile chr1 - - Combine stack plot with heatmap to show abundance of various tracks along - given chromosome. Need to give multiple beds to --stacks and --heatmaps + Draw heatmap for the given chromosome. """ - p = OptionParser(heatmap.__doc__) - p.add_option( - "--stacks", - default="Exons,Introns,DNA_transposons,Retrotransposons", - help="Features to plot in stackplot", - ) - p.add_option( - "--heatmaps", - default="Copia,Gypsy,hAT,Helitron,Introns,Exons", - help="Features to plot in heatmaps", - ) - p.add_option("--meres", default=None, help="Extra centromere / telomere features") - add_window_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x5") - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, chr = args - window, shift, subtract, merge = check_window_options(opts) - - stacks = opts.stacks.split(",") - heatmaps = opts.heatmaps.split(",") stackbeds = get_beds(stacks) heatmapbeds = get_beds(heatmaps) stackbins = get_binfiles( @@ -864,9 +852,6 @@ def heatmap(args): inner = 0.015 clen = Sizes(fastafile).mapping[chr] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = 0.3 @@ -928,13 +913,12 @@ def heatmap(args): extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto", - cmap=iopts.cmap, + cmap=cmap, ) root.text(xx + xlen + 0.01, yy, s, size=10) yy -= yh - meres = opts.meres if meres: bed = Bed(meres) for b in bed: @@ -947,9 +931,57 @@ def heatmap(args): root.add_patch(CirclePolygon((xx, yy), radius=0.01, fc="m", ec="m")) root.text(xx + 0.014, yy, accn, va="center", color="m") - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() + normalize_axes(root) + + +def heatmap(args): + """ + %prog heatmap fastafile chr1 + + Combine stack plot with heatmap to show abundance of various tracks along + given chromosome. Need to give multiple beds to --stacks and --heatmaps + """ + p = OptionParser(heatmap.__doc__) + p.add_option( + "--stacks", + default="Exons,Introns,DNA_transposons,Retrotransposons", + help="Features to plot in stackplot", + ) + p.add_option( + "--heatmaps", + default="Copia,Gypsy,hAT,Helitron,Introns,Exons", + help="Features to plot in heatmaps", + ) + p.add_option("--meres", default=None, help="Extra centromere / telomere features") + add_window_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x5") + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, chr = args + window, shift, subtract, merge = check_window_options(opts) + + stacks = opts.stacks.split(",") + heatmaps = opts.heatmaps.split(",") + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + draw_heatmap( + fig, + root, + fastafile, + chr, + stacks, + heatmaps, + window, + shift, + iopts.cmap, + subtract, + merge, + meres=opts.meres, + ) image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) From fdef81709612b80ace578fc205c0ddb1a5df3a9d Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 30 Apr 2024 09:03:51 -0700 Subject: [PATCH 15/15] revert autoscale from 8 to 6 --- jcvi/graphics/landscape.py | 22 ++++++++++++++-------- jcvi/projects/jcvi.py | 30 +++++++++++++++++++++++------- jcvi/utils/cbook.py | 2 +- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py index b4657a7f..e2bbc909 100644 --- a/jcvi/graphics/landscape.py +++ b/jcvi/graphics/landscape.py @@ -25,6 +25,7 @@ from .base import ( CirclePolygon, Colormap, + Extent, Rectangle, adjust_extent, adjust_spines, @@ -822,9 +823,10 @@ def multilineplot(args): savefig(image_name, dpi=iopts.dpi, iopts=iopts) -def draw_heatmap( +def draw_heatmaps( fig, root, + root_extent: Extent, fastafile: str, chr: str, stacks: List[str], @@ -865,7 +867,9 @@ def draw_heatmap( cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) - ax = fig.add_axes((xx, yy, xlen, yinterval - inner)) + extent = (xx, yy, xlen, yinterval - inner) + adjusted = adjust_extent(extent, root_extent) + ax = fig.add_axes(adjusted) nbins, _ = get_nbins(clen, shift) @@ -875,7 +879,7 @@ def draw_heatmap( stackplot(ax, stackbins, nbins, palette, chr, window, shift) ax.text( - 0.1, + 0.05, 0.9, cc, va="top", @@ -966,11 +970,13 @@ def heatmap(args): heatmaps = opts.heatmaps.split(",") fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) + root_extent = (0, 0, 1, 1) + root = fig.add_axes(root_extent) - draw_heatmap( + draw_heatmaps( fig, root, + root_extent, fastafile, chr, stacks, @@ -1088,10 +1094,10 @@ def stackplot( ax.set_axis_off() -def draw_stack( +def draw_stacks( fig, root, - root_extent, + root_extent: Extent, stacks: List[str], fastafile: str, window: int, @@ -1192,7 +1198,7 @@ def stack(args): root_extent = (0, 0, 1, 1) root = fig.add_axes(root_extent) - draw_stack( + draw_stacks( fig, root, root_extent, diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py index 80003a4e..d9360700 100644 --- a/jcvi/projects/jcvi.py +++ b/jcvi/projects/jcvi.py @@ -14,9 +14,9 @@ from ..assembly.hic import draw_hic_heatmap from ..assembly.kmer import draw_ks_histogram from ..compara.pedigree import Pedigree, calculate_inbreeding -from ..graphics.base import load_image, normalize_axes, panel_labels, plt, savefig +from ..graphics.base import cm, load_image, normalize_axes, panel_labels, plt, savefig from ..graphics.chromosome import draw_chromosomes -from ..graphics.landscape import draw_multi_depth, draw_stack +from ..graphics.landscape import draw_multi_depth, draw_heatmaps, draw_stacks def diversity(args): @@ -116,7 +116,7 @@ def landscape(args): ax1_root = fig.add_axes((0, 1 / 4, 0.4, 0.5 * aspect_ratio)) ax2_root_extent = (0.4, 0.5, 0.6, 0.47) ax2_root = fig.add_axes(ax2_root_extent) - ax3_root_extent = (0.4, 0, 0.6, 0.47) + ax3_root_extent = (0.41, 0, 0.6, 0.47) ax3_root = fig.add_axes(ax3_root_extent) # Panel A @@ -136,16 +136,32 @@ def landscape(args): # Panel B logger.info("Plotting landscape of genomic features across the genome") - draw_stack( + stacks = ["Repeats", "Exons"] + heatmaps = ["Copia", "Gypsy", "Helitron", "hAT", "Exons"] + window = 250000 + shift = 50000 + draw_stacks( fig, ax2_root, ax2_root_extent, - ["Repeats", "Exons"], + stacks, fastafile, - window=250000, - shift=50000, + window, + shift, top=5, ) + draw_heatmaps( + fig, + ax3_root, + ax3_root_extent, + fastafile, + "Chr2", + stacks, + heatmaps, + window, + shift, + cmap=cm.viridis, + ) ax2_root.set_axis_off() ax3_root.set_axis_off() diff --git a/jcvi/utils/cbook.py b/jcvi/utils/cbook.py index ecf16ab3..66a09817 100644 --- a/jcvi/utils/cbook.py +++ b/jcvi/utils/cbook.py @@ -260,7 +260,7 @@ def human_size(size, a_kilobyte_is_1024_bytes=False, precision=1, target=None): return "{0:.{1}f}{2}".format(size, precision, suffix) -def autoscale(bp: int, optimal: int = 8): +def autoscale(bp: int, optimal: int = 6): """ Autoscale the basepair length to a more human readable number. The optimal is the number of ticks we want to see on the axis.