Skip to content

Commit

Permalink
Dev (#660)
Browse files Browse the repository at this point in the history
* update formats.pdf

* Add tests/graphics/test_glyph.py

* Add projects.misc.grabseeds()

* minor edits
  • Loading branch information
tanghaibao authored May 7, 2024
1 parent b458986 commit 8db2bf4
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 31 deletions.
9 changes: 5 additions & 4 deletions jcvi/formats/base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import fileinput
import math
import os
import os.path as op
import math
import sys

from collections import OrderedDict
from itertools import cycle, groupby, islice
from typing import IO, Union


from Bio import SeqIO
from ..apps.base import (
Expand Down Expand Up @@ -343,7 +346,7 @@ def must_open(
checkexists: bool = False,
skipcheck: bool = False,
oappend: bool = False,
):
) -> Union[IO, fileinput.FileInput]:
"""
Accepts filename and returns filehandle.
Expand All @@ -355,8 +358,6 @@ def must_open(
if filename[0].endswith((".gz", ".bz2")):
filename = " ".join(filename) # allow opening multiple gz/bz2 files
else:
import fileinput

return fileinput.input(filename)

if filename.startswith("s3://"):
Expand Down
42 changes: 21 additions & 21 deletions jcvi/formats/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Manipulate PDF files, using PyPDF2 library.
"""
import sys
import traceback

from natsort import natsorted

Expand Down Expand Up @@ -57,7 +56,6 @@ def cat(args):
help="Remove individual pdfs after merging",
)
p.set_outfile()
p.set_verbose(help="Show page ranges as they are being read")
opts, args = p.parse_args(args)

if len(args) < 1:
Expand All @@ -67,31 +65,33 @@ def cat(args):
if outfile in args:
args.remove(outfile)

if not opts.nosort:
should_sort = not opts.nosort
if not all(x.endswith(".pdf") for x in args):
should_sort = False
logger.debug("Not sorting filenames because non-pdf args")

if should_sort:
args = natsorted(args)

filename_page_ranges = parse_filename_page_ranges(args)
verbose = opts.verbose
fw = must_open(outfile, "wb")

nfiles = len(filename_page_ranges)
merger = PdfFileMerger()
in_fs = {}
try:
for filename, page_range in filename_page_ranges:
if verbose:
print(filename, page_range, file=sys.stderr)
if filename not in in_fs:
in_fs[filename] = open(filename, "rb")
merger.append(in_fs[filename], pages=page_range)
except:
print(traceback.format_exc(), file=sys.stderr)
print("Error while reading " + filename, file=sys.stderr)
sys.exit(1)
merger.write(fw)
fw.close()
with must_open(outfile, "wb") as fw:
in_fs = {}
try:
for filename, page_range in filename_page_ranges:
logger.debug("%s: %s", filename, page_range)
if filename not in in_fs:
in_fs[filename] = open(filename, "rb")
merger.append(in_fs[filename], pages=page_range)
except Exception as e:
logger.error("Error while reading %s: %s", filename, e)
sys.exit(1)
merger.write(fw)
logger.info("Extracted %d files into `%s`", nfiles, outfile)

if opts.cleanup:
logger.debug("Cleaning up %d files", len(args))
logger.debug("Cleaning up %d files", nfiles)
cleanup(args)


Expand Down
78 changes: 72 additions & 6 deletions jcvi/projects/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,20 @@
import sys

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from ..apps.base import ActionDispatcher, OptionParser, fname, logger
from ..graphics.base import Polygon, normalize_axes, panel_labels, plt, savefig
from ..graphics.base import (
Polygon,
normalize_axes,
panel_labels,
plt,
savefig,
set_helvetica_axis,
)
from ..graphics.glyph import DoubleSquare, GeneGlyph, RoundRect, TextCircle, plot_cap
from ..graphics.karyotype import Karyotype
from ..graphics.synteny import Synteny, draw_gene_legend
Expand Down Expand Up @@ -39,28 +50,83 @@ def main():
"waterlilyGOM",
"waterlily phylogeny and related infographics (requires data)",
),
("grabseeds", "GRABSEEDS PCA plot"),
)
p = ActionDispatcher(actions)
p.dispatch(globals())


def rgb_to_hex(r: float, g: float, b: float):
"""
Convert RGB to hex.
"""
r, g, b = int(round(r)), int(round(g)), int(round(b))
return f"#{r:02x}{g:02x}{b:02x}"


def grabseeds(args):
"""
%prog FINAL_DATA_FOR_ANOVA_HERITABILITY_ANALYSIS_SEED_COLOR_SHAPE_SIZE.csv
Plot the PCA plot from GRABSEEDS.
"""
p = OptionParser(grabseeds.__doc__)
_, args, iopts = p.set_image_options(args, figsize="8x8")

if len(args) != 1:
sys.exit(not p.print_help())

(csvfile,) = args
df = pd.read_csv(csvfile).dropna()
features = [
x
for x in df.columns
if x.startswith("Avg")
if x not in ("AvgOfL", "AvgOfa", "AvgOfb")
]
x = df.loc[:, features].values
x = StandardScaler().fit_transform(x)
pca = PCA(n_components=2)
principal_components = pca.fit_transform(x)
logger.info("Explained variance: %s", pca.explained_variance_ratio_)
pc1_var, pc2_var = pca.explained_variance_ratio_

pc_df = pd.DataFrame(data=principal_components, columns=["PC1", "PC2"])
final_df = pd.concat([pc_df, df[features]], axis=1).dropna()
final_df["Color"] = final_df.apply(
lambda x: rgb_to_hex(x["AvgOfRed"], x["AvgOfGreen"], x["AvgOfGreen"]), axis=1
)
final_df["ScatterSize"] = final_df["AvgOfArea"] / 500

fig = plt.figure(1, (iopts.w, iopts.h))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel(f"Principal Component 1 ({pc1_var * 100:.0f}\%)", fontsize=15)
ax.set_ylabel(f"Principal Component 2 ({pc2_var * 100:.0f}\%)", fontsize=15)
ax.set_title("Sorghum kernels, PCA Plot", fontsize=20)
ax.scatter("PC1", "PC2", s="ScatterSize", c="Color", data=final_df)
set_helvetica_axis(ax)

image_name = "grabseeds_pca." + iopts.format
savefig(image_name, dpi=iopts.dpi, iopts=iopts)


def waterlilyGOM(args):
"""
%prog mcmctree.tre table.csv
Customized figure to plot phylogeny and related infographics.
"""
from jcvi.graphics.tree import (
from ..graphics.tree import (
LeafInfoFile,
WGDInfoFile,
draw_tree,
parse_tree,
draw_wgd_xy,
)
from jcvi.graphics.table import CsvTable, draw_table
from ..graphics.table import CsvTable, draw_table

p = OptionParser(waterlilyGOM.__doc__)
opts, args, iopts = p.set_image_options(args, figsize="12x9")
_, args, iopts = p.set_image_options(args, figsize="12x9")

if len(args) != 2:
sys.exit(not p.print_help())
Expand Down Expand Up @@ -176,7 +242,7 @@ def pomegranate(args):


def utricularia(args):
from jcvi.graphics.synteny import main as synteny_main
from ..graphics.synteny import main as synteny_main

p = OptionParser(synteny_main.__doc__)
p.add_argument("--switch", help="Rename the seqid with two-column file")
Expand Down Expand Up @@ -327,7 +393,7 @@ def mtdotplots(args):
Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two
graphics.dotplot() function calls as panel A and B.
"""
from jcvi.graphics.dotplot import check_beds, dotplot
from ..graphics.dotplot import check_beds, dotplot

p = OptionParser(mtdotplots.__doc__)
p.set_beds()
Expand Down
8 changes: 8 additions & 0 deletions tests/graphics/test_glyph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

from jcvi.graphics.glyph import demo


def test_demo():
demo([])

0 comments on commit 8db2bf4

Please sign in to comment.