Skip to content

Commit

Permalink
Merge pull request #300 from lilab-bcb/pseudo-deseq2
Browse files Browse the repository at this point in the history
Reorganize pseudobulk and deseq2 functions
  • Loading branch information
bli25 authored Jun 2, 2024
2 parents 363dee9 + e368e45 commit 31c4e21
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 81 deletions.
8 changes: 4 additions & 4 deletions pegasus/plotting/plot_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def scatter(
fontsize=legend_fontsize[attr_id],
ncol=_get_legend_ncol(label_size, legend_ncol),
)
for handle in legend.legendHandles:
for handle in legend.legend_handles:
handle.set_sizes([300.0 if scale_factor is None else 100.0])
elif legend_loc[attr_id] == "on data":
texts = []
Expand Down Expand Up @@ -588,7 +588,7 @@ def scatter_groups(
fontsize=legend_fontsize,
ncol=legend_ncol,
)
for handle in legend.legendHandles:
for handle in legend.legend_handles:
handle.set_sizes([300.0])
elif legend_loc == "on data":
texts = []
Expand Down Expand Up @@ -2092,7 +2092,7 @@ def volcano(
fontsize=8,
ncol=4,
)
for handle in legend.legendHandles: # adjust legend size
for handle in legend.legend_handles: # adjust legend size
handle.set_sizes([50.0])

ax.axhline(y = yconst, c = 'k', lw = 0.5, ls = '--')
Expand Down Expand Up @@ -2355,7 +2355,7 @@ def wordcloud(
>>> fig = pg.wordcloud(data, factor=0)
"""
fig, ax = _get_subplot_layouts(panel_size=panel_size, dpi=dpi) # default nrows = 1 & ncols = 1

assert 'W' in data.uns
hvg = data.var_names[data.var[features]]
word_dict = {}
Expand Down
23 changes: 16 additions & 7 deletions pegasus/pseudo/convenient.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def markers(
if de_key not in pseudobulk.varm.keys():
raise ValueError("Please run DE analysis first")

res_dict = {}
res_dict = {}
df = pd.DataFrame(data=pseudobulk.varm[de_key], index=pseudobulk.var_names)
idx = df["padj"] <= alpha

Expand Down Expand Up @@ -206,11 +206,20 @@ def volcano(
logger.warning("Please conduct DE test first!")
return None

log2fc = de_res[fcstr]
pvals = de_res[pstr]
qvals = de_res[qstr]
# Ignore genes with q-value NaN
idx_select = np.where(~np.isnan(qvals))[0]
if idx_select.size == 0:
logger.warning("All genes have NaN adjusted p-values!")
return None
qvals = qvals[idx_select]
log2fc = de_res[fcstr][idx_select]
pvals = de_res[pstr][idx_select]
gene_names = pseudobulk.var_names[idx_select]

pvals[pvals == 0.0] = 1e-45 # very small pvalue to avoid log10 0
neglog10p = -np.log10(pvals)
yconst = min(neglog10p[de_res[qstr] <= qval_threshold])
yconst = min(neglog10p[qvals <= qval_threshold])

from pegasus.plotting.plot_utils import _get_subplot_layouts
fig, ax = _get_subplot_layouts(panel_size=panel_size, dpi=dpi)
Expand Down Expand Up @@ -239,7 +248,7 @@ def volcano(
fontsize=8,
ncol=4,
)
for handle in legend.legendHandles: # adjust legend size
for handle in legend.legend_handles: # adjust legend size
handle.set_sizes([50.0])

ax.axhline(y = yconst, c = 'k', lw = 0.5, ls = '--')
Expand All @@ -252,13 +261,13 @@ def volcano(
posvec = np.argsort(log2fc[idx])[::-1][0:top_n]
for pos in posvec:
gid = idx[pos]
texts.append(ax.text(log2fc[gid], neglog10p[gid], pseudobulk.var_names[gid], fontsize=5))
texts.append(ax.text(log2fc[gid], neglog10p[gid], gene_names[gid], fontsize=5))

idx = np.where(idxsig & (log2fc <= -log2fc_threshold))[0]
posvec = np.argsort(log2fc[idx])[0:top_n]
for pos in posvec:
gid = idx[pos]
texts.append(ax.text(log2fc[gid], neglog10p[gid], pseudobulk.var_names[gid], fontsize=5))
texts.append(ax.text(log2fc[gid], neglog10p[gid], gene_names[gid], fontsize=5))

from adjustText import adjust_text
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='k', lw=0.5))
Expand Down
10 changes: 4 additions & 6 deletions pegasus/tools/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sklearn.cluster import KMeans
from typing import List, Optional, Union

from pegasus.tools import eff_n_jobs, construct_graph, calc_stat_per_batch, X_from_rep, slicing
from pegasus.tools import eff_n_jobs, construct_graph, calc_stat_per_batch, update_rep, X_from_rep, slicing

import logging
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -780,10 +780,8 @@ def calc_dendrogram(
"""
# Set up embedding or count matrix to use
if genes is None:
if rep:
embed_df = pd.DataFrame(X_from_rep(data, rep))
else:
embed_df = pd.DataFrame(data.X.toarray() if issparse(data.X) else data.X)
rep = update_rep(rep)
embed_df = pd.DataFrame(X_from_rep(data, rep))
else:
embed_df = pd.DataFrame(slicing(data[:, genes].X))

Expand Down Expand Up @@ -825,4 +823,4 @@ def calc_dendrogram(
np.fill_diagonal(dissim_df.to_numpy(), 0) # Enforce main diagonal to be 0 to pass squareform requirement
Z = linkage(squareform(dissim_df), method=linkage_method, optimal_ordering=True)

data.uns[res_key] = (Z, csi_df)
data.uns[res_key] = (Z, csi_df)
Loading

0 comments on commit 31c4e21

Please sign in to comment.