forked from YeoLab/singlecell_pnm
-
Notifications
You must be signed in to change notification settings - Fork 1
/
common.py
117 lines (92 loc) · 4.22 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import matplotlib as mpl
import numpy as np
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
import seaborn as sns
from sklearn.decomposition import PCA
from anchor import MODALITY_TO_COLOR
def kmer_clustermap(kmer_zscores, retain=('included', 'bimodal', 'excluded'),
row_filter=lambda x: x.var(axis=1) > (x.var(axis=1).mean() + 4*x.var(axis=1).std()),
**kwargs):
data = kmer_zscores
data = retain_cols(data, retain)
print(data.shape)
if row_filter is not None:
data = data.loc[row_filter(data)]
print(data.shape)
intron_colors = make_intron_colors(data.columns)
g = sns.clustermap(data.fillna(0), col_colors=intron_colors.values, method='ward', **kwargs)
plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0);
return g
def make_intron_colors(columns):
phenotype_colors = columns.map(lambda x: study.phenotype_to_color[x.split('_')[0]])
modality_colors = columns.map(lambda x: MODALITY_TO_COLOR[x.split('_')[1]])
direction_colors = columns.map(lambda x: direction_to_color[x.split('_')[-1].split('nt')[0].rstrip('0123456789')])
intron_colors = pd.DataFrame([modality_colors, direction_colors, phenotype_colors], columns=columns)
return intron_colors
def retain_cols(data, retain=('included', 'bimodal', 'excluded')):
try:
data = data[[col for col in data if any([r in col for r in retain])]]
except TypeError:
pass
return data
def kmer_pcaplot(zscores, title, filename, retain=('included', 'excluded', 'bimodal'), transpose=False):
# zscores = kmer_zscores[background]
data = zscores.fillna(0)
data = retain_cols(data, retain=retain)
if transpose:
data = data.T
title += '-transposed'
print(data.shape)
pca = PCA(n_components=2)
reduced = pd.DataFrame(pca.fit_transform(data),
index=data.index)
reduced.columns = reduced.columns.map(lambda x: 'component_{}'.format(x))
# reduced = reduced
print('\t', reduced.shape)
reduced.index = reduced.index.map(lambda x: x.replace('T', 'U'))
trace0 = go.Scatter(x=reduced.iloc[:, 0], y=reduced.iloc[:, 1], mode='markers', name='Motifs',
marker=dict(size=10, opacity=0.5, color='black'), text=reduced.index)
lines = []
if not transpose:
metadata = pd.DataFrame(list(data.columns.map(lambda x: x.split('_'))),
index=data.columns, columns=['phenotype', 'modality', 'location'])
components = pd.DataFrame(pca.components_, columns=data.columns)
print('\t', metadata.shape)
scaling_factor = reduced.apply(np.linalg.norm, axis=1).max()
for phenotype, phenotype_df in components.groupby(metadata['phenotype'], axis=1):
# linestyle = '-'
if phenotype == 'iPSC':
linestyle = 'solid'
elif phenotype == 'NPC':
linestyle = 'dash'
else:
linestyle = 'dot'
for modality, modality_df in phenotype_df.groupby(metadata['modality'], axis=1):
palette = map(mpl.colors.rgb2hex, reversed(sns.light_palette(MODALITY_TO_COLOR[modality], n_colors=3)))
for color, (component, column) in zip(palette, modality_df.iteritems()):
x = [0, column[0]*scaling_factor]
y = [0, column[1]*scaling_factor]
lines.append(go.Scatter(x=x, y=y, mode='lines', name=component, text=component,
line=dict(color=color, width=10, dash=linestyle)))
plotly_data = [trace0] + lines
layout = go.Layout(
# autosize=False, width=500, height=500,
title=title,
hovermode='closest',
xaxis=dict(
title='PC 1 explains {:d}% of variance'.format(int(pca.explained_variance_ratio_[0] * 100)),
ticklen=5,
zeroline=False,
gridwidth=0,
),
yaxis=dict(
title='PC 2 explains {:d}% of variance'.format(int(pca.explained_variance_ratio_[1] * 100)),
ticklen=5,
zeroline=True,
gridwidth=0,
),
)
fig = go.Figure(data=plotly_data, layout=layout)
return py.iplot(fig, filename=filename)