forked from KexinNiu/EnzBuilder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_plot.py
230 lines (190 loc) · 9.24 KB
/
eval_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# f='/ibex/user/niuk0a/funcarve/cobra/eval_clean_all_7_allec.csv'
#############################################
################ec uniprot###############
#############################################
# 读取数据
# file_path = '/ibex/user/niuk0a/funcarve/cobra/eval_7results.csv' # 替换为你的文件路径
# file_path ='/ibex/user/niuk0a/funcarve/cobra/eval_clean_all_7result_add.csv'
file_path ='/ibex/user/niuk0a/funcarve/cobra/eval_clean_all_7result_add1.csv'
# file_path ='/ibex/user/niuk0a/funcarve/cobra/eval_clean_7v3.csv'
data = pd.read_csv(file_path)
print (data.head())
## for each type with different iter_round, we only keeps the one with the highest roc
data = data.sort_values(by='roc', ascending=False).drop_duplicates(subset=['modelname', 'type','thr'], keep='first')
print (data.head())
# 确保数据类型正确
# data['thr'] = data['thr'].astype(float)
data['roc'] = data['roc'].astype(float)
# reorder by modelname then type and thr
data = data.sort_values(by=['modelname', 'type', 'thr'])
# 获取所有的模型名称
models = data['modelname'].unique()
# 设置大图网格
n_rows = (len(models) + 3) // 4 # 每行最多放4个小图
fig, axes = plt.subplots(n_rows, 4, figsize=(20, 5 * n_rows), squeeze=False)
# 遍历每个模型,生成对应的小图
for idx, model in enumerate(models):
ax = axes[idx // 4][idx % 4] # 选择子图
model_data = data[data['modelname'] == model]
# keep thres < 1
# model_data = model_data[model_data['thr'] < 1]
# 绘制每种 type 的曲线
for type_name, type_data in model_data.groupby('type'):
if type_name == 'CLEAN':
continue
ax.plot(type_data['thr'], type_data['roc'], label=type_name, marker='o')
# 添加水平参考线
clean_roc = model_data[model_data['type'] == 'CLEAN']['roc'].values
print('clean_roc=',clean_roc)
if clean_roc.size > 0:
clean_value = clean_roc[0]
ax.axhline(y=clean_value, color='gray', linestyle='--', label='CLEAN Reference')
# tiaozhaeng y轴范围
# ax.set_xlim(0, 5)
ax.set_ylim(clean_value-0.01, model_data['roc'].max() + 0.01)
# 设置图例和标题
ax.set_title(f'Model: {model}')
ax.set_xlabel('Penalty score')
ax.set_ylabel('ROC')
# ax.legend()
# 移除多余的空白子图
for j in range(len(models), n_rows * 4):
fig.delaxes(axes[j // 4][j % 4])
# 调整布局并保存
# add legend for all subplots by using the last one and put it outside,at right bottom
plt.legend(loc='best', bbox_to_anchor=(1, 1), ncol=1)
#'best', 'upper right', 'upper left', 'lower left', 'lower right', 'right', 'center left', 'center right', 'lower center', 'upper center', 'center'
plt.tight_layout()
# plt.savefig('eval_thr_plotv3.png', dpi=300)
plt.savefig('eval_thr_plot.png', dpi=300)
plt.show()
exit()
######################################################################################################################3
#############################################
################model coverage###############
#############################################
# 读取数据
# file_path = '/ibex/user/niuk0a/funcarve/cobra/model_bigg_overlap_i1.csv' # 替换为你的文件路径
# data = pd.read_csv(file_path)
# ## data add a new column precision
# data['precision'] = data['both']/(data['predmodel'])
# data['recall'] = data['both']/(data['biggmodel'])
# data['f1'] = 2*data['precision']*data['recall']/(data['precision']+data['recall'])
# file_path ='/ibex/user/niuk0a/funcarve/cobra/emodel_bigg_overlap_rerun.csv'
file_path ='/ibex/user/niuk0a/funcarve/cobra/emodel_bigg_overlap_v3.csv'
df = pd.read_csv(file_path)
recon_file_path = '/ibex/user/niuk0a/funcarve/cobra/model_bigg_overlap_i1_recon.csv'
recon_data = pd.read_csv(recon_file_path)
# data = pd.concat([df, data], ignore_index=True)
data = df
# remove thr = 0
data = data[data['thr'] != '0']
data = data[data['iter'] < 5]
data['title'] = data['modelname']
# genome ORGID ORGNASIM
# iLJ478 NC_000853.1 243274 Thermotoga maritima MSB8
# iJN1463 NC_002947.4 160488 Pseudomonas putida KT2440
# iCN900 AM180355.1 272563 Clostridioides difficile 630
# iAF1260 NC_000913.3 83333 Escherichia coli str. K-12 substr. MG1655
# iAF987 CP000148.1 269799 Geobacter metallireducens GS-15
# iNJ661 NC_000962.3 83332 Mycobacterium tuberculosis H37Rv
# iYO844 AL009126.3 224308 Bacillus subtilis subsp. subtilis str. 168
# rename the title to the organism name
for index, row in data.iterrows():
if row['modelname'] == 'iLJ478':
data.at[index, 'title'] = 'Thermotoga maritima MSB8'
if row['modelname'] == 'iJN1463':
data.at[index, 'title'] = 'Pseudomonas putida KT2440'
if row['modelname'] == 'iCN900':
data.at[index, 'title'] = 'Clostridioides difficile 630'
if row['modelname'] == 'iAF1260':
data.at[index, 'title'] = 'Escherichia coli str. K-12 substr. MG1655'
if row['modelname'] == 'iAF987':
data.at[index, 'title'] = 'Geobacter metallireducens GS-15'
if row['modelname'] == 'iNJ661':
data.at[index, 'title'] = 'Mycobacterium tuberculosis H37Rv'
if row['modelname'] == 'iYO844':
data.at[index, 'title'] = 'Bacillus subtilis subsp. subtilis str. 168'
# print (data.head())
## for each type with different iter_round, we only keeps the one with the highest roc
# data = data.sort_values(by='roc', ascending=False).drop_duplicates(subset=['modelname', 'evaltype','thr','iter'], keep='first')
# evalcol = 'f1'
# evalcol='tp'
evalcol='ratio_biggmodel'
# 设置绘图风格
sns.set(style="whitegrid")
# 创建 FacetGrid,每个 modelname 一个小图
g = sns.FacetGrid(
data,
# col="modelname", # 每个 modelname 一列小图
col="title", # 每个 modelname 一列小图
col_wrap=4, # 每行放 4 个小图,可以根据需要调整
height=4, # 每个小图的高度
sharey=False # 所有小图共享 y 轴
)
# 添加线条图到每个子图中
g.map_dataframe(
sns.lineplot,
x="iter",
# y="both",
# y="ratio_biggmodel",
# y="ratio_predmodel",
y=evalcol,
hue="thr", # 用 thr 区分线条
style="thr",
markers=True,
dashes=False
)
#给每个字图加一条水平参考线 数据来自recon_data
# g.map(plt.axhline, y=recon_data[evalcol].values[0], ls='--', c='gray')
for ax, modelname in zip(g.axes.flat, data["modelname"].unique()):
#添加参考线
# print('val=',recon_data[recon_data['modelname']==modelname][evalcol].values[0])
refval = recon_data[recon_data['modelname']==modelname][evalcol].values[0]
# ax.axhline(y=refval, color='grey', linestyle='--', label='Recon {refval}')
#set legend for recon reference
# ax.legend([f'Recon={refval}'], loc='lower left',color='grey')
ax.text(1, 0.05, f'Recon={refval:.3f}', horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes, color='black', fontsize=10)
subset = data[data["modelname"] == modelname] # 提取当前子图的对应数据
# ymin, ymax = subset["both"].min(), subset["both"].max() # 获取当前数据的 y 值范围
# ymin, ymax = subset["ratio_biggmodel"].min(), subset["ratio_biggmodel"].max() # 获取当前数据的 y 值范围
ymin, ymax = subset["ratio_predmodel"].min(), subset["ratio_predmodel"].max() # 获取当前数据的 y 值范围
ymin, ymax = subset[evalcol].min(), subset[evalcol].max() # 获取当前数据的 y 值范围
# ax.set_ylim(ymin - 5, ymax + 5) # 设置 y 轴范围,增加少量缓冲区以美观
# ax.set_ylim(ymin-0.01, ymax + 0.02) # 设置 y 轴范围,增加少量缓冲区以美观
ax.set_ylim(ymin-0.01, ymax + 0.01) # 设置 y 轴范围,增加少量缓冲区以美观
# ax.set_ylim(refval-0.001, ymax + 0.01) # 设置 y 轴范围,增加少量缓冲区以美观
# ax.set_ylim(refval-1, ymax + 50) # 设置 y 轴范围,增加少量缓冲区以美观
# 设置图例和标题
g.add_legend(title="Threshold")
## set legend for recon reference
# g.add_legend(title="Threshold", labels=['Recon Reference'], loc='best')
# g.set_titles("{col_name}") # 每个子图的标题为对应的 modelname
# 字图的标题为dataframe中的title
# print(data['title'].unique())
# print('col_name=',g.col_names)
g.set_titles("{col_name}") # 每个子图的标题为对应的 modelname
# g.set_axis_labels("Iteration", "Both")
# g.set_axis_labels("Iteration", "ratio_biggmodel")
# g.set_axis_labels("Iteration", "ratio_predmodel")
g.set_axis_labels("Iteration", evalcol)
# 调整整体布局
plt.subplots_adjust(top=0.9)
# g.fig.suptitle("Both vs Iter for Each Modelname", fontsize=16) # 总标题
# g.fig.suptitle("ratio_biggmodel vs Iter for Each Modelname", fontsize=16) # 总标题
# g.fig.suptitle("ratio_predmodel vs Iter for Each Modelname", fontsize=16) # 总标题
g.fig.suptitle(f"Reactions {evalcol} based on BiGG models ", fontsize=16) # 总标题
# 显示图表
plt.show()
# plt.savefig('eval_modelrxn_ratio_biggmodel.png', dpi=300)
# plt.savefig('eval_modelrxn_ratio_predmodel.png', dpi=300)
# plt.savefig(f'eval_modelrxn_{evalcol}_recon5.png', dpi=300)
plt.savefig(f'eval_modelrxn_{evalcol}_v3.png', dpi=300)
exit()
#############################################
################model memote ###############
#############################################
# def eval_memote()