-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate_aulc.py
182 lines (137 loc) · 7.29 KB
/
calculate_aulc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# %%
import seaborn as sns
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt
import matplotlib
import itertools
import numpy as np
import warnings
warnings.filterwarnings('ignore')
matplotlib.use('Agg')
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.dpi'] = 300
sns.set(style="ticks")
# sns.set(style="whitegrid")
# %%
'''
Here in this file, we plan to plot the final version of learning curves.
1. a conbined figure of all the passive models. 6 sub-figure in total.
2. all the dataset-model in a single graph. (4*5)
3. The Results of the Best Stratrgy on Each Model 4 subfigure
4. On each dataset, the domain performances. 6 set of figs
'''
# %%
class Analyser:
'''
A class to process PerformanceLog instances.
Plot and save the results.
'''
def __init__(self, result_folder_address, model_strategy_pair_list, repetition_index_range):
self.result_folder_address = result_folder_address
self.repetition_index_range = repetition_index_range
self.model_strategy_pair_list = model_strategy_pair_list
self.domain_names = None
# The .result file is in the order dataset=>model_strategy_pair=>repetition=>domain=>performance
# Re-build in the order.
# dataset=>domain=>model_strategy_pair=>list_of_performance(merge repetitions)
self.aulc_mean_dict, self.aulc_std_dict= self._load_results()
# Put list_of_performance(for each repetition) into a dataframe, and remain the same dict structure.
def _load_results(self):
'''
Load results from the corresponding folder and re-build to the required order.
# dataset=>domain=>model_strategy_pair=>list_of_performance(merge repetitions)
'''
aulc_mean_dict = {}
aulc_std_dict = {}
for model_strategy_pair in self.model_strategy_pair_list:
aulc_mean_dict[model_strategy_pair] = {}
aulc_std_dict[model_strategy_pair] = {}
temp_aulc_list = []
for i in range(*self.repetition_index_range):
result_address = f'{self.result_folder_address}result_{model_strategy_pair}_{i}.result'
temp_performance_log = pickle.load(open(result_address, "rb"))
# All domain performance
performance_list = temp_performance_log.performance_dict[-1] # as a list
temp_aulc = self._calculate_aulc(performance_list)
temp_aulc_list.append(temp_aulc)
aulc_mean_dict[model_strategy_pair] = np.mean(temp_aulc_list)*100
aulc_std_dict[model_strategy_pair] = np.std(temp_aulc_list)*100
return aulc_mean_dict, aulc_std_dict
def _calculate_aulc(self, performance_list):
temp_list = []
temp1 = 0
for i in performance_list:
temp2 = i
if temp1 != 0:
temp_list.append((temp1+temp2)/2)
temp1 = temp2
aulc = np.mean(temp_list)
return aulc
def export_aulc(self, save_address=None):
'''
Plot learning curves for all domains in one.
'''
data = {'Model-Strategy-Pair':[], 'mean':[], 'std':[]}
for pair in self.model_strategy_pair_list:
data['Model-Strategy-Pair'].append(pair)
data['mean'].append(self.aulc_mean_dict[pair])
data['std'].append(self.aulc_std_dict[pair])
df = pd.DataFrame.from_dict(data)
writer = pd.ExcelWriter(os.path.join(os.getcwd(), f'{save_address}aulc.xlsx'))
df = df.round(2)
df.to_excel(writer)
writer.save()
#%%
#%%
def plot_curves(root_folder, dataset, model_list, strategy_list, repetition_index_range):
result_folder_address = f'{root_folder}/folder_result/'
image_folder_address = f'{root_folder}/folder_image/'
if not os.path.isdir(image_folder_address):
os.mkdir(image_folder_address)
####################################
#####################################
# For each net on the current dataset
save_address = f'{root_folder}/folder_image/2.active_parformances/'
if not os.path.isdir(save_address):
os.mkdir(save_address)
model_strategy_pair_list = []
for model in model_list:
for strategy in strategy_list:
# Need to check if the combination is valid.
# Several models cannot adopted to many strategies.
model_strategy_pair_list.append(f'{model}_{strategy}')
analyser = Analyser(result_folder_address, model_strategy_pair_list, repetition_index_range)
analyser.export_aulc(save_address = save_address)
######################################
######################################
save_address = f'{root_folder}/folder_image/4.domain_performance/'
if not os.path.isdir(save_address):
os.mkdir(save_address)
model_strategy_pair_list_dict = {'double_inter_twin_moon': ['DANN_Random', 'SDL_joint_Random', 'SDL_separate_Random', 'MDNet_Random', 'MAN_Random'],
'triple_inter_twin_moon': ['DANN_Random', 'SDL_joint_Random', 'SDL_separate_Random', 'MDNet_Random', 'MAN_Random'],
'digits': ['DANN_Uncertainty', 'SDL_joint_Uncertainty', 'SDL_separate_Uncertainty', 'MDNet_Uncertainty', 'MAN_Uncertainty', 'CAN_Uncertainty'],
'amazon': ['DANN_Uncertainty', 'SDL_joint_Uncertainty', 'SDL_separate_Uncertainty', 'MDNet_Uncertainty', 'MAN_Uncertainty', 'CAN_Uncertainty'],
'office_31': ['DANN_Uncertainty', 'SDL_joint_Uncertainty', 'SDL_separate_Uncertainty', 'MDNet_Uncertainty', 'MAN_Uncertainty','CAN_Uncertainty'],
'imageCLEF': ['DANN_Random', 'SDL_joint_Random', 'SDL_separate_Uncertainty', 'MDNet_Uncertainty', 'MAN_Uncertainty'],
'office_home': ['DANN_Uncertainty', 'SDL_joint_Uncertainty', 'SDL_separate_Uncertainty', 'MDNet_Uncertainty', 'MAN_Uncertainty', 'CAN_Uncertainty'],
'PACs': ['DANN_Uncertainty', 'SDL_joint_Uncertainty', 'SDL_separate_Uncertainty', 'MDNet_Uncertainty', 'MAN_Uncertainty', 'CAN_Uncertainty']}
# model_strategy_pair_list = model_strategy_pair_list_dict[dataset]
# analyser = Analyser(result_folder_address,
# model_strategy_pair_list, repetition_index_range)
# analyser.plot_pairs_seperately(save_address=save_address, domain_performance = True, y_label = "Average Accuracy on the Domain")
# %%
if __name__ == '__main__':
'''
Provide the name of the task (folder name, bacause there might be amazon-1/amazon-2 for different hyper-parameters).
Provide the repitition number.
In the corresponding folder, create new folder to save the figures.
'''
dataset = 'PACs' # ['digits', 'amazon', 'office_31', 'imageCLEF', 'office_home', 'PACs']
task_result_folder_name = 'DONE-PACs'
root_folder = f'all-results/{task_result_folder_name}'
repetition_index_range = (0, 3)
model_list = ['DANN','SDL_joint','SDL_separate','MDNet','MAN', 'CAN'] # ['DANN','SDL_joint','SDL_separate','MDNet','MAN', 'CAN']
strategy_list = ['Random', 'Uncertainty', 'BADGE', 'EGL', 'Coreset'] #['Random', 'Uncertainty', 'BADGE', 'EGL', 'Coreset']
plot_curves(root_folder, dataset, model_list, strategy_list, repetition_index_range)