-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtabularize_forward_transfer.py
executable file
·83 lines (67 loc) · 3.08 KB
/
tabularize_forward_transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import argparse
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument("results_csv_file", help="Path to results file", nargs='+')
parser.add_argument("--latex", default=False, action='store_true', help="Produce latex output")
parser.add_argument("--save", help="Path to save resulting table")
args = parser.parse_args()
print("Loading data:", args.results_csv_file)
df = pd.read_csv(args.results_csv_file[0])
print("N =",len(df))
for path in args.results_csv_file[1:]:
print("Adding data:", path)
add_data = pd.read_csv(path)
df = pd.concat([df, add_data], axis=0, ignore_index=True)
print("N =",len(df))
def SD(values):
""" Just to ensure we use ddof=1 """
return values.std(ddof=1)
def SE(values):
return values.std(ddof=1) / np.sqrt(values.count())
def forward_transfer(df):
print("Computing forward transfer")
first_task_per_dataset = df.groupby('dataset', as_index=False)['year'].min()
print("Dropping first task per dataset:", first_task_per_dataset, sep='\n')
for dataset, first_task in first_task_per_dataset.itertuples(index=False):
idx = df[(df['dataset'] == dataset) & (df['year'] == first_task)].index
df = df.drop(idx, axis=0)
df = df.pivot_table(index=['dataset', 'model', 'history', 'year'], columns=['start'], aggfunc='mean')
df['FWT'] = df['accuracy']['warm'] - df['accuracy']['cold']
df.drop('accuracy', axis=1, inplace=True)
# Average out the tasks (years)
fwt = df.groupby(['dataset','model','history']).FWT.mean()
return fwt
# dataset,seed,model,variant,n_params,n_hidden,n_layers,dropout,history,sampling,batch_size,saint_coverage,limited_pretraining,initial_epochs,initial_lr,initial_wd,annual_epochs,annual_lr,annual_wd,
# start,decay,year,epoch,f1_macro,accuracy
for col in ['annual_lr', 'sampling', 'batch_size', 'variant']:
if col in df and len(df[col].unique()) > 1:
print(f"[warn] Don't apply this to hyperparameter optimization results! Will not group by column '{col}'")
# TODO include 'seed'?
groupby_cols = ['dataset','model','history']
# Selet subset of interesting columns
df = df[(groupby_cols + ['start', 'accuracy', 'year'])]
# print(df.head())
# print("N =", len(df))
# print("Grouping by:", groupby_cols)
# grouped_df = df.groupby(groupby_cols, as_index=False).accuracy.mean()
# print(grouped_df)
fwt = forward_transfer(df)
# df = df.pivot_table(index=groupby_cols, columns='start', aggfunc=['mean', SD, SE, 'count'])
df = df.drop('year', axis=1)
# This aggregates accuracy grouped by dataset, model, history (index) and start (column)
df = df.pivot_table(index=groupby_cols, columns='start', aggfunc=['mean', SE])
# df['FWT'] = df['mean']['accuracy']['warm'] - df['mean']['accuracy']['cold']
# print(df)
df['FWT'] = fwt
df['SE'] *= 1.96
df.rename({"SE": "1.96SE"}, inplace=True, axis=1)
print(df * 100)
if args.save:
print("Saving aggregated results to:", args.save)
if args.latex:
with open(args.save, 'w') as fhandle:
print(df.to_latex(), file=fhandle)
else:
df.to_csv(args.save)