This repository has been archived by the owner on Mar 2, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
pca_fuse.py
117 lines (101 loc) · 3.95 KB
/
pca_fuse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import sklearn
import pickle
from sklearn.decomposition import PCA
import numpy as np
from numpy import dot
from numpy.linalg import norm
import os
class InputFeature():
def __init__(self, net, step, feature_name, num_features, n_components, wrong=False):
self.net = net
self.step = step
self.feature_name = feature_name
self.num_features = num_features
self.n_components = n_components
self.file = 'features/lfw_' + net + '_step' + str(step) + '_' + feature_name + '.txt'
if wrong:
self.file = 'features_wrong/lfw_' + net + '_step' + str(step) + '_' + feature_name + '.txt'
self.features = None
f1 = InputFeature('vgg_16', 67000, 'fc7', 4096, 256)
f2 = InputFeature('vgg_16', 78000, 'fc7', 4096, 256)
f3 = InputFeature('vgg_16', 91000, 'fc7', 4096, 256)
f4 = InputFeature('vgg_16', 113000, 'fc7', 4096, 256)
f5 = InputFeature('vgg_16', 113000, 'fc6', 4096, 256)
f6 = InputFeature('vgg_16', 113000, 'fc8', 10575, 256)
f7 = InputFeature('resnet_v1_50', 123000, 'res_block', 7 * 7 * 2048, 256)
f8 = InputFeature('vgg_16', 131500, 'fc7', 4096, 256, wrong=True)
f9 = InputFeature('vgg_16', 135000, 'fc7', 4096, 256, wrong=True)
features = [f3, f4, f8, f9]
pca_components = 512
size = 6000
for f in features:
if not os.path.isfile(f.file):
print('File not found: %s' % f.file)
exit(0)
for f in features:
print('Loading %s ...' % f.file)
f.features = pickle.load(open(f.file, 'r'))
num_features = sum([f.num_features for f in features])
print('Sum of input features is %d' % num_features)
feature_map = np.zeros((12000, num_features))
gts = [0 for x in range(6000)]
print('Fusing features...')
for f in features:
x = 0
feature = f.features
for i, t in enumerate(feature):
gt = t['ground_truth']
gts[i] = gt
feature_0 = t['features'][0]
feature_1 = t['features'][1]
feature_0 = feature_0.reshape(1, -1)
feature_1 = feature_1.reshape(1, -1)
feature_map[2 * i, x:x + f.num_features] = feature_0
feature_map[2 * i + 1, x:x + f.num_features] = feature_1
x = x + f.num_features
print('Shape of feature map before PCA: %s' % str(feature_map.shape))
print('Calculating PCA...')
pca = PCA(n_components=pca_components)
new_feature_map = pca.fit_transform(feature_map)
print('Shape of feature map after PCA: %s' % str(new_feature_map.shape))
assert len(gts) == size
# print('Storing data at %s' % f_out)
# output = open(f_out, 'w')
# pickle.dump(new_feature_map, output)
# pickle.dump(gts, output)
def search_threshold(sorted_pairs, size=6000):
correct = size / 2
t_t = size / 2
f_f = 0
best_correct = correct
best_threshold = 0.0
best_t_t = t_t
best_f_f = f_f
for image_pair in sorted_pairs:
if image_pair['ground_truth'] is True:
correct -= 1
t_t -= 1
else:
correct += 1
f_f += 1
if correct > best_correct:
best_correct = correct
best_threshold = image_pair['similarity']
best_t_t, best_f_f = t_t, f_f
return best_correct, best_threshold, best_t_t, best_f_f
feature_map = new_feature_map
pairs = [dict() for x in range(size)]
for i in range(size):
f_0 = feature_map[2 * i]
f_1 = feature_map[2 * i + 1]
# f_0 = np.concatenate((f_0,f_2))
# f_1 = np.concatenate((f_1, f_3))
sim = dot(f_0, f_1) / (norm(f_0) * norm(f_1))
pairs[i]['ground_truth'] = gts[i]
pairs[i]['similarity'] = sim
sorted_pairs = sorted(pairs, key=lambda x: x['similarity'])
best_correct, best_threshold, best_t_t, best_f_f = search_threshold(sorted_pairs)
print('Choose threshold: %.4f' % best_threshold)
print('Size = %d, Correct = %4d, rate = %s' % (size, best_correct, format(best_correct / float(size), '6.2%')))
print('True, guess True = %4d, rate = %s' % (best_t_t, format(best_t_t / float(size), '6.2%')))
print('False, guess False = %4d, rate = %s' % (best_f_f, format(best_f_f / float(size), '6.2%')))