-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeatureExtraction.py
130 lines (102 loc) · 3.98 KB
/
featureExtraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
class FeatureExtraction:
def __init__(self, length=28):
self.len = length
self.distributionX = np.zeros(self.len)
self.distributionY = np.zeros(self.len)
# add your feature function here
self.features = [self.calcVar, self.calcMinMax, self.normalizedDists]
# add length of return vector of your feature function
self.return_length = 1 + 2 + 4 + 10
def init_pic(self, picture):
for i in range(self.len):
self.distributionX[i] = np.sum(picture[i, :])
self.distributionY[i] = np.sum(picture[:, i])
def get_train_data(self, picture):
self.init_pic(picture)
# add constant value to data set to match constant in weight vector
data = [1]
# TODO optimize with multiprocessing?
for f in self.features:
data.extend(f())
return data
def calcVar(self):
distributionX = self.distributionX[np.nonzero(self.distributionX)]
distributionX = np.divide(distributionX,np.max(distributionX))
distributionY = self.distributionY[np.nonzero(self.distributionY)]
distributionY = np.divide(distributionY,np.max(distributionY))
return [np.var(distributionX), np.var(distributionY)]
"""
berechne die relativen POsitionen der minima und maxima von den
distribtionen bezogen auf das kleinste Rechteck
"""
def calcMinMax(self):
cbl, cbr, cul, cur = self.calcRectangle()
#print("a: ",cbl, cbr, cul, cur)
argMaxX = np.argmax(self.distributionX)
argMaxY = np.argmax(self.distributionY)
#print("b: ", argMaxX,argMaxY)
#relative position der maxima:
rpMaxX = (argMaxX - cbl) / cbr
rpMaxY = (argMaxY - cul) / cur
argMinX = np.argmin(self.distributionX)
argMinY = np.argmin(self.distributionY)
#print("c: ", argMinX, argMinY)
#relative position der minima:
rpMinX = (argMinX - cbl) / cbr
rpMinY = (argMinY - cul) / cur
return [rpMaxX,rpMaxY,rpMinX,rpMinY]
def normalizedDists(self):
distributionX = self.distributionX[np.nonzero(self.distributionX)]
distributionX = np.divide(distributionX,np.max(distributionX))
sumX = np.sum(distributionX)
lenX = len(distributionX)
nrParts = 5
startIndex = 0
stopIndex = int(lenX/nrParts)
nrAdditional = lenX % nrParts
normDistsX = []
for i in range(nrParts):
if nrAdditional > 0:
stopIndex += 1
nrAdditional -= 1
partSum = np.sum(distributionX[startIndex:stopIndex])
normDistsX.append(partSum / sumX)
startIndex = stopIndex
stopIndex += int(lenX/nrParts)
distributionY = self.distributionY[np.nonzero(self.distributionY)]
distributionY = np.divide(distributionY,np.max(distributionY))
sumY = np.sum(distributionY)
lenY = len(distributionY)
nrParts = 5
startIndex = 0
stopIndex = int(lenY / nrParts)
nrAdditional = lenY % nrParts
normDistsY = []
for i in range(nrParts):
if nrAdditional > 0:
stopIndex += 1
nrAdditional -= 1
partSum = np.sum(distributionY[startIndex:stopIndex])
normDistsY.append(partSum / sumY)
startIndex = stopIndex
stopIndex += int(lenY / nrParts)
normDists = []
normDists.extend(normDistsX)
normDists.extend(normDistsY)
return normDists
"""
gebe Eckpositionen des kleinsten Rechteckes um die Zahl aus
"""
def calcRectangle(self):
nzX = np.nonzero(self.distributionX)
#untere linke Ecke:
cbl = nzX[0][0]
#untere rechte Ecke:
cbr = nzX[0][-1]
nzY = np.nonzero(self.distributionY)
#untere linke Ecke:
cul = nzY[0][0]
#untere rechte Ecke:
cur = nzY[0][-1]
return [cbl,cbr,cul,cur]