-
Notifications
You must be signed in to change notification settings - Fork 25
/
transforms.py
executable file
·175 lines (121 loc) · 3.45 KB
/
transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""This module contains functions for transforming datasets"""
import numpy as np
def dropna_Y(Y, label):
Y = Y.squeeze()
# mark locations with missing labels:
valid = ~np.isnan(Y) & ~np.isinf(Y) & ~(Y == -999)
# mark outliers in individual datasets
if label == "nightlights":
valid = valid & (Y <= 629)
# drop
Y = Y[valid]
return Y, valid
def dropna(X, Y, latlon, c_app):
Y = Y.squeeze()
# drop obs with missing labels:
Y, valid = dropna_Y(Y, c_app["application"])
latlon = latlon[valid]
X = X[valid]
return X, Y, latlon
def dropna_and_transform(X, Y, latlon, c_app):
name = c_app["application"]
X, Y, latlon = dropna(X, Y, latlon, c_app)
transform_func = globals()["transform_" + name]
return transform_func(X, Y, latlon, c_app["logged"])
def transform_elevation(X, Y, latlon, log):
return X, Y, latlon
def transform_population(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_housing(X, Y, latlon, log):
if log:
Y = np.log(Y)
return X, Y, latlon
def transform_income(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_nightlights(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_roads(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_treecover(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def log_all(Y, c_app):
name = c_app["application"]
if name == "housing":
logY = np.log(Y)
else:
logY = np.log(Y + 1)
return logY
##################################################
### ACS functions
##################################################
def transform_B08303(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B15003(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B19013(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B19301(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_C17002(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B22010(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B25071(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B25001(X, Y, latlon, log):
# Drop observations with extremely high (top 0.1%) values:
upperEnd = np.percentile(Y, 99.9)
valid = ~(Y >= upperEnd)
Y = Y[valid]
X = X[valid]
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B25002(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B25035(X, Y, latlon, log):
# Transform from year to age
Y = 2015.00 - Y
# Drop the top .1% of obs -- buildings in the world just aren't that old.
# http://www.oldest.org/structures/buildings-america/
upperEnd = np.percentile(Y, 99.9)
valid = Y < upperEnd
Y = Y[valid]
X = X[valid]
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B25017(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon
def transform_B25077(X, Y, latlon, log):
if log:
Y = np.log(Y + 1)
return X, Y, latlon