-
Notifications
You must be signed in to change notification settings - Fork 9
/
config_emily.yml
141 lines (132 loc) · 4 KB
/
config_emily.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
spark:
app_name: "mm"
files:
max_partition_bytes: 67108864
driver:
memory: "50g"
max_result_size: "2g"
loglevel: "ERROR"
path:
wd: "/home/em/cider/"
survey:
data: "../synthetic_data"
outputs: "../tests/outputs/survey"
file_names:
survey: "/survey.csv"
featurizer:
data: "../synthetic_data"
outputs: "../tests/outputs/featurizer"
file_names:
cdr: '/cdr.csv'
antennas: '/antennas.csv'
recharges: '/recharges.csv'
mobiledata: '/mobiledata.csv'
mobilemoney: '/mobilemoney.csv'
shapefiles:
'regions': '/regions.geojson'
'prefectures': '/prefectures.geojson'
ml:
outputs: "/home/em/cideroutputs/machinelearning"
features: '/data/togo_anon/feats/survey_combos/survey2020_cdr2020_cider.csv'
labels: "/data/togo_anon/surveys/survey2018/survey2020_labels_cider.csv"
home_location:
data: "../synthetic_data"
outputs: "../tests/outputs/homelocation"
file_names:
cdr: '/cdr.csv'
antennas: '/antennas.csv'
shapefiles:
'prefectures': '/prefectures.geojson'
poverty_scores: null
groundtruth: '/home_locations.csv'
targeting:
data: "../../synthetic_data"
outputs: "../../tests/outputs/targeting"
file_names:
data: '/targeting.csv'
fairness:
data: "../../synthetic_data"
outputs: "../../tests/outputs/fairness"
file_names:
data: '/fairness.csv'
col_names:
cdr:
txn_type: "txn_type"
caller_id: "caller_id"
recipient_id: "recipient_id"
timestamp: "timestamp"
duration: "duration"
caller_antenna: "caller_antenna"
recipient_antenna: "recipient_antenna"
international: "international"
antennas:
antenna_id: "antenna_id"
tower_id: "tower_id"
latitude: "latitude"
longitude: "longitude"
recharges:
caller_id: "caller_id"
amount: "amount"
timestamp: "timestamp"
mobiledata:
caller_id: "caller_id"
volume: "volume"
timestamp: "timestamp"
mobilemoney:
txn_type: "txn_type"
caller_id: "caller_id"
recipient_id: "recipient_id"
timestamp: "timestamp"
amount: "amount"
sender_balance_before: "sender_balance_before"
sender_balance_after: "sender_balance_after"
recipient_balance_before: "recipient_balance_before"
recipient_balance_after: "recipient_balance_after"
geo: 'tower_id'
col_types:
survey:
continuous: [ "con0", "con1", "con2", "con3", "con4", "con5", "con6", "con7", "con8", "con9" ]
categorical: [ "cat0", "cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9" ]
binary: [ "bin0", "bin1", "bin2", "bin3", "bin4", "bin5", "bin6", "bin7", "bin8", "bin9" ]
params:
home_location:
filter_hours: null
automl:
autosklearn:
time_left: 3600
n_jobs:
memory_limit: 3072
autogluon:
time_limit: 3600
eval_metric: 'r2'
label: 'label'
sample_weight: 'weight'
hyperparams:
'linear':
'dropmissing__threshold': [0.9, 1]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [[0., 1.], [0.005, .995]]
'lasso':
'dropmissing__threshold': [ 0.9, 1 ]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [[0., 1.], [0.005, .995]]
'model__alpha': [ .001, .01, .05, .03, .1 ]
'ridge':
'dropmissing__threshold': [ 0.9, 1 ]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [[0., 1.], [0.005, .995]]
'model__alpha': [ .001, .01, .05, .03, .1 ]
'randomforest':
'dropmissing__threshold': [ 0.9, 1 ]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [[0., 1.], [0.005, .995]]
'model__max_depth': [ 2, 4, 6, 8, 10 ]
'model__n_estimators': [ 50, 100, 200 ]
'gradientboosting':
'dropmissing__threshold': [ 0.99 ]
'droplowvariance__threshold': [ 0.01 ]
'winsorizer__limits': [[0., 1.], [0.005, .995]]
'model__min_data_in_leaf': [ 10, 20, 50 ]
'model__num_leaves': [ 5, 10, 20 ]
'model__learning_rate': [ 0.05, 0.075, 0.1 ]
'model__n_estimators': [ 50, 100, 200 ]