-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathconfig.yml
127 lines (117 loc) · 3.65 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
spark:
app:
name: "mm"
sql:
files:
maxPartitionBytes: 67108864
driver:
memory: "8g"
maxResultSize: "2g"
loglevel: "ERROR"
path:
input_data:
# TODO: replace w absolute paths, here and under working
directory_path: "synthetic_data/"
file_paths:
antennas: 'antennas.csv'
cdr: 'cdr.csv'
fairness: 'fairness.csv'
home_ground_truth: 'home_locations.csv'
labels: 'labels.csv'
mobiledata: 'mobiledata.csv'
mobilemoney: 'mobilemoney.csv'
population: 'population_tgo_2019-07-01.tif'
poverty_scores: null
recharges: 'recharges.csv'
rwi: '/TGO_relative_wealth_index.csv'
shapefiles:
regions: 'regions.geojson'
cantons: 'cantons.geojson'
prefectures: 'prefectures.geojson'
survey: 'survey.csv'
targeting: 'targeting.csv'
user_consent: null
working:
directory_path: "../outputs/"
col_names:
cdr:
txn_type: "txn_type"
caller_id: "caller_id"
recipient_id: "recipient_id"
timestamp: "timestamp"
duration: "duration"
caller_antenna: "caller_antenna"
recipient_antenna: "recipient_antenna"
international: "international"
antennas:
antenna_id: "antenna_id"
tower_id: "tower_id"
latitude: "latitude"
longitude: "longitude"
recharges:
caller_id: "caller_id"
amount: "amount"
timestamp: "timestamp"
mobiledata:
caller_id: "caller_id"
volume: "volume"
timestamp: "timestamp"
mobilemoney:
txn_type: "txn_type"
caller_id: "caller_id"
recipient_id: "recipient_id"
timestamp: "timestamp"
amount: "amount"
sender_balance_before: "sender_balance_before"
sender_balance_after: "sender_balance_after"
recipient_balance_before: "recipient_balance_before"
recipient_balance_after: "recipient_balance_after"
geo: 'cantons'
col_types:
survey:
continuous: [ "con0", "con1", "con2", "con3", "con4", "con5", "con6", "con7", "con8", "con9" ]
categorical: [ "cat0", "cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9" ]
binary: [ "bin0", "bin1", "bin2", "bin3", "bin4", "bin5", "bin6", "bin7", "bin8", "bin9" ]
params:
cdr:
weekend: [1, 7]
start_of_day: 7
end_of_day: 19
home_location:
filter_hours: null
automl:
autogluon:
time_limit: 300
eval_metric: 'r2'
label: 'label'
sample_weight: 'weight'
opt_in_default: false
hyperparams:
'linear':
'dropmissing__threshold': [0.9, 1]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [!!python/tuple [0., 1.], !!python/tuple [0.005, .995]]
'lasso':
'dropmissing__threshold': [ 0.9, 1 ]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [!!python/tuple [0., 1.], !!python/tuple [0.005, .995]]
'model__alpha': [ .001, .01, .05, .03, .1 ]
'ridge':
'dropmissing__threshold': [ 0.9, 1 ]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [!!python/tuple [0., 1.], !!python/tuple [0.005, .995]]
'model__alpha': [ .001, .01, .05, .03, .1 ]
'randomforest':
'dropmissing__threshold': [ 0.9, 1 ]
'droplowvariance__threshold': [ 0, 0.01 ]
'winsorizer__limits': [!!python/tuple [0., 1.], !!python/tuple [0.005, .995]]
'model__max_depth': [ 2, 4, 6, 8, 10 ]
'model__n_estimators': [ 50, 100, 200 ]
'gradientboosting':
'dropmissing__threshold': [ 0.99 ]
'droplowvariance__threshold': [ 0.01 ]
'winsorizer__limits': [!!python/tuple [0., 1.], !!python/tuple [0.005, .995]]
'model__min_data_in_leaf': [ 10, 20, 50 ]
'model__num_leaves': [ 5, 10, 20 ]
'model__learning_rate': [ 0.05, 0.075, 0.1 ]
'model__n_estimators': [ 50, 100, 200 ]