forked from dssg/police-eis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_officer_config.yaml
279 lines (250 loc) · 13 KB
/
example_officer_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
################################################
# UChicago DSaPP Police EIS #
# Model experiments #
################################################
# Name of the department:
department_unit: 'department_name'
# Set the officer table name.
officer_feature_table_name: "example_officer_feature_table"
officer_label_table_name: "example_officer_labels_table"
schema_feature_blocks: "example_schema_block"
# determine whether model objects gets stored in a pickle in root_path/department_unit/directory
store_model_object: False
directory: ''
########################
# Comment fields #
########################
model_comment: ""
batch_comment: ""
test_flag: True
########################
# Type of Experiment #
########################
unit: 'officer' # other options: 'dispatch'
########################
# Temporal parameters #
########################
start_date: '2008-01-01' # The first day of the data
end_date: '2016-04-01' # The last day of the data
prediction_window: ['1y'] # prediction window used for officer-level prediction eg: ['1d', '1w', '1m', '6m', '1y']
update_window: ['1y'] # update window used for updating the model eg: ['1d', '1w', '1m', '6m', '1y']
training_window: ['1y'] # months, used for training the model
officer_past_activity_window: ['1y'] # Include officers that have activity in the x months preceding start time of test/train
timegated_feature_lookback_duration: ["1w","1m","1y","5y"] # Aggregation time for features eg: ["1 day", "10 days", "1 week", "1 month", "1 year", "5 years"]
########################
# Labelling Details #
########################
officer_labels: # note: the labels below are combined with a logical OR.
AllAllegations: False # all allegations.
SustainedAllegations: False # all sustained allegations.
SustainedandUnknownOutcomeAllegations: False # all sustained and unknown outcome allegations.
MajorAllegations: False # all allegations of type: {accident, bias, insubordination, gift policy, handling of civs, substance abuse, harrassment, force}
SustainedMajorAllegations: True # all allegations of major type that were sustained.
SustainedUnknownMajorAllegations: False # all allegations of major type that were sustained or had an unknown outcome.
MinorAllegations: False # all allegations of type: {appearance, standard procedures, traffic laws, tardiness, equipment, quality, conditions of employment }
SustainedMinorAllegations: False # all allegations of minor type that were sustained.
SustainedUnkownMinorAllegations: False # all allegations of minor type that were sustained or had an unknown outcome.
ForceAllegations: False # all force allegations.
SustainedForceAllegations: False # all sustained force allegations.
SustainedandUnknownForceAllegations: False # all sustained and unknown outcome force allegations.
UnknownAllegations: False # allegations categorized as "unknown".
SustainedUnknownAllegations: False # allegations categorized as "unknown" and sustained.
SustainedUnknownUnknownAllegations: False # allegations categorized as "unknown" with a final out come that is sustained or unknown
########################
# Feature selection #
########################
officer_features: [ 'IncidentsReported', 'IncidentsCompleted', 'OfficerShifts', 'OfficerArrests', 'TrafficStops', 'FieldInterviews', 'Dispatches', 'OfficerCharacteristics', 'DemographicNpaArrests','OfficerEmployment']
feature_blocks:
IncidentsReported:
SuspensionsOfType: True # time-gated, The number of suspensions an officer has had
HoursSuspensionsOfType: True # time-gated, The number of hours of suspension an officer has had
InterventionsOfType: True # time-gated, The number of interventions of each type an officer has had
AllAllegations: True # time-gated, Number of allegations made against an officer
IncidentsOfType: True # time-gated, The number of reported incidents incidents of certain type
IncidentsOfSeverity: True # time-gated, The number of all reported allegations, mayor or minor
IncidentsSeverityUnknown: True # time-gated, The number of all reported allegations, mayor or minor with unknown outcome
ComplaintsTypeSource: True # time-gated, Number of complaints by source an officer had
Complaints: True # time-gated, Number of complaints an officer had
DaysSinceLastAllegation: True # The number of days since the last allegation was made against the officer.
IncidentsCompleted:
IncidentsByOutcome: True # time-gated, The number of incidents by type of outcome
MajorIncidentsByOutcome: True # time-gated, The number of major incidents by type of outcome
MinorIncidentsByOutcome: True # time-gated, The number of minor incidents by type of outcome
DaysSinceLastSustainedAllegation: True # time-gated, The number of days since the last sustained allegation was made against the officer
OfficerShifts:
ShiftsOfType: True # time-gated, The Number of time-gated shifts by categorical type
HoursPerShift: True # time-gated, The Average number of hours per shift
OfficerArrests:
ArrestMonthlyVariance: True # time-gated, month-by-month variance of arrest counts.
ArrestMonthlyCOV: False # time-gated, month-by-month coefficient of variation in arrest counts. ##TODO: in a collate branch
Arrests: True # time-gated, Number of arrests made by an officer
ArrestsOfType: True # Number of time-gated arrests by categorical type.
ArrestsON: True # Number of time-gated arrests by day of week.
SuspectsArrestedOfRace: True #Number of suspects arrested by race type, time-gated periods.
SuspectsArrestedOfEthnicity: True #Number of suspects arrested by ethnicity type, time-gated periods.
TrafficStops:
TrafficStopsWithSearch: True # time gated
TrafficStopsWithUseOfForce: True # time gated
TrafficStops: True # time gated
TrafficStopsWithArrest: True # time gated
TrafficStopsWithInjury: True # time gated
TrafficStopsWithOfficerInjury: True # time gated
TrafficStopsWithSearchRequest: True # time gated
TrafficStopsByRace: True # time gated categorical by stopped person's race
TrafficStopsByStopType: True # time gated categorical
TrafficStopsByStopResult: True # time gated categorical
TrafficStopsBySearchReason: True
TrafficStopsByInterestingSearch: False #### TODO: create text features
FieldInterviews:
FieldInterviews: True
HourOfFieldInterviews: True
ModeHourOfFieldInterviews: True
FieldInterviewsByRace: True
FieldInterviewsByOutcome: True
FieldInterviewsWithFlag: True
InterviewsType: True
UseOfForce: ##TODO
UsesOfForceOfType: True # The number of uses of force by type of force over time-gated periods.
UnjustifiedUsesOfForceOfType: True # The number of unjustified uses of force by time over time-gated periods.
UnjustUOFInterventionsOfType: True #Number of interventions of type X following an unjustified force, time gated.
UOFwithSuspectInjury: False #Number of uses of force by whether the suspect was injured, time gated ##TODO
SuspectInjuryToUOFRatio: False #Ratio of suspect injuries to uses of force that an officer has, time gated.##TODO
Dispatches:
DispatchType: True #Number of dispatches of different type aggregated over time
DispatchInitiatiationType: True
EISAlerts: ##TODO
EISInterventionsOfType: True
FractionEISFlagsWithIntervention: False ## TODO
EISFlagsOfType: True
OfficerCharacteristics:
DummyOfficerMarital: False # The marital status of the officer. The problem here is that it comes without a date, eg traning in the past as if an officer was always married or divorced.
DummyOfficerGender: True # Officer gender code.
DummyOfficerRace: True # Officer race code.
DummyOfficerEthnicity: True # Officer ethnicity code.
OfficerAge: True # Age of officer in years.
DummyOfficerEducation: True # Officer education level.
MilesFromPost: False # Number of miles to post. ## TODO
DummyOfficerMilitary: True # Whether or not the officer has had military experience
AcademyScore: True # Performance score at the police academy.
DummyOfficerRank: True # Officer rank.
OfficerEmployment:
OutsideEmploymentHours: True #Extra duty
Other:
CountUOFwithResistingArrest: True #Number of uses of force by whether the suspect resisted arrest, time gated
ResistingArrestToUOFRatio: False #Ratio of resisting arrest to uses of force that an officer has, time gated.
ComplaintToArrestRatio: False # The ratio of complaints per arrest.
ComplaintsPerHourWorked: False # The rate of complaints per hour worked.
UOFtoArrestRatio: True #Ratio of uses of force per arrest ratio, time gated.
ComplimentsToComplaintsRatio: False #Ratio of internal compliments to complaints and officer has
Demographic_Features:
OfficerAvgArrestDemographics_1: False
OfficerAvgArrestDemographics_2: False
OfficerAvgDispatchDemographics_1: False
OfficerAvgDispatchDemographics_2: False
OfficerAvgStopsDemographics_1: False
OfficerAvgStopsDemographics_2: False
DemographicNpaArrests:
Arrests311Call: True
Arrests311Requests: True
PopulationDensity: True
AgeOfResidents: True
BlackPopulation: True
HouseholdIncome: True
EmploymentRate: True
VacantLandArea: True
VoterParticipation: True
AgeOfDeath: True
HousingDensity: True
NuisanceViolations: True
ViolentCrimeRate: True
PropertyCrimeRate: True
SidewalkAvailability: True
Foreclosures: True
DisorderCallRate: True
########################
# Model selection #
########################
#ALL MODEL TYPES
#model: ['RandomForest', 'RandomForestBagging', 'RandomForestBoosting', 'ExtraTrees',
# 'AdaBoost', 'LogisticRegression', 'SVM', 'GradientBoostingClassifier',
# 'DecisionTreeClassifier', 'SGDClassifier', 'KNeighborsClassifier']
model: ['RandomForest', 'ExtraTrees', 'LogisticRegression']
parameters:
RandomForest:
n_estimators: [10000] #[50,100, 1000, 10000, 50000]
max_depth: [2,5] #[100, 5, 10, 50]
max_features: ['log2'] # ['log2', 2, 4, 8, 16, "auto"]
criterion: ['gini'] # ['entropy']
min_samples_split: [2,5] #[2, 5, 10]
random_state: [2193]
RandomForestBagging:
n_estimators: [10] # [25, 50, 100, 1000, 10000]
max_depth: [5] # [10, 20, 50, 100]
max_features: ['sqrt'] # ['log2', 2, 4, 8, 16, "auto"]
criterion: ['gini'] # ['entropy']
min_samples_split: [2] # [5, 10]
max_samples: [0.5] # [1.0]
bootstrap: [True]
bootstrap_features: [False] # [True]
n_estimators_bag: [10] # [25, 50, 100, 1000, 10000]
max_features_bag: [2] # [4, 8, 16]
random_state: [2193]
RandomForestBoosting:
n_estimators: [100] # [25, 50, 100, 1000, 10000]
max_depth: [20] # [10, 20, 50, 100]
max_features: [2] # ['sqrt', 'log2', 2, 4, 8, 16, "auto"]
criterion: ['gini'] # ['entropy']
min_samples_split: [2] # [5, 10]
algorithm: ['SAMME'] # ['SAMME.R']
learning_rate: [0.01] # [0.1, 1, 10, 100]
n_estimators_boost: [10] # [25, 50, 100, 1000, 10000]
random_state: [2193]
ExtraTrees:
n_estimators: [10000] #[ 25, 50, 100, 1000, 10000, 50000, 100000]
max_depth: [2,5] #[5, 10, 50, 100 ]
max_features: ['log2'] # [4, 8, 16, "auto"]
criterion: ['gini'] #, 'entropy']
min_samples_split: [2,5] #[2, 5, 10] #, 5, 10]
random_state: [2193]
AdaBoost:
algorithm: ['SAMME', 'SAMME.R']
n_estimators: [1, 10, 100] # [1000, 10000]
learning_rate: [0.01, 0.1, 1, 10, 100]
random_state: [2193]
LogisticRegression:
C_reg: [ 0.001, 0.01, 1] # [1, 10]
penalty: ['l1', 'l2']
random_state: [2193]
SVM:
C_reg: [0.00001, 0.0001, 0.001, 0.01, 0.1] # [1, 10]
kernel: ['linear']
random_state: [2193]
GradientBoostingClassifier:
n_estimators: [1, 10, 100] # [1000, 10000]
learning_rate: [0.001, 0.01, 0.05, 0.1, 0.5]
subsample: [0.1, 0.5, 1.0]
max_depth: [1, 3, 5, 10, 20] # [50, 100]
random_state: [2193]
DecisionTreeClassifier:
criterion: ['gini', 'entropy']
max_depth: [1, 5, 10, 20] # [50, 100]
max_features: ['sqrt', 'log2']
min_samples_split: [2, 5, 10]
random_state: [2193]
SGDClassifier:
loss: ['log', 'modified_huber']
penalty: ['l1', 'l2', 'elasticnet']
random_state: [2193]
KNeighborsClassifier:
n_neighbors: [1, 3, 5, 10, 25, 50, 100]
weights: ['uniform', 'distance']
algorithm: ['auto', 'kd_tree']
########################
# Parallelization #
########################
n_cpus: 4
########################
# Auditing hooks #
########################
auditing: False # Turn on if you want auditing performed
audits: 'audits/'