diff --git a/eis/features/officers.py b/eis/features/officers.py index b29d18e3..bef9ef27 100644 --- a/eis/features/officers.py +++ b/eis/features/officers.py @@ -15,6 +15,364 @@ time_format = "%Y-%m-%d %X" +##################################################################### +##### ETL FEATURES ##### +##################################################################### + +class ETLdummyfeature1(abstract.OfficerFeature): + def __init__(self, **kwargs): + abstract.OfficerFeature.__init__(self, **kwargs) + self.description = ("Dummy feature for testing 2016 schema") + self.num_features = 1 + self.name_of_features = ["DummyFeature"] + self.query = ("SELECT officer_id, COUNT(event_type_code) " + "FROM events_hub " + "WHERE event_type_code = 4 " + "GROUP BY officer_id") + +class ETL_ArrestOnlyResist(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Number of officer arrests where the only charge was resisting or evading, time-gated") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM (SELECT officers_hub.officer_id, count( distinct arrests.arr_nbr) " + "FROM etl.arrests " + "LEFT JOIN ( " + "SELECT arr_nbr, " + "true AS no_resist_evade " + "FROM etl.arrests resist_evade_charges " + "WHERE NOT lower(w_chgdesc) SIMILAR TO '%%(resist|evad)%%' " + " ) AS no_resist_evade_charges " + "ON arrests.arr_nbr = no_resist_evade_charges.arr_nbr " + "FULL JOIN staging.officers_hub " + "ON cast( arrests.anonid as text)=department_defined_officer_id " + "WHERE no_resist_evade is null " + "AND arr_date <= '{2}'::date " + "AND arr_date >= '{2}'::date - interval '{3}' " + "GROUP BY officer_id, arrests.arr_nbr " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION )) + self.set_null_counts_to_zero = True + +class ETL_YearsOfService(abstract.OfficerFeature): + def __init__(self, **kwargs): + abstract.OfficerFeature.__init__(self, **kwargs) + self.description = ("Officer's years of service") + self.num_features = 1 + self.name_of_features = ["ETLYearsOfService"] + self.query = ("UPDATE features.{} feature_table " + "SET {} = staging_table.years_service " + "FROM ( SELECT officer_id, years_service " + " FROM etl.officers " + " JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + .format( self.table_name, + self.feature_name ) ) + +class ETL_NumberTransfers(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Number of officer transfers, time-gated") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM ( SELECT officer_id, count(officer_id) " + " FROM etl.transfers " + " FULL JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " WHERE startdate <= '{2}'::date " + " AND startdate >= '{2}'::date - interval '{3}' " + " AND officer_id is not null " + " GROUP BY officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION )) + self.set_null_counts_to_zero = True + +class ETL_NumberTransfersNotOnNewYears(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Number of officer transfers not starting on New Years, YYYY-01-01, time-gated") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM ( SELECT officer_id, count(officer_id) " + " FROM etl.transfers " + " FULL JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " WHERE extract(doy from startdate) != 1 " + " AND startdate <= '{2}'::date " + " AND startdate >= '{2}'::date - interval '{3}' " + " AND officer_id is not null " + " GROUP BY officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION )) + self.set_null_counts_to_zero = True + +class ETL_NumberTransfersNotInJanuary(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Number of officer transfers not starting in January, time-gated") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM ( SELECT officer_id, count(officer_id) " + " FROM etl.transfers " + " FULL JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " WHERE extract(month from startdate) != 01 " + " AND startdate <= '{2}'::date " + " AND startdate >= '{2}'::date - interval '{3}' " + " AND officer_id is not null " + " GROUP BY officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION )) + self.set_null_counts_to_zero = True + +class ETL_NumberTransfersLessThanOneYear(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Number of officer transfers less than one year in length, time-gated") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM ( SELECT officer_id, count(officer_id) " + " FROM etl.transfers " + " FULL JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " WHERE extract(year from age(enddate, startdate )) < 1 " + " AND startdate <= '{2}'::date " + " AND startdate >= '{2}'::date - interval '{3}' " + " AND officer_id is not null " + " GROUP BY officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION )) + self.set_null_counts_to_zero = True + +class ETL_NumberTransfersLessThanOneMonth(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Number of officer transfers less than one month in length, time-gated") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM ( SELECT officer_id, count(officer_id) " + " FROM etl.transfers " + " FULL JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " WHERE extract(year from age(enddate, startdate )) < 1 " + " AND extract(month from age(enddate, startdate )) < 1 " + " AND startdate <= '{2}'::date " + " AND startdate >= '{2}'::date - interval '{3}' " + " AND officer_id is not null " + " GROUP BY officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION )) + self.set_null_counts_to_zero = True + + + +##### charges that were dismissed. +### Any charges with the dispositions below are considered dismissed +### in other words, these are charges where no trial has or will take place. +### The selection of these categories should be varified with MNPD +### 'DISMISSED ROS', +### 'DISMISSED - COSTS TO PROSECUTOR', +### 'RETIRED ON COSTS', +### 'DISMISSED ON COST', +### 'RETIRED', +### 'NOT GUILTY - REASON OF INSANITY', +### 'NOLLE PROSEQUI', +### 'NO TRUE BILL' +#################################### +class ETL_ChargesDismissed(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Proportion of arrested charges that were dismissed, time-gated") + self.query = """ + UPDATE features.{0} feature_table + SET {1} = staging_table.propdismissed + FROM (SELECT officers_hub.officer_id, + case when COUNT(disposition_desc) > 0 then + SUM(CASE WHEN disposition_desc IN ( + 'DISMISSED ROS', + 'DISMISSED - COSTS TO PROSECUTOR', + 'RETIRED ON COSTS', + 'DISMISSED ON COST', + 'RETIRED', + 'NOT GUILTY - REASON OF INSANITY', + 'NOLLE PROSEQUI', + 'NO TRUE BILL' + ) THEN 1 ELSE 0 END) / COUNT(disposition_desc)::float + when COUNT(disposition_desc) = 0 then 0 + end as propDismissed + FROM etl.arrests + FULL JOIN staging.officers_hub + ON cast( arrests.anonid as text)=department_defined_officer_id + WHERE arr_date <= '{2}'::date + AND arr_date >= '{2}'::date - interval '{3}' + group by officer_id + ) AS staging_table + WHERE feature_table.officer_id = staging_table.officer_id + AND feature_table.fake_today = '{2}'::date + """.format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION ) + self.set_null_counts_to_zero = True + +class ETL_NumberDispatchedInitiatedBy(abstract.TimeGatedCategoricalOfficerFeature): + def __init__(self, **kwargs): + self.categories = { "No": "None", + "Schedule": "Schedule", + "Phone": "Phone", + "Mobile": "Mobile", + "Field": "Field", + "911": "911" } + abstract.TimeGatedCategoricalOfficerFeature.__init__(self, **kwargs) + self.description = ("Number dispatches by initiation source over time gated periods") + self.query = ("UPDATE features.{0} feature_table " + "SET {1} = staging_table.count " + "FROM ( SELECT officer_id, count(officer_id) " + " FROM etl.dispatch " + " FULL JOIN staging.officers_hub " + " ON cast( anonid as text)=department_defined_officer_id " + " WHERE init_source like '%%{4}%%' " + " AND call_rec <= '{2}'::date " + " AND call_rec >= '{2}'::date - interval '{3}' " + " AND officer_id is not null " + " GROUP BY officer_id " + " ) AS staging_table " + "WHERE feature_table.officer_id = staging_table.officer_id " + "AND feature_table.fake_today = '{2}'::date" + .format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION, + self.LOOKUPCODE )) + self.set_null_counts_to_zero = True + +##### charges that resulted in some disposition like guilty +### Any charges with the dispositions below are guilty or similar to guilty +### The selection of these categories should be varified with MNPD +# 'GUILTY', +# 'PROBATION', +# 'GUILTY PLEA - LESSER CHARGE`', +# 'NOLO CONTENDERE', +# 'CONCLUDE PROBATION', +# 'NOLO CONTENDERE 40-35-313', +# 'PRE-TRIAL DIVERSION (40-15-105)', +# 'GUILTY AFTER TRIAL', +# 'PROBATION VIOLATION' +#################################### +class ETL_ChargesQuasiGuilty(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Proportion of arrested charges that were found quasi-guilty, time-gated") + self.query = """ + UPDATE features.{0} feature_table + SET {1} = staging_table.propdismissed + FROM (SELECT officers_hub.officer_id, + case when COUNT(disposition_desc) > 0 then + SUM(CASE WHEN disposition_desc IN ( + 'GUILTY', + 'PROBATION', + 'GUILTY PLEA - LESSER CHARGE`', + 'NOLO CONTENDERE', + 'CONCLUDE PROBATION', + 'NOLO CONTENDERE 40-35-313', + 'PRE-TRIAL DIVERSION (40-15-105)', + 'GUILTY AFTER TRIAL', + 'PROBATION VIOLATION' + ) THEN 1 ELSE 0 END) / COUNT(disposition_desc)::float + when COUNT(disposition_desc) = 0 then 0 + end as propDismissed + FROM etl.arrests + FULL JOIN staging.officers_hub + ON cast( arrests.anonid as text)=department_defined_officer_id + WHERE arr_date <= '{2}'::date + AND arr_date >= '{2}'::date - interval '{3}' + group by officer_id + ) AS staging_table + WHERE feature_table.officer_id = staging_table.officer_id + AND feature_table.fake_today = '{2}'::date + """.format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION ) + self.set_null_counts_to_zero = True + +##### charges that resulted in some disposition not guilty +### Any charges with the dispositions below are not guilty +### The selection of these categories should be varified with MNPD +# 'NOT GUILTY', +# 'NOT GUILTY - REASON OF INSANITY' +#################################### +class ETL_ChargesNotGuilty(abstract.TimeGatedOfficerFeature): + def __init__(self, **kwargs): + abstract.TimeGatedOfficerFeature.__init__(self, **kwargs) + self.description = ("Proportion of arrested charges that were found not guilty, time-gated") + self.query = """ + UPDATE features.{0} feature_table + SET {1} = staging_table.propdismissed + FROM (SELECT officers_hub.officer_id, + case when COUNT(disposition_desc) > 0 then + SUM(CASE WHEN disposition_desc IN ( + 'NOT GUILTY', + 'NOT GUILTY - REASON OF INSANITY' + ) THEN 1 ELSE 0 END) / COUNT(disposition_desc)::float + when COUNT(disposition_desc) = 0 then 0 + end as propDismissed + FROM etl.arrests + FULL JOIN staging.officers_hub + ON cast( arrests.anonid as text)=department_defined_officer_id + WHERE arr_date <= '{2}'::date + AND arr_date >= '{2}'::date - interval '{3}' + group by officer_id + ) AS staging_table + WHERE feature_table.officer_id = staging_table.officer_id + AND feature_table.fake_today = '{2}'::date + """.format( self.table_name, + self.COLUMN, + self.fake_today.strftime(time_format), + self.DURATION ) + self.set_null_counts_to_zero = True + + + + +##################################################################### +##### STAGING FEATURES ##### +##################################################################### + + ### Officer labels. class LabelSustained(abstract.OfficerFeature): def __init__(self, **kwargs): diff --git a/example_officer_config.yaml b/example_officer_config.yaml index 00228f21..beb72542 100644 --- a/example_officer_config.yaml +++ b/example_officer_config.yaml @@ -103,6 +103,24 @@ officer_features: SuspectInjuryToUOFRatio: False #Ratio of suspect injuries to uses of force that an officer has, time gated. ResistingArrestToUOFRatio: False #Ratio of resisting arrest to uses of force that an officer has, time gated. + ############################ + # ETL Feature Selection # + ############################ + + ETL_dummyfeature1: False #Dummy etl feature 1 + ETL_dummyfeature2: False #Dummy etl feature 2 + ETL_ChargesDismissed: False # proportion of charges that are dismissed + ETL_YearsOfService: True #Officer's years of service. + ETL_NumberTransfers: True #Number of officer transfers, time-gated. + ETL_NumberTransfersNotOnNewYears: True #Number of officer transfers not starting on New Years, YYYY-01-01, time-gated. + ETL_NumberTransfersNotInJanuary: True #Number of officer transfers not starting in January, time-gated. + ETL_NumberTransfersLessThanOneYear: True #Number of officer transfers less than one year in length, time-gated. + ETL_NumberTransfersLessThanOneMonth: True #Number of officer transfers less than one month in length, time-gated. + ETL_ArrestOnlyResist: True # Number of officer arrests where the only charge was resisting or evading, time-gated. + ETL_ChargesQuasiGuilty: True # proportion of charges that are something like guilty (quasi-guilty) + ETL_ChargesNotGuilty: True # proportion of charges that are not guilty + ETL_NumberDispatchedInitiatedBy: True #Number dispatches by initiation source over time gated periods. + ################################################### # Threshold based EIS system Feature Selection # ###################################################