example_officer_config.yaml

################################################
#          UChicago DSaPP Police EIS           #
#              Model experiments               #
################################################

# Name of the department:
department_unit: 'department_name'

# Set the officer table name.
officer_feature_table_name: "example_officer_feature_table"
officer_label_table_name: "example_officer_labels_table"
schema_feature_blocks: "example_schema_block"

# determine whether model objects gets stored in a pickle in root_path/department_unit/directory
store_model_object: False
directory: ''
########################
# Comment fields       #
########################
model_comment: ""
batch_comment: ""

test_flag: True
########################
# Type of Experiment   #
########################

unit: 'officer'  # other options: 'dispatch'

########################
# Temporal parameters  #
########################
start_date: '2008-01-01' # The first day of the data
end_date: '2016-04-01' # The last day of the data
prediction_window: ['1y'] # prediction window  used for officer-level prediction eg: ['1d', '1w', '1m', '6m', '1y']
update_window: ['1y'] # update window used for updating the model eg: ['1d', '1w', '1m', '6m', '1y']
training_window: ['1y'] # months, used for training the model
officer_past_activity_window: ['1y'] # Include officers that have activity in the x months preceding start time of test/train
timegated_feature_lookback_duration: ["1w","1m","1y","5y"] # Aggregation time for features eg: ["1 day", "10 days", "1 week", "1 month", "1 year", "5 years"] 

########################
# Labelling Details    #
########################

officer_labels:                     # note: the labels below are combined with a logical OR.
    AllAllegations: False           # all allegations.
    SustainedAllegations: False     # all sustained allegations.
    SustainedandUnknownOutcomeAllegations: False     # all sustained and unknown outcome allegations.
    MajorAllegations: False # all allegations of type: {accident, bias, insubordination, gift policy, handling of civs, substance abuse, harrassment, force}
    SustainedMajorAllegations: True  # all allegations of major type that were sustained.
    SustainedUnknownMajorAllegations: False  # all allegations of major type that were sustained or had an unknown outcome.
    MinorAllegations: False # all allegations of type: {appearance, standard procedures, traffic laws, tardiness, equipment, quality, conditions of employment }
    SustainedMinorAllegations: False # all allegations of minor type that were sustained.
    SustainedUnkownMinorAllegations: False # all allegations of minor type that were sustained or had an unknown outcome.
    ForceAllegations: False         # all force allegations.
    SustainedForceAllegations: False # all sustained force allegations.
    SustainedandUnknownForceAllegations: False # all sustained and unknown outcome force allegations.
    UnknownAllegations: False # allegations categorized as "unknown".
    SustainedUnknownAllegations: False # allegations categorized as "unknown" and sustained.
    SustainedUnknownUnknownAllegations: False # allegations categorized as "unknown" with a final out come that is sustained or unknown

########################
# Feature selection    #
########################

officer_features: [ 'IncidentsReported', 'IncidentsCompleted', 'OfficerShifts', 'OfficerArrests', 'TrafficStops', 'FieldInterviews', 'Dispatches', 'OfficerCharacteristics', 'DemographicNpaArrests','OfficerEmployment']

feature_blocks:
    IncidentsReported:
        SuspensionsOfType: True # time-gated, The number of suspensions an officer has had
        HoursSuspensionsOfType: True # time-gated, The number of hours of suspension an officer has had
        InterventionsOfType: True # time-gated, The number of interventions of each type an officer has had
        AllAllegations: True # time-gated, Number of allegations made against an officer
        IncidentsOfType: True # time-gated, The number of reported incidents incidents of certain type
        IncidentsOfSeverity: True # time-gated, The number of all reported allegations, mayor or minor
        IncidentsSeverityUnknown: True # time-gated, The number of all reported allegations, mayor or minor with unknown outcome
        ComplaintsTypeSource: True # time-gated, Number of complaints by source an officer had
        Complaints: True # time-gated, Number of complaints an officer had
        DaysSinceLastAllegation: True # The number of days since the last allegation was made against the officer.

    IncidentsCompleted:
        IncidentsByOutcome: True # time-gated, The number of incidents by type of outcome
        MajorIncidentsByOutcome:  True # time-gated, The number of major incidents by type of outcome
        MinorIncidentsByOutcome: True # time-gated, The number of minor incidents by type of outcome
        DaysSinceLastSustainedAllegation: True # time-gated, The number of days since the last sustained allegation was made against the officer

    OfficerShifts:
        ShiftsOfType: True # time-gated, The Number of time-gated shifts by categorical type
        HoursPerShift: True # time-gated, The Average number of hours per shift
    
    OfficerArrests:
        ArrestMonthlyVariance: True # time-gated, month-by-month variance of arrest counts. 
        ArrestMonthlyCOV: False # time-gated, month-by-month coefficient of variation in arrest counts. ##TODO: in a collate branch
        Arrests: True # time-gated, Number of arrests made by an officer
        ArrestsOfType: True # Number of time-gated arrests by categorical type.
        ArrestsON: True # Number of time-gated arrests by day of week.
        SuspectsArrestedOfRace: True #Number of suspects arrested by race type, time-gated periods.
        SuspectsArrestedOfEthnicity: True #Number of suspects arrested by ethnicity type, time-gated periods.

    TrafficStops:
        TrafficStopsWithSearch: True # time gated
        TrafficStopsWithUseOfForce: True # time gated
        TrafficStops: True # time gated
        TrafficStopsWithArrest: True # time gated
        TrafficStopsWithInjury: True # time gated
        TrafficStopsWithOfficerInjury: True # time gated
        TrafficStopsWithSearchRequest: True # time gated
        TrafficStopsByRace: True # time gated categorical by stopped person's race
        TrafficStopsByStopType: True # time gated categorical
        TrafficStopsByStopResult: True # time gated categorical
        TrafficStopsBySearchReason: True
        TrafficStopsByInterestingSearch: False #### TODO: create text features

    FieldInterviews:
        FieldInterviews: True
        HourOfFieldInterviews: True
        ModeHourOfFieldInterviews: True 
        FieldInterviewsByRace: True
        FieldInterviewsByOutcome: True
        FieldInterviewsWithFlag: True 
        InterviewsType: True
    
    UseOfForce: ##TODO
        UsesOfForceOfType: True # The number of uses of force by type of force over time-gated periods.
        UnjustifiedUsesOfForceOfType: True # The number of unjustified uses of force by time over time-gated periods.
        UnjustUOFInterventionsOfType: True #Number of interventions of type X following an unjustified force, time gated.
        UOFwithSuspectInjury: False #Number of uses of force by whether the suspect was injured, time gated ##TODO
        SuspectInjuryToUOFRatio: False #Ratio of suspect injuries to uses of force that an officer has, time gated.##TODO
    
    Dispatches:
        DispatchType: True #Number of dispatches of different type aggregated over time
        DispatchInitiatiationType: True

    EISAlerts: ##TODO
        EISInterventionsOfType: True
        FractionEISFlagsWithIntervention: False ## TODO
        EISFlagsOfType: True

    OfficerCharacteristics:
        DummyOfficerMarital: False # The marital status of the officer. The problem here is that it comes without a date, eg traning in the past as if an officer was always married or divorced. 
        DummyOfficerGender: True # Officer gender code.
        DummyOfficerRace: True # Officer race code.
        DummyOfficerEthnicity: True # Officer ethnicity code.
        OfficerAge: True # Age of officer in years. 
        DummyOfficerEducation: True # Officer education level.
        MilesFromPost: False # Number of miles to post. ## TODO 
        DummyOfficerMilitary: True # Whether or not the officer has had military experience
        AcademyScore: True # Performance score at the police academy.
        DummyOfficerRank: True # Officer rank.

    OfficerEmployment:
        OutsideEmploymentHours: True #Extra duty

    Other:
        CountUOFwithResistingArrest: True #Number of uses of force by whether the suspect resisted arrest, time gated
        ResistingArrestToUOFRatio: False #Ratio of resisting arrest to uses of force that an officer has, time gated.
        ComplaintToArrestRatio: False # The ratio of complaints per arrest.
        ComplaintsPerHourWorked: False # The rate of complaints per hour worked.
        UOFtoArrestRatio: True #Ratio of uses of force per arrest ratio, time gated.
        ComplimentsToComplaintsRatio: False #Ratio of internal compliments to complaints and officer has

    Demographic_Features:
        OfficerAvgArrestDemographics_1: False
        OfficerAvgArrestDemographics_2: False
        OfficerAvgDispatchDemographics_1: False
        OfficerAvgDispatchDemographics_2: False
        OfficerAvgStopsDemographics_1: False
        OfficerAvgStopsDemographics_2: False

    DemographicNpaArrests:
        Arrests311Call: True
        Arrests311Requests: True
        PopulationDensity: True
        AgeOfResidents: True
        BlackPopulation: True
        HouseholdIncome: True
        EmploymentRate: True
        VacantLandArea: True
        VoterParticipation: True
        AgeOfDeath: True
        HousingDensity: True
        NuisanceViolations: True
        ViolentCrimeRate: True
        PropertyCrimeRate: True
        SidewalkAvailability: True
        Foreclosures: True
        DisorderCallRate: True

########################
# Model selection      #
########################
#ALL MODEL TYPES
#model: ['RandomForest', 'RandomForestBagging', 'RandomForestBoosting', 'ExtraTrees',
#        'AdaBoost', 'LogisticRegression', 'SVM', 'GradientBoostingClassifier',
#        'DecisionTreeClassifier', 'SGDClassifier', 'KNeighborsClassifier']

model: ['RandomForest', 'ExtraTrees', 'LogisticRegression']
parameters:
  RandomForest:
    n_estimators: [10000] #[50,100, 1000, 10000, 50000]
    max_depth: [2,5] #[100, 5, 10, 50] 
    max_features: ['log2'] # ['log2', 2, 4, 8, 16, "auto"]
    criterion: ['gini'] # ['entropy']
    min_samples_split: [2,5] #[2, 5, 10]
    random_state: [2193]
  RandomForestBagging:
    n_estimators: [10] # [25, 50, 100, 1000, 10000]
    max_depth: [5] # [10, 20, 50, 100]
    max_features: ['sqrt'] # ['log2', 2, 4, 8, 16, "auto"]
    criterion: ['gini'] # ['entropy']
    min_samples_split: [2] # [5, 10]
    max_samples: [0.5] # [1.0]
    bootstrap: [True]
    bootstrap_features: [False] # [True]
    n_estimators_bag: [10] # [25, 50, 100, 1000, 10000]
    max_features_bag: [2] # [4, 8, 16]
    random_state: [2193]
  RandomForestBoosting:
    n_estimators: [100] # [25, 50, 100, 1000, 10000]
    max_depth: [20] # [10, 20, 50, 100]
    max_features: [2] # ['sqrt', 'log2', 2, 4, 8, 16, "auto"]
    criterion: ['gini'] # ['entropy']
    min_samples_split: [2] # [5, 10]
    algorithm: ['SAMME'] # ['SAMME.R']
    learning_rate: [0.01] # [0.1, 1, 10, 100]
    n_estimators_boost: [10] # [25, 50, 100, 1000, 10000]
    random_state: [2193]
  ExtraTrees:
    n_estimators: [10000] #[ 25, 50, 100, 1000, 10000, 50000, 100000]
    max_depth: [2,5] #[5, 10, 50, 100 ]
    max_features: ['log2'] # [4, 8, 16, "auto"]
    criterion: ['gini'] #, 'entropy']
    min_samples_split: [2,5] #[2, 5, 10] #, 5, 10]
    random_state: [2193]
  AdaBoost:
    algorithm: ['SAMME', 'SAMME.R']
    n_estimators: [1, 10, 100]  # [1000, 10000]
    learning_rate: [0.01, 0.1, 1, 10, 100]
    random_state: [2193]
  LogisticRegression:
    C_reg: [ 0.001, 0.01, 1]  # [1, 10]
    penalty: ['l1', 'l2']
    random_state: [2193]
  SVM:
    C_reg: [0.00001, 0.0001, 0.001, 0.01, 0.1]  # [1, 10]
    kernel: ['linear']
    random_state: [2193]
  GradientBoostingClassifier:
    n_estimators: [1, 10, 100]  # [1000, 10000]
    learning_rate: [0.001, 0.01, 0.05, 0.1, 0.5]
    subsample: [0.1, 0.5, 1.0]
    max_depth: [1, 3, 5, 10, 20]  # [50, 100]
    random_state: [2193]
  DecisionTreeClassifier:
    criterion: ['gini', 'entropy']
    max_depth: [1, 5, 10, 20]  # [50, 100]
    max_features: ['sqrt', 'log2']
    min_samples_split: [2, 5, 10]
    random_state: [2193]
  SGDClassifier:
    loss: ['log', 'modified_huber']
    penalty: ['l1', 'l2', 'elasticnet']
    random_state: [2193]
  KNeighborsClassifier:
    n_neighbors: [1, 3, 5, 10, 25, 50, 100]
    weights: ['uniform', 'distance']
    algorithm: ['auto', 'kd_tree']


########################
# Parallelization      #
########################
n_cpus: 4

########################
# Auditing hooks       #
########################
auditing: False  # Turn on if you want auditing performed
audits: 'audits/'