philly_complaints.Rmd

---
title: "Complaints Against Philly Police"
author: "Yao Yu"
date: "6/2/2020"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)

# Loading in necessary packages

library(tidyverse)
library(scales)
library(reactable)

# Reading in complaints data with details: https://www.opendataphilly.org/dataset/police-complaints

complaints <- read_csv("data/ppd_complaints.csv", col_types = cols(
  complaint_id = col_character(),
  date_received = col_date(format = ""),
  district_occurrence = col_character(),
  general_cap_classification = col_character(),
  summary = col_character()
))

# Reading in complaints data with demographics

demographics <- read_csv("data/ppd_complainant_demographics.csv", col_types = cols(
  complaint_id = col_character(),
  complainant_sex = col_character(),
  complainant_race = col_character(),
  complainant_age = col_double()
))

# Reading in complaints data disciplines

disciplines <- read_csv("data/ppd_complaint_disciplines.csv", col_types = cols(
  complaint_id = col_character(),
  officer_id = col_double(),
  po_race = col_character(),
  po_sex = col_character(),
  po_assigned_unit = col_character(),
  allegations_investigated = col_character(),
  investigative_findings = col_character(),
  disciplinary_findings = col_character()
))

# Joining complaints with demographics and disciplines by complaint_id

philly_full <- complaints %>%
  inner_join(demographics, by = "complaint_id") %>%
  inner_join(disciplines, by = "complaint_id")

```

```{r cleaning}

# Cleaning data for visualization. I pull out the year from the date and then
# select the necessary variables. Next, I group complaints from people that are
# not White or Black and NA values into a separate group because they make up a
# smaller proportion of complaints and are not the main focus of the article.
# The last drop_na() is to remove one row of data with no category variable.

philly_clean <- philly_full %>%
  mutate(year = format(as.Date(date_received, format = "%Y-%m-%d"), "%Y")) %>%
  select(complaint_id, year, district_occurrence, general_cap_classification, complainant_race, investigative_findings, disciplinary_findings, officer_id, po_race) %>%
  mutate(race = case_when(
    complainant_race == "black" ~ "Black",
    complainant_race == "white" ~ "White",
    TRUE ~ "Other/NA"
  )) %>%
  mutate(po_race = case_when(
    po_race == "asian" ~ "Asian",
    po_race == "black" ~ "Black",
    po_race == "indian" ~ "Indian",
    po_race == "latino" ~ "Latino",
    po_race == "white" ~ "White",
    TRUE ~ "Other/NA"
  )) %>% 
  drop_na(general_cap_classification)

# Further cleaning to remove bad data. Originally discovered this issue where
# complaint_id had multiple race variables. Likely due to human error in
# entering the data. They will be filtered out.

race_duplicates <- philly_clean %>%
  group_by(complaint_id) %>%
  count(race, general_cap_classification) %>% 
  select(-n) %>% 
  count() %>% 
  arrange(desc(n)) %>% 
  filter(n != 1)

# Removing the complaint_id that had errors from full dataset

philly_clean <- philly_clean %>% 
  filter(!complaint_id %in% race_duplicates$complaint_id)

```

```{r graph 1}

# Preparing data for viz. Cleaning data so that each complaint_id is unique.

data_category_race <- philly_clean %>%
  group_by(complaint_id) %>%
  count(race, general_cap_classification) %>% 
  select(-n) %>% 
  ungroup() %>% 
  group_by(general_cap_classification, race) %>% 
  count()
  

# First viz breaks down all complaints into categories and race.

complaints_by_category_race <- data_category_race %>%
  ggplot(aes(x = reorder(general_cap_classification, n), y = n, fill = reorder(race, -n))) +
  geom_col() +
  coord_flip() +
  theme_classic() +
  labs(title = "Number of Complaints Made Against Philly Police by Race",
       subtitle = "from May 2015 to April 2020",
       x = "Type of Complaint",
       y = "",
       fill = "Race",
       caption = "Source: City of Philadelphia via OpenDataPhilly")

complaints_by_category_race

# Code to save image into png

# png("graphics/complaints_by_category_race.png", units="in", width=8, height=5, res=300)
# print(complaints_by_category_race)
# dev.off()

```

```{r graph 2}

# Preparing data for viz. Cleaning data so that each complaint_id is unique.

data_year_race <- philly_clean %>%
  group_by(complaint_id) %>%
  count(race, year) %>% 
  select(-n) %>% 
  ungroup() %>% 
  group_by(year, race) %>% 
  count()

# Second viz breaks down all complaints by year and race.

complaints_by_year_race <- data_year_race %>% 
  ggplot(aes(x = year, y = n, fill = reorder(race, -n))) +
  geom_col(position = "dodge") +
  theme_classic() +
  labs(title = "Number of Complaints Made Against Philly Police by Race",
       subtitle = "from May 2015 to April 2020",
       x = "",
       y = "",
       fill = "Race",
       caption = "Source: City of Philadelphia via OpenDataPhilly")

complaints_by_year_race

# Code to save image into png

# png("graphics/complaints_by_year_race.png", units="in", width=6, height=4, res=300)
# print(complaints_by_year_race)
# dev.off()

```

```{r graph 3}

# Preparing data for viz. Cleaning data so that for each complaint, an officer
# only has a unique investigative_finding once.

investigate <- philly_clean %>%
  group_by(complaint_id) %>%
  count(officer_id, investigative_findings) %>% 
  drop_na(officer_id) %>%
  select(-n) %>% 
  ungroup() %>% 
  count(investigative_findings) %>% 
  mutate(prop = n / sum(n))

# Graphing investigative findings

investigative_findings <- investigate %>%
  ggplot(aes(x = "", y = prop, fill = reorder(investigative_findings, -prop), label = paste(n, percent(prop), sep=" | "))) +
  geom_col(width = 0.4) +
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  theme_void() +
  labs(title = "Investigative Findings of Complaints",
       subtitle = "per officer from May 2015 to April 2020",
       x = "",
       y = "",
       fill = "",
       caption = "Source: City of Philadelphia via OpenDataPhilly")

investigative_findings

# Code to save image into png

# png("graphics/investigative_findings.png", units="in", width=4.25, height=5, res=300)
# print(investigative_findings)
# dev.off()
```

```{r graph 4}

# Preparing data for viz. Cleaning data so that for each complaint, an officer
# only has a unique disciplinary_findings once.

discipline <- philly_clean %>%
  group_by(complaint_id) %>%
  count(officer_id, disciplinary_findings) %>% 
  drop_na(officer_id) %>%
  filter(disciplinary_findings != "Not Applicable") %>% 
  select(-n) %>% 
  ungroup() %>% 
  count(disciplinary_findings) %>% 
  mutate(prop = n / sum(n))

# Graphing disciplinary action

disciplinary_findings <- discipline %>%
  ggplot(aes(x = "", y = prop, fill = reorder(disciplinary_findings, -prop), label = paste(n, percent(prop), sep=" | "))) +
  geom_col(width = 0.4) +
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  theme_void() +
  labs(title = "Disciplinary Findings of Complaints",
       subtitle = "from May 2015 to April 2020",
       x = "",
       y = "",
       fill = "",
       caption = "Source: City of Philadelphia via OpenDataPhilly") +
  scale_fill_manual(values=c("#00BFC4", "#C77CFF", "#7CAE00", "#F8766D"))

disciplinary_findings

# Code to save image into png

# png("graphics/disciplinary_findings.png", units="in", width=4.25, height=5, res=300)
# print(disciplinary_findings)
# dev.off()
```

```{r table 1}

# Creating a table to show the officers with the most complaints by race

most_complaints <- philly_clean %>%
  group_by(complaint_id) %>%
  count(officer_id, po_race, race) %>% 
  drop_na(officer_id) %>%
  select(-n) %>% 
  ungroup() %>% 
  group_by(officer_id) %>%
  count(po_race, race) %>%
  arrange(desc(n)) %>%
  mutate(officer_id = as.character(officer_id))

# Finding training/counseling

training_counseling <- philly_clean %>%
  group_by(complaint_id) %>%
  count(officer_id, disciplinary_findings) %>% 
  drop_na(officer_id) %>%
  filter(disciplinary_findings != "Not Applicable") %>% 
  select(-n) %>% 
  ungroup() %>% 
  filter(disciplinary_findings == "Training/Counseling") %>% 
  mutate(officer_id = as.character(officer_id))

# Finding guilty

guilty <- philly_clean %>%
  group_by(complaint_id) %>%
  count(officer_id, disciplinary_findings) %>% 
  drop_na(officer_id) %>%
  filter(disciplinary_findings != "Not Applicable") %>% 
  select(-n) %>% 
  ungroup() %>% 
  filter(disciplinary_findings == "Guilty Finding") %>% 
  mutate(officer_id = as.character(officer_id))

# Adding in disciplinary finding per officer

most_complaints <- most_complaints %>% 
  mutate(training = map_int(officer_id, ~ training_counseling %>% filter(officer_id == .x) %>% nrow())) %>% 
  mutate(guilty = map_int(officer_id, ~ guilty %>% filter(officer_id == .x) %>% nrow()))

reactable(
  most_complaints, 
  columns = list(
    officer_id = colDef(name = "Officer ID"),
    po_race = colDef(name = "Police Officer Race"),
    race = colDef(name = "Complaint Filer Race"),
    n = colDef(name = "Total Complaints by Race"),
    training = colDef(name = "Training/Counseling Findings"),
    guilty = colDef(name = "Guilty Findings")),
  defaultColDef = colDef(
    header = function(value) gsub(".", " ", value, fixed = TRUE),
    cell = function(value) format(value, nsmall = 1),
    align = "center",
    minWidth = 70,
    headerStyle = list(background = "#f7f7f8")
  ),
  bordered = TRUE,
  highlight = TRUE)

```