From cf448caadc10ac344b5f3f67ab7129f47f5d548f Mon Sep 17 00:00:00 2001
From: Brad Cannell <brad.cannell@gmail.com>
Date: Tue, 16 Apr 2024 16:36:00 -0500
Subject: [PATCH] WIP #41

---
 .../preliminary_self_report_analysis.qmd      | 102 ++++++++++++++----
 1 file changed, 83 insertions(+), 19 deletions(-)

diff --git a/exploratory/preliminary_self_report_analysis.qmd b/exploratory/preliminary_self_report_analysis.qmd
index b4e9a99..a39c97d 100644
--- a/exploratory/preliminary_self_report_analysis.qmd
+++ b/exploratory/preliminary_self_report_analysis.qmd
@@ -2,52 +2,116 @@
 title: "Preliminary analysis of the self-reported EM section of the follow-up interviews"
 ---
 
+
 # Overview
 
 In this file, we conduct a preliminary analysis of the self-reported EM section of the DETECT follow-up interviews to understand what people self-reported in more detail.
 
-```{r, message= FALSE}
-# Load all necessary libraries
-library(readr)
-library(dplyr)
+## 🔴 Note for Ebie:
+
+I'm thinking that it might be best to keep all the follow-up data frames separate and just merge them as needed. If we do that, we will need to have a key linking MedStar ID's to unique person identifiers. For now, I'm just going to grab them from the `detect_fu_data_merged.rds` data.
+
+# Load packages
+
+```{r}
+#| message: false
+
+library(dplyr, warn.conflicts = FALSE)
 library(flextable)
 library(officer)
-source("../r/extract_df_from_merged_detect.R")
-source("../r/n_percent_ci.R")
+```
+
+
+# Load custom functions
+
+```{r}
+source(here::here("R", "extract_df_from_merged_detect.R"))
+source(here::here("R", "n_percent_ci.R"))
 ```
 
 
 # Load data
 
-Load the merged detect data frame into the environment and extract the self-report data.
+Load the cleaned self-report data frame into the environment. This data is created in `data_06_self_report_import.qmd`.
 
 ```{r}
-detect_fu_merge  <- readRDS("../data/cleaned_rds_files/detect_fu_data_merged.rds")
-sr <- filter_merged_df(detect_fu_merge, "_sfr")
+self_rep_path <- here::here("data", "cleaned_rds_files", "self_report_import.rds")
+detect_fu_data_merged_path <- here::here("data", "cleaned_rds_files", "detect_fu_data_merged.rds")
 ```
 
+```{r}
+sr <- readr::read_rds(self_rep_path)
+detect_fu_data_merged <- readr::read_rds(detect_fu_data_merged_path)
+```
+
+```{r}
+# Data check
+dim(sr) # 953 574
+```
+
+```{r}
+# Data check
+dim(detect_fu_data_merged) # 98200   799
+```
+
+
+# Data management 
+
+## Keep unique patient identifier
+
+🔴 In the future, I think I'd like to just load the MedStar IDs and unique patient identifiers as a separate RDS file. For now, I'm grabbing them from the `detect_fu_data_merged.rds` data.
+
+```{r}
+unique_id_par <- detect_fu_data_merged |> 
+  select(medstar_id, unique_id_par)
+```
+
+Merge the unique patient ID into the self-report data.
+
+# 🔴 Left off here 2024-04-16
+
+Get this merge working. Right now, it's creating multiple rows per MedStar ID.
+
+```{r}
+sr |> 
+  left_join(unique_id_par, by = "medstar_id")
+```
+
+
 # Recode missing data
 
 Replace "Don't know" and "Refused" values with NA
+
+## 🔴 Note for Ebie:
+
+Ebie, this isn't necessary if you use the new variables I created in `data_06_self_report_import.qmd`. I haven't uploaded them to SharePoint yet, but if you just rerun `data_06_self_report_import.qmd`, you will have the updated data on your computer.
+
 ```{r}
-sr <- sr %>% mutate(
-  across(
-    .cols = matches("_[0-9]+cat_f"),
-    .fns = ~ case_when(
-      .x == "Refused" | .x == "Don't know" ~ NA,
-      TRUE                                 ~ .x
-    )
-  )
-)
+# sr <- sr %>% mutate(
+#   across(
+#     .cols = matches("_[0-9]+cat_f"),
+#     .fns = ~ case_when(
+#       .x == "Refused" | .x == "Don't know" ~ NA,
+#       TRUE                                 ~ .x
+#     )
+#   )
+# )
 ```
 
+
 # Create an aggregate variable of any self-reported abuse
-For each type of abuse, create a binary variable with yes/ no categories for: 
+
+## 🔴 Note for Ebie: 
+
+Ebie, please add two lines above, and one line below, level-1 headings. For all other headings, please add one line above and one line below. I'm changing them below.
+
+For each type of abuse, create a binary variable with yes/no categories for: 
 - Ever abuse
 - Abuse over age 65
 - Abuse in the past year
 
 ## Ever abuse
+
 The category will be "yes" if the value is "yes" for any of the variables related to ever abuse.
 
 ```{r}