revise naming conventions via #25 comments and refactor targets pipeline

sokolhessnerlab · May 25, 2021 · a491f92 · aridyckovsky · May 25, 2021 · a491f92
1 parent 58c0bb1
commit a491f92
Show file tree

Hide file tree

Showing 11 changed files with 249 additions and 93 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,8 +6,10 @@ export(extract_behavioral_data)
 export(extract_eyetracker_data)
 export(extract_recordings_data)
 export(false_alarm_by_response_time_model)
-export(get_all_hits_with_reaction_times)
+export(get_behavioral_metadata)
+export(get_false_alarms_given_responses)
 export(get_hit_times)
+export(get_hits_given_signals)
 export(hit_by_signal_time_model)
 export(reaction_time_by_signal_time_model)
 importFrom(magrittr,"%>%")
diff --git a/R/behavioral_data_models.R b/R/behavioral_data_models.R
@@ -4,13 +4,13 @@
 hit_by_signal_time_model <- function(df, random_effects = FALSE) {
   if (random_effects) {
     glmer(
-      is_hit ~ 1 + signal_time + (1 + signal_time | id),
+      is_hit_given_signal ~ 1 + signal_time + (1 + signal_time | id),
       data = df,
       family = "binomial"
     )
   } else {
     glmer(
-      is_hit ~ 1 + signal_time + (1 | id),
+      is_hit_given_signal ~ 1 + signal_time + (1 | id),
       data = df,
       family = "binomial"
     )
@@ -40,13 +40,13 @@ reaction_time_by_signal_time_model <- function(df, random_effects = FALSE) {
 false_alarm_by_response_time_model <- function(df, random_effects = FALSE) {
   if (random_effects) {
     glmer(
-      is_false_alarm ~ 1 + resp_time + (1 + resp_time | id),
+      is_false_alarm_given_response ~ 1 + resp_time + (1 + resp_time | id),
       data = df,
       family = "binomial"
     )
   } else {
     glmer(
-      is_false_alarm ~ 1 + resp_time + (1 | id),
+      is_false_alarm_given_response ~ 1 + resp_time + (1 | id),
       data = df,
       family = "binomial"
     )

diff --git a/R/behavioral_data_preprocessing.R b/R/behavioral_data_preprocessing.R
@@ -1,3 +1,13 @@
+#' @title Get behavioral metadata for each participant
+#' @description Creates a dataframe composed of each participant's id, signal
+#'   probability, clock side, task begin and end.
+#' @export
+get_behavioral_metadata <- function(behavioral_data) {
+  behavioral_data %>%
+    dplyr::select(id, p_signal, clock_side, task_begin, task_end) %>%
+    unique()
+}
+
 #' @title Get hit times vector from signals and responses per participant
 #' @description This function uses input of both signal times and response times
 #'   from a single participant's extracted behavioral data to determine the hit
@@ -11,12 +21,12 @@ get_hit_times <- function(signal_times, response_times, .interval = 8.0) {
   hit_indices <- c()
 
   signal_times %>%
-    map_dbl(function(signal_time) {
+    purrr::map_dbl(function(signal_time) {
 
       # Find indices for potential hits within the signal interval
       potential_hit_indices <- which(
         response_times %>%
-          map_lgl(~ between(.x, signal_time, signal_time + .interval)),
+          purrr::map_lgl(~ between(.x, signal_time, signal_time + .interval)),
         arr.ind = TRUE
       )
 
@@ -41,43 +51,59 @@ get_hit_times <- function(signal_times, response_times, .interval = 8.0) {
     })
 }
 
-#' @title Get all participants' hits with reaction times
+#' @title Get all participants' hits given signal presence
 #' @description Creates a dataframe composed of each participant's hit times and
 #'   reaction times for those hits, row-by-row with signal times.
 #' @export
-get_all_hits_with_reaction_times <- function(combined_df) {
+get_hits_given_signals <- function(behavioral_data) {
 
-  participants <- combined_df %>%
-    pull(id) %>%
+  participants <- behavioral_data %>%
+    dplyr::pull(id) %>%
     unique()
 
   # Extract only rows where a signal is present
-  all_signals_df <- combined_df %>%
-    filter(is_signal == 1) %>%
-    mutate(
+  only_signals_df <- behavioral_data %>%
+    dplyr::filter(is_signal == 1) %>%
+    dplyr::mutate(
       signal_time = step_time
     ) %>%
-    select(trial, id, image_index, signal_time)
+    dplyr::select(trial, id, image_index, signal_time)
 
   # Extract only rows where a response attempt is present
-  all_responses_df <- combined_df %>%
-    filter(is_response == 1) %>%
-    select(trial, id, image_index, resp_time)
+  only_responses_df <- behavioral_data %>%
+    dplyr::filter(is_response == 1) %>%
+    dplyr::select(trial, id, image_index, resp_time)
 
   # Map over the unlisted participants' ids to get the per-participant
   # signals and responses, then return a combined dataframe of all participant
   # including trial rows for signals, and if it exists, hit time and reaction time
   map_dfr(participants, function(participant) {
-    signals <- all_signals_df %>%
-      filter(id == participant)
+    signals <- only_signals_df %>%
+      dplyr::filter(id == participant)
 
-    responses <- all_responses_df %>%
-      filter(id == participant)
+    responses <- only_responses_df %>%
+      dplyr::filter(id == participant)
 
-    signals %>% mutate(
+    signals %>% dplyr::mutate(
       hit_time = get_hit_times(signals$signal_time, responses$resp_time),
       reaction_time = hit_time - signal_time,
-      is_hit = as.integer(!is.na(hit_time))
+      is_hit_given_signal = as.integer(!is.na(hit_time))
     )
   })
 }
+
+#'
+#'
+#'
+
+#' @title Get false alarms given responses for all participants
+#' @description TODO
+#' @export
+get_false_alarms_given_responses <- function(behavioral_data, hits_with_reaction_times) {
+  behavioral_data %>%
+    dplyr::filter(is_response == 1) %>%
+    dplyr::left_join(hits_with_reaction_times, by = c('trial', 'id', 'image_index')) %>%
+    tidyr::replace_na(list(is_hit_given_signal = 0)) %>%
+    dplyr::mutate(is_false_alarm_given_response = as.integer(!is_hit_given_signal)) %>%
+    dplyr::select(trial, id, image_index, resp_time, is_false_alarm_given_response)
+}
diff --git a/_targets.R b/_targets.R
@@ -34,7 +34,7 @@ tar_option_set(
 # Config
 config <- config::get()
 
-mapped_extraction <- tar_map(
+extract_raw_data <- tar_map(
   unlist = FALSE, # Return a nested list from tar_map()
   # Create a dataframe of participants' meta information using raw behavioral data response file names
   values = tibble(
@@ -67,9 +67,9 @@ mapped_extraction <- tar_map(
   )
 )
 
-combined_behavioral <- tar_combine(
-  extracted_behavioral_data_combined,
-  mapped_extraction$extracted_behavioral_data,
+combine_extracted_behavioral_data <- tar_combine(
+  combined_behavioral_data,
+  extract_raw_data$extracted_behavioral_data,
   command = dplyr::bind_rows(!!!.x) %>%
     dplyr::mutate(
       # Transform step and response types to 0 or 1 integer values to simulate boolean behavior.
@@ -79,6 +79,27 @@ combined_behavioral <- tar_combine(
     dplyr::select(-c(resp_type, step_type))
 )
 
+preprocess_behavioral_data <- list (
+  combine_extracted_behavioral_data,
+  tar_target(
+    behavioral_metadata,
+    get_behavioral_metadata(combined_behavioral_data)
+  ),
+  tar_target(
+    hits_given_signals,
+    get_hits_given_signals(combined_behavioral_data)
+  ),
+  tar_target(
+    false_alarms_given_responses,
+    get_false_alarms_given_responses(combined_behavioral_data, hits_given_signals)
+  ),
+  tar_render(
+    behavioral_data_preprocessing_notebook,
+    "notebooks/behavioral_data_preprocessing.Rmd",
+    output_dir = "output"
+  )
+)
+
 summary_reports <- list(
   tar_render(
     incompletes,
@@ -93,31 +114,7 @@ summary_reports <- list(
 )
 
 list(
-  mapped_extraction,
-  combined_behavioral,
-  tar_target( #TODO: need to migrate to function in 'R/'
-    metadata_behavioral,
-    extracted_behavioral_data_combined %>%
-      dplyr::select(id, p_signal, clock_side, task_begin, task_end) %>%
-      unique()
-  ),
-  tar_target(
-    all_hits_with_reaction_times,
-    get_all_hits_with_reaction_times(extracted_behavioral_data_combined)
-  ),
-  tar_target(
-    false_alarms,
-    extracted_behavioral_data_combined %>%
-      dplyr::filter(is_response == 1) %>%
-      dplyr::left_join(all_hits_with_reaction_times, by = c('trial', 'id', 'image_index')) %>%
-      tidyr::replace_na(list(is_hit = 0)) %>%
-      dplyr::mutate(is_false_alarm = as.integer(!is_hit)) %>%
-      dplyr::select(trial, id, image_index, resp_time, is_false_alarm)
-  ),
-  tar_render(
-    behavioral_data_preprocessing_notebook,
-    "notebooks/behavioral_data_preprocessing.Rmd",
-    output_dir = "output"
-  ),
+  extract_raw_data,
+  preprocess_behavioral_data,
   summary_reports
 )
diff --git a/man/get_behavioral_metadata.Rd b/man/get_behavioral_metadata.Rd
diff --git a/man/get_false_alarms_given_responses.Rd b/man/get_false_alarms_given_responses.Rd
diff --git a/man/get_all_hits_with_reaction_times.Rd → man/get_hits_given_signals.Rd b/man/get_all_hits_with_reaction_times.Rd → man/get_hits_given_signals.Rd
diff --git a/notebooks/behavioral_data_preprocessing.Rmd b/notebooks/behavioral_data_preprocessing.Rmd
@@ -42,7 +42,7 @@ Use `tar_read` to get the target object `extracted_behavioral_data_combined` and
 
 ```{r load-data}
 withr::with_dir(here::here(), {
-  combined_df <- tar_read(extracted_behavioral_data_combined)
+  combined_df <- tar_read(combined_behavioral_data)
 })
 ```
 
@@ -66,7 +66,7 @@ Read targets object `all_hits_with_reaction_times` and assign to the combined hi
 
 ```{r read-combined-hits}
 withr::with_dir(here::here(), {
-  combined_hits_df <- tar_read(all_hits_with_reaction_times)
+  combined_hits_df <- tar_read(hits_given_signals)
 })
 ```
 
@@ -132,7 +132,7 @@ combined_hits_df %>%
 
 ```{r read-false-alarms}
 withr::with_dir(here::here(), {
-  false_alarms_df <- tar_read(false_alarms)
+  false_alarms_df <- tar_read(false_alarms_given_responses)
 })
 ```
 
@@ -221,3 +221,20 @@ model_FA_resp_time_rfx = false_alarm_by_response_time_model(
 summary(model_FA_resp_time_rfx)
 ```
 
+```{r}
+scaled_false_alarms_df$pred <- predict(model_FA_resp_time_rfx, type = "response")
+ggplot(scaled_false_alarms_df, aes(x = resp_time)) +
+  geom_line(aes(x = resp_time, y = pred))
+```
+
+```{r}
+scaled_false_alarms_df$pred <- predict(model_FA_resp_time_rfx, type = "response")
+scaled_false_alarms_df
+```
+
+## Playground (in progress)
+
+```{r}
+combined_hits_df %>%
+  mutate(first_half_of_task = as.integer(signal_time < 1800))
+```
diff --git a/output/behavioral_data_preprocessing.html b/output/behavioral_data_preprocessing.html