From a754281db60e7f863aaa11e8ff655b7bb9631e62 Mon Sep 17 00:00:00 2001
From: Jacqueline Buros <jackinovik@gmail.com>
Date: Thu, 25 Jan 2024 12:08:32 -0500
Subject: [PATCH 1/2] use data.table for rolling joins

---
 DESCRIPTION    |  5 ++--
 R/utils_pkpd.R | 62 ++++++++++++++++++++++++++++----------------------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 30700bd..4a62775 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -34,7 +34,8 @@ Imports:
     table1,
     reticulate,
     boot,
-    cli
+    cli,
+    data.table
 Suggests: 
     testthat,
     knitr,
@@ -43,5 +44,5 @@ Suggests:
     scales,
     tidyverse,
     utils
-RoxygenNote: 7.2.0
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
diff --git a/R/utils_pkpd.R b/R/utils_pkpd.R
index dee33c0..7615c25 100644
--- a/R/utils_pkpd.R
+++ b/R/utils_pkpd.R
@@ -25,6 +25,7 @@ fetch_pkpd <- function(project = NULL, project_version_id = NULL, pd_measure = N
 #' @param pk_measure measurement_name of PK measurement (defaults to 'conc', NULL indicates no PK marker)
 #' @param pd_measure measurement_name of PD measurement (defaults to NULL - no PD marker)
 #' @return data.frame containing merged biomarker & dose data for the PK & PD parameter selected, with columns annotating cycles, time since last SDA, and measurement type.
+#' @import data.table
 #' @export
 prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_measure = NULL) {
   if (nrow(dose_data) == 0) {
@@ -40,37 +41,44 @@ prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_mea
   if (!'start_hours' %in% names(dose_data)) {
     stop('dose_data does not have start_hours data. Cannot prepare pkpd data without a formatted start time.')
   }
-  dose_data_renamed <- dose_data %>%
+  dosesDT <- dose_data %>%
     dplyr::rename_at(.vars = dplyr::vars(-.data$subject_id, -.data$drug), .funs = ~ stringr::str_c('dose_', .x)) %>%
-    dplyr::mutate(hours = .data$dose_start_hours)
-  if ('collection_timepoint' %in% names(biomarkers_data)) {
-    merged_data <- biomarkers_data %>%
-      dplyr::mutate(.dir = dplyr::if_else(.data$collection_timepoint == 'Pre-infusion', 'forward', 'reverse')) %>%
-      rolling_join(dose_data_renamed,
-                   by = 'subject_id',
-                   on = 'hours',
-                   direction_field = '.dir',
-                   how = 'left',
-                   suffix = c('', '.dose')) %>%
-      dplyr::select(-.data$hours.dose, -.data$.dir)
-  } else {
-    merged_data <- rolling_join(biomarkers_data,
-                                dose_data_renamed,
-                                by = 'subject_id',
-                                on = 'hours',
-                                direction = 'reverse',
-                                how = 'left',
-                                suffix = c('', '.dose')) %>%
-      dplyr::select(-.data$hours.dose)
-  }
+    dplyr::mutate(hours = .data$dose_start_hours) %>%
+    data.table::data.table()
+  biomarkersDT <- data.table::data.table(biomarkers_data)
+  .datatable.aware = TRUE
+  data.table::setkeyv(biomarkersDT, c('subject_id', 'hours'))
+  data.table::setkeyv(dosesDT, c('subject_id', 'hours'))
+  # for each PK measurement, identify the dose immediately preceding it
+  prior_dose <- dosesDT[biomarkersDT, roll = T]
+  # also identify the next dose
+  next_dose <- dosesDT[biomarkersDT, roll = -Inf]
+
+  ## construct final data frame:
+  # for measurements with a preceding dose, use this as the "closest dose"
+  with_prior_dose <- prior_dose |>
+    dplyr::filter(!is.na(.data$dose_dose_id))
+  # otherwise, use next dose
+  no_prior_dose <- next_dose |>
+    dplyr::anti_join(with_prior_dose, by = 'measurement_id')
+  merged_data <- dplyr::bind_rows(with_prior_dose,
+                           no_prior_dose) |>
+    dplyr::arrange(subject_id, hours) |>
+    dplyr::mutate(hours_since_SDA = hours - dose_start_hours)
+
+
+  # merged_data <- rolling_join(biomarkers_data,
+  #                             dose_data_renamed,
+  #                             by = 'subject_id',
+  #                             on = 'hours',
+  #                             direction = 'reverse',
+  #                             how = 'left',
+  #                             suffix = c('', '.dose')) %>%
+  #   dplyr::select(-.data$hours.dose)
   if (nrow(merged_data) != nrow(biomarkers_data)) {
     futile.logger::flog.warn(glue::glue("Number of records in biomarkers data changed after join, from {nrow(biomarkers_data)} to {nrow(merged_data)}."))
   }
-  pkpd_data <- annotate_pkpd_data(merged_data, pd_measure = pd_measure, pk_measure = pk_measure)
-  if (nrow(pkpd_data) != nrow(biomarkers_data)) {
-    futile.logger::flog.warn(glue::glue("Number of records in biomarkers data changed after annotation, from {nrow(biomarkers_data)} to {nrow(pkpd_data)}."))
-  }
-  pkpd_data
+  merged_data
 }
 
 #' @importFrom rlang !!

From 198ea71b8a1717bfc8700b4c6153742774f76a7b Mon Sep 17 00:00:00 2001
From: Jacqueline Buros <jackinovik@gmail.com>
Date: Thu, 25 Jan 2024 12:11:26 -0500
Subject: [PATCH 2/2] update doc

---
 NAMESPACE         |  2 ++
 R/rgeco-package.R |  1 -
 R/utils_pkpd.R    |  5 +++--
 man/re-exports.Rd | 14 +++++---------
 man/rgeco.Rd      |  9 +++++++++
 5 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 798599e..29899d1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -40,6 +40,8 @@ import(httr)
 importFrom(RJSONIO,fromJSON)
 importFrom(boot,inv.logit)
 importFrom(broom,tidy)
+importFrom(data.table,data.table)
+importFrom(data.table,setkeyv)
 importFrom(dplyr,arrange)
 importFrom(dplyr,desc)
 importFrom(dplyr,distinct)
diff --git a/R/rgeco-package.R b/R/rgeco-package.R
index 0658fc1..7ef42d0 100644
--- a/R/rgeco-package.R
+++ b/R/rgeco-package.R
@@ -66,7 +66,6 @@
 #'
 #' @docType package
 #' @name rgeco
-NULL
 
 xarray <- NULL
 
diff --git a/R/utils_pkpd.R b/R/utils_pkpd.R
index 7615c25..f7fb79f 100644
--- a/R/utils_pkpd.R
+++ b/R/utils_pkpd.R
@@ -18,6 +18,8 @@ fetch_pkpd <- function(project = NULL, project_version_id = NULL, pd_measure = N
   pkpd <- prep_pkpd_data(biomarkers_data = b, dose_data = d, pd_measure = pd_measure, pk_measure = pk_measure)
 }
 
+.datatable.aware = TRUE
+
 #' Merge and annotate pkpd biomarkers data with dosing data
 #' returns a data.frame suitable for plotting and analysis.
 #' @param biomarkers_data data.frame containing biomarkers data
@@ -25,7 +27,7 @@ fetch_pkpd <- function(project = NULL, project_version_id = NULL, pd_measure = N
 #' @param pk_measure measurement_name of PK measurement (defaults to 'conc', NULL indicates no PK marker)
 #' @param pd_measure measurement_name of PD measurement (defaults to NULL - no PD marker)
 #' @return data.frame containing merged biomarker & dose data for the PK & PD parameter selected, with columns annotating cycles, time since last SDA, and measurement type.
-#' @import data.table
+#' @importFrom data.table setkeyv data.table
 #' @export
 prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_measure = NULL) {
   if (nrow(dose_data) == 0) {
@@ -46,7 +48,6 @@ prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_mea
     dplyr::mutate(hours = .data$dose_start_hours) %>%
     data.table::data.table()
   biomarkersDT <- data.table::data.table(biomarkers_data)
-  .datatable.aware = TRUE
   data.table::setkeyv(biomarkersDT, c('subject_id', 'hours'))
   data.table::setkeyv(dosesDT, c('subject_id', 'hours'))
   # for each PK measurement, identify the dose immediately preceding it
diff --git a/man/re-exports.Rd b/man/re-exports.Rd
index 4922c38..311e361 100644
--- a/man/re-exports.Rd
+++ b/man/re-exports.Rd
@@ -1,16 +1,12 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/utils.R
-\docType{import}
 \name{re-exports}
 \alias{re-exports}
 \alias{flog.logger}
 \title{Re-exports}
-\keyword{internal}
+\usage{
+flog.logger(...)
+}
 \description{
-These objects are imported from other packages. Follow the links
-below to see their documentation.
-
-\describe{
-  \item{futile.logger}{\code{\link[futile.logger]{flog.logger}}}
-}}
-
+Re-exports
+}
diff --git a/man/rgeco.Rd b/man/rgeco.Rd
index f6294cb..214a386 100644
--- a/man/rgeco.Rd
+++ b/man/rgeco.Rd
@@ -2,8 +2,16 @@
 % Please edit documentation in R/rgeco-package.R
 \docType{package}
 \name{rgeco}
+\alias{-package}
 \alias{rgeco}
+\alias{xarray}
 \title{rgeco: R package for accessing the Generable API}
+\format{
+An object of class \code{python.builtin.module} (inherits from \code{python.builtin.object}) of length 0.
+}
+\usage{
+xarray
+}
 \description{
 The Generable API is a HTTP REST API that provides access
 to data and inferences. The rgeco package provides convenient
@@ -75,3 +83,4 @@ Key methods for working with inferences are:
 }
 }
 
+\keyword{datasets}