From a754281db60e7f863aaa11e8ff655b7bb9631e62 Mon Sep 17 00:00:00 2001 From: Jacqueline Buros Date: Thu, 25 Jan 2024 12:08:32 -0500 Subject: [PATCH 1/2] use data.table for rolling joins --- DESCRIPTION | 5 ++-- R/utils_pkpd.R | 62 ++++++++++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 30700bd..4a62775 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,8 @@ Imports: table1, reticulate, boot, - cli + cli, + data.table Suggests: testthat, knitr, @@ -43,5 +44,5 @@ Suggests: scales, tidyverse, utils -RoxygenNote: 7.2.0 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/R/utils_pkpd.R b/R/utils_pkpd.R index dee33c0..7615c25 100644 --- a/R/utils_pkpd.R +++ b/R/utils_pkpd.R @@ -25,6 +25,7 @@ fetch_pkpd <- function(project = NULL, project_version_id = NULL, pd_measure = N #' @param pk_measure measurement_name of PK measurement (defaults to 'conc', NULL indicates no PK marker) #' @param pd_measure measurement_name of PD measurement (defaults to NULL - no PD marker) #' @return data.frame containing merged biomarker & dose data for the PK & PD parameter selected, with columns annotating cycles, time since last SDA, and measurement type. +#' @import data.table #' @export prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_measure = NULL) { if (nrow(dose_data) == 0) { @@ -40,37 +41,44 @@ prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_mea if (!'start_hours' %in% names(dose_data)) { stop('dose_data does not have start_hours data. Cannot prepare pkpd data without a formatted start time.') } - dose_data_renamed <- dose_data %>% + dosesDT <- dose_data %>% dplyr::rename_at(.vars = dplyr::vars(-.data$subject_id, -.data$drug), .funs = ~ stringr::str_c('dose_', .x)) %>% - dplyr::mutate(hours = .data$dose_start_hours) - if ('collection_timepoint' %in% names(biomarkers_data)) { - merged_data <- biomarkers_data %>% - dplyr::mutate(.dir = dplyr::if_else(.data$collection_timepoint == 'Pre-infusion', 'forward', 'reverse')) %>% - rolling_join(dose_data_renamed, - by = 'subject_id', - on = 'hours', - direction_field = '.dir', - how = 'left', - suffix = c('', '.dose')) %>% - dplyr::select(-.data$hours.dose, -.data$.dir) - } else { - merged_data <- rolling_join(biomarkers_data, - dose_data_renamed, - by = 'subject_id', - on = 'hours', - direction = 'reverse', - how = 'left', - suffix = c('', '.dose')) %>% - dplyr::select(-.data$hours.dose) - } + dplyr::mutate(hours = .data$dose_start_hours) %>% + data.table::data.table() + biomarkersDT <- data.table::data.table(biomarkers_data) + .datatable.aware = TRUE + data.table::setkeyv(biomarkersDT, c('subject_id', 'hours')) + data.table::setkeyv(dosesDT, c('subject_id', 'hours')) + # for each PK measurement, identify the dose immediately preceding it + prior_dose <- dosesDT[biomarkersDT, roll = T] + # also identify the next dose + next_dose <- dosesDT[biomarkersDT, roll = -Inf] + + ## construct final data frame: + # for measurements with a preceding dose, use this as the "closest dose" + with_prior_dose <- prior_dose |> + dplyr::filter(!is.na(.data$dose_dose_id)) + # otherwise, use next dose + no_prior_dose <- next_dose |> + dplyr::anti_join(with_prior_dose, by = 'measurement_id') + merged_data <- dplyr::bind_rows(with_prior_dose, + no_prior_dose) |> + dplyr::arrange(subject_id, hours) |> + dplyr::mutate(hours_since_SDA = hours - dose_start_hours) + + + # merged_data <- rolling_join(biomarkers_data, + # dose_data_renamed, + # by = 'subject_id', + # on = 'hours', + # direction = 'reverse', + # how = 'left', + # suffix = c('', '.dose')) %>% + # dplyr::select(-.data$hours.dose) if (nrow(merged_data) != nrow(biomarkers_data)) { futile.logger::flog.warn(glue::glue("Number of records in biomarkers data changed after join, from {nrow(biomarkers_data)} to {nrow(merged_data)}.")) } - pkpd_data <- annotate_pkpd_data(merged_data, pd_measure = pd_measure, pk_measure = pk_measure) - if (nrow(pkpd_data) != nrow(biomarkers_data)) { - futile.logger::flog.warn(glue::glue("Number of records in biomarkers data changed after annotation, from {nrow(biomarkers_data)} to {nrow(pkpd_data)}.")) - } - pkpd_data + merged_data } #' @importFrom rlang !! From 198ea71b8a1717bfc8700b4c6153742774f76a7b Mon Sep 17 00:00:00 2001 From: Jacqueline Buros Date: Thu, 25 Jan 2024 12:11:26 -0500 Subject: [PATCH 2/2] update doc --- NAMESPACE | 2 ++ R/rgeco-package.R | 1 - R/utils_pkpd.R | 5 +++-- man/re-exports.Rd | 14 +++++--------- man/rgeco.Rd | 9 +++++++++ 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 798599e..29899d1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,8 @@ import(httr) importFrom(RJSONIO,fromJSON) importFrom(boot,inv.logit) importFrom(broom,tidy) +importFrom(data.table,data.table) +importFrom(data.table,setkeyv) importFrom(dplyr,arrange) importFrom(dplyr,desc) importFrom(dplyr,distinct) diff --git a/R/rgeco-package.R b/R/rgeco-package.R index 0658fc1..7ef42d0 100644 --- a/R/rgeco-package.R +++ b/R/rgeco-package.R @@ -66,7 +66,6 @@ #' #' @docType package #' @name rgeco -NULL xarray <- NULL diff --git a/R/utils_pkpd.R b/R/utils_pkpd.R index 7615c25..f7fb79f 100644 --- a/R/utils_pkpd.R +++ b/R/utils_pkpd.R @@ -18,6 +18,8 @@ fetch_pkpd <- function(project = NULL, project_version_id = NULL, pd_measure = N pkpd <- prep_pkpd_data(biomarkers_data = b, dose_data = d, pd_measure = pd_measure, pk_measure = pk_measure) } +.datatable.aware = TRUE + #' Merge and annotate pkpd biomarkers data with dosing data #' returns a data.frame suitable for plotting and analysis. #' @param biomarkers_data data.frame containing biomarkers data @@ -25,7 +27,7 @@ fetch_pkpd <- function(project = NULL, project_version_id = NULL, pd_measure = N #' @param pk_measure measurement_name of PK measurement (defaults to 'conc', NULL indicates no PK marker) #' @param pd_measure measurement_name of PD measurement (defaults to NULL - no PD marker) #' @return data.frame containing merged biomarker & dose data for the PK & PD parameter selected, with columns annotating cycles, time since last SDA, and measurement type. -#' @import data.table +#' @importFrom data.table setkeyv data.table #' @export prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_measure = NULL) { if (nrow(dose_data) == 0) { @@ -46,7 +48,6 @@ prep_pkpd_data <- function(biomarkers_data, dose_data, pd_measure = NULL, pk_mea dplyr::mutate(hours = .data$dose_start_hours) %>% data.table::data.table() biomarkersDT <- data.table::data.table(biomarkers_data) - .datatable.aware = TRUE data.table::setkeyv(biomarkersDT, c('subject_id', 'hours')) data.table::setkeyv(dosesDT, c('subject_id', 'hours')) # for each PK measurement, identify the dose immediately preceding it diff --git a/man/re-exports.Rd b/man/re-exports.Rd index 4922c38..311e361 100644 --- a/man/re-exports.Rd +++ b/man/re-exports.Rd @@ -1,16 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R -\docType{import} \name{re-exports} \alias{re-exports} \alias{flog.logger} \title{Re-exports} -\keyword{internal} +\usage{ +flog.logger(...) +} \description{ -These objects are imported from other packages. Follow the links -below to see their documentation. - -\describe{ - \item{futile.logger}{\code{\link[futile.logger]{flog.logger}}} -}} - +Re-exports +} diff --git a/man/rgeco.Rd b/man/rgeco.Rd index f6294cb..214a386 100644 --- a/man/rgeco.Rd +++ b/man/rgeco.Rd @@ -2,8 +2,16 @@ % Please edit documentation in R/rgeco-package.R \docType{package} \name{rgeco} +\alias{-package} \alias{rgeco} +\alias{xarray} \title{rgeco: R package for accessing the Generable API} +\format{ +An object of class \code{python.builtin.module} (inherits from \code{python.builtin.object}) of length 0. +} +\usage{ +xarray +} \description{ The Generable API is a HTTP REST API that provides access to data and inferences. The rgeco package provides convenient @@ -75,3 +83,4 @@ Key methods for working with inferences are: } } +\keyword{datasets}