Skip to content

Commit

Permalink
Merge pull request #124 from generable/dev
Browse files Browse the repository at this point in the history
Use httpcache
  • Loading branch information
jburos authored Oct 9, 2024
2 parents 18c9e5e + e509cf4 commit eb45be7
Show file tree
Hide file tree
Showing 25 changed files with 489 additions and 78 deletions.
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: rgeco
Type: Package
Title: R package for accessing the Generable API
Version: 0.0.7
Version: 0.0.7-9000
Author: Generable Inc
Maintainer: Jacqueline Buros <[email protected]>
Description: The Generable API is a HTTP REST API that provides access to data and inferences. The rgeco package provides convenient access to the information in the API in an R-friendly format.
Expand Down Expand Up @@ -34,7 +34,10 @@ Imports:
table1,
reticulate,
boot,
cli
cli,
data.table,
httpcache,
keyring
Suggests:
testthat,
knitr,
Expand All @@ -43,5 +46,5 @@ Suggests:
scales,
tidyverse,
utils
RoxygenNote: 7.2.0
RoxygenNote: 7.3.1
VignetteBuilder: knitr
16 changes: 15 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

export(configure)
export(drop_configs)
export(extract_subsample_info)
export(fetch_aes)
export(fetch_association_state)
export(fetch_biomarker_params)
export(fetch_biomarkers)
Expand All @@ -23,6 +26,7 @@ export(find_runs)
export(flog.logger)
export(format_quantiles_as_widths)
export(list_biomarker_names)
export(list_configs)
export(list_datasets)
export(list_models)
export(list_parameter_names)
Expand All @@ -36,10 +40,12 @@ export(prep_pkpd_data)
export(sample_groups)
import(checkmate)
import(cli)
import(httr)
import(keyring)
importFrom(RJSONIO,fromJSON)
importFrom(boot,inv.logit)
importFrom(broom,tidy)
importFrom(data.table,data.table)
importFrom(data.table,setkeyv)
importFrom(dplyr,arrange)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
Expand All @@ -49,7 +55,15 @@ importFrom(dplyr,select)
importFrom(dplyr,semi_join)
importFrom(futile.logger,flog.logger)
importFrom(glue,glue_safe)
importFrom(httpcache,GET)
importFrom(httpcache,POST)
importFrom(httr,add_headers)
importFrom(httr,content)
importFrom(httr,http_error)
importFrom(httr,modify_url)
importFrom(httr,status_code)
importFrom(httr,user_agent)
importFrom(keyring,backend_file)
importFrom(lubridate,ymd_hms)
importFrom(magrittr,"%>%")
importFrom(rlang,"!!!")
Expand Down
75 changes: 75 additions & 0 deletions R/geco_aes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

#' Fetch AE data from the Generable API
#'
#' Fetch AE data from the Generable API for a specific project.
#'
#' This function retrieves AE data from the Generable API.
#' It requires authentication (see \code{\link{login}}) prior to use
#' and this pulls data from the Generable API.
#'
#' @note
#' A project can be specified by using the project name or a specific project version.
#' \enumerate{
#' \item If a project is specified using the name, data is fetched for the latest version of the project.
#' \item If a project is specified using the project version, the project name is not required.
#' \item If neither a project nor a project version is provided, the default project or project version is used. These are set by the environment variables GECO_API_PROJECT and GECO_API_PROJECT_VERSION
#' }
#'
#' @param project Project name. If NULL, defaults to value of environment variable GECO_API_PROJECT
#' @param project_version_id Project version. If NULL, defaults to the most recent version of the project if provided, or the value of environment variable GECO_API_PROJECT_VERSION
#' @param event_type Limits the event_types to the names provided. NULL is unfiltered. Default is NULL.
#' @param ... Optional filters applied to events data, provided as name-value pairs to limit returned values.
#' Example: trial_id = unique(subjects$trial_id)
#' @return data.frame with one record per subject and event type
#' @export
fetch_aes <- function(project = NULL, project_version_id = NULL, event_type = NULL,
serious_event_flag = NULL, ...) {
where <- rlang::list2(...)
pv_id <- .process_project_inputs(project = project, project_version_id = project_version_id)
if (!is.null(event_type)) {
where <- .update_filter(where, event_type = event_type)
}
if (!is.null(serious_event_flag)) {
where <- .update_filter(where, serious_event_flag = serious_event_flag)
}
events <- .fetch_ae_data(project_version_id = pv_id, where = where)
if (nrow(events) == 0 && !is.null(project)) {
futile.logger::flog.info(glue::glue('No ae information available for this version of project {project} data.'))
} else if (nrow(events) == 0) {
futile.logger::flog.debug(glue::glue('No ae information available for this project_version_id: {project_version_id}.'))
}
events
}


#' @importFrom rlang .data
pivot_ae_wider <- function(.d) {
tidyr::pivot_wider(.d,
id_cols = c(.data$subject_id),
names_from = c(.data$event_type),
values_from = c(.data$event_flag, .data$event_trial_day, .data$event_id, .data$event_created_at),
names_glue = "{event_type}_{.value}")
}


#' @importFrom magrittr %>%
.fetch_ae_data <- function(project = NULL, project_version_id = NULL, where = list()) {
pv_id <- .process_project_inputs(project = project, project_version_id = project_version_id)
filter <- .prepare_filter(where, endpoint = 'AES')
events <- geco_api(AES, project_version_id = pv_id, url_query_parameters = filter)
d <- as_dataframe.geco_api_data(events, flatten_names = c('params'))
if ('params' %in% names(d)) {
d <- d %>%
tidyr::unnest_wider(.data$params, names_repair = 'universal')
}
d <- .apply_filters(d, where)
suppressWarnings({
d <- d %>%
dplyr::rename_at(.vars = dplyr::vars(-dplyr::one_of(c('subject_id'))),
.funs = ~ stringr::str_c('ae_', .x))
})
d
}



169 changes: 161 additions & 8 deletions R/geco_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ PROJECTVERSIONS <- 'data/project/{project}/projectversions'
PROJECTS <- 'data/projects'
LESIONS <- 'data/projectversion/{project_version_id}/lesions'
LESIONTV <- 'data/projectversion/{project_version_id}/lesiontvs'
AES <- 'data/projectversion/{project_version_id}/ae'

# ---- inference api endpoints ----
IDATA <- 'inferences/projectversion/{project_version_id}/dataset/attributes'
Expand Down Expand Up @@ -56,6 +57,20 @@ geco_api_url <- function(..., project = NULL, project_version_id = NULL, run_id=
glue::glue_safe(url)
}

#' Never cache certain endpoints
.clean_cache <- function() {
nocache_endpoints <- c(
LOGIN,
PROJECTVERSIONS,
PROJECTS,
IDATA,
IMODELS,
IRUNS
)
nocache_endpoints |>
purrr::walk(httpcache::dropOnly)
}

#' Log in to the Generable API
#'
#' This function logs the user into the Generable API.
Expand All @@ -72,20 +87,155 @@ geco_api_url <- function(..., project = NULL, project_version_id = NULL, run_id=
#' @param password User password. If not provided, will read the `GECO_API_PASSWORD` environment variable.
#' @return The OAuth 2.0 Bearer Token for the Generable API
#' @export
login <- function(user, password) {
if (missing(user)) {
user <- Sys.getenv('GECO_API_USER')
login <- function(user, password, host) {
url <- .get_url(host)
if (missing(password) || missing(user)) {
use_keyring <- Sys.getenv('GECO_API_NO_KEYRING') == ""
if (!use_keyring) {
user <- Sys.getenv('GECO_API_USER')
password <- Sys.getenv('GECO_API_PASSWORD')
} else {
# get credentials from keyring
creds <- .get_credentials(host = host, user = user)
user = creds[1]
password = creds[2]
}
}
if (missing(password)) {
password <- Sys.getenv('GECO_API_PASSWORD')
if (is.null(user)) {
cli::cli_inform('Note: credential storage has changed; please run `configure()` to migrate to the new storage.')
}
body <- list(email = user, password = password)
resp <- geco_api(LOGIN, body = body, encode = 'json', method = 'POST')
ENV$.GECO_AUTH <- resp$content
invisible(resp$content)
}

#' Configure Geco credentials, saving in keyring
#' @param user Geco user name (email address). Defaults to GECO_API_USER environment variable
#' @param password Geco password (will prompt if not provided). Defaults to GECO_API_PASSWORD environment variable
#' @param host (optional) alternate host for API, only used for testing
#' @import keyring
#' @export
configure <- function(user, password, host) {
cli::cli_inform('Configuring credentials for Geco API')
url <- .get_url(host)
if (missing(user)) {
cli::cli_inform('Reading username from environment variable: GECO_API_USER')
user <- Sys.getenv('GECO_API_USER', unset = '')
if (user == '') {
stop('Username not provided.')
}
}
if (missing(password)) {
cli::cli_inform('Reading password from environment variable: GECO_API_PASSWORD')
password <- Sys.getenv('GECO_API_PASSWORD', unset = '')
cli::cli_inform('GECO_API_PASSWORD not found. Prompting you for your geco password.')
if (password == '' && interactive()) {
password <- rstudioapi::askForPassword(prompt = 'Please enter your geco password.')
}
}
cli::cli_inform('Attempting to log in ...')
res <- tryCatch(login(user=user, password = password, host=host))
if (inherits(res, 'try-error')) {
cli::cli_alert_warning('Failed to authenticate.')
} else {
cli::cli_alert_success('Success!')
if (keyring::has_keyring_support() && interactive() && askYesNo("Do you want to save credentials in your keyring?")) {
keyring::key_set_with_value(service = .get_keyring_service(),
username = user, password = password)
cli::cli_alert_success('Credentials saved.')
} else {
cli::cli_inform('Populating credentials in environment variables.')
Sys.setenv('GECO_API_USER'=user)
Sys.setenv('GECO_API_PASSWORD'=password)
Sys.setenv('GECO_API_URL'=url)
}
}
}

#' List saved configurations
#' @param host Host identifier, defaults to 'geco'
#' @export
list_configs <- function(host='geco') {
service <- .get_keyring_service(host)
key_list(service)
}

#' Drop saved configurations
#' Warning! this will remove all saved configurations from the host.
#' @param host Host identifier, defaults to 'geco'
#' @param user Optional username, provided as a string.
#' @seealso [list_configs()]
#' @export
drop_configs <- function(user, host='geco') {
service <- .get_keyring_service(host)
keys <- key_list(service)
if (!missing(user)) {
keys <- keys %>%
filter(username == !!user)
}
if (nrow(keys) == 0) {
cli::cli_alert_info('No keys found.')
} else {
cli::cli_alert_warning('This will drop _ALL_ saved configs listed.')
print(keys)
confirm <- askYesNo("Do you want to drop these configs from your keyring?",
default = FALSE)
if (!is.na(confirm) && isTRUE(confirm)) {
keys %>%
dplyr::pull(username) %>%
purrr::walk(~ keyring::key_delete(service=service, username = .))
}
}
}


.get_keyring_service <- function(host) {
url <- .get_url(host)
stringr::str_c('R-GECO_API', url, sep = '-')
}
.get_url <- function(host) {
if (missing(host)) {
url <- Sys.getenv('GECO_API_URL', unset = 'https://geco.generable.com')
} else {
url <- glue::glue('https://{host}.generable.com')
Sys.setenv('GECO_API_URL' = url)
}
url
}

# returns vector as username, password
.get_credentials <- function(host, user) {
if (!keyring::has_keyring_support()) {
if (missing(user)) {
user <- Sys.getenv('GECO_API_USER')
}
password <- Sys.getenv('GECO_API_PASSWORD')
return(c(user, password))
}
# get user & reconcile with keychain
service <- .get_keyring_service(host)
keys <- key_list(service)
if (missing(user)) {
if (nrow(keys) == 1) {
user <- unique(keys$username)
} else {
user <- Sys.getenv('GECO_API_USER', unset = 'null')
if (is.null(user)) {
stop("Multiple users configured; please set default user with GECO_API_USER environment variable")
}
}
} else if (user %in% keys$username) {
Sys.setenv('GECO_API_USER' = user)
} else {
stop('User does not exist in keyring; please run `configure()`')
}
# get password
password <- key_get(service, username = user)
return(c(user, password))
}

#' @importFrom httr add_headers
get_auth <- function() {
if (!exists(envir = ENV, '.GECO_AUTH')) {
futile.logger::flog.error('Not logged in. Use `login(user, password)` to login.')
Expand All @@ -94,23 +244,26 @@ get_auth <- function() {
httr::add_headers(.headers = unlist(ENV$.GECO_AUTH))
}

#' @import httr
#' @importFrom httr user_agent http_error content status_code
#' @importFrom httpcache GET POST
#' @importFrom RJSONIO fromJSON
geco_api <- function(path, ..., method = c('GET', 'POST'), project = NULL, project_version_id = NULL, run_id=NULL, type=NULL, parameter=NULL, filters=NULL, url_query_parameters=NULL) {
url <- geco_api_url(path, project = project, project_version_id = project_version_id, run_id=run_id, type=type, parameter=parameter, filters=filters, url_query_parameters=url_query_parameters)

ua <- httr::user_agent("https://github.com/generable/rgeco")
.clean_cache()

method <- match.arg(method, several.ok = FALSE)
if (method == 'GET')
resp <- try(httr::GET(url, ..., get_auth(), ua))
resp <- try(httpcache::GET(url, ..., get_auth(), ua))
else if (method == 'POST')
resp <- try(httr::POST(url, ..., ua))
resp <- try(httpcache::POST(url, ..., ua))
#if (httr::http_type(resp) != "application/json") {
# stop("API did not return json", call. = FALSE)
#}
if (inherits(resp, 'try-error')) {
stop(glue::glue("Error connecting to API: {url} {print(resp)}"))
httpcache::dropOnly(url)
}

parsed <- try(RJSONIO::fromJSON(httr::content(resp, "text", encoding = 'UTF-8'), simplify = FALSE), silent = T)
Expand Down
2 changes: 1 addition & 1 deletion R/geco_biomarkers.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fetch_biomarkers <- function(project = NULL, project_version_id = NULL, measurem
biomarkers <- geco_api(TIMEVARYING, project_version_id = pv_id, url_query_parameters = filters)
b <- as_dataframe.geco_api_data(biomarkers, flatten_names = 'params')
if (nrow(b) > 0 && 'params' %in% names(b) && isTRUE(annotate)) {
b <- b %>% tidyr::unnest_wider(.data$params)
b <- b %>% tidyr::unnest_wider(.data$params, names_repair = 'universal')
}
suppressWarnings({
b <- b %>%
Expand Down
Loading

0 comments on commit eb45be7

Please sign in to comment.