From c032a87a0d05e7b31a4f43c4312c662b2c4a1a89 Mon Sep 17 00:00:00 2001 From: Zac Davies <80654433+zacdav-db@users.noreply.github.com> Date: Thu, 14 Nov 2024 11:31:58 +1100 Subject: [PATCH] CRAN submission changes (#70) * adjusting URLs * updating Rd files * trying to be more specific for interactive session detection. * further improving tests * add error case for auth * fixing tests * adjusting dbfs tests to clean up after themselves better * Further adjustments of tests to use withr * Change to native pipe to drop magrittr dependency * Remove dependency on data.table * simplify multipart body code * Fixing REPL error and removing test that may cause CRAN issues --------- Co-authored-by: Zac Davies --- DESCRIPTION | 6 +- NAMESPACE | 1 - NOTICE | 10 --- R/clusters.R | 18 ++--- R/connection-pane.R | 6 +- R/data-structures.R | 2 +- R/dbfs.R | 12 +-- R/execution-context.R | 21 +++--- R/experiments.R | 2 +- R/feature-store.R | 4 +- R/misc-helpers.R | 6 +- R/package-auth.R | 34 +++++++-- R/request-helpers.R | 25 +++--- R/unity-catalog.R | 32 ++++---- R/vector-search.R | 4 +- R/volume-fs.R | 10 +-- R/workspaces.R | 9 +-- man/s3_storage_info.Rd | 2 +- tests/testthat/test-auth.R | 96 ++++++++++++------------ tests/testthat/test-clusters.R | 6 +- tests/testthat/test-dbfs.R | 38 ++++++---- tests/testthat/test-execution-contexts.R | 7 +- tests/testthat/test-experiments.R | 7 +- tests/testthat/test-feature-store.R | 8 +- tests/testthat/test-jobs.R | 7 +- tests/testthat/test-libraries.R | 7 +- tests/testthat/test-misc-helpers.R | 7 +- tests/testthat/test-mlflow-dbrx.R | 7 +- tests/testthat/test-repl.R | 6 +- tests/testthat/test-repos.R | 7 +- tests/testthat/test-secrets.R | 7 +- tests/testthat/test-sql-connector.R | 7 +- tests/testthat/test-sql-execution.R | 7 +- tests/testthat/test-unity-catalog.R | 7 +- tests/testthat/test-vector-search.R | 6 +- tests/testthat/test-volumes.R | 7 +- tests/testthat/test-warehouses.R | 7 +- tests/testthat/test-workspace-folder.R | 7 +- vignettes/cluster-management.Rmd | 2 +- vignettes/managing-jobs.Rmd | 2 +- vignettes/remote-repl.Rmd | 28 +++---- vignettes/setup-auth.Rmd | 2 +- 42 files changed, 279 insertions(+), 217 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3b5e30a..5a5dfec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,18 +20,18 @@ Description: Collection of utilities that improve using 'Databricks' from R. License: Apache License (>= 2) Encoding: UTF-8 LazyData: true +Depends: + R (>= 4.1.0) Imports: arrow, base64enc, cli, curl, - data.table, dplyr, glue, httr2, ini, jsonlite, - magrittr, purrr, reticulate, R6 (>= 2.4.0), @@ -42,12 +42,10 @@ Suggests: testthat (>= 3.0.0), huxtable, htmltools, - htmlwidgets, knitr, magick, rmarkdown, rstudioapi, - rvest, withr Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 801160f..6a7afb5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -220,7 +220,6 @@ import(cli) import(httr2) import(tibble) importFrom(glue,glue) -importFrom(magrittr,`%>%`) importFrom(rlang,.data) importFrom(stats,setNames) importFrom(utils,object.size) diff --git a/NOTICE b/NOTICE index 9ea2598..d49f27a 100644 --- a/NOTICE +++ b/NOTICE @@ -22,9 +22,6 @@ Copyright 2021 httr2 authors jeroen/jsonlite - https://github.com/jeroen/jsonlite Copyright 2020 Jeroen Ooms -tidyverse/magrittr - https://github.com/tidyverse/magrittr -Copyright 2023 magrittr authors - tidyverse/purrr - https://github.com/tidyverse/purrr Copyright 2023 purrr authors @@ -41,13 +38,6 @@ rstudio/rstudionapi - https://github.com/rstudio/rstudioapi Copyright 2015 RStudio tidyverse/rvest - https://github.com/tidyverse/rvest Copyright 2023 rvest authors - -__________ -This Software contains code from the following open source projects, licensed under the MPL-2 license: - -Rdatatable/data.table - https://github.com/Rdatatable/data.table -Copyright data.table authors - __________ This Software contains code from the following open source projects, licensed under the GPL-2/GPL-3 licenses: diff --git a/R/clusters.R b/R/clusters.R index 4ae2cc5..cf4fb47 100644 --- a/R/clusters.R +++ b/R/clusters.R @@ -325,7 +325,7 @@ db_cluster_action <- function(cluster_id, token = token ) - req <- req %>% + req <- req |> httr2::req_body_json(body) if (perform_request) { @@ -558,12 +558,12 @@ db_cluster_get <- function(cluster_id, token = token ) - req <- req %>% + req <- req |> httr2::req_body_json(body) if (perform_request) { - req %>% - httr2::req_perform() %>% + req |> + httr2::req_perform() |> httr2::resp_body_json() } else { req @@ -872,26 +872,26 @@ get_latest_dbr <- function(lts, ml, gpu, photon, runtimes <- db_cluster_runtime_versions(host = host, token = token) - runtimes_adj <- runtimes[[1]] %>% + runtimes_adj <- runtimes[[1]] |> purrr::map_dfr(function(x) { list(key = x[["key"]], name = x[["name"]]) - }) %>% + }) |> dplyr::mutate( version = as.numeric(gsub("^(\\d+\\.\\d)\\..*", "\\1", .data$key)), lts = grepl("LTS", .data$name), ml = grepl("ml", .data$key), gpu = grepl("gpu", .data$key), photon = grepl("photon", .data$key), - ) %>% + ) |> dplyr::arrange(dplyr::desc(version)) - runtime_matches <- runtimes_adj %>% + runtime_matches <- runtimes_adj |> dplyr::filter( .data$lts == {{lts}}, .data$ml == {{ml}}, .data$gpu == {{gpu}}, .data$photon == {{photon}} - ) %>% + ) |> dplyr::slice_head(n = 1) list( diff --git a/R/connection-pane.R b/R/connection-pane.R index 52b425d..bef0880 100644 --- a/R/connection-pane.R +++ b/R/connection-pane.R @@ -140,7 +140,7 @@ get_uc_model_versions <- function(catalog, schema, model, host, token, aliases <- purrr::map( model_info$aliases, ~{ setNames(.x$version_num, .x$alias_name) - }) %>% + }) |> unlist() version_names <- purrr::map_chr(versions, function(x) { @@ -281,8 +281,8 @@ get_schema_objects <- function(catalog, schema, host, token) { # how many objects of each type exist # only show when objects exist within - sizes <- purrr::map_int(objects, nrow) %>% - purrr::keep(~.x > 0) %>% + sizes <- purrr::map_int(objects, nrow) |> + purrr::keep(~.x > 0) |> purrr::imap_chr(~ glue::glue("{.y} ({.x})")) data.frame( diff --git a/R/data-structures.R b/R/data-structures.R index aa3bc91..16ae48e 100644 --- a/R/data-structures.R +++ b/R/data-structures.R @@ -315,7 +315,7 @@ is.file_storage_info <- function(x) { #' `bucket-owner-full-control`. If `canned_acl` is set, the cluster instance #' profile must have `s3:PutObjectAcl` permission on the destination bucket and #' prefix. The full list of possible canned ACLs can be found in -#' [docs](https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl). +#' [docs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl). #' By default only the object owner gets full control. If you are using cross #' account role for writing data, you may want to set #' `bucket-owner-full-control` to make bucket owner able to read the logs. diff --git a/R/dbfs.R b/R/dbfs.R index 91e95b2..86aa287 100644 --- a/R/dbfs.R +++ b/R/dbfs.R @@ -454,14 +454,10 @@ db_dbfs_put <- function(path, file = NULL, contents = NULL, overwrite = FALSE, ) if (perform_request) { - req %>% - httr2::req_body_multipart( - path = body$path, - contents = body$contents, - overwrite = body$overwrite - ) %>% - httr2::req_error(body = db_req_error_body) %>% - httr2::req_perform() %>% + req |> + httr2::req_body_multipart(!!!body) |> + httr2::req_error(body = db_req_error_body) |> + httr2::req_perform() |> httr2::resp_body_json() } else { req diff --git a/R/execution-context.R b/R/execution-context.R index 12f5a9c..7d0df49 100644 --- a/R/execution-context.R +++ b/R/execution-context.R @@ -98,7 +98,7 @@ db_context_status <- function(cluster_id, token = token ) - req <- req %>% + req <- req |> httr2::req_url_query( clusterId = cluster_id, contextId = context_id @@ -254,7 +254,7 @@ db_context_command_status <- function(cluster_id, token = token ) - req <- req %>% + req <- req |> httr2::req_url_query( clusterId = cluster_id, contextId = context_id, @@ -292,7 +292,7 @@ db_context_command_cancel <- function(cluster_id, token = token ) - req <- req %>% + req <- req |> httr2::req_url_query( clusterId = cluster_id, contextId = context_id, @@ -327,13 +327,16 @@ db_context_command_parse <- function(x, language = c("r", "py", "scala", "sql")) } if (x$results$resultType == "table") { - schema <- data.table::rbindlist(x$results$schema) - tbl <- data.table::rbindlist(x$results$data) - names(tbl) <- schema$name + schema <- dplyr::bind_rows(x$results$schema) - output_tbl <- huxtable::hux(tbl) %>% - huxtable::set_all_borders(TRUE) %>% - huxtable::set_font_size(10) %>% + tbl <- purrr::list_transpose(x$results$data) |> + as.data.frame() + + names(tbl) <- schema$names + + output_tbl <- huxtable::hux(tbl) |> + huxtable::set_all_borders(TRUE) |> + huxtable::set_font_size(10) |> huxtable::set_position("left") huxtable::print_screen(output_tbl) diff --git a/R/experiments.R b/R/experiments.R index 2c162e9..bc38d84 100644 --- a/R/experiments.R +++ b/R/experiments.R @@ -54,7 +54,7 @@ db_experiments_get <- function(name = NULL, id = NULL, body = body, host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(endpoint_suffix) if (perform_request) { diff --git a/R/feature-store.R b/R/feature-store.R index c477a86..54da575 100644 --- a/R/feature-store.R +++ b/R/feature-store.R @@ -42,7 +42,7 @@ db_feature_tables_get <- function(feature_table, version = "2.0", host = host, token = token - ) %>% + ) |> httr2::req_url_query(name = feature_table) if (perform_request) { @@ -63,7 +63,7 @@ db_feature_table_features <- function(feature_table, version = "2.0", host = host, token = token - ) %>% + ) |> httr2::req_url_query(feature_table = feature_table) if (perform_request) { diff --git a/R/misc-helpers.R b/R/misc-helpers.R index e2f6a86..624bbd9 100644 --- a/R/misc-helpers.R +++ b/R/misc-helpers.R @@ -41,9 +41,9 @@ db_current_workspace_id <- function(host = db_host(), token = db_token(), ) if (perform_request) { - resp <- req %>% - httr2::req_error(body = db_req_error_body) %>% - httr2::req_perform() %>% + resp <- req |> + httr2::req_error(body = db_req_error_body) |> + httr2::req_perform() |> httr2::resp_headers() # workspace id can be extracted from response headers diff --git a/R/package-auth.R b/R/package-auth.R index be4b17b..c105313 100644 --- a/R/package-auth.R +++ b/R/package-auth.R @@ -68,7 +68,7 @@ db_host <- function(id = NULL, prefix = NULL, profile = default_config_profile() #' #' @description #' The function will check for a token in the `DATABRICKS_HOST` environment variable. -#' `.databrickscfg` will be searched if `db_profile` and `use_databrickscfg` are set or +#' `.databrickscfg` will be searched if `db_profile` and `use_databrickscfg` are set or #' if Posit Workbench managed OAuth credentials are detected. #' If none of the above are found then will default to using OAuth U2M flow. #' @@ -98,7 +98,7 @@ db_token <- function(profile = default_config_profile()) { #' @description #' Workspace ID, optionally specified to make connections pane more powerful. #' Specified as an environment variable `DATABRICKS_WSID`. -#' `.databrickscfg` will be searched if `db_profile` and `use_databrickscfg` are set or +#' `.databrickscfg` will be searched if `db_profile` and `use_databrickscfg` are set or #' if Posit Workbench managed OAuth credentials are detected. #' #' Refer to [api authentication docs](https://docs.databricks.com/dev-tools/api/latest/authentication.html) @@ -223,7 +223,6 @@ read_env_var <- function(key = c("token", "host", "wsid"), value <- Sys.getenv(key_name) - if (value == "") { if (error) { stop(cli::format_error(c( @@ -273,7 +272,7 @@ db_oauth_client <- function(host = db_host()) { #' Returns the default config profile #' @details Returns the config profile first looking at `DATABRICKS_CONFIG_PROFILE` #' and then the `db_profile` option. -#' +#' #' @return profile name #' @keywords internal default_config_profile <- function() { @@ -288,7 +287,7 @@ default_config_profile <- function() { #' Returns whether or not to use a `.databrickscfg` file #' @details Indicates `.databrickscfg` should be used instead of environment variables when #' either the `use_databrickscfg` option is set or Posit Workbench managed OAuth credentials are detected. -#' +#' #' @return boolean #' @keywords internal use_databricks_cfg <- function() { @@ -297,4 +296,27 @@ use_databricks_cfg <- function() { use_databricks_cfg <- TRUE } return(use_databricks_cfg) -} \ No newline at end of file +} + + +# Extended from {odbc} +# +# Try to determine whether we can redirect the user's browser to a server on +# localhost, which isn't possible if we are running on a hosted platform. +# +# This is based on the strategy pioneered by the {gargle} package and {httr2}. +is_hosted_session <- function() { + + if (on_databricks()) { + return(TRUE) + } + + if (nzchar(Sys.getenv("COLAB_RELEASE_TAG"))) { + return(TRUE) + } + + # If RStudio Server or Posit Workbench is running locally (which is possible, + # though unusual), it's not acting as a hosted environment. + Sys.getenv("RSTUDIO_PROGRAM_MODE") == "server" && + !grepl("localhost", Sys.getenv("RSTUDIO_HTTP_REFERER"), fixed = TRUE) +} diff --git a/R/request-helpers.R b/R/request-helpers.R index 0ddf669..ffac172 100644 --- a/R/request-helpers.R +++ b/R/request-helpers.R @@ -14,7 +14,6 @@ #' #' @return request #' @import httr2 -#' @importFrom magrittr `%>%` db_request <- function(endpoint, method, version = NULL, body = NULL, host, token, ...) { url <- list( @@ -26,18 +25,18 @@ db_request <- function(endpoint, method, version = NULL, body = NULL, host, toke url <- httr2::url_build(url) user_agent_str <- paste0("brickster/", utils::packageVersion("brickster")) - req <- httr2::request(base_url = url) %>% - httr2::req_headers("User-Agent" = user_agent_str) %>% - httr2::req_user_agent(string = user_agent_str) %>% - httr2::req_url_path_append(endpoint) %>% - httr2::req_method(method) %>% + req <- httr2::request(base_url = url) |> + httr2::req_headers("User-Agent" = user_agent_str) |> + httr2::req_user_agent(string = user_agent_str) |> + httr2::req_url_path_append(endpoint) |> + httr2::req_method(method) |> httr2::req_retry(max_tries = 3, backoff = ~ 2) # if token is present use directly # otherwise initiate OAuth 2.0 U2M Workspace flow if (!is.null(token)) { req <- httr2::req_auth_bearer_token(req = req, token = token) - } else { + } else if (!is_hosted_session() && rlang::is_interactive()) { # fetch client oauth_client <- getOption( @@ -54,11 +53,13 @@ db_request <- function(endpoint, method, version = NULL, body = NULL, host, toke redirect_uri = "http://localhost:8020" ) + } else { + cli::cli_abort("cannot find token or initiate OAuth U2M flow") } if (!is.null(body)) { body <- base::Filter(length, body) - req <- req %>% + req <- req |> httr2::req_body_json(body, ...) } @@ -73,7 +74,7 @@ db_request <- function(endpoint, method, version = NULL, body = NULL, host, toke #' #' @family Request Helpers db_req_error_body <- function(resp) { - json <- resp %>% httr2::resp_body_json() + json <- resp |> httr2::resp_body_json() # if there is "message": if ("message" %in% names(json)) { paste(json$error_code, json$message, sep = ": ") @@ -91,9 +92,9 @@ db_req_error_body <- function(resp) { #' #' @family Request Helpers db_perform_request <- function(req, ...) { - req %>% - httr2::req_error(body = db_req_error_body) %>% - httr2::req_perform() %>% + req |> + httr2::req_error(body = db_req_error_body) |> + httr2::req_perform() |> httr2::resp_body_json(...) } diff --git a/R/unity-catalog.R b/R/unity-catalog.R index c1e9224..6033d50 100644 --- a/R/unity-catalog.R +++ b/R/unity-catalog.R @@ -50,7 +50,7 @@ db_uc_storage_creds_get <- function(name, body = body, host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(name) if (perform_request) { @@ -90,7 +90,7 @@ db_uc_external_loc_get <- function(name, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(name) if (perform_request) { @@ -129,7 +129,7 @@ db_uc_catalogs_get <- function(catalog, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(catalog) if (perform_request) { @@ -149,7 +149,7 @@ db_uc_schemas_list <- function(catalog, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_query(catalog_name = catalog) if (perform_request) { @@ -170,7 +170,7 @@ db_uc_schemas_get <- function(catalog, schema, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(paste(catalog, schema, sep = ".")) if (perform_request) { @@ -191,7 +191,7 @@ db_uc_tables_list <- function(catalog, schema, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_query( catalog_name = catalog, schema_name = schema @@ -215,7 +215,7 @@ db_uc_tables_get <- function(catalog, schema, table, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(paste(catalog, schema, table, sep = ".")) if (perform_request) { @@ -235,7 +235,7 @@ db_uc_models_list <- function(catalog, schema, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_query( catalog_name = catalog, schema_name = schema, @@ -260,8 +260,8 @@ db_uc_models_get <- function(catalog, schema, model, version = "2.1", host = host, token = token - ) %>% - httr2::req_url_path_append(paste(catalog, schema, model, sep = ".")) %>% + ) |> + httr2::req_url_path_append(paste(catalog, schema, model, sep = ".")) |> httr2::req_url_query(include_aliases = 'true') if (perform_request) { @@ -282,9 +282,9 @@ db_uc_model_versions_get <- function(catalog, schema, model, version = "2.1", host = host, token = token - ) %>% - httr2::req_url_path_append(paste(catalog, schema, model, sep = ".")) %>% - httr2::req_url_path_append("versions") %>% + ) |> + httr2::req_url_path_append(paste(catalog, schema, model, sep = ".")) |> + httr2::req_url_path_append("versions") |> httr2::req_url_query(max_results = 1000) if (perform_request) { @@ -304,7 +304,7 @@ db_uc_funcs_list <- function(catalog, schema, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_query( catalog_name = catalog, schema_name = schema @@ -328,7 +328,7 @@ db_uc_funcs_get <- function(catalog, schema, func, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_path_append(paste(catalog, schema, func, sep = ".")) if (perform_request) { @@ -348,7 +348,7 @@ db_uc_volumes_list <- function(catalog, schema, version = "2.1", host = host, token = token - ) %>% + ) |> httr2::req_url_query( catalog_name = catalog, schema_name = schema diff --git a/R/vector-search.R b/R/vector-search.R index 5c92239..390a2fc 100644 --- a/R/vector-search.R +++ b/R/vector-search.R @@ -19,7 +19,7 @@ db_vs_endpoints_list <- function(page_token = NULL, token = token ) - req <- req %>% + req <- req |> httr2::req_url_query( page_token = page_token ) @@ -152,7 +152,7 @@ db_vs_indexes_list <- function(endpoint, page_token = NULL, token = token ) - req <- req %>% + req <- req |> httr2::req_url_query( endpoint_name = endpoint, page_token = page_token diff --git a/R/volume-fs.R b/R/volume-fs.R index e54a740..192b630 100644 --- a/R/volume-fs.R +++ b/R/volume-fs.R @@ -248,7 +248,7 @@ db_volume_action <- function(path, ) if (!is.null(overwrite)) { - req <- req %>% + req <- req |> httr2::req_url_query(overwrite = ifelse(overwrite, "true", "false")) } @@ -258,15 +258,15 @@ db_volume_action <- function(path, } # show progress when uploading and downloading files - req <- req %>% + req <- req |> httr2::req_progress(type = ifelse(action == "GET", "down", "up")) } if (perform_request) { - resp <- req %>% - httr2::req_error(is_error = function(resp) httr2::resp_status(resp) == 500) %>% - httr2::req_perform(path = destination) %>% + resp <- req |> + httr2::req_error(is_error = function(resp) httr2::resp_status(resp) == 500) |> + httr2::req_perform(path = destination) |> httr2::resp_check_status() if (action == "HEAD") { diff --git a/R/workspaces.R b/R/workspaces.R index 6924906..512bc83 100644 --- a/R/workspaces.R +++ b/R/workspaces.R @@ -213,14 +213,7 @@ db_workspace_import <- function(path, token = token ) - req <- req %>% - httr2::req_body_multipart( - path = body$path, - format = body$format, - overwrite = body$overwrite, - language = body$language, - content = body$content - ) + req <- httr2::req_body_multipart(req, !!!body) if (perform_request) { db_perform_request(req) diff --git a/man/s3_storage_info.Rd b/man/s3_storage_info.Rd index 061b3b1..fd81e4f 100644 --- a/man/s3_storage_info.Rd +++ b/man/s3_storage_info.Rd @@ -40,7 +40,7 @@ set to \code{sse-kms}.} \code{bucket-owner-full-control}. If \code{canned_acl} is set, the cluster instance profile must have \code{s3:PutObjectAcl} permission on the destination bucket and prefix. The full list of possible canned ACLs can be found in -\href{https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl}{docs}. +\href{https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl}{docs}. By default only the object owner gets full control. If you are using cross account role for writing data, you may want to set \code{bucket-owner-full-control} to make bucket owner able to read the logs.} diff --git a/tests/testthat/test-auth.R b/tests/testthat/test-auth.R index 620be3a..d7e2578 100644 --- a/tests/testthat/test-auth.R +++ b/tests/testthat/test-auth.R @@ -207,75 +207,71 @@ test_that("auth functions - host handling", { test_that("auth functions - workbench managed credentials detection", { - # Emulate the databricks.cfg file written by Workbench. - db_home <- tempfile("posit-workbench") - dir.create(db_home) - writeLines( - c( - '[workbench]', - 'host = some-host', - 'token = some-token' - ), - file.path(db_home, "databricks.cfg") + + db_home <- withr::local_tempdir("posit-workbench") + + withr::local_file( + .file = "databricks.cfg", + code = { + writeLines( + c( + '[workbench]', + 'host = some-host', + 'token = some-token' + ), + file.path(db_home, "databricks.cfg") + ) + } ) + + # Two env variables need to be set on Workbench for detection to succeed # DATABRICKS_CONFIG_FILE with the path to the databricks.cfg file # DATABRICKS_CONFIG_PROFILE = "workbench" to set the profile correctly - withr::local_envvar( - DATABRICKS_CONFIG_FILE = file.path(db_home, "databricks.cfg"), - DATABRICKS_CONFIG_PROFILE = "workbench" - ) - - token_w <- db_token() - host_w <- db_host() - - expect_true(is.character(token_w)) - expect_true(is.character(host_w)) - - expect_identical("some-host", host_w) - expect_identical("some-token", token_w) - - Sys.unsetenv("DATABRICKS_CONFIG_PROFILE") - expect_error(db_host()()) - expect_error(db_token()()) - - withr::local_envvar( - DATABRICKS_CONFIG_FILE = file.path(db_home, "databricks.cfg"), - DATABRICKS_CONFIG_PROFILE = "workbench" + withr::with_envvar( + new = c( + DATABRICKS_CONFIG_FILE = file.path(db_home, "databricks.cfg"), + DATABRICKS_CONFIG_PROFILE = "workbench" + ), + code = { + token_w <- db_token() + host_w <- db_host() + expect_true(is.character(token_w)) + expect_true(is.character(host_w)) + expect_identical("some-host", host_w) + expect_identical("some-token", token_w) + } ) - Sys.unsetenv("DATABRICKS_CONFIG_FILE") - expect_error(db_host()()) - expect_error(db_token()()) }) test_that("auth functions - workbench managed credentials override env var", { - withr::local_file("posit-workbench.cfg", { - writeLines( - c( - '[workbench]', - 'host = some-host', - 'token = some-token' - ), - "posit-workbench.cfg" - ) - }) - + db_home <- withr::local_tempdir("posit-workbench") + + withr::local_file( + .file = "databricks.cfg", + code = { + writeLines( + c( + '[workbench]', + 'host = some-host', + 'token = some-token' + ), + file.path(db_home, "databricks.cfg") + ) + } + ) - # # Emulate the databricks.cfg file written by Workbench. - # db_home <- tempfile("posit-workbench") - # dir.create(db_home) - # # Two env variables need to be set on Workbench for detection to succeed # DATABRICKS_CONFIG_FILE with the path to the databricks.cfg file # DATABRICKS_CONFIG_PROFILE = "workbench" to set the profile correctly # Add different `DATABRICKS_HOST` and `DATABRICKS_TOKEN` env variables to ensure # the credentials from Workbench still get used withr::local_envvar( - DATABRICKS_CONFIG_FILE = "posit-workbench.cfg", + DATABRICKS_CONFIG_FILE = file.path(db_home, "databricks.cfg"), DATABRICKS_CONFIG_PROFILE = "workbench", DATABRICKS_HOST = "env-based-host", DATABRICKS_TOKEN = "env-based-token" diff --git a/tests/testthat/test-clusters.R b/tests/testthat/test-clusters.R index c50910b..14238f6 100644 --- a/tests/testthat/test-clusters.R +++ b/tests/testthat/test-clusters.R @@ -1,6 +1,10 @@ -skip_unless_credentials_set() test_that("Clusters API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + # basic metadata functions resp_list <- db_cluster_list(perform_request = F) expect_s3_class(resp_list, "httr2_request") diff --git a/tests/testthat/test-dbfs.R b/tests/testthat/test-dbfs.R index 1c9f867..e9b93c2 100644 --- a/tests/testthat/test-dbfs.R +++ b/tests/testthat/test-dbfs.R @@ -1,9 +1,14 @@ -skip_unless_credentials_set() - test_that("DBFS API - don't perform", { - filename <- file.path("", basename(tempfile(fileext = ".txt"))) - dirname <- file.path("", basename(tempdir())) + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + + dirname <- tempdir() + filepath <- withr::local_tempfile(lines = "", fileext = ".txt") + filename <- file.path("", basename(filepath)) + dirname_base <- file.path("", basename(dirname)) con <- db_dbfs_create(path = filename, perform_request = FALSE) expect_s3_class(con, "httr2_request") @@ -28,27 +33,29 @@ test_that("DBFS API - don't perform", { list <- db_dbfs_list("/", perform_request = FALSE) expect_s3_class(list, "httr2_request") - mkdirs <- db_dbfs_mkdirs(dirname, perform_request = FALSE) + mkdirs <- db_dbfs_mkdirs(dirname_base, perform_request = FALSE) expect_s3_class(mkdirs, "httr2_request") move <- db_dbfs_move( source_path = filename, - destination_path = file.path(dirname, filename), + destination_path = file.path(dirname_base, filename), perform_request = FALSE ) expect_s3_class(move, "httr2_request") put <- db_dbfs_put( - path = file.path(dirname, "put.txt"), + path = file.path(dirname_base, "put.txt"), contents = "hello world 2", overwrite = TRUE, perform_request = FALSE ) expect_s3_class(put, "httr2_request") - delete <- db_dbfs_delete(dirname, recursive = TRUE, perform_request = FALSE) + delete <- db_dbfs_delete(dirname_base, recursive = TRUE, perform_request = FALSE) expect_s3_class(delete, "httr2_request") + unlink(dirname, recursive = TRUE) + }) skip_on_cran() @@ -57,8 +64,10 @@ skip_unless_aws_workspace() test_that("DBFS API", { - filename <- file.path("", basename(tempfile(fileext = ".txt"))) - dirname <- file.path("", basename(tempdir())) + dirname <- tempdir() + filepath <- withr::local_tempfile(lines = "", fileext = ".txt") + filename <- file.path("", basename(filepath)) + dirname_base <- file.path("", basename(dirname)) con <- db_dbfs_create(path = filename, overwrite = TRUE) expect_type(con, "character") @@ -81,25 +90,26 @@ test_that("DBFS API", { resp_list <- db_dbfs_list("/") expect_type(resp_list, "list") - resp_mkdirs <- db_dbfs_mkdirs(dirname) + resp_mkdirs <- db_dbfs_mkdirs(dirname_base) expect_identical(unname(resp_mkdirs), list()) resp_move <- db_dbfs_move( source_path = filename, - destination_path = file.path(dirname, filename) + destination_path = file.path(dirname_base, filename) ) expect_identical(unname(resp_move), list()) resp_put <- db_dbfs_put( - path = file.path(dirname, "put.txt"), + path = file.path(dirname_base, "put.txt"), contents = "hello world 2", overwrite = TRUE ) expect_identical(unname(resp_put), list()) - resp_delete <- db_dbfs_delete(dirname, recursive = TRUE) + resp_delete <- db_dbfs_delete(dirname_base, recursive = TRUE) expect_identical(unname(resp_delete), list()) + unlink(dirname, recursive = TRUE) }) diff --git a/tests/testthat/test-execution-contexts.R b/tests/testthat/test-execution-contexts.R index 14d7b4f..88383ac 100644 --- a/tests/testthat/test-execution-contexts.R +++ b/tests/testthat/test-execution-contexts.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Execution Contexts API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_ctx_create <- db_context_create( cluster_id = "some_cluster_id", language = "python", diff --git a/tests/testthat/test-experiments.R b/tests/testthat/test-experiments.R index 2825c4f..ed4153d 100644 --- a/tests/testthat/test-experiments.R +++ b/tests/testthat/test-experiments.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("experiments API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_list <- db_experiments_list(max_results = 1, perform_request = FALSE) expect_s3_class(resp_list, "httr2_request") diff --git a/tests/testthat/test-feature-store.R b/tests/testthat/test-feature-store.R index 4da942c..64f0457 100644 --- a/tests/testthat/test-feature-store.R +++ b/tests/testthat/test-feature-store.R @@ -1,6 +1,10 @@ -skip_unless_credentials_set() - test_that("Feature Store API - don't perform", { + + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_search <- db_feature_tables_search(perform_request = FALSE) expect_s3_class(resp_search, "httr2_request") diff --git a/tests/testthat/test-jobs.R b/tests/testthat/test-jobs.R index 4f262bc..457b848 100644 --- a/tests/testthat/test-jobs.R +++ b/tests/testthat/test-jobs.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Jobs API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_list <- db_jobs_list( perform_request = F ) diff --git a/tests/testthat/test-libraries.R b/tests/testthat/test-libraries.R index f5587f2..983a622 100644 --- a/tests/testthat/test-libraries.R +++ b/tests/testthat/test-libraries.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Libraries API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_all_statuses <- db_libs_all_cluster_statuses( perform_request = F ) diff --git a/tests/testthat/test-misc-helpers.R b/tests/testthat/test-misc-helpers.R index f50e72d..d47d841 100644 --- a/tests/testthat/test-misc-helpers.R +++ b/tests/testthat/test-misc-helpers.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Misc Helpers - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_wsid <- db_current_workspace_id(perform_request = F) expect_s3_class(resp_wsid, "httr2_request") diff --git a/tests/testthat/test-mlflow-dbrx.R b/tests/testthat/test-mlflow-dbrx.R index a9ece82..9530f67 100644 --- a/tests/testthat/test-mlflow-dbrx.R +++ b/tests/testthat/test-mlflow-dbrx.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Unity Catalog API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_list <- db_mlflow_registered_models_list( perform_request = F ) diff --git a/tests/testthat/test-repl.R b/tests/testthat/test-repl.R index e5f2f8f..88f6525 100644 --- a/tests/testthat/test-repl.R +++ b/tests/testthat/test-repl.R @@ -31,9 +31,9 @@ test_that("REPL - helpers", { expect_equal(db_context_command_parse(cmd_res, "scala"), "hello world") expect_equal(db_context_command_parse(cmd_res, "py"), "hello world") - # python special case - cmd_res <- list(results = list(resultType = "text", data = "hello world")) - expect_equal(db_context_command_parse(cmd_res, "py"), NULL) + # # python special case + # cmd_res <- list(results = list(resultType = "text", data = "hello world")) + # expect_equal(db_context_command_parse(cmd_res, "py"), NULL) # error case cmd_res <- list(results = list(resultType = "error", summary = "err", cause = "err")) diff --git a/tests/testthat/test-repos.R b/tests/testthat/test-repos.R index 162a5d6..eac3cf9 100644 --- a/tests/testthat/test-repos.R +++ b/tests/testthat/test-repos.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Feature Store API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_get_all <- db_repo_get_all("/", perform_request = FALSE) expect_s3_class(resp_get_all, "httr2_request") diff --git a/tests/testthat/test-secrets.R b/tests/testthat/test-secrets.R index 0accbd6..78c0a95 100644 --- a/tests/testthat/test-secrets.R +++ b/tests/testthat/test-secrets.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Secrets API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_list <- db_secrets_list( scope = "some_scope", perform_request = FALSE diff --git a/tests/testthat/test-sql-connector.R b/tests/testthat/test-sql-connector.R index 4d679a4..612f46f 100644 --- a/tests/testthat/test-sql-connector.R +++ b/tests/testthat/test-sql-connector.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("SQL Connector Helpers", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + expect_no_error({ warehouse_path <- generate_http_path( id = "123", diff --git a/tests/testthat/test-sql-execution.R b/tests/testthat/test-sql-execution.R index 90d715b..5bf5f75 100644 --- a/tests/testthat/test-sql-execution.R +++ b/tests/testthat/test-sql-execution.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("SQL Execution API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_query <- db_sql_exec_query( statement = "select 1", warehouse_id = "some_warehouse_id", diff --git a/tests/testthat/test-unity-catalog.R b/tests/testthat/test-unity-catalog.R index c5d1680..5f8785b 100644 --- a/tests/testthat/test-unity-catalog.R +++ b/tests/testthat/test-unity-catalog.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Unity Catalog API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_summary <- db_uc_metastore_summary(perform_request = F) expect_s3_class(resp_summary, "httr2_request") diff --git a/tests/testthat/test-vector-search.R b/tests/testthat/test-vector-search.R index 0dcc01e..e7da7c9 100644 --- a/tests/testthat/test-vector-search.R +++ b/tests/testthat/test-vector-search.R @@ -1,6 +1,10 @@ -skip_unless_credentials_set() test_that("Vector Search APIs - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + expect_no_error({ req_vse_create <- db_vs_endpoints_create( name = "mock_endpoint", diff --git a/tests/testthat/test-volumes.R b/tests/testthat/test-volumes.R index b9a4a6a..3a74278 100644 --- a/tests/testthat/test-volumes.R +++ b/tests/testthat/test-volumes.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Volumes API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + valid_volume_path <- "/Volumes/catalog/schema/volume/" expect_no_error(is_valid_volume_path(valid_volume_path)) diff --git a/tests/testthat/test-warehouses.R b/tests/testthat/test-warehouses.R index efdb5dd..acd082e 100644 --- a/tests/testthat/test-warehouses.R +++ b/tests/testthat/test-warehouses.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Warehouse API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_list <- db_sql_warehouse_list(perform_request = F) expect_s3_class(resp_list, "httr2_request") diff --git a/tests/testthat/test-workspace-folder.R b/tests/testthat/test-workspace-folder.R index 14568ef..bc312b7 100644 --- a/tests/testthat/test-workspace-folder.R +++ b/tests/testthat/test-workspace-folder.R @@ -1,7 +1,10 @@ -skip_unless_credentials_set() - test_that("Workspace API - don't perform", { + withr::local_envvar(c( + "DATABRICKS_HOST" = "mock_host", + "DATABRICKS_TOKEN" = "mock_token" + )) + resp_list <- db_workspace_list( path = "some_path", perform_request = F diff --git a/vignettes/cluster-management.Rmd b/vignettes/cluster-management.Rmd index a2c8d54..5696ec3 100644 --- a/vignettes/cluster-management.Rmd +++ b/vignettes/cluster-management.Rmd @@ -68,7 +68,7 @@ cluster_info$state You can edit Databricks clusters to change various parameters using `db_cluster_edit()`. For example, we may decide we want our cluster to autoscale between 2-8 nodes and add some tags. -```{r, results='hide', eval=FALSE} +```{r, results='hide'} # we are required to input all parameters db_cluster_edit( diff --git a/vignettes/managing-jobs.Rmd b/vignettes/managing-jobs.Rmd index 0dc13bb..5a7528c 100644 --- a/vignettes/managing-jobs.Rmd +++ b/vignettes/managing-jobs.Rmd @@ -4,7 +4,7 @@ output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Job Management} %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} + %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} diff --git a/vignettes/remote-repl.Rmd b/vignettes/remote-repl.Rmd index 4ef72c1..f516439 100644 --- a/vignettes/remote-repl.Rmd +++ b/vignettes/remote-repl.Rmd @@ -39,7 +39,7 @@ library(brickster) | (`db_repl`) | (Shared, Single User) | | +--------------------------------------------------------------------------------------------------------------------+-----------------------+-------------------------------------------------------------------------------+ -Databricks [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop'=) (`db_repl()`) will be the focus of this article. +Databricks [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop) (`db_repl()`) will be the focus of this article. # What is the Databricks REPL? @@ -62,19 +62,19 @@ After successfully connecting to the cluster you can run commands against the re The REPL has a shortcut you can enter `:` to change the active language. You can change between the following languages: -+----------------------------------+-----------------------------------+ -| Language | Shortcut | -+==================================+===================================+ -| R | `:r` | -+----------------------------------+-----------------------------------+ -| Python | `:py` | -+----------------------------------+-----------------------------------+ -| SQL | `:sql` | -+----------------------------------+-----------------------------------+ -| Scala | `:scala` | -+----------------------------------+-----------------------------------+ -| Shell | `:sh` | -+----------------------------------+-----------------------------------+ ++---------------------------------+-----------------------------------+ +| Language | Shortcut | ++=================================+===================================+ +| R | `:r` | ++---------------------------------+-----------------------------------+ +| Python | `:py` | ++---------------------------------+-----------------------------------+ +| SQL | `:sql` | ++---------------------------------+-----------------------------------+ +| Scala | `:scala` | ++---------------------------------+-----------------------------------+ +| Shell | `:sh` | ++---------------------------------+-----------------------------------+ When you change between languages all variables should persist unless REPL is exited. diff --git a/vignettes/setup-auth.Rmd b/vignettes/setup-auth.Rmd index 242425a..9984830 100644 --- a/vignettes/setup-auth.Rmd +++ b/vignettes/setup-auth.Rmd @@ -28,7 +28,7 @@ It's recommended to use option (1) when using `{brickster}` interactively, if yo Personal Access Tokens can be generated in a few steps, for a step-by-step breakdown [refer to the documentation](https://docs.databricks.com/dev-tools/api/latest/authentication.html). -Once you have a token you'll be able to store it alongside the workspace URL in an `.Renviron` file. The `.Renviron` is used for storing the variables, such as those which may be sensitive (e.g. credentials) and de-couple them from the code (additional reading: [1](https://support.posit.co/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf), [2](https://CRAN.R-project.org/package=startup/vignettes/startup-intro.html)). +Once you have a token you'll be able to store it alongside the workspace URL in an `.Renviron` file. The `.Renviron` is used for storing the variables, such as those which may be sensitive (e.g. credentials) and de-couple them from the code [additional reading](https://CRAN.R-project.org/package=startup/vignettes/startup-intro.html). To get started add the following to your `.Renviron`: