From 4d39f0d01b8ba96eba3d38a1bed71a38577d9b72 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Fri, 30 Aug 2024 01:59:16 +1000 Subject: [PATCH 1/4] Preparing for CRAN resubmission --- DESCRIPTION | 11 +-- NAMESPACE | 2 - R/data-structures.R | 2 +- R/databricks-helpers.R | 13 --- R/knitr-engines.R | 8 +- R/notebook-helpers.R | 79 ------------------- README.md | 4 +- _pkgdown.yml | 2 - .../skeleton/skeleton.Rmd | 6 ++ man/cron_schedule.Rd | 2 +- man/notebook_enable_htmlwidgets.Rd | 38 --------- man/notebook_use_posit_repo.Rd | 22 ------ tests/testthat/test-notebook-helpers.R | 2 - vignettes/setup-auth.Rmd | 7 +- 14 files changed, 25 insertions(+), 173 deletions(-) delete mode 100644 man/notebook_enable_htmlwidgets.Rd delete mode 100644 man/notebook_use_posit_repo.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 90c14ab..61ae6e0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: brickster -Title: R Toolkit for Databricks +Title: R Toolkit for 'Databricks' Version: 0.2.4 Authors@R: c( @@ -13,9 +13,10 @@ Authors@R: email = "rafi.kurlansik@databricks.com"), person("Databricks", role = c("cph", "fnd")) ) -Description: Collection of utilities that improve using Databricks from R. - Primarily functions that wrap specific Databricks APIs, RStudio connection - pane support, quality of life functions to make Databricks simpler to use. +Description: Collection of utilities that improve using 'Databricks' from R. + Primarily functions that wrap specific 'Databricks' APIs + (), 'RStudio' connection pane support, quality + of life functions to make 'Databricks' simpler to use. License: Apache License (>= 2) Encoding: UTF-8 LazyData: true @@ -49,5 +50,5 @@ Suggests: Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 VignetteBuilder: knitr -URL: https://github.com/zacdav-db/brickster +URL: https://github.com/databrickslabs/brickster Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index c1d6e6b..71ac3cb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -200,9 +200,7 @@ export(lib_pypi) export(lib_whl) export(libraries) export(new_cluster) -export(notebook_enable_htmlwidgets) export(notebook_task) -export(notebook_use_posit_repo) export(open_workspace) export(pipeline_task) export(py_db_sql_connector) diff --git a/R/data-structures.R b/R/data-structures.R index b9cc53d..aa3bc91 100644 --- a/R/data-structures.R +++ b/R/data-structures.R @@ -840,7 +840,7 @@ is.email_notifications <- function(x) { #' #' @param quartz_cron_expression Cron expression using Quartz syntax that #' describes the schedule for a job. -#' See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) +#' See [Cron Trigger](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) #' for details. #' @param timezone_id Java timezone ID. The schedule for a job is resolved with #' respect to this timezone. diff --git a/R/databricks-helpers.R b/R/databricks-helpers.R index fc5d11a..766a63a 100644 --- a/R/databricks-helpers.R +++ b/R/databricks-helpers.R @@ -3,19 +3,6 @@ on_databricks <- function() { dbr != "" } -in_databricks_nb <- function() { - ("/databricks/spark/R/lib" %in% .libPaths()) && - exists("DATABRICKS_GUID", envir = .GlobalEnv) -} - -use_posit_repo <- function() { - if (in_databricks_nb()) { - codename <- system("lsb_release -c --short", intern = T) - mirror <- paste0("https://packagemanager.posit.co/cran/__linux__/", codename, "/latest") - options(repos = c(POSIT = mirror)) - } -} - #' Determine brickster virtualenv #' #' @details Returns `NULL` when running within Databricks, diff --git a/R/knitr-engines.R b/R/knitr-engines.R index 069e45f..a158b96 100644 --- a/R/knitr-engines.R +++ b/R/knitr-engines.R @@ -97,9 +97,6 @@ clean_command_results <- function(x, options, language) { schema <- data.table::rbindlist(x$results$schema) tbl <- data.table::rbindlist(x$results$data) names(tbl) <- schema$name - if (!is.null(options$keep_as)) { - base::assign(options$keep_as, value = tbl, envir = .GlobalEnv) - } if (isTRUE(getOption('knitr.in.progress'))) { outputs$table <- knitr::engine_output( options = options, @@ -139,11 +136,14 @@ clean_command_results <- function(x, options, language) { if (isTRUE(getOption('knitr.in.progress'))) { outputs$plot <- knitr::engine_output( options = options, - out = list(knitr::include_graphics(path = file)) + out = list(knitr::include_graphics(path = file, dpi = options$dpi)) ) } else { res <- structure(file, class = c("knit_image_paths", "knit_asis"), dpi = options$dpi) print(res) + # img <- magick::image_read(raw) + # grid::grid.newpage() + # grid::grid.raster(img) } } diff --git a/R/notebook-helpers.R b/R/notebook-helpers.R index 8fdfa62..f823ba4 100644 --- a/R/notebook-helpers.R +++ b/R/notebook-helpers.R @@ -10,82 +10,3 @@ in_databricks_nb <- function() { ("/databricks/spark/R/lib" %in% .libPaths()) && exists("DATABRICKS_GUID", envir = .GlobalEnv) } - -#' Setup Databricks Notebook with Posit Package Manager -#' -#' @details -#' Databricks notebooks default repo for package installation is CRAN. -#' CRAN doesn't provide pre-compiled binaries for linux and this results in -#' packages taking longer than desired. -#' -#' This function can be called within a Databricks notebook to easily switch to -#' Posit and retrieve pre-compiled binaries. -#' -#' This function will behave correctly across different Databricks Runtimes, -#' even when the underlying linux version changes. -#' -#' @export -notebook_use_posit_repo <- function() { - if (in_databricks_nb()) { - agent <- sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version["platform"], R.version["arch"], R.version["os"])) - codename <- system("lsb_release -c --short", intern = T) - mirror <- paste0("https://packagemanager.posit.co/cran/__linux__/", codename, "/latest") - options( - HTTPUserAgent = agent, - repos = c(POSIT = mirror, getOption("repos")) - ) - } -} - -#' Enable htmlwidgets in Databricks Notebook -#' -#' @details -#' Databricks notebooks by default don't currently support htmlwidgets. -#' This behaviour can be corrected by: -#' - adjusting the print method in htmltools -#' - installing pandoc -#' -#' This is a invasive method to correct the behaviour as htmltools isn't -#' flexible to adjust via the `viewer` option directly. -#' -#' It only runs within a Databricks notebook cell. -#' -#' The height can be adjusted without running the function again by using the -#' `db_htmlwidget_height` option (e.g. `options(db_htmlwidget_height = 300)`). -#' -#' -#' @param height Measurement passed to height of htmlwidget. This overrides -#' existing values that may often be `NULL` to ensure the height is correctly -#' displayed within the iframe of notebook results cells (via `displayHTML()`). -#' Default is 450. -#' -#' @export -#' -#' @examples -#' notebook_enable_htmlwidgets() -#' # set default height to 800px -#' notebook_enable_htmlwidgets(height = 800) -notebook_enable_htmlwidgets <- function(height = 450) { - if (in_databricks_nb()) { - - # new option to control default widget height, default is 450px - options(db_htmlwidget_height = height) - - system("apt-get --yes install pandoc", intern = T) - if (!base::require("htmlwidgets")) { - utils::install.packages("htmlwidgets") - } - - # new method will fetch height based on new option, or default to 450px - new_method <- function(x, ...) { - x$height <- getOption("db_htmlwidget_height", 450) - file <- tempfile(fileext = ".html") - htmlwidgets::saveWidget(x, file = file) - contents <- as.character(rvest::read_html(file)) - displayHTML(contents) - } - - utils::assignInNamespace("print.htmlwidget", new_method, ns = "htmlwidgets") - invisible(list(default_height = height, print = new_method)) - } -} diff --git a/README.md b/README.md index c4408ff..15e3a12 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# [brickster](https://databrickslabs.github.io/brickster/) +# [brickster](https://databrickslabs.github.io/brickster/) -[![R-CMD-check](https://github.com/zacdav-db/brickster/workflows/R-CMD-check/badge.svg)](https://github.com/zacdav-db/brickster/actions) [![Codecov test coverage](https://codecov.io/gh/zacdav-db/brickster/branch/main/graph/badge.svg)](https://app.codecov.io/gh/zacdav-db/brickster?branch=main) +[![R-CMD-check](https://github.com/databrickslabs/brickster/workflows/R-CMD-check/badge.svg)](https://github.com/databrickslabs/brickster/actions) [![Codecov test coverage](https://codecov.io/gh/zacdav-db/brickster/branch/main/graph/badge.svg)](https://app.codecov.io/gh/zacdav-db/brickster?branch=main) diff --git a/_pkgdown.yml b/_pkgdown.yml index 92a146f..2e2ce94 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,8 +52,6 @@ reference: - title: Databricks Notebook Helpers contents: - in_databricks_nb - - notebook_use_posit_repo - - notebook_enable_htmlwidgets - title: DBFS contents: starts_with("db_dbfs", internal = TRUE) - title: Volume FileSystem diff --git a/inst/rmarkdown/templates/databricks-remote-notebook/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/databricks-remote-notebook/skeleton/skeleton.Rmd index f0c28b3..09337d5 100644 --- a/inst/rmarkdown/templates/databricks-remote-notebook/skeleton/skeleton.Rmd +++ b/inst/rmarkdown/templates/databricks-remote-notebook/skeleton/skeleton.Rmd @@ -48,6 +48,12 @@ print("hello from Databricks") ``` +```{python, engine = "databricks_py"} +# install folium +!pip install folium +``` + + ```{python, engine = "databricks_py"} import folium m = folium.Map(location=[45.5236, -122.6750]) diff --git a/man/cron_schedule.Rd b/man/cron_schedule.Rd index 273609e..5e7a53e 100644 --- a/man/cron_schedule.Rd +++ b/man/cron_schedule.Rd @@ -13,7 +13,7 @@ cron_schedule( \arguments{ \item{quartz_cron_expression}{Cron expression using Quartz syntax that describes the schedule for a job. -See \href{http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html}{Cron Trigger} +See \href{https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html}{Cron Trigger} for details.} \item{timezone_id}{Java timezone ID. The schedule for a job is resolved with diff --git a/man/notebook_enable_htmlwidgets.Rd b/man/notebook_enable_htmlwidgets.Rd deleted file mode 100644 index a74bb7d..0000000 --- a/man/notebook_enable_htmlwidgets.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/notebook-helpers.R -\name{notebook_enable_htmlwidgets} -\alias{notebook_enable_htmlwidgets} -\title{Enable htmlwidgets in Databricks Notebook} -\usage{ -notebook_enable_htmlwidgets(height = 450) -} -\arguments{ -\item{height}{Measurement passed to height of htmlwidget. This overrides -existing values that may often be \code{NULL} to ensure the height is correctly -displayed within the iframe of notebook results cells (via \code{displayHTML()}). -Default is 450.} -} -\description{ -Enable htmlwidgets in Databricks Notebook -} -\details{ -Databricks notebooks by default don't currently support htmlwidgets. -This behaviour can be corrected by: -\itemize{ -\item adjusting the print method in htmltools -\item installing pandoc -} - -This is a invasive method to correct the behaviour as htmltools isn't -flexible to adjust via the \code{viewer} option directly. - -It only runs within a Databricks notebook cell. - -The height can be adjusted without running the function again by using the -\code{db_htmlwidget_height} option (e.g. \code{options(db_htmlwidget_height = 300)}). -} -\examples{ -notebook_enable_htmlwidgets() -# set default height to 800px -notebook_enable_htmlwidgets(height = 800) -} diff --git a/man/notebook_use_posit_repo.Rd b/man/notebook_use_posit_repo.Rd deleted file mode 100644 index c5d88fb..0000000 --- a/man/notebook_use_posit_repo.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/notebook-helpers.R -\name{notebook_use_posit_repo} -\alias{notebook_use_posit_repo} -\title{Setup Databricks Notebook with Posit Package Manager} -\usage{ -notebook_use_posit_repo() -} -\description{ -Setup Databricks Notebook with Posit Package Manager -} -\details{ -Databricks notebooks default repo for package installation is CRAN. -CRAN doesn't provide pre-compiled binaries for linux and this results in -packages taking longer than desired. - -This function can be called within a Databricks notebook to easily switch to -Posit and retrieve pre-compiled binaries. - -This function will behave correctly across different Databricks Runtimes, -even when the underlying linux version changes. -} diff --git a/tests/testthat/test-notebook-helpers.R b/tests/testthat/test-notebook-helpers.R index 169c402..18ab19e 100644 --- a/tests/testthat/test-notebook-helpers.R +++ b/tests/testthat/test-notebook-helpers.R @@ -2,7 +2,5 @@ test_that("Databricks Notebook Helpers", { # currently running tests outside of a databricks notebook expect_false(in_databricks_nb()) - expect_no_error(notebook_use_posit_repo()) - expect_no_error(notebook_enable_htmlwidgets()) }) diff --git a/vignettes/setup-auth.Rmd b/vignettes/setup-auth.Rmd index 76a01fd..0912991 100644 --- a/vignettes/setup-auth.Rmd +++ b/vignettes/setup-auth.Rmd @@ -20,13 +20,16 @@ knitr::opts_chunk$set( The `{brickster}` package connects to a Databricks workspace is two ways: 1. [OAuth user-to-machine (U2M) authentication](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html#oauth-user-to-machine-u2m-authentication) -2. [Personal Access Tokens (PAT)](https://docs.databricks.com/en/dev-tools/auth/pat.htmlhttps://docs.databricks.com/en/dev-tools/auth/pat.html) +2. [Personal Access Tokens (PAT)](https://docs.databricks.com/en/dev-tools/auth/pat.html) It's recommended to use option (1) when using `{brickster}` interactively, if you need to run code via an automated process the only option currently is (2). Personal Access Tokens can be generated in a few steps, for a step-by-step breakdown [refer to the documentation](https://docs.databricks.com/dev-tools/api/latest/authentication.html). -Once you have a token you'll be able to store it alongside the workspace URL in an `.Renviron` file. The `.Renviron` is used for storing the variables, such as those which may be sensitive (e.g. credentials) and de-couple them from the code (additional reading: [1](https://support.rstudio.com/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf), [2](https://cran.r-project.org/web/packages/startup/vignettes/startup-intro.html)). +Once you have a token you'll be able to store it alongside the workspace URL in an `.Renviron` file. The `.Renviron` is used for storing the variables, such as those which may be sensitive (e.g. credentials) and de-couple them from the code (additional reading: [1](https://support.posit.co/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf), [2](https://CRAN.R-project.org/package=startup/vignettes/startup-intro.html)). + + + To get started add the following to your `.Renviron`: From 8a7ba1f814d580c564bd734c668257247601ebc8 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Fri, 30 Aug 2024 02:06:50 +1000 Subject: [PATCH 2/4] Incrementing version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 61ae6e0..6b2624c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: brickster Title: R Toolkit for 'Databricks' -Version: 0.2.4 +Version: 0.2.5 Authors@R: c( person(given = "Zac", From 171812d0f5e1a269ea1c02520ddb0d9a7b65bd63 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Fri, 30 Aug 2024 02:23:34 +1000 Subject: [PATCH 3/4] Adoptiong `output.var` instead of `keep_as` to conform with existing standards. --- R/knitr-engines.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/knitr-engines.R b/R/knitr-engines.R index a158b96..815015d 100644 --- a/R/knitr-engines.R +++ b/R/knitr-engines.R @@ -96,6 +96,7 @@ clean_command_results <- function(x, options, language) { if (options$eval) { schema <- data.table::rbindlist(x$results$schema) tbl <- data.table::rbindlist(x$results$data) + names(tbl) <- schema$name if (isTRUE(getOption('knitr.in.progress'))) { outputs$table <- knitr::engine_output( @@ -106,6 +107,12 @@ clean_command_results <- function(x, options, language) { knitr::knit_print(tbl) } + # when `output.var` option is used return the table assigned to object + varname <- options$output.var + if (!is.null(varname)) { + assign(varname, tbl, envir = knitr::knit_global()) + } + } return(do.call(paste, outputs)) From 45a39c904c1cb513e223d1556f4ce0570c7048b2 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Fri, 30 Aug 2024 02:24:42 +1000 Subject: [PATCH 4/4] Adjusting vignettes to reflect `keep_as` to `output.var` change. --- vignettes/rmarkdown-databricks-notebook.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/rmarkdown-databricks-notebook.Rmd b/vignettes/rmarkdown-databricks-notebook.Rmd index fc5854a..70cf03c 100644 --- a/vignettes/rmarkdown-databricks-notebook.Rmd +++ b/vignettes/rmarkdown-databricks-notebook.Rmd @@ -126,10 +126,10 @@ Results that are detected as tabular in **any** `databricks_*` chunk will be ren ### Persisting Tabular Results -When a result is rendered as a table you can persist a copy to the R session (`.GlobalEnv`) by using the `keep_as` chunk option +When a result is rendered as a table you can persist a copy to the R session (`.GlobalEnv`) by using the `output.var` chunk option ```` r -`r ''````{sql, engine = "databricks_sql", keep_as = "tables"} +`r ''````{sql, engine = "databricks_sql", output.var = "tables"} show databases ``` ````