From e0e8b8a0fafedacf21b5a6fce206d2fa94eae84b Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Fri, 3 Nov 2023 17:53:28 +1100 Subject: [PATCH 01/10] adding sql execution api functions --- NAMESPACE | 4 + R/sql-exec-client.R | 88 ++++++++++++++++++++++ R/sql-query-execution.R | 161 ++++++++++++++++++++++++++++++++++++++++ man/db_sql_cancel.Rd | 33 ++++++++ man/db_sql_query.Rd | 61 +++++++++++++++ man/db_sql_result.Rd | 36 +++++++++ man/db_sql_status.Rd | 33 ++++++++ 7 files changed, 416 insertions(+) create mode 100644 R/sql-exec-client.R create mode 100644 R/sql-query-execution.R create mode 100644 man/db_sql_cancel.Rd create mode 100644 man/db_sql_query.Rd create mode 100644 man/db_sql_result.Rd create mode 100644 man/db_sql_status.Rd diff --git a/NAMESPACE b/NAMESPACE index fe61f93..b4885e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,8 +92,12 @@ export(db_secrets_scope_acl_put) export(db_secrets_scope_create) export(db_secrets_scope_delete) export(db_secrets_scope_list_all) +export(db_sql_cancel) export(db_sql_global_warehouse_get) +export(db_sql_query) export(db_sql_query_history) +export(db_sql_result) +export(db_sql_status) export(db_sql_warehouse_create) export(db_sql_warehouse_delete) export(db_sql_warehouse_edit) diff --git a/R/sql-exec-client.R b/R/sql-exec-client.R new file mode 100644 index 0000000..589abb6 --- /dev/null +++ b/R/sql-exec-client.R @@ -0,0 +1,88 @@ +# library(brickster) +# library(arrow) +# library(furrr) +# library(progressr) +# +# handlers("cli") +# plan(sequential) +# +# # Notes: +# # Wait 0 seconds +# # format default to arrow with external links +# # poll every 5 seconds +# # +# # Steps: +# # 1. Send Query +# # 2. If result, parse +# # 3. Otherwise poll for result +# # 4. Distribute fetching into arrow format +# +# +# query <- db_sql_query( +# statement = "select * from zacdav.default.big_example LIMIT 1000000", +# warehouse_id = "18243426e662e2ad", +# format = "ARROW_STREAM", +# disposition = "EXTERNAL_LINKS", +# wait_timeout = "10s", +# on_wait_timeout = "CONTINUE" +# ) +# +# # poll till "SUCCEEDED" +# while (query$status$state %in% c("PENDING", "RUNNING")) { +# Sys.sleep(2) +# message("pending") +# query <- db_sql_status(statement_id = query$statement_id) +# } +# +# # check if results were truncated and warn +# if (query$manifest$truncated) { +# warning("Results are truncated...") +# } +# +# chunks <- seq.int(from = 0, length.out = query$manifest$total_chunk_count) +# +# chunks_to_tables <- function(statement_id, chunks, links_only = FALSE) { +# p <- progressr::progressor(along = chunks) +# purrr::map(chunks, function(x) { +# chunk <- db_sql_result(query$statement_id, chunk = x) +# table <- chunk$external_links[[1]]$external_link +# print(table) +# if (!links_only) { +# table <- arrow::read_ipc_stream(file = table, as_data_frame = FALSE) +# } +# p(message = paste0("fetching chunk: ", x)) +# table +# }) +# } +# +# with_progress(chunk_tables <- chunks_to_tables(query$statement_id, chunks)) +# +# +# +# arrow::read_ipc_stream( +# file = db_sql_result(query$statement_id, chunk = 1)$external_links[[1]]$external_link, +# as_data_frame = FALSE +# ) +# +# +# result <- do.call(arrow::concat_tables, chunk_tables) +# result +# +# +# arrow::as_record_batch(chunk_tables[[1]]) +# class((chunk_tables[[1]])) +# +# #### +# # # only works for CSV but saves needing to go multi-core route +# # +# # with_progress(chunk_links <- chunks_to_tables(query$statement_id, chunks, links_only = TRUE)) +# # +# # library(duckdb) +# # library(DBI) +# # +# # chunk_links[[1]] +# # +# # conn <- DBI::dbConnect(duckdb::duckdb()) +# # dbGetQuery(conn, "select * from read_csv_auto('https://e2-demo-field-eng-dbfs.s3.us-west-2.amazonaws.com/oregon-prod/1444828305810485.jobs/sql/extended/results_2023-11-03T13%3A09%3A55Z_9b14c6b9-8be2-4bce-b220-3a0b3d118a4b?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEPT%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDnVzLXdlc3QtMi1maXBzIkYwRAIgZSyHdUi0o5vhf7kenVUOLWSXjfBeMiyjnxRtDrCiyKECIBN6fuDvbsS8V3wSm6gH8%2BlziYCznpZM5RehkZcq5%2FfkKtwDCC0QABoMNDE0MzUxNzY3ODI2IgxYNkjDOj7c5KykvlYquQNlnebmntDBqyZSfzzkhp5HZKnxkKcYuip15LE5x93iyEmGXSRlXv5DHTBlbNk1biNuHTZRDWtXsC6w%2FAZqrq%2BGXKlegxoZIELaAbEX2p94bHcC%2FUFa8o%2B9jBTZh7KeXvCZOUR8b8M9%2F8%2FdJ1ZmRMIyV7q%2Fa7D39bfYz4FLrzFQ8xs5Q%2B%2FmLvTaYT%2BL%2FxFdjtlPjuxfVM3CwkwvAvAb%2BwkGKMRLmwcq%2Bh8bDNMC7eKi9r0CAzr3x4Va6%2B5ibPuNjTqqDPONCLnH2QcVZS1oah6U3qQN6hbkvhsZRroKEJc0232HDz0qPdWZS%2B9G4Lew%2BY8tQ04MbeOkx6buQVnYi1UomEAk%2BJpPEeGGyrmcr6jQ8a36wlyhcqbZWl0zZA8INhHe2rAJQjDxDBL3IhqvTIHnlWZvAV0SzpEzD8l4Z%2FnBy2lwwXCH3MnVDqgpYNHUSOTAwx%2BqkgGZvaoWLKj4GoWPcoeQ1O%2FWqNwLSD%2FsBqwrQdzyfnpGRm81oJ6mBiRQr%2FdsXqbqAa44V9iGz4JhKicKjKC%2BVFcRyKhz%2BEo7UpVbu3TjqROiMOP6sFAgUzDQQ9IqJhZQXOGZhDMw4qOOqgY65AFaQw6xv4n44og2XQUWpj33wK0mjIT4HNnyZJmwdwG%2FGJNHr37Cn48b7%2FCVEw10OXLdJpQ8OfSeZ4ihybA9UsBZ%2BP%2BDqANQp6DjUSQ7ttUxkhoMTYYeJZT9mhgYlCn7590rFMd3BgXJimhYMvPRigmbPKKncX3Brvl4OhH2%2BKXMRKQJLoMEZNMADQXQBTtbzLZAbHX2Yrm2qby1SU9Ez6i8Mi5i95MuioL0pHWnk0xLbGDEykkXI0eVYnxoeaZatDuLUg5%2FdaESa2TZqfMqxWYtsETLCQll6wZuS%2BdEY1qRRu5s%2BJY%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20231102T121250Z&X-Amz-SignedHeaders=host&X-Amz-Expires=899&X-Amz-Credential=ASIAWA6KKHEJAE4HS7TR%2F20231102%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Signature=ed812a12073aa6b187ab4162b0be10047a54c7a5c23596950a92e9e46c6db845')") +# # +# diff --git a/R/sql-query-execution.R b/R/sql-query-execution.R new file mode 100644 index 0000000..311d5eb --- /dev/null +++ b/R/sql-query-execution.R @@ -0,0 +1,161 @@ +# https://docs.databricks.com/api/workspace/statementexecution +# https://docs.databricks.com/en/sql/admin/sql-execution-tutorial.html#language-curl + + +#' Execute SQL Query +#' +#' @details TODO +#' +#' @param statement TODO +#' @param warehouse_id TODO +#' @param catalog TODO +#' @param schema TODO +#' @param parameters TODO +#' @param row_limit TODO +#' @param byte_limit TODO +#' @param disposition TODO +#' @param format TODO +#' @param wait_timeout TODO +#' @inheritParams auth_params +#' +#' @family SQL Execution APIs +#' +#' @export +db_sql_query <- function(statement, warehouse_id, + catalog = NULL, schema = NULL, parameters = NULL, + row_limit = NULL, byte_limit = NULL, + disposition = c("INLINE", "EXTERNAL_LINKS"), + format = c("JSON_ARRAY", "ARROW_STREAM", "CSV"), + wait_timeout = "10s", + on_wait_timeout = c("CONTINUE", "CANCEL"), + host = db_host(), token = db_token(), + perform_request = TRUE) { + + disposition <- match.arg(disposition) + format <- match.arg(format) + on_wait_timeout <- match.arg(on_wait_timeout) + + body <- list( + statement = statement, + warehouse_id = warehouse_id, + catalog = catalog, + schema = schema, + parameters = parameters, + row_limit = row_limit, + byte_limit = byte_limit, + disposition = disposition, + format = format, + wait_timeout = wait_timeout, + on_wait_timeout = on_wait_timeout + ) + + req <- db_request( + endpoint = "sql/statements", + method = "POST", + version = "2.0", + body = body, + host = host, + token = token + ) + + if (perform_request) { + db_perform_request(req) + } else { + req + } + +} + +#' Cancel SQL Query +#' +#' @details TODO +#' +#' @param statement_id TODO +#' @inheritParams auth_params +#' +#' @family SQL Execution APIs +#' +#' @export +db_sql_cancel <- function(statement_id, + host = db_host(), token = db_token(), + perform_request = TRUE) { + + req <- db_request( + endpoint = paste0("sql/statements/", statement_id, "/cancel"), + method = "POST", + version = "2.0", + host = host, + token = token + ) + + if (perform_request) { + db_perform_request(req) + } else { + req + } + +} + + +#' Get SQL Query Status +#' +#' @details TODO +#' +#' @param statement_id TODO +#' @inheritParams auth_params +#' +#' @family SQL Execution APIs +#' +#' @export +db_sql_status <- function(statement_id, + host = db_host(), token = db_token(), + perform_request = TRUE) { + + req <- db_request( + endpoint = paste0("sql/statements/", statement_id), + method = "GET", + version = "2.0", + host = host, + token = token + ) + + if (perform_request) { + db_perform_request(req) + } else { + req + } + +} + + +#' Get SQL Query Results +#' +#' @details TODO +#' +#' @param statement_id TODO +#' @param chunk_index TODO +#' @inheritParams auth_params +#' +#' @family SQL Execution APIs +#' +#' @export +db_sql_result <- function(statement_id, chunk_index, + host = db_host(), token = db_token(), + perform_request = TRUE) { + + req <- db_request( + endpoint = paste0("sql/statements/", statement_id, "/result/chunks/", chunk_index), + method = "GET", + version = "2.0", + host = host, + token = token + ) + + if (perform_request) { + db_perform_request(req) + } else { + req + } + +} + diff --git a/man/db_sql_cancel.Rd b/man/db_sql_cancel.Rd new file mode 100644 index 0000000..631d72e --- /dev/null +++ b/man/db_sql_cancel.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-query-execution.R +\name{db_sql_cancel} +\alias{db_sql_cancel} +\title{Cancel SQL Query} +\usage{ +db_sql_cancel( + statement_id, + host = db_host(), + token = db_token(), + perform_request = TRUE +) +} +\arguments{ +\item{statement_id}{TODO} + +\item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} + +\item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} +} +\description{ +Cancel SQL Query +} +\details{ +TODO +} +\seealso{ +Other SQL Execution APIs: +\code{\link{db_sql_query}()}, +\code{\link{db_sql_result}()}, +\code{\link{db_sql_status}()} +} +\concept{SQL Execution APIs} diff --git a/man/db_sql_query.Rd b/man/db_sql_query.Rd new file mode 100644 index 0000000..65dc7c6 --- /dev/null +++ b/man/db_sql_query.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-query-execution.R +\name{db_sql_query} +\alias{db_sql_query} +\title{Execute SQL Query} +\usage{ +db_sql_query( + statement, + warehouse_id, + catalog = NULL, + schema = NULL, + parameters = NULL, + row_limit = NULL, + byte_limit = NULL, + disposition = c("INLINE", "EXTERNAL_LINKS"), + format = c("JSON_ARRAY", "ARROW_STREAM", "CSV"), + wait_timeout = "10s", + on_wait_timeout = c("CONTINUE", "CANCEL"), + host = db_host(), + token = db_token(), + perform_request = TRUE +) +} +\arguments{ +\item{statement}{TODO} + +\item{warehouse_id}{TODO} + +\item{catalog}{TODO} + +\item{schema}{TODO} + +\item{parameters}{TODO} + +\item{row_limit}{TODO} + +\item{byte_limit}{TODO} + +\item{disposition}{TODO} + +\item{format}{TODO} + +\item{wait_timeout}{TODO} + +\item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} + +\item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} +} +\description{ +Execute SQL Query +} +\details{ +TODO +} +\seealso{ +Other SQL Execution APIs: +\code{\link{db_sql_cancel}()}, +\code{\link{db_sql_result}()}, +\code{\link{db_sql_status}()} +} +\concept{SQL Execution APIs} diff --git a/man/db_sql_result.Rd b/man/db_sql_result.Rd new file mode 100644 index 0000000..5c85afc --- /dev/null +++ b/man/db_sql_result.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-query-execution.R +\name{db_sql_result} +\alias{db_sql_result} +\title{Get SQL Query Results} +\usage{ +db_sql_result( + statement_id, + chunk_index, + host = db_host(), + token = db_token(), + perform_request = TRUE +) +} +\arguments{ +\item{statement_id}{TODO} + +\item{chunk_index}{TODO} + +\item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} + +\item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} +} +\description{ +Get SQL Query Results +} +\details{ +TODO +} +\seealso{ +Other SQL Execution APIs: +\code{\link{db_sql_cancel}()}, +\code{\link{db_sql_query}()}, +\code{\link{db_sql_status}()} +} +\concept{SQL Execution APIs} diff --git a/man/db_sql_status.Rd b/man/db_sql_status.Rd new file mode 100644 index 0000000..e8af7b4 --- /dev/null +++ b/man/db_sql_status.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-query-execution.R +\name{db_sql_status} +\alias{db_sql_status} +\title{Get SQL Query Status} +\usage{ +db_sql_status( + statement_id, + host = db_host(), + token = db_token(), + perform_request = TRUE +) +} +\arguments{ +\item{statement_id}{TODO} + +\item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} + +\item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} +} +\description{ +Get SQL Query Status +} +\details{ +TODO +} +\seealso{ +Other SQL Execution APIs: +\code{\link{db_sql_cancel}()}, +\code{\link{db_sql_query}()}, +\code{\link{db_sql_result}()} +} +\concept{SQL Execution APIs} From 38428a01545a0a93d2b0b53a6e15db4c33cab6b5 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Fri, 3 Nov 2023 17:55:35 +1100 Subject: [PATCH 02/10] remove presigned url --- R/sql-exec-client.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/sql-exec-client.R b/R/sql-exec-client.R index 589abb6..c29a162 100644 --- a/R/sql-exec-client.R +++ b/R/sql-exec-client.R @@ -83,6 +83,3 @@ # # chunk_links[[1]] # # # # conn <- DBI::dbConnect(duckdb::duckdb()) -# # dbGetQuery(conn, "select * from read_csv_auto('https://e2-demo-field-eng-dbfs.s3.us-west-2.amazonaws.com/oregon-prod/1444828305810485.jobs/sql/extended/results_2023-11-03T13%3A09%3A55Z_9b14c6b9-8be2-4bce-b220-3a0b3d118a4b?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEPT%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDnVzLXdlc3QtMi1maXBzIkYwRAIgZSyHdUi0o5vhf7kenVUOLWSXjfBeMiyjnxRtDrCiyKECIBN6fuDvbsS8V3wSm6gH8%2BlziYCznpZM5RehkZcq5%2FfkKtwDCC0QABoMNDE0MzUxNzY3ODI2IgxYNkjDOj7c5KykvlYquQNlnebmntDBqyZSfzzkhp5HZKnxkKcYuip15LE5x93iyEmGXSRlXv5DHTBlbNk1biNuHTZRDWtXsC6w%2FAZqrq%2BGXKlegxoZIELaAbEX2p94bHcC%2FUFa8o%2B9jBTZh7KeXvCZOUR8b8M9%2F8%2FdJ1ZmRMIyV7q%2Fa7D39bfYz4FLrzFQ8xs5Q%2B%2FmLvTaYT%2BL%2FxFdjtlPjuxfVM3CwkwvAvAb%2BwkGKMRLmwcq%2Bh8bDNMC7eKi9r0CAzr3x4Va6%2B5ibPuNjTqqDPONCLnH2QcVZS1oah6U3qQN6hbkvhsZRroKEJc0232HDz0qPdWZS%2B9G4Lew%2BY8tQ04MbeOkx6buQVnYi1UomEAk%2BJpPEeGGyrmcr6jQ8a36wlyhcqbZWl0zZA8INhHe2rAJQjDxDBL3IhqvTIHnlWZvAV0SzpEzD8l4Z%2FnBy2lwwXCH3MnVDqgpYNHUSOTAwx%2BqkgGZvaoWLKj4GoWPcoeQ1O%2FWqNwLSD%2FsBqwrQdzyfnpGRm81oJ6mBiRQr%2FdsXqbqAa44V9iGz4JhKicKjKC%2BVFcRyKhz%2BEo7UpVbu3TjqROiMOP6sFAgUzDQQ9IqJhZQXOGZhDMw4qOOqgY65AFaQw6xv4n44og2XQUWpj33wK0mjIT4HNnyZJmwdwG%2FGJNHr37Cn48b7%2FCVEw10OXLdJpQ8OfSeZ4ihybA9UsBZ%2BP%2BDqANQp6DjUSQ7ttUxkhoMTYYeJZT9mhgYlCn7590rFMd3BgXJimhYMvPRigmbPKKncX3Brvl4OhH2%2BKXMRKQJLoMEZNMADQXQBTtbzLZAbHX2Yrm2qby1SU9Ez6i8Mi5i95MuioL0pHWnk0xLbGDEykkXI0eVYnxoeaZatDuLUg5%2FdaESa2TZqfMqxWYtsETLCQll6wZuS%2BdEY1qRRu5s%2BJY%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20231102T121250Z&X-Amz-SignedHeaders=host&X-Amz-Expires=899&X-Amz-Credential=ASIAWA6KKHEJAE4HS7TR%2F20231102%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Signature=ed812a12073aa6b187ab4162b0be10047a54c7a5c23596950a92e9e46c6db845')") -# # -# From a9b265496d67c25aebce860570e39c9442ed5c6d Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Sat, 4 Nov 2023 01:51:28 +1100 Subject: [PATCH 03/10] removing client code --- R/sql-connector.R | 0 R/sql-exec-client.R | 85 --------------------------------------------- 2 files changed, 85 deletions(-) create mode 100644 R/sql-connector.R delete mode 100644 R/sql-exec-client.R diff --git a/R/sql-connector.R b/R/sql-connector.R new file mode 100644 index 0000000..e69de29 diff --git a/R/sql-exec-client.R b/R/sql-exec-client.R deleted file mode 100644 index c29a162..0000000 --- a/R/sql-exec-client.R +++ /dev/null @@ -1,85 +0,0 @@ -# library(brickster) -# library(arrow) -# library(furrr) -# library(progressr) -# -# handlers("cli") -# plan(sequential) -# -# # Notes: -# # Wait 0 seconds -# # format default to arrow with external links -# # poll every 5 seconds -# # -# # Steps: -# # 1. Send Query -# # 2. If result, parse -# # 3. Otherwise poll for result -# # 4. Distribute fetching into arrow format -# -# -# query <- db_sql_query( -# statement = "select * from zacdav.default.big_example LIMIT 1000000", -# warehouse_id = "18243426e662e2ad", -# format = "ARROW_STREAM", -# disposition = "EXTERNAL_LINKS", -# wait_timeout = "10s", -# on_wait_timeout = "CONTINUE" -# ) -# -# # poll till "SUCCEEDED" -# while (query$status$state %in% c("PENDING", "RUNNING")) { -# Sys.sleep(2) -# message("pending") -# query <- db_sql_status(statement_id = query$statement_id) -# } -# -# # check if results were truncated and warn -# if (query$manifest$truncated) { -# warning("Results are truncated...") -# } -# -# chunks <- seq.int(from = 0, length.out = query$manifest$total_chunk_count) -# -# chunks_to_tables <- function(statement_id, chunks, links_only = FALSE) { -# p <- progressr::progressor(along = chunks) -# purrr::map(chunks, function(x) { -# chunk <- db_sql_result(query$statement_id, chunk = x) -# table <- chunk$external_links[[1]]$external_link -# print(table) -# if (!links_only) { -# table <- arrow::read_ipc_stream(file = table, as_data_frame = FALSE) -# } -# p(message = paste0("fetching chunk: ", x)) -# table -# }) -# } -# -# with_progress(chunk_tables <- chunks_to_tables(query$statement_id, chunks)) -# -# -# -# arrow::read_ipc_stream( -# file = db_sql_result(query$statement_id, chunk = 1)$external_links[[1]]$external_link, -# as_data_frame = FALSE -# ) -# -# -# result <- do.call(arrow::concat_tables, chunk_tables) -# result -# -# -# arrow::as_record_batch(chunk_tables[[1]]) -# class((chunk_tables[[1]])) -# -# #### -# # # only works for CSV but saves needing to go multi-core route -# # -# # with_progress(chunk_links <- chunks_to_tables(query$statement_id, chunks, links_only = TRUE)) -# # -# # library(duckdb) -# # library(DBI) -# # -# # chunk_links[[1]] -# # -# # conn <- DBI::dbConnect(duckdb::duckdb()) From 7566b7f05c6d932883b174242d6000def5337d50 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Sun, 5 Nov 2023 21:41:54 +1100 Subject: [PATCH 04/10] changes: - sql execution functions renamed - updated DESCRIPTION (version, libs) - README updated - Request helpers improved - SQL connector added via reticulate - onLoad behaviour added --- DESCRIPTION | 3 +- NAMESPACE | 11 +- R/package-auth.R | 3 +- R/request-helpers.R | 15 +- R/sql-connector.R | 184 ++++++++++++++++++ R/sql-query-execution.R | 9 +- R/zzz.R | 11 ++ README.md | 1 + _pkgdown.yml | 2 + man/db_sql_client.Rd | 50 +++++ ...db_sql_cancel.Rd => db_sql_exec_cancel.Rd} | 12 +- man/{db_sql_query.Rd => db_sql_exec_query.Rd} | 12 +- ...db_sql_result.Rd => db_sql_exec_result.Rd} | 12 +- ...db_sql_status.Rd => db_sql_exec_status.Rd} | 12 +- man/install_db_sql_connector.Rd | 24 +++ man/py_db_sql_connector.Rd | 19 ++ vignettes/databricks-sql-connector.Rmd | 30 +++ 17 files changed, 373 insertions(+), 37 deletions(-) create mode 100644 man/db_sql_client.Rd rename man/{db_sql_cancel.Rd => db_sql_exec_cancel.Rd} (76%) rename man/{db_sql_query.Rd => db_sql_exec_query.Rd} (85%) rename man/{db_sql_result.Rd => db_sql_exec_result.Rd} (77%) rename man/{db_sql_status.Rd => db_sql_exec_status.Rd} (76%) create mode 100644 man/install_db_sql_connector.Rd create mode 100644 man/py_db_sql_connector.Rd create mode 100644 vignettes/databricks-sql-connector.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index d2521e2..d9d906c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: brickster Title: R interface to Databricks REST 2.0 APIs -Version: 0.2.0 +Version: 0.2.1 Authors@R: c( person(given = "Zac", @@ -31,6 +31,7 @@ Imports: purrr, rlang Suggests: + arrow, testthat (>= 3.0.0), DT, htmltools, diff --git a/NAMESPACE b/NAMESPACE index b4885e4..3206de0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,12 +92,13 @@ export(db_secrets_scope_acl_put) export(db_secrets_scope_create) export(db_secrets_scope_delete) export(db_secrets_scope_list_all) -export(db_sql_cancel) +export(db_sql_client) +export(db_sql_exec_cancel) +export(db_sql_exec_query) +export(db_sql_exec_result) +export(db_sql_exec_status) export(db_sql_global_warehouse_get) -export(db_sql_query) export(db_sql_query_history) -export(db_sql_result) -export(db_sql_status) export(db_sql_warehouse_create) export(db_sql_warehouse_delete) export(db_sql_warehouse_edit) @@ -126,6 +127,7 @@ export(get_and_start_cluster) export(get_and_start_warehouse) export(git_source) export(init_script_info) +export(install_db_sql_connector) export(is.access_control_req_group) export(is.access_control_req_user) export(is.access_control_request) @@ -172,6 +174,7 @@ export(new_cluster) export(notebook_task) export(open_workspace) export(pipeline_task) +export(py_db_sql_connector) export(python_wheel_task) export(remove_lib_path) export(s3_storage_info) diff --git a/R/package-auth.R b/R/package-auth.R index 1c079f4..cec513f 100644 --- a/R/package-auth.R +++ b/R/package-auth.R @@ -39,9 +39,10 @@ db_host <- function(id = NULL, prefix = NULL, profile = getOption("db_profile", if (is.null(id) && is.null(prefix)) { host <- read_env_var(key = "host", profile = profile) + host <- httr2::url_parse(host)$hostname } else { # otherwise construct host string - host <- paste0("https://", prefix, id, ".cloud.databricks.com") + host <- paste0(prefix, id, ".cloud.databricks.com") } host diff --git a/R/request-helpers.R b/R/request-helpers.R index 8343857..f15029e 100644 --- a/R/request-helpers.R +++ b/R/request-helpers.R @@ -17,10 +17,19 @@ #' @importFrom magrittr `%>%` db_request <- function(endpoint, method, version = NULL, body = NULL, host, token, ...) { - req <- httr2::request(base_url = paste0(host, "api", "/", version, "/")) %>% + url <- list( + scheme = "https", + hostname = host, + path = paste0("/api/", version) + ) + + url <- httr2::url_build(url) + user_agent_str <- paste0("brickster/", packageVersion("brickster")) + + req <- httr2::request(base_url = url) %>% httr2::req_auth_bearer_token(token) %>% - httr2::req_headers("User-Agent" = "brickster/1.0") %>% - httr2::req_user_agent(string = "brickster/1.0") %>% + httr2::req_headers("User-Agent" = user_agent_str) %>% + httr2::req_user_agent(string = user_agent_str) %>% httr2::req_url_path_append(endpoint) %>% httr2::req_method(method) %>% httr2::req_retry(max_tries = 3, backoff = ~ 2) diff --git a/R/sql-connector.R b/R/sql-connector.R index e69de29..e9b3e44 100644 --- a/R/sql-connector.R +++ b/R/sql-connector.R @@ -0,0 +1,184 @@ +#' Install Databricks SQL Connector (Python) +#' +#' @param envname TODO +#' @param method TODO +#' @param ... TODO +#' +#' @details TODO +#' +#' @export +#' +#' @examples +#' TODO +install_db_sql_connector <- function(envname = "r-brickster", method = "auto", ...) { + reticulate::py_install( + "databricks-sql-connector", + envname = envname, + method = method, + ... + ) +} + +#' Create Databricks SQL Connector Client +#' +#' @details TODO +#' +#' @param id TODO +#' @param catalog TODO +#' @param schema TODO +#' @param compute_type TODO +#' @param use_cloud_fetch TODO +#' @param session_configuration TODO +#' @param host TODO +#' @param token TODO +#' @param workspace_id TODO +#' +#' @return TODO +#' @export +#' +#' @examples TODO +db_sql_client <- function(id, + catalog = NULL, schema = NULL, + compute_type = c("warehouse", "cluster"), + use_cloud_fetch = FALSE, + session_configuration = list(), + host = db_host(), token = db_token(), + workspace_id = db_wsid(), + ...) { + + compute_type <- match.arg(compute_type) + http_path <- generate_http_path( + id = id, + is_warehouse = compute_type == "warehouse", + workspace_id = workspace_id + ) + + DatabricksSqlClient$new( + host = host, + token = token, + http_path = http_path, + catalog = catalog, + schema = schema, + use_cloud_fetch = use_cloud_fetch, + session_configuration = session_configuration, + ... + ) + +} + +DatabricksSqlClient <- R6::R6Class( + classname = "db_sql_client", + public = list( + + initialize = function(host, token, http_path, + catalog, schema, + use_cloud_fetch, session_configuration, + ...) { + + private$connection <- py_db_sql_connector$connect( + server_hostname = host, + access_token = token, + http_path = http_path, + use_cloud_fetch = use_cloud_fetch, + session_configuration = session_configuration, + ... + ) + }, + + use_cloud_fetch = function() { + private$connection$use_cloud_fetch + }, + + set_cloud_fetch = function(enabled = TRUE) { + private$connection$use_cloud_fetch <- enabled + }, + + columns = function(catalog_name = NULL, schema_name = NULL, + table_name = NULL, column_name = NULL, + as_tibble = TRUE) { + cursor <- private$connection$cursor() + on.exit(cursor$close()) + cursor$columns( + catalog_name = catalog_name, + schema_name = schema_name, + table_name = table_name, + column_name = column_name + ) + handle_results(cursor$fetchall_arrow(), as_tibble) + }, + + catalogs = function(as_tibble = TRUE) { + cursor <- private$connection$cursor() + on.exit(cursor$close()) + cursor$catalogs() + handle_results(cursor$fetchall_arrow(), as_tibble) + }, + + schemas = function(catalog_name = NULL, schema_name = NULL, + as_tibble = TRUE) { + cursor <- private$connection$cursor() + on.exit(cursor$close()) + cursor$schemas( + catalog_name = catalog_name, + schema_name = schema_name + ) + handle_results(cursor$fetchall_arrow(), as_tibble) + }, + + tables = function(catalog_name = NULL, schema_name = NULL, + table_name = NULL, table_types = NULL, + as_tibble = TRUE) { + cursor <- private$connection$cursor() + on.exit(cursor$close()) + cursor$tables( + catalog_name = catalog_name, + schema_name = schema_name, + table_name = table_name, + table_types = table_types + ) + handle_results(cursor$fetchall_arrow(), as_tibble) + }, + + execute = function(operation, parameters = NULL, as_tibble = TRUE) { + cursor <- private$connection$cursor() + on.exit(cursor$close()) + cursor$execute( + operation = operation, + parameters = parameters + ) + handle_results(cursor$fetchall_arrow(), as_tibble) + }, + + execute_many = function(operation, seq_of_parameters = NULL, + as_tibble = TRUE) { + cursor <- private$connection$cursor() + on.exit(cursor$close()) + cursor$executemany( + operation = operation, + seq_of_parameters = seq_of_parameters + ) + handle_results(cursor$fetchall_arrow(), as_tibble) + } + + ), + private = list( + connection = NULL + ) +) + +generate_http_path <- function(id, is_warehouse = TRUE, + workspace_id = db_wsid()) { + if (is_warehouse) { + paste0("/sql/1.0/warehouses/", id) + } else { + paste0("/sql/protocolv1/o/", workspace_id, "/", id) + } +} + +handle_results <- function(x, as_tibble) { + if (as_tibble) { + x <- dplyr::collect(x) + } + x +} + diff --git a/R/sql-query-execution.R b/R/sql-query-execution.R index 311d5eb..0a61581 100644 --- a/R/sql-query-execution.R +++ b/R/sql-query-execution.R @@ -1,6 +1,7 @@ # https://docs.databricks.com/api/workspace/statementexecution # https://docs.databricks.com/en/sql/admin/sql-execution-tutorial.html#language-curl +# "https://", #' Execute SQL Query #' @@ -21,7 +22,7 @@ #' @family SQL Execution APIs #' #' @export -db_sql_query <- function(statement, warehouse_id, +db_sql_exec_query <- function(statement, warehouse_id, catalog = NULL, schema = NULL, parameters = NULL, row_limit = NULL, byte_limit = NULL, disposition = c("INLINE", "EXTERNAL_LINKS"), @@ -76,7 +77,7 @@ db_sql_query <- function(statement, warehouse_id, #' @family SQL Execution APIs #' #' @export -db_sql_cancel <- function(statement_id, +db_sql_exec_cancel <- function(statement_id, host = db_host(), token = db_token(), perform_request = TRUE) { @@ -107,7 +108,7 @@ db_sql_cancel <- function(statement_id, #' @family SQL Execution APIs #' #' @export -db_sql_status <- function(statement_id, +db_sql_exec_status <- function(statement_id, host = db_host(), token = db_token(), perform_request = TRUE) { @@ -139,7 +140,7 @@ db_sql_status <- function(statement_id, #' @family SQL Execution APIs #' #' @export -db_sql_result <- function(statement_id, chunk_index, +db_sql_exec_result <- function(statement_id, chunk_index, host = db_host(), token = db_token(), perform_request = TRUE) { diff --git a/R/zzz.R b/R/zzz.R index 419fffe..a607a7e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,10 @@ +#' Databricks SQL Connector (Python) +#' +#' @details TODO +#' +#' @export +py_db_sql_connector <- NULL + .onLoad <- function(libname, pkgname) { if (requireNamespace("knitr", quietly = TRUE)) { knitr::knit_engines$set( @@ -13,4 +20,8 @@ db_sh = db_engine_sh ) } + + py_db_sql_connector <<- reticulate::import("databricks.sql", delay_load = TRUE) + reticulate::use_virtualenv("r-brickster", required = FALSE) + } diff --git a/README.md b/README.md index e9657e9..dc97475 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ that provides details on how to connect to a Databricks workspace. | [Endpoints](https://docs.databricks.com/sql/api/sql-endpoints.html) | Yes | 2.0 | | [Query History](https://docs.databricks.com/sql/api/query-history.html) | Yes | 2.0 | | [Jobs](https://docs.databricks.com/dev-tools/api/latest/jobs.html) | Yes | 2.1 | +| [SQL Statement Execution](https://docs.databricks.com/api/workspace/statementexecution) | Yes | 2.0 | | [REST 1.2 Commands](https://docs.databricks.com/dev-tools/api/1.2/index.html) | Partially | 1.2 | | [Unity Catalog](https://api-docs.databricks.com/rest/latest/unity-catalog-api-specification-2-1.html) | Partially | 2.1 | | [Tokens](https://docs.databricks.com/dev-tools/api/latest/tokens.html) | Undecided | 2.0 | diff --git a/_pkgdown.yml b/_pkgdown.yml index 85e2666..67ec657 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -80,6 +80,8 @@ reference: contents: starts_with("db_repo", internal = TRUE) - title: Secrets contents: starts_with("db_secrets", internal = TRUE) +- title: SQL Statement Execution + contents: starts_with("db_sql_exec", internal = TRUE) - title: Warehouses contents: - starts_with("db_sql", internal = TRUE) diff --git a/man/db_sql_client.Rd b/man/db_sql_client.Rd new file mode 100644 index 0000000..a2b5803 --- /dev/null +++ b/man/db_sql_client.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-connector.R +\name{db_sql_client} +\alias{db_sql_client} +\title{Create Databricks SQL Connector Client} +\usage{ +db_sql_client( + id, + catalog = NULL, + schema = NULL, + compute_type = c("warehouse", "cluster"), + use_cloud_fetch = FALSE, + session_configuration = list(), + host = db_host(), + token = db_token(), + workspace_id = db_wsid(), + ... +) +} +\arguments{ +\item{id}{TODO} + +\item{catalog}{TODO} + +\item{schema}{TODO} + +\item{compute_type}{TODO} + +\item{use_cloud_fetch}{TODO} + +\item{session_configuration}{TODO} + +\item{host}{TODO} + +\item{token}{TODO} + +\item{workspace_id}{TODO} +} +\value{ +TODO +} +\description{ +Create Databricks SQL Connector Client +} +\details{ +TODO +} +\examples{ +TODO +} diff --git a/man/db_sql_cancel.Rd b/man/db_sql_exec_cancel.Rd similarity index 76% rename from man/db_sql_cancel.Rd rename to man/db_sql_exec_cancel.Rd index 631d72e..c8fbee1 100644 --- a/man/db_sql_cancel.Rd +++ b/man/db_sql_exec_cancel.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/sql-query-execution.R -\name{db_sql_cancel} -\alias{db_sql_cancel} +\name{db_sql_exec_cancel} +\alias{db_sql_exec_cancel} \title{Cancel SQL Query} \usage{ -db_sql_cancel( +db_sql_exec_cancel( statement_id, host = db_host(), token = db_token(), @@ -26,8 +26,8 @@ TODO } \seealso{ Other SQL Execution APIs: -\code{\link{db_sql_query}()}, -\code{\link{db_sql_result}()}, -\code{\link{db_sql_status}()} +\code{\link{db_sql_exec_query}()}, +\code{\link{db_sql_exec_result}()}, +\code{\link{db_sql_exec_status}()} } \concept{SQL Execution APIs} diff --git a/man/db_sql_query.Rd b/man/db_sql_exec_query.Rd similarity index 85% rename from man/db_sql_query.Rd rename to man/db_sql_exec_query.Rd index 65dc7c6..4eeb2cc 100644 --- a/man/db_sql_query.Rd +++ b/man/db_sql_exec_query.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/sql-query-execution.R -\name{db_sql_query} -\alias{db_sql_query} +\name{db_sql_exec_query} +\alias{db_sql_exec_query} \title{Execute SQL Query} \usage{ -db_sql_query( +db_sql_exec_query( statement, warehouse_id, catalog = NULL, @@ -54,8 +54,8 @@ TODO } \seealso{ Other SQL Execution APIs: -\code{\link{db_sql_cancel}()}, -\code{\link{db_sql_result}()}, -\code{\link{db_sql_status}()} +\code{\link{db_sql_exec_cancel}()}, +\code{\link{db_sql_exec_result}()}, +\code{\link{db_sql_exec_status}()} } \concept{SQL Execution APIs} diff --git a/man/db_sql_result.Rd b/man/db_sql_exec_result.Rd similarity index 77% rename from man/db_sql_result.Rd rename to man/db_sql_exec_result.Rd index 5c85afc..933b939 100644 --- a/man/db_sql_result.Rd +++ b/man/db_sql_exec_result.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/sql-query-execution.R -\name{db_sql_result} -\alias{db_sql_result} +\name{db_sql_exec_result} +\alias{db_sql_exec_result} \title{Get SQL Query Results} \usage{ -db_sql_result( +db_sql_exec_result( statement_id, chunk_index, host = db_host(), @@ -29,8 +29,8 @@ TODO } \seealso{ Other SQL Execution APIs: -\code{\link{db_sql_cancel}()}, -\code{\link{db_sql_query}()}, -\code{\link{db_sql_status}()} +\code{\link{db_sql_exec_cancel}()}, +\code{\link{db_sql_exec_query}()}, +\code{\link{db_sql_exec_status}()} } \concept{SQL Execution APIs} diff --git a/man/db_sql_status.Rd b/man/db_sql_exec_status.Rd similarity index 76% rename from man/db_sql_status.Rd rename to man/db_sql_exec_status.Rd index e8af7b4..03c6ed7 100644 --- a/man/db_sql_status.Rd +++ b/man/db_sql_exec_status.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/sql-query-execution.R -\name{db_sql_status} -\alias{db_sql_status} +\name{db_sql_exec_status} +\alias{db_sql_exec_status} \title{Get SQL Query Status} \usage{ -db_sql_status( +db_sql_exec_status( statement_id, host = db_host(), token = db_token(), @@ -26,8 +26,8 @@ TODO } \seealso{ Other SQL Execution APIs: -\code{\link{db_sql_cancel}()}, -\code{\link{db_sql_query}()}, -\code{\link{db_sql_result}()} +\code{\link{db_sql_exec_cancel}()}, +\code{\link{db_sql_exec_query}()}, +\code{\link{db_sql_exec_result}()} } \concept{SQL Execution APIs} diff --git a/man/install_db_sql_connector.Rd b/man/install_db_sql_connector.Rd new file mode 100644 index 0000000..f627450 --- /dev/null +++ b/man/install_db_sql_connector.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-connector.R +\name{install_db_sql_connector} +\alias{install_db_sql_connector} +\title{Install Databricks SQL Connector (Python)} +\usage{ +install_db_sql_connector(envname = "r-brickster", method = "auto", ...) +} +\arguments{ +\item{envname}{TODO} + +\item{method}{TODO} + +\item{...}{TODO} +} +\description{ +Install Databricks SQL Connector (Python) +} +\details{ +TODO +} +\examples{ +TODO +} diff --git a/man/py_db_sql_connector.Rd b/man/py_db_sql_connector.Rd new file mode 100644 index 0000000..9120d89 --- /dev/null +++ b/man/py_db_sql_connector.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.R +\docType{data} +\name{py_db_sql_connector} +\alias{py_db_sql_connector} +\title{Databricks SQL Connector (Python)} +\format{ +An object of class \code{python.builtin.module} (inherits from \code{python.builtin.object}) of length 0. +} +\usage{ +py_db_sql_connector +} +\description{ +Databricks SQL Connector (Python) +} +\details{ +TODO +} +\keyword{datasets} diff --git a/vignettes/databricks-sql-connector.Rmd b/vignettes/databricks-sql-connector.Rmd new file mode 100644 index 0000000..a9568e8 --- /dev/null +++ b/vignettes/databricks-sql-connector.Rmd @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 9320776e8d622078fd7a0f462b98fc3c69f1eb67 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Mon, 6 Nov 2023 09:52:21 +1100 Subject: [PATCH 05/10] Updating DESCRIPTION --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d9d906c..c890382 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: brickster -Title: R interface to Databricks REST 2.0 APIs +Title: R interface to Databricks REST APIs Version: 0.2.1 Authors@R: c( @@ -18,6 +18,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Imports: + arrow, base64enc, cli, curl, @@ -29,11 +30,10 @@ Imports: jsonlite, magrittr, purrr, + reticulate, rlang Suggests: - arrow, testthat (>= 3.0.0), - DT, htmltools, knitr, magick, From 1755d5e84ce62eeae35ade6b37579b086c4c69c8 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Mon, 6 Nov 2023 11:53:52 +1100 Subject: [PATCH 06/10] Changes: - improved detection of env on Databricks for sql connector - Arrow imports --- NAMESPACE | 2 ++ R/databricks-helpers.R | 18 ++++++++++++++++++ R/sql-connector.R | 4 +++- R/zzz.R | 4 +++- man/detect_brickster_venv.Rd | 15 +++++++++++++++ man/install_db_sql_connector.Rd | 6 +++++- 6 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 R/databricks-helpers.R create mode 100644 man/detect_brickster_venv.Rd diff --git a/NAMESPACE b/NAMESPACE index 3206de0..b4b3860 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,7 @@ export(db_workspace_list) export(db_workspace_mkdirs) export(db_wsid) export(dbfs_storage_info) +export(detect_brickster_venv) export(docker_image) export(email_notifications) export(file_storage_info) @@ -184,6 +185,7 @@ export(spark_jar_task) export(spark_python_task) export(spark_submit_task) export(wait_for_lib_installs) +import(arrow) import(cli) import(httr2) importFrom(glue,glue) diff --git a/R/databricks-helpers.R b/R/databricks-helpers.R new file mode 100644 index 0000000..1dc7031 --- /dev/null +++ b/R/databricks-helpers.R @@ -0,0 +1,18 @@ +on_databricks <- function() { + dbr <- Sys.getenv("DATABRICKS_RUNTIME_VERSION") + dbr != "" +} + +#' Detect brickster virtualenv +#' +#' @details Returns `NULL` when running within Databricks, +#' otherwise "r-brickster" +#' +#' @export +detect_brickster_venv <- function() { + if (on_databricks()) { + NULL + } else { + "r-brickster" + } +} diff --git a/R/sql-connector.R b/R/sql-connector.R index e9b3e44..9d6e5bf 100644 --- a/R/sql-connector.R +++ b/R/sql-connector.R @@ -10,7 +10,8 @@ #' #' @examples #' TODO -install_db_sql_connector <- function(envname = "r-brickster", method = "auto", ...) { +install_db_sql_connector <- function(envname = detect_brickster_venv(), + method = "auto", ...) { reticulate::py_install( "databricks-sql-connector", envname = envname, @@ -34,6 +35,7 @@ install_db_sql_connector <- function(envname = "r-brickster", method = "auto", . #' @param workspace_id TODO #' #' @return TODO +#' @import arrow #' @export #' #' @examples TODO diff --git a/R/zzz.R b/R/zzz.R index a607a7e..15d80a2 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -22,6 +22,8 @@ py_db_sql_connector <- NULL } py_db_sql_connector <<- reticulate::import("databricks.sql", delay_load = TRUE) - reticulate::use_virtualenv("r-brickster", required = FALSE) + + venv <- detect_brickster_venv() + reticulate::use_virtualenv(virtualenv = venv, required = FALSE) } diff --git a/man/detect_brickster_venv.Rd b/man/detect_brickster_venv.Rd new file mode 100644 index 0000000..adf9294 --- /dev/null +++ b/man/detect_brickster_venv.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/databricks-helpers.R +\name{detect_brickster_venv} +\alias{detect_brickster_venv} +\title{Detect brickster virtualenv} +\usage{ +detect_brickster_venv() +} +\description{ +Detect brickster virtualenv +} +\details{ +Returns \code{NULL} when running within Databricks, +otherwise "r-brickster" +} diff --git a/man/install_db_sql_connector.Rd b/man/install_db_sql_connector.Rd index f627450..6a5c593 100644 --- a/man/install_db_sql_connector.Rd +++ b/man/install_db_sql_connector.Rd @@ -4,7 +4,11 @@ \alias{install_db_sql_connector} \title{Install Databricks SQL Connector (Python)} \usage{ -install_db_sql_connector(envname = "r-brickster", method = "auto", ...) +install_db_sql_connector( + envname = detect_brickster_venv(), + method = "auto", + ... +) } \arguments{ \item{envname}{TODO} From 5c434421900217bf106f3a93fb1a3a919ce97f29 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Mon, 6 Nov 2023 13:05:47 +1100 Subject: [PATCH 07/10] Trying to improve db_host() to work when https:// is omitted. --- R/package-auth.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/package-auth.R b/R/package-auth.R index cec513f..315edd6 100644 --- a/R/package-auth.R +++ b/R/package-auth.R @@ -39,6 +39,9 @@ db_host <- function(id = NULL, prefix = NULL, profile = getOption("db_profile", if (is.null(id) && is.null(prefix)) { host <- read_env_var(key = "host", profile = profile) + if (!grepl("^https://", host)) { + host <- paste0("https://", host) + } host <- httr2::url_parse(host)$hostname } else { # otherwise construct host string From 1591454128b3019eb1319841bbbace2ec86d9429 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Mon, 6 Nov 2023 17:19:41 +1100 Subject: [PATCH 08/10] Changes: - fix DESCRIPTION - add examples - update README - rename `detect_brickster_venv` to be `determine...` - Update auth + add tests --- DESCRIPTION | 11 +++--- NAMESPACE | 4 +- R/databricks-helpers.R | 6 +-- R/package-auth.R | 9 ++++- R/request-helpers.R | 3 +- R/sql-connector.R | 33 ++++++++--------- R/sql-query-execution.R | 2 - R/zzz.R | 11 +++++- README.md | 25 ++++++------- man/db_sql_client.Rd | 4 +- man/detect_brickster_venv.Rd | 15 -------- man/determine_brickster_venv.Rd | 15 ++++++++ man/install_db_sql_connector.Rd | 23 +++++++++--- man/py_db_sql_connector.Rd | 9 ++++- tests/testthat/test-auth.R | 47 +++++++++++++++++++----- tests/testthat/test-databricks-helpers.R | 13 +++++++ tests/testthat/test-repos.R | 28 -------------- vignettes/databricks-sql-connector.Rmd | 5 +-- 18 files changed, 152 insertions(+), 111 deletions(-) delete mode 100644 man/detect_brickster_venv.Rd create mode 100644 man/determine_brickster_venv.Rd create mode 100644 tests/testthat/test-databricks-helpers.R delete mode 100644 tests/testthat/test-repos.R diff --git a/DESCRIPTION b/DESCRIPTION index c890382..62626a3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: brickster -Title: R interface to Databricks REST APIs +Title: Databricks Utilities in R Version: 0.2.1 Authors@R: c( @@ -13,7 +13,7 @@ Authors@R: email = "rafi.kurlansik@databricks.com"), person("Databricks", role = c("cph", "fnd")) ) -Description: Toolkit to work with Databricks from R +Description: Toolkit to work with Databricks from R. License: MIT + file LICENSE Encoding: UTF-8 LazyData: true @@ -31,15 +31,16 @@ Imports: magrittr, purrr, reticulate, - rlang + R6 (>= 2.4.0), + rlang, + utils Suggests: testthat (>= 3.0.0), htmltools, knitr, magick, rmarkdown, - rstudioapi, - rvest + rstudioapi Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index b4b3860..6ca53fe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,7 +92,6 @@ export(db_secrets_scope_acl_put) export(db_secrets_scope_create) export(db_secrets_scope_delete) export(db_secrets_scope_list_all) -export(db_sql_client) export(db_sql_exec_cancel) export(db_sql_exec_query) export(db_sql_exec_result) @@ -119,7 +118,7 @@ export(db_workspace_list) export(db_workspace_mkdirs) export(db_wsid) export(dbfs_storage_info) -export(detect_brickster_venv) +export(determine_brickster_venv) export(docker_image) export(email_notifications) export(file_storage_info) @@ -193,3 +192,4 @@ importFrom(magrittr,`%>%`) importFrom(rlang,.data) importFrom(stats,setNames) importFrom(utils,object.size) +importFrom(utils,packageVersion) diff --git a/R/databricks-helpers.R b/R/databricks-helpers.R index 1dc7031..c19d13e 100644 --- a/R/databricks-helpers.R +++ b/R/databricks-helpers.R @@ -3,13 +3,13 @@ on_databricks <- function() { dbr != "" } -#' Detect brickster virtualenv +#' Determine brickster virtualenv #' #' @details Returns `NULL` when running within Databricks, -#' otherwise "r-brickster" +#' otherwise `"r-brickster"` #' #' @export -detect_brickster_venv <- function() { +determine_brickster_venv <- function() { if (on_databricks()) { NULL } else { diff --git a/R/package-auth.R b/R/package-auth.R index 315edd6..9b30847 100644 --- a/R/package-auth.R +++ b/R/package-auth.R @@ -39,10 +39,15 @@ db_host <- function(id = NULL, prefix = NULL, profile = getOption("db_profile", if (is.null(id) && is.null(prefix)) { host <- read_env_var(key = "host", profile = profile) - if (!grepl("^https://", host)) { - host <- paste0("https://", host) + parsed_url <- httr2::url_parse(host) + + # inject scheme if not present then re-build with https + if (is.null(parsed_url$schema)) { + parsed_url$scheme <- "https" + host <- httr2::url_build(parsed_url) } host <- httr2::url_parse(host)$hostname + } else { # otherwise construct host string host <- paste0(prefix, id, ".cloud.databricks.com") diff --git a/R/request-helpers.R b/R/request-helpers.R index f15029e..ecb0078 100644 --- a/R/request-helpers.R +++ b/R/request-helpers.R @@ -14,6 +14,7 @@ #' #' @return request #' @import httr2 +#' @importFrom utils packageVersion #' @importFrom magrittr `%>%` db_request <- function(endpoint, method, version = NULL, body = NULL, host, token, ...) { @@ -24,7 +25,7 @@ db_request <- function(endpoint, method, version = NULL, body = NULL, host, toke ) url <- httr2::url_build(url) - user_agent_str <- paste0("brickster/", packageVersion("brickster")) + user_agent_str <- paste0("brickster/", utils::packageVersion("brickster")) req <- httr2::request(base_url = url) %>% httr2::req_auth_bearer_token(token) %>% diff --git a/R/sql-connector.R b/R/sql-connector.R index 9d6e5bf..d9c3982 100644 --- a/R/sql-connector.R +++ b/R/sql-connector.R @@ -1,16 +1,17 @@ #' Install Databricks SQL Connector (Python) #' -#' @param envname TODO -#' @param method TODO -#' @param ... TODO +#' @inheritParams reticulate::py_install +#' @details Installs [`databricks-sql-connector`](https://github.com/databricks/databricks-sql-python). +#' Environemnt is resolved by [determine_brickster_venv()] which defaults to +#' `r-brickster` virtualenv. #' -#' @details TODO +#' When running within Databricks it will use the existing python environment. #' #' @export #' #' @examples -#' TODO -install_db_sql_connector <- function(envname = detect_brickster_venv(), +#' \dontrun{install_db_sql_connector()} +install_db_sql_connector <- function(envname = determine_brickster_venv(), method = "auto", ...) { reticulate::py_install( "databricks-sql-connector", @@ -36,9 +37,11 @@ install_db_sql_connector <- function(envname = detect_brickster_venv(), #' #' @return TODO #' @import arrow -#' @export #' -#' @examples TODO +#' @examples +#' \dontrun{ +#' client <- db_sql_client(id = "", use_cloud_fetch = TRUE) +#' } db_sql_client <- function(id, catalog = NULL, schema = NULL, compute_type = c("warehouse", "cluster"), @@ -87,14 +90,6 @@ DatabricksSqlClient <- R6::R6Class( ) }, - use_cloud_fetch = function() { - private$connection$use_cloud_fetch - }, - - set_cloud_fetch = function(enabled = TRUE) { - private$connection$use_cloud_fetch <- enabled - }, - columns = function(catalog_name = NULL, schema_name = NULL, table_name = NULL, column_name = NULL, as_tibble = TRUE) { @@ -164,7 +159,11 @@ DatabricksSqlClient <- R6::R6Class( ), private = list( - connection = NULL + connection = NULL, + + finalize = function() { + private$connection$close() + } ) ) diff --git a/R/sql-query-execution.R b/R/sql-query-execution.R index 0a61581..3f62322 100644 --- a/R/sql-query-execution.R +++ b/R/sql-query-execution.R @@ -1,8 +1,6 @@ # https://docs.databricks.com/api/workspace/statementexecution # https://docs.databricks.com/en/sql/admin/sql-execution-tutorial.html#language-curl -# "https://", - #' Execute SQL Query #' #' @details TODO diff --git a/R/zzz.R b/R/zzz.R index 15d80a2..40ebab9 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,6 +1,13 @@ #' Databricks SQL Connector (Python) #' -#' @details TODO +#' @description Access the Databricks SQL connector from Python via +#' {reticulate}. +#' +#' @details This requires that the connector has been installed via +#' [install_db_sql_connector()]. +#' +#' For more documentation of the methods, refer to the +#' [python documentation](https://github.com/databricks/databricks-sql-python). #' #' @export py_db_sql_connector <- NULL @@ -23,7 +30,7 @@ py_db_sql_connector <- NULL py_db_sql_connector <<- reticulate::import("databricks.sql", delay_load = TRUE) - venv <- detect_brickster_venv() + venv <- determine_brickster_venv() reticulate::use_virtualenv(virtualenv = venv, required = FALSE) } diff --git a/README.md b/README.md index dc97475..01a3f7e 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -# brickster +# brickster -[![R-CMD-check](https://github.com/zacdav-db/brickster/workflows/R-CMD-check/badge.svg)](https://github.com/zacdav-db/brickster/actions) -[![Codecov test -coverage](https://codecov.io/gh/zacdav-db/brickster/branch/main/graph/badge.svg)](https://app.codecov.io/gh/zacdav-db/brickster?branch=main) + +[![R-CMD-check](https://github.com/zacdav-db/brickster/workflows/R-CMD-check/badge.svg)](https://github.com/zacdav-db/brickster/actions) [![Codecov test coverage](https://codecov.io/gh/zacdav-db/brickster/branch/main/graph/badge.svg)](https://app.codecov.io/gh/zacdav-db/brickster?branch=main) + `{brickster}` aims to reduce friction for R users on Databricks by: @@ -16,7 +16,7 @@ coverage](https://codecov.io/gh/zacdav-db/brickster/branch/main/graph/badge.svg) - Utility functions to streamline workloads -- Shiny widgets for RStudio +- Expose the [`databricks-sql-connector`](https://github.com/databricks/databricks-sql-python) via `{reticulate}` ## Installation @@ -24,13 +24,12 @@ coverage](https://codecov.io/gh/zacdav-db/brickster/branch/main/graph/badge.svg) ## Setup Authentication -Docs website has [an article](https://zacdav-db.github.io/brickster/articles/setup-auth.html) -that provides details on how to connect to a Databricks workspace. +Docs website has [an article](https://zacdav-db.github.io/brickster/articles/setup-auth.html) that provides details on how to connect to a Databricks workspace. ## API Coverage | API | Available | Version | -|----------------------------------------------------------------------------------------------------------------------|-----------|---------| +|----------------------------------------------------|----------|----------| | [DBFS](https://docs.databricks.com/dev-tools/api/latest/dbfs.html) | Yes | 2.0 | | [Secrets](https://docs.databricks.com/dev-tools/api/latest/secrets.html) | Yes | 2.0 | | [Repos](https://docs.databricks.com/dev-tools/api/latest/repos.html) | Yes | 2.0 | @@ -45,12 +44,12 @@ that provides details on how to connect to a Databricks workspace. | [SQL Statement Execution](https://docs.databricks.com/api/workspace/statementexecution) | Yes | 2.0 | | [REST 1.2 Commands](https://docs.databricks.com/dev-tools/api/1.2/index.html) | Partially | 1.2 | | [Unity Catalog](https://api-docs.databricks.com/rest/latest/unity-catalog-api-specification-2-1.html) | Partially | 2.1 | -| [Tokens](https://docs.databricks.com/dev-tools/api/latest/tokens.html) | Undecided | 2.0 | -| [Delta Live Tables](https://docs.databricks.com/data-engineering/delta-live-tables/delta-live-tables-api-guide.html) | Undecided | 2.0 | | mlflow webhooks (Private Preview) | Later | 2.0 | -| [Queries & Dashboards](https://docs.databricks.com/sql/api/queries-dashboards.html) | Undecided | 2.0 | -| [Instance Pools](https://docs.databricks.com/dev-tools/api/latest/instance-pools.html) | Undecided | 2.0 | -| mlflow OSS | Undecided | 2.0 | +| [Tokens](https://docs.databricks.com/dev-tools/api/latest/tokens.html) | Never | 2.0 | +| [Delta Live Tables](https://docs.databricks.com/data-engineering/delta-live-tables/delta-live-tables-api-guide.html) | Never | 2.0 | +| [Queries & Dashboards](https://docs.databricks.com/sql/api/queries-dashboards.html) | Never | 2.0 | +| [Instance Pools](https://docs.databricks.com/dev-tools/api/latest/instance-pools.html) | Never | 2.0 | +| mlflow OSS | Never | 2.0 | | [Cluster Policies](https://docs.databricks.com/dev-tools/api/latest/policies.html) | Never | 2.0 | | [Permissions](https://docs.databricks.com/dev-tools/api/latest/permissions.html) | Never | 2.0 | | [Token Management](https://docs.databricks.com/dev-tools/api/latest/token-management.html) | Never | 2.0 | diff --git a/man/db_sql_client.Rd b/man/db_sql_client.Rd index a2b5803..c9d2a0e 100644 --- a/man/db_sql_client.Rd +++ b/man/db_sql_client.Rd @@ -46,5 +46,7 @@ Create Databricks SQL Connector Client TODO } \examples{ -TODO +\dontrun{ + client <- db_sql_client(id = "", use_cloud_fetch = TRUE) +} } diff --git a/man/detect_brickster_venv.Rd b/man/detect_brickster_venv.Rd deleted file mode 100644 index adf9294..0000000 --- a/man/detect_brickster_venv.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/databricks-helpers.R -\name{detect_brickster_venv} -\alias{detect_brickster_venv} -\title{Detect brickster virtualenv} -\usage{ -detect_brickster_venv() -} -\description{ -Detect brickster virtualenv -} -\details{ -Returns \code{NULL} when running within Databricks, -otherwise "r-brickster" -} diff --git a/man/determine_brickster_venv.Rd b/man/determine_brickster_venv.Rd new file mode 100644 index 0000000..e648452 --- /dev/null +++ b/man/determine_brickster_venv.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/databricks-helpers.R +\name{determine_brickster_venv} +\alias{determine_brickster_venv} +\title{Determine brickster virtualenv} +\usage{ +determine_brickster_venv() +} +\description{ +Determine brickster virtualenv +} +\details{ +Returns \code{NULL} when running within Databricks, +otherwise \code{"r-brickster"} +} diff --git a/man/install_db_sql_connector.Rd b/man/install_db_sql_connector.Rd index 6a5c593..0589e30 100644 --- a/man/install_db_sql_connector.Rd +++ b/man/install_db_sql_connector.Rd @@ -5,24 +5,35 @@ \title{Install Databricks SQL Connector (Python)} \usage{ install_db_sql_connector( - envname = detect_brickster_venv(), + envname = determine_brickster_venv(), method = "auto", ... ) } \arguments{ -\item{envname}{TODO} +\item{envname}{The name, or full path, of the environment in which Python +packages are to be installed. When \code{NULL} (the default), the active +environment as set by the \code{RETICULATE_PYTHON_ENV} variable will be used; +if that is unset, then the \code{r-reticulate} environment will be used.} -\item{method}{TODO} +\item{method}{Installation method. By default, "auto" automatically finds a +method that will work in the local environment. Change the default to force +a specific installation method. Note that the "virtualenv" method is not +available on Windows.} -\item{...}{TODO} +\item{...}{Additional arguments passed to \code{\link[reticulate:conda_install]{conda_install()}} +or \code{\link[reticulate:virtualenv_install]{virtualenv_install()}}.} } \description{ Install Databricks SQL Connector (Python) } \details{ -TODO +Installs \href{https://github.com/databricks/databricks-sql-python}{\code{databricks-sql-connector}}. +Environemnt is resolved by \code{\link[=determine_brickster_venv]{determine_brickster_venv()}} which defaults to +\code{r-brickster} virtualenv. + +When running within Databricks it will use the existing python environment. } \examples{ -TODO +\dontrun{install_db_sql_connector()} } diff --git a/man/py_db_sql_connector.Rd b/man/py_db_sql_connector.Rd index 9120d89..f493e2b 100644 --- a/man/py_db_sql_connector.Rd +++ b/man/py_db_sql_connector.Rd @@ -11,9 +11,14 @@ An object of class \code{python.builtin.module} (inherits from \code{python.buil py_db_sql_connector } \description{ -Databricks SQL Connector (Python) +Access the Databricks SQL connector from Python via +{reticulate}. } \details{ -TODO +This requires that the connector has been installed via +\code{\link[=install_db_sql_connector]{install_db_sql_connector()}}. + +For more documentation of the methods, refer to the +\href{https://github.com/databricks/databricks-sql-python}{python documentation}. } \keyword{datasets} diff --git a/tests/testthat/test-auth.R b/tests/testthat/test-auth.R index 91996c8..50a2d67 100644 --- a/tests/testthat/test-auth.R +++ b/tests/testthat/test-auth.R @@ -24,15 +24,6 @@ test_that("auth functions - baseline behaviour", { Sys.setenv("DATABRICKS_HOST" = "") expect_error(db_host()) - expect_identical( - db_host(id = "mock", prefix = "dev-"), - "https://dev-mock.cloud.databricks.com" - ) - expect_identical( - db_host(id = "mock"), - "https://mock.cloud.databricks.com" - ) - }) test_that("auth functions - switching profile", { @@ -90,3 +81,41 @@ test_that("auth functions - switching profile", { expect_identical(db_wsid(), wsid) }) + +test_that("auth functions - host handling", { + + expect_identical( + db_host(id = "mock", prefix = "dev-"), + "dev-mock.cloud.databricks.com" + ) + + expect_identical( + db_host(id = "mock"), + "mock.cloud.databricks.com" + ) + + expect_identical( + db_host(id = "mock", prefix = "dev-"), + "dev-mock.cloud.databricks.com" + ) + + # input and output pairs to check + hostname_mapping <- list( + "https://mock.cloud.databricks.com" = "mock.cloud.databricks.com", + "https://mock.cloud.databricks.com/" = "mock.cloud.databricks.com", + "http://mock.cloud.databricks.com" = "mock.cloud.databricks.com", + "mock.cloud.databricks.com" = "mock.cloud.databricks.com", + "mock.cloud.databricks.com/" = "mock.cloud.databricks.com", + "mock.cloud.databricks.com//" = "mock.cloud.databricks.com", + "://mock.cloud.databricks.com" = NULL, + "//mock.cloud.databricks.com" = "mock.cloud.databricks.com", + "tps://mock.cloud.databricks.com" = "mock.cloud.databricks.com" + ) + + purrr::iwalk(hostname_mapping, function(output, input) { + Sys.setenv("DATABRICKS_HOST" = input) + expect_no_error(db_host()) + expect_identical(db_host(), output) + }) + +}) diff --git a/tests/testthat/test-databricks-helpers.R b/tests/testthat/test-databricks-helpers.R new file mode 100644 index 0000000..639cafd --- /dev/null +++ b/tests/testthat/test-databricks-helpers.R @@ -0,0 +1,13 @@ +test_that("databricks helpers - runtime detection", { + + Sys.setenv("DATABRICKS_RUNTIME_VERSION" = "") + expect_no_error(on_databricks()) + expect_identical(on_databricks(), FALSE) + expect_identical(determine_brickster_venv(), "r-brickster") + + Sys.setenv("DATABRICKS_RUNTIME_VERSION" = "14.0") + expect_no_error(on_databricks()) + expect_identical(on_databricks(), TRUE) + expect_null(determine_brickster_venv()) + +}) diff --git a/tests/testthat/test-repos.R b/tests/testthat/test-repos.R deleted file mode 100644 index c9ba86c..0000000 --- a/tests/testthat/test-repos.R +++ /dev/null @@ -1,28 +0,0 @@ -# http_mock_status <- function(status) { -# expr <- substitute( -# expression( -# function(x) { -# httr2::response(status_code = status) -# } -# ), list(status = status)) -# eval(expr, envir = NULL) -# } -# -# a <- http_mock_status(200) -# a -# -# httr2::with_mock(my_mock, { -# db_repo_get(repo_id = 1, perform_request = FALSE) %>% -# httr2::req_perform() -# }) -# -# -# test_that("repos api behaviour", { -# -# expect_error() -# expect_s3_class() -# expect_ -# -# -# -# }) diff --git a/vignettes/databricks-sql-connector.Rmd b/vignettes/databricks-sql-connector.Rmd index a9568e8..4b52427 100644 --- a/vignettes/databricks-sql-connector.Rmd +++ b/vignettes/databricks-sql-connector.Rmd @@ -10,9 +10,8 @@ - - - + + From 39f3019cfafb37d293e8068a1e7f86bb8d30eba4 Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Mon, 6 Nov 2023 20:28:05 +1100 Subject: [PATCH 09/10] Lots of docs updates --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/sql-connector.R | 178 ++++++++++- R/sql-query-execution.R | 122 ++++++-- _pkgdown.yml | 5 + man/DatabricksSqlClient.Rd | 415 +++++++++++++++++++++++++ man/db_sql_client.Rd | 38 ++- man/db_sql_exec_cancel.Rd | 10 +- man/db_sql_exec_query.Rd | 77 ++++- man/db_sql_exec_result.Rd | 21 +- man/db_sql_exec_status.Rd | 17 +- man/install_db_sql_connector.Rd | 2 +- vignettes/databricks-sql-connector.Rmd | 29 -- 13 files changed, 825 insertions(+), 92 deletions(-) create mode 100644 man/DatabricksSqlClient.Rd delete mode 100644 vignettes/databricks-sql-connector.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 62626a3..7696821 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,7 @@ Suggests: magick, rmarkdown, rstudioapi -Roxygen: list(markdown = TRUE) +Roxygen: list(markdown = TRUE, r6 = TRUE) RoxygenNote: 7.2.0 VignetteBuilder: knitr URL: https://github.com/zacdav-db/brickster diff --git a/NAMESPACE b/NAMESPACE index 6ca53fe..8181f16 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(DatabricksSqlClient) export(access_control_req_group) export(access_control_req_user) export(access_control_request) diff --git a/R/sql-connector.R b/R/sql-connector.R index d9c3982..51b31b1 100644 --- a/R/sql-connector.R +++ b/R/sql-connector.R @@ -10,7 +10,7 @@ #' @export #' #' @examples -#' \dontrun{install_db_sql_connector()} +#' \donttest{install_db_sql_connector()} install_db_sql_connector <- function(envname = determine_brickster_venv(), method = "auto", ...) { reticulate::py_install( @@ -23,21 +23,31 @@ install_db_sql_connector <- function(envname = determine_brickster_venv(), #' Create Databricks SQL Connector Client #' -#' @details TODO +#' @details Create client using Databricks SQL Connector. #' -#' @param id TODO -#' @param catalog TODO -#' @param schema TODO -#' @param compute_type TODO -#' @param use_cloud_fetch TODO -#' @param session_configuration TODO -#' @param host TODO -#' @param token TODO -#' @param workspace_id TODO +#' @param id String, ID of either the SQL warehouse or all purpose cluster. +#' Important to set `compute_type` to the associated type of `id`. +#' @param compute_type One of `"warehouse"` (default) or `"cluster"`, corresponding to +#' associated compute type of the resource specified in `id`. +#' @param use_cloud_fetch Boolean (default is `FALSE`). `TRUE` to send fetch +#' requests directly to the cloud object store to download chunks of data. +#' False to send fetch requests directly to Databricks. #' -#' @return TODO -#' @import arrow +#' If `use_cloud_fetch` is set to `TRUE` but network access is blocked, then +#' the fetch requests will fail. +#' @param session_configuration A optional named list of Spark session +#' configuration parameters. Setting a configuration is equivalent to using the +#' `SET key=val` SQL command. +#' Run the SQL command `SET -v` to get a full list of available configurations. +#' @param workspace_id String, workspace Id used to build the http path for the +#' connection. This defaults to using [db_wsid()] to get `DATABRICKS_WSID` +#' environment variable. Not required if `compute_type` is `"cluster"`. +#' @param ... passed onto [DatabricksSqlClient()]. +#' @inheritParams db_sql_exec_query +#' @inheritParams auth_params #' +#' @import arrow +#' @returns [DatabricksSqlClient()] #' @examples #' \dontrun{ #' client <- db_sql_client(id = "", use_cloud_fetch = TRUE) @@ -71,10 +81,37 @@ db_sql_client <- function(id, } +#' @title Databricks SQL Connector +#' +#' @description +#' Wraps the [`databricks-sql-connector`](https://github.com/databricks/databricks-sql-python) +#' using [reticulate](https://rstudio.github.io/reticulate/). +#' +#' [API reference on Databricks docs](https://docs.databricks.com/en/dev-tools/python-sql-connector.html#api-reference) +#' @export DatabricksSqlClient <- R6::R6Class( classname = "db_sql_client", public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + #' + #' Note that this object is typically constructed via [db_sql_client()]. + #' + #' @param host (`character(1)`)\cr + #' See [db_sql_client()]. + #' @param token (`character(1)`)\cr + #' See [db_sql_client()]. + #' @param http_path (`character(1)`)\cr + #' See [db_sql_client()]. + #' @param catalog (`character(1)`)\cr + #' See [db_sql_client()]. + #' @param use_cloud_fetch (`logical(1)`)\cr + #' See [db_sql_client()]. + #' @param session_configuration (`list(...)`)\cr + #' See [db_sql_client()]. + #' @param ... Parameters passed to [connection method](https://docs.databricks.com/en/dev-tools/python-sql-connector.html#methods) + #' @return [DatabricksSqlClient]. initialize = function(host, token, http_path, catalog, schema, use_cloud_fetch, session_configuration, @@ -90,6 +127,30 @@ DatabricksSqlClient <- R6::R6Class( ) }, + #' @description + #' Execute a metadata query about the columns. + #' + #' @param catalog_name (`character(1)`)\cr + #' A catalog name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param schema_name (`character(1)`)\cr + #' A schema name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param table_name (`character(1)`)\cr + #' A table name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param column_name (`character(1)`)\cr + #' A column name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param as_tibble (`logical(1)`)\cr + #' If `TRUE` (default) will return [tibble::tibble], otherwise returns + #' [arrow::Table]. + #' @examples + #' \dontrun{ + #' client$columns(catalog_name = "defa%") + #' client$columns(catalog_name = "default", table_name = "gold_%") + #' } + #' @return [tibble::tibble] or [arrow::Table]. columns = function(catalog_name = NULL, schema_name = NULL, table_name = NULL, column_name = NULL, as_tibble = TRUE) { @@ -104,6 +165,17 @@ DatabricksSqlClient <- R6::R6Class( handle_results(cursor$fetchall_arrow(), as_tibble) }, + #' @description + #' Execute a metadata query about the catalogs. + #' + #' @param as_tibble (`logical(1)`)\cr + #' If `TRUE` (default) will return [tibble::tibble], otherwise returns + #' [arrow::Table]. + #' @examples + #' \dontrun{ + #' client$catalogs() + #' } + #' @return [tibble::tibble] or [arrow::Table]. catalogs = function(as_tibble = TRUE) { cursor <- private$connection$cursor() on.exit(cursor$close()) @@ -111,6 +183,23 @@ DatabricksSqlClient <- R6::R6Class( handle_results(cursor$fetchall_arrow(), as_tibble) }, + #' @description + #' Execute a metadata query about the schemas. + #' + #' @param catalog_name (`character(1)`)\cr + #' A catalog name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param schema_name (`character(1)`)\cr + #' A schema name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param as_tibble (`logical(1)`)\cr + #' If `TRUE` (default) will return [tibble::tibble], otherwise returns + #' [arrow::Table]. + #' @examples + #' \dontrun{ + #' client$schemas(catalog_name = "main") + #' } + #' @return [tibble::tibble] or [arrow::Table]. schemas = function(catalog_name = NULL, schema_name = NULL, as_tibble = TRUE) { cursor <- private$connection$cursor() @@ -122,6 +211,24 @@ DatabricksSqlClient <- R6::R6Class( handle_results(cursor$fetchall_arrow(), as_tibble) }, + #' @description + #' Execute a metadata query about tables and views + #' + #' @param catalog_name (`character(1)`)\cr + #' A catalog name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param schema_name (`character(1)`)\cr + #' A schema name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param table_name (`character(1)`)\cr + #' A table name to retrieve information about. + #' The `%` character is interpreted as a wildcard. + #' @param table_types (`character()`)\cr + #' A list of table types to match, for example `"TABLE"` or `"VIEW"`. + #' @param as_tibble (`logical(1)`)\cr + #' If `TRUE` (default) will return [tibble::tibble], otherwise returns + #' [arrow::Table]. + #' @return [tibble::tibble] or [arrow::Table]. tables = function(catalog_name = NULL, schema_name = NULL, table_name = NULL, table_types = NULL, as_tibble = TRUE) { @@ -136,6 +243,26 @@ DatabricksSqlClient <- R6::R6Class( handle_results(cursor$fetchall_arrow(), as_tibble) }, + #' @description + #' Prepares and then runs a database query or command. + #' + #' @param operation (`character(1)`)\cr + #' The query or command to prepare and then run. + #' @param parameters (`list()`)\cr + #' Optional. A sequence of parameters to use with the operation parameter. + #' @param as_tibble (`logical(1)`)\cr + #' If `TRUE` (default) will return [tibble::tibble], otherwise returns + #' [arrow::Table]. + #' @examples + #' \dontrun{ + #' client$execute("select 1") + #' client$execute("select * from x.y.z limit 100") + #' client$execute( + #' operation = "select * from x.y.z where a < %(threshold)s limit 1000", + #' parameters = list(threshold = 100) + #' ) + #'} + #' @return [tibble::tibble] or [arrow::Table]. execute = function(operation, parameters = NULL, as_tibble = TRUE) { cursor <- private$connection$cursor() on.exit(cursor$close()) @@ -146,6 +273,31 @@ DatabricksSqlClient <- R6::R6Class( handle_results(cursor$fetchall_arrow(), as_tibble) }, + #' @description + #' Prepares and then runs a database query or command using all parameter + #' sequences in the seq_of_parameters argument. Only the final result set + #' is retained. + #' + #' @param operation (`character(1)`)\cr + #' The query or command to prepare and then run. + #' @param seq_of_parameters (`list(list())`)\cr + #' A sequence of many sets of parameter values to use with the operation + #' parameter. + #' @param as_tibble (`logical(1)`)\cr + #' If `TRUE` (default) will return [tibble::tibble], otherwise returns + #' [arrow::Table]. + #' @examples + #' \dontrun{ + #' client$execute_many( + #' operation = "select * from x.y.z where a < %(threshold)s limit 1000", + #' seq_of_parameters = list( + #' list(threshold = 100), + #' list(threshold = 200), + #' list(threshold = 300) + #' ) + #' ) + #'} + #' @return [tibble::tibble] or [arrow::Table]. execute_many = function(operation, seq_of_parameters = NULL, as_tibble = TRUE) { cursor <- private$connection$cursor() diff --git a/R/sql-query-execution.R b/R/sql-query-execution.R index 3f62322..d78df79 100644 --- a/R/sql-query-execution.R +++ b/R/sql-query-execution.R @@ -3,19 +3,72 @@ #' Execute SQL Query #' -#' @details TODO -#' -#' @param statement TODO -#' @param warehouse_id TODO -#' @param catalog TODO -#' @param schema TODO -#' @param parameters TODO -#' @param row_limit TODO -#' @param byte_limit TODO -#' @param disposition TODO -#' @param format TODO -#' @param wait_timeout TODO +#' @details Refer to the +#' [web documentation](https://docs.databricks.com/api/workspace/statementexecution/executestatement) +#' for detailed material on interaction of the various parameters and general recommendations +#' +#' @param statement String, the SQL statement to execute. The statement can +#' optionally be parameterized, see `parameters`. +#' @param warehouse_id String, ID of warehouse upon which to execute a statement. +#' @param catalog String, sets default catalog for statement execution, similar +#' to `USE CATALOG` in SQL. +#' @param schema String, sets default schema for statement execution, similar +#' to `USE SCHEMA` in SQL. +#' @param parameters List of Named Lists, parameters to pass into a SQL +#' statement containing parameter markers. +#' +#' A parameter consists of a name, a value, and *optionally* a type. +#' To represent a `NULL` value, the value field may be omitted or set to `NULL` +#' explicitly. +#' +#' See [docs](https://docs.databricks.com/api/workspace/statementexecution/executestatement) +#' for more details. +#' @param row_limit Integer, applies the given row limit to the statement's +#' result set, but unlike the `LIMIT` clause in SQL, it also sets the +#' `truncated` field in the response to indicate whether the result was trimmed +#' due to the limit or not. +#' @param byte_limit Integer, applies the given byte limit to the statement's +#' result size. Byte counts are based on internal data representations and +#' might not match the final size in the requested format. If the result was +#' truncated due to the byte limit, then `truncated` in the response is set to +#' true. When using `EXTERNAL_LINKS` disposition, a default byte_limit of +#' 100 GiB is applied if `byte_limit` is not explicitly set. +#' @param disposition One of `"INLINE"` (default) or `"EXTERNAL_LINKS"`. See +#' [docs](https://docs.databricks.com/api/workspace/statementexecution/executestatement) +#' for details. +#' @param format One of `"JSON_ARRAY"` (default), `"ARROW_STREAM"`, or `"CSV"`. +#' See [docs](https://docs.databricks.com/api/workspace/statementexecution/executestatement) +#' for details. +#' @param wait_timeout String, default is `"10s"`. The time in seconds the call +#' will wait for the statement's result set as `Ns`, where `N` can be set to +#' `0` or to a value between `5` and `50`. +#' When set to `0s`, the statement will execute in asynchronous mode and the +#' call will not wait for the execution to finish. In this case, the call +#' returns directly with `PENDING` state and a statement ID which can be used +#' for polling with [db_sql_exec_status()]. +#' +#' When set between `5` and `50` seconds, the call will behave synchronously up +#' to this timeout and wait for the statement execution to finish. If the +#' execution finishes within this time, the call returns immediately with a +#' manifest and result data (or a `FAILED` state in case of an execution error). +#' +#' If the statement takes longer to execute, `on_wait_timeout` determines what +#' should happen after the timeout is reached. +#' +#' @param on_wait_timeout One of `"CONTINUE"` (default) or `"CANCEL"`. +#' When `wait_timeout` > `0s`, the call will block up to the specified time. +#' If the statement execution doesn't finish within this time, +#' `on_wait_timeout` determines whether the execution should continue or be +#' canceled. +#' +#' When set to `CONTINUE`, the statement execution continues asynchronously and +#' the call returns a statement ID which can be used for polling with +#' [db_sql_exec_status()]. +#' +#' When set to `CANCEL`, the statement execution is canceled and the call +#' returns with a `CANCELED` state. #' @inheritParams auth_params +#' @inheritParams db_sql_warehouse_create #' #' @family SQL Execution APIs #' @@ -67,10 +120,15 @@ db_sql_exec_query <- function(statement, warehouse_id, #' Cancel SQL Query #' -#' @details TODO +#' @details +#' Requests that an executing statement be canceled. Callers must poll for +#' status to see the terminal state. #' -#' @param statement_id TODO +#' [Read more on Databricks API docs](https://docs.databricks.com/api/workspace/statementexecution/cancelexecution) +#' +#' @param statement_id String, query execution `statement_id` #' @inheritParams auth_params +#' @inheritParams db_sql_warehouse_create #' #' @family SQL Execution APIs #' @@ -98,10 +156,22 @@ db_sql_exec_cancel <- function(statement_id, #' Get SQL Query Status #' -#' @details TODO +#' @details +#' This request can be used to poll for the statement's status. +#' When the `status.state` field is `SUCCEEDED` it will also return the result +#' manifest and the first chunk of the result data. +#' +#' When the statement is in the terminal states `CANCELED`, `CLOSED` or +#' `FAILED`, it returns HTTP `200` with the state set. +#' +#' After at least 12 hours in terminal state, the statement is removed from the +#' warehouse and further calls will receive an HTTP `404` response. +#' +#' [Read more on Databricks API docs](https://docs.databricks.com/api/workspace/statementexecution/getstatement) #' -#' @param statement_id TODO #' @inheritParams auth_params +#' @inheritParams db_sql_exec_cancel +#' @inheritParams db_sql_warehouse_create #' #' @family SQL Execution APIs #' @@ -129,11 +199,25 @@ db_sql_exec_status <- function(statement_id, #' Get SQL Query Results #' -#' @details TODO +#' @details +#' After the statement execution has `SUCCEEDED`, this request can be used to +#' fetch any chunk by index. +#' +#' Whereas the first chunk with chunk_index = `0` is typically fetched with +#' [db_sql_exec_result()] or [db_sql_exec_status()], this request can be used +#' to fetch subsequent chunks +#' +#' The response structure is identical to the nested result element described +#' in the [db_sql_exec_result()] request, and similarly includes the +#' `next_chunk_index` and `next_chunk_internal_link` fields for simple +#' iteration through the result set. +#' +#' [Read more on Databricks API docs](https://docs.databricks.com/api/workspace/statementexecution/getstatementresultchunkn) #' -#' @param statement_id TODO -#' @param chunk_index TODO +#' @param chunk_index Integer, chunk index to fetch result. Starts from `0`. +#' @inheritParams db_sql_exec_cancel #' @inheritParams auth_params +#' @inheritParams db_sql_warehouse_create #' #' @family SQL Execution APIs #' diff --git a/_pkgdown.yml b/_pkgdown.yml index 67ec657..867be2a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -82,6 +82,11 @@ reference: contents: starts_with("db_secrets", internal = TRUE) - title: SQL Statement Execution contents: starts_with("db_sql_exec", internal = TRUE) +- title: SQL Connector + contents: + - install_db_sql_connector + - db_sql_client + - DatabricksSqlClient - title: Warehouses contents: - starts_with("db_sql", internal = TRUE) diff --git a/man/DatabricksSqlClient.Rd b/man/DatabricksSqlClient.Rd new file mode 100644 index 0000000..7d51b22 --- /dev/null +++ b/man/DatabricksSqlClient.Rd @@ -0,0 +1,415 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sql-connector.R +\name{DatabricksSqlClient} +\alias{DatabricksSqlClient} +\title{Databricks SQL Connector} +\description{ +Wraps the \href{https://github.com/databricks/databricks-sql-python}{\code{databricks-sql-connector}} +using \href{https://rstudio.github.io/reticulate/}{reticulate}. + +\href{https://docs.databricks.com/en/dev-tools/python-sql-connector.html#api-reference}{API reference on Databricks docs} +} +\examples{ + +## ------------------------------------------------ +## Method `DatabricksSqlClient$columns` +## ------------------------------------------------ + +\dontrun{ + client$columns(catalog_name = "defa\%") + client$columns(catalog_name = "default", table_name = "gold_\%") +} + +## ------------------------------------------------ +## Method `DatabricksSqlClient$catalogs` +## ------------------------------------------------ + +\dontrun{ + client$catalogs() +} + +## ------------------------------------------------ +## Method `DatabricksSqlClient$schemas` +## ------------------------------------------------ + +\dontrun{ + client$schemas(catalog_name = "main") +} + +## ------------------------------------------------ +## Method `DatabricksSqlClient$execute` +## ------------------------------------------------ + +\dontrun{ + client$execute("select 1") + client$execute("select * from x.y.z limit 100") + client$execute( + operation = "select * from x.y.z where a < \%(threshold)s limit 1000", + parameters = list(threshold = 100) + ) +} + +## ------------------------------------------------ +## Method `DatabricksSqlClient$execute_many` +## ------------------------------------------------ + +\dontrun{ + client$execute_many( + operation = "select * from x.y.z where a < \%(threshold)s limit 1000", + seq_of_parameters = list( + list(threshold = 100), + list(threshold = 200), + list(threshold = 300) + ) + ) +} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-db_sql_client-new}{\code{DatabricksSqlClient$new()}} +\item \href{#method-db_sql_client-columns}{\code{DatabricksSqlClient$columns()}} +\item \href{#method-db_sql_client-catalogs}{\code{DatabricksSqlClient$catalogs()}} +\item \href{#method-db_sql_client-schemas}{\code{DatabricksSqlClient$schemas()}} +\item \href{#method-db_sql_client-tables}{\code{DatabricksSqlClient$tables()}} +\item \href{#method-db_sql_client-execute}{\code{DatabricksSqlClient$execute()}} +\item \href{#method-db_sql_client-execute_many}{\code{DatabricksSqlClient$execute_many()}} +\item \href{#method-db_sql_client-clone}{\code{DatabricksSqlClient$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. + +Note that this object is typically constructed via \code{\link[=db_sql_client]{db_sql_client()}}. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$new( + host, + token, + http_path, + catalog, + schema, + use_cloud_fetch, + session_configuration, + ... +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{host}}{(\code{character(1)})\cr +See \code{\link[=db_sql_client]{db_sql_client()}}.} + +\item{\code{token}}{(\code{character(1)})\cr +See \code{\link[=db_sql_client]{db_sql_client()}}.} + +\item{\code{http_path}}{(\code{character(1)})\cr +See \code{\link[=db_sql_client]{db_sql_client()}}.} + +\item{\code{catalog}}{(\code{character(1)})\cr +See \code{\link[=db_sql_client]{db_sql_client()}}.} + +\item{\code{use_cloud_fetch}}{(\code{logical(1)})\cr +See \code{\link[=db_sql_client]{db_sql_client()}}.} + +\item{\code{session_configuration}}{(\code{list(...)})\cr +See \code{\link[=db_sql_client]{db_sql_client()}}.} + +\item{\code{...}}{Parameters passed to \href{https://docs.databricks.com/en/dev-tools/python-sql-connector.html#methods}{connection method}} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link{DatabricksSqlClient}. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-columns}{}}} +\subsection{Method \code{columns()}}{ +Execute a metadata query about the columns. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$columns( + catalog_name = NULL, + schema_name = NULL, + table_name = NULL, + column_name = NULL, + as_tibble = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{catalog_name}}{(\code{character(1)})\cr +A catalog name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{schema_name}}{(\code{character(1)})\cr +A schema name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{table_name}}{(\code{character(1)})\cr +A table name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{column_name}}{(\code{character(1)})\cr +A column name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{as_tibble}}{(\code{logical(1)})\cr +If \code{TRUE} (default) will return \link[tibble:tibble]{tibble::tibble}, otherwise returns +\link[arrow:Table-class]{arrow::Table}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link[tibble:tibble]{tibble::tibble} or \link[arrow:Table-class]{arrow::Table}. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{\dontrun{ + client$columns(catalog_name = "defa\%") + client$columns(catalog_name = "default", table_name = "gold_\%") +} +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-catalogs}{}}} +\subsection{Method \code{catalogs()}}{ +Execute a metadata query about the catalogs. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$catalogs(as_tibble = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{as_tibble}}{(\code{logical(1)})\cr +If \code{TRUE} (default) will return \link[tibble:tibble]{tibble::tibble}, otherwise returns +\link[arrow:Table-class]{arrow::Table}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link[tibble:tibble]{tibble::tibble} or \link[arrow:Table-class]{arrow::Table}. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{\dontrun{ + client$catalogs() +} +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-schemas}{}}} +\subsection{Method \code{schemas()}}{ +Execute a metadata query about the schemas. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$schemas( + catalog_name = NULL, + schema_name = NULL, + as_tibble = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{catalog_name}}{(\code{character(1)})\cr +A catalog name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{schema_name}}{(\code{character(1)})\cr +A schema name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{as_tibble}}{(\code{logical(1)})\cr +If \code{TRUE} (default) will return \link[tibble:tibble]{tibble::tibble}, otherwise returns +\link[arrow:Table-class]{arrow::Table}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link[tibble:tibble]{tibble::tibble} or \link[arrow:Table-class]{arrow::Table}. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{\dontrun{ + client$schemas(catalog_name = "main") +} +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-tables}{}}} +\subsection{Method \code{tables()}}{ +Execute a metadata query about tables and views +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$tables( + catalog_name = NULL, + schema_name = NULL, + table_name = NULL, + table_types = NULL, + as_tibble = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{catalog_name}}{(\code{character(1)})\cr +A catalog name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{schema_name}}{(\code{character(1)})\cr +A schema name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{table_name}}{(\code{character(1)})\cr +A table name to retrieve information about. +The \verb{\%} character is interpreted as a wildcard.} + +\item{\code{table_types}}{(\code{character()})\cr +A list of table types to match, for example \code{"TABLE"} or \code{"VIEW"}.} + +\item{\code{as_tibble}}{(\code{logical(1)})\cr +If \code{TRUE} (default) will return \link[tibble:tibble]{tibble::tibble}, otherwise returns +\link[arrow:Table-class]{arrow::Table}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link[tibble:tibble]{tibble::tibble} or \link[arrow:Table-class]{arrow::Table}. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-execute}{}}} +\subsection{Method \code{execute()}}{ +Prepares and then runs a database query or command. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$execute(operation, parameters = NULL, as_tibble = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{operation}}{(\code{character(1)})\cr +The query or command to prepare and then run.} + +\item{\code{parameters}}{(\code{list()})\cr +Optional. A sequence of parameters to use with the operation parameter.} + +\item{\code{as_tibble}}{(\code{logical(1)})\cr +If \code{TRUE} (default) will return \link[tibble:tibble]{tibble::tibble}, otherwise returns +\link[arrow:Table-class]{arrow::Table}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link[tibble:tibble]{tibble::tibble} or \link[arrow:Table-class]{arrow::Table}. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{\dontrun{ + client$execute("select 1") + client$execute("select * from x.y.z limit 100") + client$execute( + operation = "select * from x.y.z where a < \%(threshold)s limit 1000", + parameters = list(threshold = 100) + ) +} +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-execute_many}{}}} +\subsection{Method \code{execute_many()}}{ +Prepares and then runs a database query or command using all parameter +sequences in the seq_of_parameters argument. Only the final result set +is retained. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$execute_many( + operation, + seq_of_parameters = NULL, + as_tibble = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{operation}}{(\code{character(1)})\cr +The query or command to prepare and then run.} + +\item{\code{seq_of_parameters}}{(\code{list(list())})\cr +A sequence of many sets of parameter values to use with the operation +parameter.} + +\item{\code{as_tibble}}{(\code{logical(1)})\cr +If \code{TRUE} (default) will return \link[tibble:tibble]{tibble::tibble}, otherwise returns +\link[arrow:Table-class]{arrow::Table}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\link[tibble:tibble]{tibble::tibble} or \link[arrow:Table-class]{arrow::Table}. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{\dontrun{ + client$execute_many( + operation = "select * from x.y.z where a < \%(threshold)s limit 1000", + seq_of_parameters = list( + list(threshold = 100), + list(threshold = 200), + list(threshold = 300) + ) + ) +} +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-db_sql_client-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DatabricksSqlClient$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/db_sql_client.Rd b/man/db_sql_client.Rd index c9d2a0e..9ceffaf 100644 --- a/man/db_sql_client.Rd +++ b/man/db_sql_client.Rd @@ -18,32 +18,48 @@ db_sql_client( ) } \arguments{ -\item{id}{TODO} +\item{id}{String, ID of either the SQL warehouse or all purpose cluster. +Important to set \code{compute_type} to the associated type of \code{id}.} -\item{catalog}{TODO} +\item{catalog}{String, sets default catalog for statement execution, similar +to \verb{USE CATALOG} in SQL.} -\item{schema}{TODO} +\item{schema}{String, sets default schema for statement execution, similar +to \verb{USE SCHEMA} in SQL.} -\item{compute_type}{TODO} +\item{compute_type}{One of \code{"warehouse"} (default) or \code{"cluster"}, corresponding to +associated compute type of the resource specified in \code{id}.} -\item{use_cloud_fetch}{TODO} +\item{use_cloud_fetch}{Boolean (default is \code{FALSE}). \code{TRUE} to send fetch +requests directly to the cloud object store to download chunks of data. +False to send fetch requests directly to Databricks. -\item{session_configuration}{TODO} +If \code{use_cloud_fetch} is set to \code{TRUE} but network access is blocked, then +the fetch requests will fail.} -\item{host}{TODO} +\item{session_configuration}{A optional named list of Spark session +configuration parameters. Setting a configuration is equivalent to using the +\verb{SET key=val} SQL command. +Run the SQL command \code{SET -v} to get a full list of available configurations.} -\item{token}{TODO} +\item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} -\item{workspace_id}{TODO} +\item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} + +\item{workspace_id}{String, workspace Id used to build the http path for the +connection. This defaults to using \code{\link[=db_wsid]{db_wsid()}} to get \code{DATABRICKS_WSID} +environment variable. Not required if \code{compute_type} is \code{"cluster"}.} + +\item{...}{passed onto \code{\link[=DatabricksSqlClient]{DatabricksSqlClient()}}.} } \value{ -TODO +\code{\link[=DatabricksSqlClient]{DatabricksSqlClient()}} } \description{ Create Databricks SQL Connector Client } \details{ -TODO +Create client using Databricks SQL Connector. } \examples{ \dontrun{ diff --git a/man/db_sql_exec_cancel.Rd b/man/db_sql_exec_cancel.Rd index c8fbee1..323123b 100644 --- a/man/db_sql_exec_cancel.Rd +++ b/man/db_sql_exec_cancel.Rd @@ -12,17 +12,23 @@ db_sql_exec_cancel( ) } \arguments{ -\item{statement_id}{TODO} +\item{statement_id}{String, query execution \code{statement_id}} \item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} \item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} + +\item{perform_request}{If \code{TRUE} (default) the request is performed, if +\code{FALSE} the httr2 request is returned \emph{without} being performed.} } \description{ Cancel SQL Query } \details{ -TODO +Requests that an executing statement be canceled. Callers must poll for +status to see the terminal state. + +\href{https://docs.databricks.com/api/workspace/statementexecution/cancelexecution}{Read more on Databricks API docs} } \seealso{ Other SQL Execution APIs: diff --git a/man/db_sql_exec_query.Rd b/man/db_sql_exec_query.Rd index 4eeb2cc..db22301 100644 --- a/man/db_sql_exec_query.Rd +++ b/man/db_sql_exec_query.Rd @@ -22,35 +22,90 @@ db_sql_exec_query( ) } \arguments{ -\item{statement}{TODO} +\item{statement}{String, the SQL statement to execute. The statement can +optionally be parameterized, see \code{parameters}.} -\item{warehouse_id}{TODO} +\item{warehouse_id}{String, ID of warehouse upon which to execute a statement.} -\item{catalog}{TODO} +\item{catalog}{String, sets default catalog for statement execution, similar +to \verb{USE CATALOG} in SQL.} -\item{schema}{TODO} +\item{schema}{String, sets default schema for statement execution, similar +to \verb{USE SCHEMA} in SQL.} -\item{parameters}{TODO} +\item{parameters}{List of Named Lists, parameters to pass into a SQL +statement containing parameter markers. -\item{row_limit}{TODO} +A parameter consists of a name, a value, and \emph{optionally} a type. +To represent a \code{NULL} value, the value field may be omitted or set to \code{NULL} +explicitly. -\item{byte_limit}{TODO} +See \href{https://docs.databricks.com/api/workspace/statementexecution/executestatement}{docs} +for more details.} -\item{disposition}{TODO} +\item{row_limit}{Integer, applies the given row limit to the statement's +result set, but unlike the \code{LIMIT} clause in SQL, it also sets the +\code{truncated} field in the response to indicate whether the result was trimmed +due to the limit or not.} -\item{format}{TODO} +\item{byte_limit}{Integer, applies the given byte limit to the statement's +result size. Byte counts are based on internal data representations and +might not match the final size in the requested format. If the result was +truncated due to the byte limit, then \code{truncated} in the response is set to +true. When using \code{EXTERNAL_LINKS} disposition, a default byte_limit of +100 GiB is applied if \code{byte_limit} is not explicitly set.} -\item{wait_timeout}{TODO} +\item{disposition}{One of \code{"INLINE"} (default) or \code{"EXTERNAL_LINKS"}. See +\href{https://docs.databricks.com/api/workspace/statementexecution/executestatement}{docs} +for details.} + +\item{format}{One of \code{"JSON_ARRAY"} (default), \code{"ARROW_STREAM"}, or \code{"CSV"}. +See \href{https://docs.databricks.com/api/workspace/statementexecution/executestatement}{docs} +for details.} + +\item{wait_timeout}{String, default is \code{"10s"}. The time in seconds the call +will wait for the statement's result set as \code{Ns}, where \code{N} can be set to +\code{0} or to a value between \code{5} and \code{50}. +When set to \verb{0s}, the statement will execute in asynchronous mode and the +call will not wait for the execution to finish. In this case, the call +returns directly with \code{PENDING} state and a statement ID which can be used +for polling with \code{\link[=db_sql_exec_status]{db_sql_exec_status()}}. + +When set between \code{5} and \code{50} seconds, the call will behave synchronously up +to this timeout and wait for the statement execution to finish. If the +execution finishes within this time, the call returns immediately with a +manifest and result data (or a \code{FAILED} state in case of an execution error). + +If the statement takes longer to execute, \code{on_wait_timeout} determines what +should happen after the timeout is reached.} + +\item{on_wait_timeout}{One of \code{"CONTINUE"} (default) or \code{"CANCEL"}. +When \code{wait_timeout} > \verb{0s}, the call will block up to the specified time. +If the statement execution doesn't finish within this time, +\code{on_wait_timeout} determines whether the execution should continue or be +canceled. + +When set to \code{CONTINUE}, the statement execution continues asynchronously and +the call returns a statement ID which can be used for polling with +\code{\link[=db_sql_exec_status]{db_sql_exec_status()}}. + +When set to \code{CANCEL}, the statement execution is canceled and the call +returns with a \code{CANCELED} state.} \item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} \item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} + +\item{perform_request}{If \code{TRUE} (default) the request is performed, if +\code{FALSE} the httr2 request is returned \emph{without} being performed.} } \description{ Execute SQL Query } \details{ -TODO +Refer to the +\href{https://docs.databricks.com/api/workspace/statementexecution/executestatement}{web documentation} +for detailed material on interaction of the various parameters and general recommendations } \seealso{ Other SQL Execution APIs: diff --git a/man/db_sql_exec_result.Rd b/man/db_sql_exec_result.Rd index 933b939..4ce8f41 100644 --- a/man/db_sql_exec_result.Rd +++ b/man/db_sql_exec_result.Rd @@ -13,19 +13,34 @@ db_sql_exec_result( ) } \arguments{ -\item{statement_id}{TODO} +\item{statement_id}{String, query execution \code{statement_id}} -\item{chunk_index}{TODO} +\item{chunk_index}{Integer, chunk index to fetch result. Starts from \code{0}.} \item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} \item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} + +\item{perform_request}{If \code{TRUE} (default) the request is performed, if +\code{FALSE} the httr2 request is returned \emph{without} being performed.} } \description{ Get SQL Query Results } \details{ -TODO +After the statement execution has \code{SUCCEEDED}, this request can be used to +fetch any chunk by index. + +Whereas the first chunk with chunk_index = \code{0} is typically fetched with +\code{\link[=db_sql_exec_result]{db_sql_exec_result()}} or \code{\link[=db_sql_exec_status]{db_sql_exec_status()}}, this request can be used +to fetch subsequent chunks + +The response structure is identical to the nested result element described +in the \code{\link[=db_sql_exec_result]{db_sql_exec_result()}} request, and similarly includes the +\code{next_chunk_index} and \code{next_chunk_internal_link} fields for simple +iteration through the result set. + +\href{https://docs.databricks.com/api/workspace/statementexecution/getstatementresultchunkn}{Read more on Databricks API docs} } \seealso{ Other SQL Execution APIs: diff --git a/man/db_sql_exec_status.Rd b/man/db_sql_exec_status.Rd index 03c6ed7..12cb1f2 100644 --- a/man/db_sql_exec_status.Rd +++ b/man/db_sql_exec_status.Rd @@ -12,17 +12,30 @@ db_sql_exec_status( ) } \arguments{ -\item{statement_id}{TODO} +\item{statement_id}{String, query execution \code{statement_id}} \item{host}{Databricks workspace URL, defaults to calling \code{\link[=db_host]{db_host()}}.} \item{token}{Databricks workspace token, defaults to calling \code{\link[=db_token]{db_token()}}.} + +\item{perform_request}{If \code{TRUE} (default) the request is performed, if +\code{FALSE} the httr2 request is returned \emph{without} being performed.} } \description{ Get SQL Query Status } \details{ -TODO +This request can be used to poll for the statement's status. +When the \code{status.state} field is \code{SUCCEEDED} it will also return the result +manifest and the first chunk of the result data. + +When the statement is in the terminal states \code{CANCELED}, \code{CLOSED} or +\code{FAILED}, it returns HTTP \code{200} with the state set. + +After at least 12 hours in terminal state, the statement is removed from the +warehouse and further calls will receive an HTTP \code{404} response. + +\href{https://docs.databricks.com/api/workspace/statementexecution/getstatement}{Read more on Databricks API docs} } \seealso{ Other SQL Execution APIs: diff --git a/man/install_db_sql_connector.Rd b/man/install_db_sql_connector.Rd index 0589e30..ea84f1a 100644 --- a/man/install_db_sql_connector.Rd +++ b/man/install_db_sql_connector.Rd @@ -35,5 +35,5 @@ Environemnt is resolved by \code{\link[=determine_brickster_venv]{determine_bric When running within Databricks it will use the existing python environment. } \examples{ -\dontrun{install_db_sql_connector()} +\donttest{install_db_sql_connector()} } diff --git a/vignettes/databricks-sql-connector.Rmd b/vignettes/databricks-sql-connector.Rmd deleted file mode 100644 index 4b52427..0000000 --- a/vignettes/databricks-sql-connector.Rmd +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From a4d69273b670dec59e4e3b67f147381409f3804e Mon Sep 17 00:00:00 2001 From: Zac Davies Date: Mon, 6 Nov 2023 20:58:27 +1100 Subject: [PATCH 10/10] Fixing exports and imports --- NAMESPACE | 2 ++ R/sql-connector.R | 4 +++- man/install_db_sql_connector.Rd | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8181f16..7d97eb8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -93,6 +93,7 @@ export(db_secrets_scope_acl_put) export(db_secrets_scope_create) export(db_secrets_scope_delete) export(db_secrets_scope_list_all) +export(db_sql_client) export(db_sql_exec_cancel) export(db_sql_exec_query) export(db_sql_exec_result) @@ -185,6 +186,7 @@ export(spark_jar_task) export(spark_python_task) export(spark_submit_task) export(wait_for_lib_installs) +import(R6) import(arrow) import(cli) import(httr2) diff --git a/R/sql-connector.R b/R/sql-connector.R index 51b31b1..6cf66ff 100644 --- a/R/sql-connector.R +++ b/R/sql-connector.R @@ -10,7 +10,7 @@ #' @export #' #' @examples -#' \donttest{install_db_sql_connector()} +#' \dontrun{install_db_sql_connector()} install_db_sql_connector <- function(envname = determine_brickster_venv(), method = "auto", ...) { reticulate::py_install( @@ -52,6 +52,7 @@ install_db_sql_connector <- function(envname = determine_brickster_venv(), #' \dontrun{ #' client <- db_sql_client(id = "", use_cloud_fetch = TRUE) #' } +#' @export db_sql_client <- function(id, catalog = NULL, schema = NULL, compute_type = c("warehouse", "cluster"), @@ -88,6 +89,7 @@ db_sql_client <- function(id, #' using [reticulate](https://rstudio.github.io/reticulate/). #' #' [API reference on Databricks docs](https://docs.databricks.com/en/dev-tools/python-sql-connector.html#api-reference) +#' @import R6 #' @export DatabricksSqlClient <- R6::R6Class( classname = "db_sql_client", diff --git a/man/install_db_sql_connector.Rd b/man/install_db_sql_connector.Rd index ea84f1a..0589e30 100644 --- a/man/install_db_sql_connector.Rd +++ b/man/install_db_sql_connector.Rd @@ -35,5 +35,5 @@ Environemnt is resolved by \code{\link[=determine_brickster_venv]{determine_bric When running within Databricks it will use the existing python environment. } \examples{ -\donttest{install_db_sql_connector()} +\dontrun{install_db_sql_connector()} }