Skip to content

Commit

Permalink
add roxygen pal
Browse files Browse the repository at this point in the history
  • Loading branch information
simonpcouch committed Oct 8, 2024
1 parent c8f2dae commit 7fe8d97
Show file tree
Hide file tree
Showing 12 changed files with 457 additions and 43 deletions.
48 changes: 48 additions & 0 deletions R/addin.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# replace selection with refactored code
rs_update_selection <- function(context, role) {
# check if pal exists
if (exists(paste0(".last_pal_", role))) {
Expand Down Expand Up @@ -64,6 +65,7 @@ stream_selection <- function(selection, context, pal, n_lines_orig) {
output_lines <- character(0)
stream <- pal[[".__enclos_env__"]][["private"]]$.stream(selection_text)
coro::loop(for (chunk in stream) {
if (identical(chunk, "")) {next}
output_lines <- paste(output_lines, sub("\n$", "", chunk), sep = "")
n_lines <- nchar(gsub("[^\n]+", "", output_lines)) + 1
if (n_lines_orig - n_lines > 0) {
Expand Down Expand Up @@ -107,6 +109,48 @@ stream_selection <- function(selection, context, pal, n_lines_orig) {
rstudioapi::executeCommand("reindent")
}

# prefix selection with new code -----------------------------------------------
rs_prefix_selection <- function(context, role) {
# check if pal exists
if (exists(paste0(".last_pal_", role))) {
pal <- get(paste0(".last_pal_", role))
} else {
tryCatch(
pal <- pal(role),
error = function(e) {
rstudioapi::showDialog("Error", "Unable to create a pal. See `?pal()`.")
return(NULL)
}
)
}

selection <- rstudioapi::primary_selection(context)

if (selection[["text"]] == "") {
rstudioapi::showDialog("Error", "No code selected. Please highlight some code first.")
return(NULL)
}

# add one blank line before the selection
rstudioapi::modifyRange(selection$range, paste0("\n", selection[["text"]]), context$id)

# make the "current selection" that blank line
first_line <- selection$range
first_line$start[["column"]] <- 1
first_line$end[["row"]] <- selection$range$start[["row"]]
first_line$end[["column"]] <- Inf
selection$range <- first_line
rstudioapi::setCursorPosition(selection$range$start)

# start streaming into it--will be interactively appended to if need be
tryCatch(
stream_selection(selection, context, pal, n_lines_orig = 1),
error = function(e) {
rstudioapi::showDialog("Error", paste("The pal ran into an issue: ", e$message))
}
)
}

# pal-specific helpers ---------------------------------------------------------
rs_pal_cli <- function(context = rstudioapi::getActiveDocumentContext()) {
rs_update_selection(context = context, role = "cli")
Expand All @@ -115,3 +159,7 @@ rs_pal_cli <- function(context = rstudioapi::getActiveDocumentContext()) {
rs_pal_testthat <- function(context = rstudioapi::getActiveDocumentContext()) {
rs_update_selection(context = context, role = "testthat")
}

rs_pal_roxygen <- function(context = rstudioapi::getActiveDocumentContext()) {
rs_prefix_selection(context = context, role = "roxygen")
}
2 changes: 1 addition & 1 deletion R/pal.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ pal <- function(
)
}

supported_roles <- c("cli", "testthat")
supported_roles <- c("cli", "testthat", "roxygen")
106 changes: 106 additions & 0 deletions R/pal_roxygen.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#' The roxygen pal
#'
#' @description
#'
#' The roxygen pal prefixes the selected function with a minimal roxygen2
#' documentation template. The pal is instructed to only generate a subset
#' of a complete documentation entry, to be then completed by a developer:
#'
#' * Stub `@param` descriptions based on defaults and inferred types
#' * Stub `@returns` entry that describes the return value as well as important
#' errors and warnings users might encounter.
#'
#'
#' @section Creating a roxygen pal:
#'
#' Create a roxygen pal with:
#'
#' ```r
#' pal("roxygen")
#' ```
#'
#' @section Cost:
#'
#' The system prompt from a roxygen pal includes something like 1,000 tokens.
#' Add in 200 tokens for the code that's actually highlighted
#' and also sent off to the model and you're looking at 1,200 input tokens.
#' The model returns maybe 10 to 15 lines of relatively barebones royxgen
#' documentation, so we'll call that 200 output tokens per refactor.
#'
#' As of the time of writing (October 2024), the default pal model Claude
#' Sonnet 3.5 costs \\$3 per million input tokens and $15 per million output
#' tokens. So, using the default model,
#' **roxygen pals cost around \\$4 for every 1,000 generated roxygen documentation
#' entries**. GPT-4o Mini, by contrast, doesn't tend to infer argument types
#' correctly as often and
#' often fails to line-break properly, but _does_ usually return syntactically
#' valid documentation entries, and it would cost around
#' 20 cents per 1,000 generated roxygen documentation entries.
#'
#' @section Gallery:
#'
#' This section includes a handful of examples
#' "[from the wild](https://github.com/hadley/elmer/tree/e497d627e7be01206df6f1420ca36235141dc22a/R)"
#' and are generated with the default model, Claude Sonnet 3.5.
#'
#' ```{r}
#' library(pal)
#'
#' roxygen_pal <- pal("roxygen")
#' ```
#'
#' ```{r}
#' roxygen_pal$chat({
#' deferred_method_transform <- function(lambda_expr, transformer, eval_env) {
#' transformer <- enexpr(transformer)
#' force(eval_env)
#'
#' unique_id <- new_id()
#' env_bind_lazy(
#' generators,
#' !!unique_id := inject((!!transformer)(!!lambda_expr)),
#' eval.env = eval_env
#' )
#'
#' inject(
#' function(...) {
#' (!!generators)[[!!unique_id]](self, private, ...)
#' }
#' )
#' }
#' })
#' ```
#'
#' ```{r}
#' roxygen_pal$chat({
#' set_default <- function(value, default, arg = caller_arg(value)) {
#' if (is.null(value)) {
#' if (!is_testing() || is_snapshot()) {
#' cli::cli_inform("Using {.field {arg}} = {.val {default}}.")
#' }
#' default
#' } else {
#' value
#' }
#' }
#' })
#' ```
#'
#' ```{r}
#' roxygen_pal$chat({
#' find_index <- function(left, e_right) {
#' if (!is.list(e_right) || !has_name(e_right, "index") || !is.numeric(e_right$index)) {
#' return(NA)
#' }
#'
#' matches_idx <- map_lgl(left, function(e_left) e_left$index == e_right$index)
#' if (sum(matches_idx) != 1) {
#' return(NA)
#' }
#' which(matches_idx)[[1]]
#' }
#' })
#' ```
#'
#' @name pal_roxygen
NULL
6 changes: 3 additions & 3 deletions R/pal_testthat.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@
#'
#' @section Cost:
#'
#' The system prompt from a pal includes something like 1,000 tokens.
#' The system prompt from a testthat pal includes something like 1,000 tokens.
#' Add in (a generous) 100 tokens for the code that's actually highlighted
#' and also sent off to the model and you're looking at 4,100 input tokens.
#' and also sent off to the model and you're looking at 1,100 input tokens.
#' The model returns approximately the same number of output tokens as it
#' receives, so we'll call that 100 output tokens per refactor.
#'
#' As of the time of writing (October 2024), the default pal model Claude
#' Sonnet 3.5 costs \\$3 per million input tokens and $15 per million output
#' tokens. So, using the default model,
#' **cli pals cost around \\$4 for every 1,000 refactored pieces of code**. GPT-4o
#' **testthat pals cost around \\$4 for every 1,000 refactored pieces of code**. GPT-4o
#' Mini, by contrast, doesn't tend to get many pieces of formatting right and
#' often fails to line-break properly, but _does_ usually return syntactically
#' valid calls to testthat functions, and it would cost around
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ As-is, the package provides ergonomic LLM assistants for R package development:

* `"cli"`: [Convert to cli](https://simonpcouch.github.io/pal/reference/pal_cli.html)
* `"testthat"`: [Convert to testthat 3](https://simonpcouch.github.io/pal/reference/pal_testthat.html)
* `"roxygen"`: Document functions with roxygen
* `"roxygen"`: [Document functions with roxygen](https://simonpcouch.github.io/pal/reference/pal_roxygen.html)

That said, the package provides infrastructure for others to make LLM assistants for any task in R, from authoring to interactive data analysis. With only a markdown file and a function call, users can extend pal to assist with their own repetitive but hard-to-automate tasks.

Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ development:
cli](https://simonpcouch.github.io/pal/reference/pal_cli.html)
- `"testthat"`: [Convert to testthat
3](https://simonpcouch.github.io/pal/reference/pal_testthat.html)
- `"roxygen"`: Document functions with roxygen
- `"roxygen"`: [Document functions with
roxygen](https://simonpcouch.github.io/pal/reference/pal_roxygen.html)

That said, the package provides infrastructure for others to make LLM
assistants for any task in R, from authoring to interactive data
Expand Down
118 changes: 118 additions & 0 deletions inst/prompts/roxygen.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Templating function documentation

You are a terse assistant designed to help R package developers quickly template out their function documentation using roxygen2. Given some highlighted function code, return minimal documentation on the function's parameters and return type. Beyond those two elements, be sparing so as not to describe things you don't have context for. Respond with *only* R `#'` roxygen2 comments—no backticks or newlines around the response, no further commentary.

For function parameters in `@params`, describe each according to their type (e.g. "A numeric vector" or "A single string") and note if the parameter isn't required by writing "Optional" if it has a default value. If the parameters have a default enum (e.g. `arg = c("a", "b", "c")`), write them out as 'one of `"a"`, `"b"`, or `"c"`.' If there are ellipses in the function signature, note what happens to them. If they're checked with `rlang::check_dots_empty()` or otherwise, document them as "Currently unused; must be empty." If the ellipses are passed along to another function, note which function they're passed to.

For the return type in `@returns`, note any important errors or warnings that might occur and under what conditions. If the `output` is returned with `invisible(output)`, note that it's returned "invisibly."

Here's an example:

``` r
# given:
key_get <- function(name, error_call = caller_env()) {
val <- Sys.getenv(name)
if (!identical(val, "")) {
val
} else {
if (is_testing()) {
testthat::skip(sprintf("%s env var is not configured", name))
} else {
cli::cli_abort("Can't find env var {.code {name}}.", call = error_call)
}
}
}

# reply with:
#' Get key
#'
#' @description
#' A short description...
#'
#' @param name A single string.
#' @param error_call A call to mention in error messages. Optional.
#'
#' @returns
#' If found, the value corresponding to the provided `name`. Otherwise,
#' the function will error.
#'
#' @export
```

Another:

``` r
# given:
chat_perform <- function(provider,
mode = c("value", "stream", "async-stream", "async-value"),
turns,
tools = list(),
extra_args = list()) {

mode <- arg_match(mode)
stream <- mode %in% c("stream", "async-stream")

req <- chat_request(
provider = provider,
turns = turns,
tools = tools,
stream = stream,
extra_args = extra_args
)

switch(mode,
"value" = chat_perform_value(provider, req),
"stream" = chat_perform_stream(provider, req),
"async-value" = chat_perform_async_value(provider, req),
"async-stream" = chat_perform_async_stream(provider, req)
)
}

# reply with:
#' Perform chat
#'
#' @description
#' A short description...
#'
#' @param provider A provider.
#' @param mode One of `"value"`, `"stream"`, `"async-stream"`, or `"async-value"`.
#' @param turns Turns.
#' @param tools Optional. A list of tools.
#' @param extra_args Optional. A list of extra arguments.
#'
#' @returns
#' A result.
#'
#' @export
```

``` r
# given:
check_args <- function(fn, ...) {
rlang::check_dots_empty()
arg_names <- names(formals(fn))
if (length(arg_names) < 2) {
cli::cli_abort("Function must have at least two arguments.", .internal = TRUE)
} else if (arg_names[[1]] != "self") {
cli::cli_abort("First argument must be {.arg self}.", .internal = TRUE)
} else if (arg_names[[2]] != "private") {
cli::cli_abort("Second argument must be {.arg private}.", .internal = TRUE)
}
invisible(fn)
}

# reply with:
#' Check a function's arguments
#'
#' @description
#' A short description...
#'
#' @param fn A function.
#' @param ... Currently unused; must be empty.
#'
#' @returns
#' `fn`, invisibly. The function will instead raise an error if the function
#' doesn't take first argument `self` and second argument `private`.
#'
#' @export
```
5 changes: 5 additions & 0 deletions inst/rstudio/addins.dcf
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,8 @@ Name: Pal: convert to testthat
Description: Replaces selected unit testing code with a version adapted to testthat 3
Binding: rs_pal_testthat
Interactive: false

Name: Pal: template roxygen documentation
Description: Prefixes selected function with templated roxygen2 documentation
Binding: rs_pal_roxygen
Interactive: false
2 changes: 1 addition & 1 deletion man/pal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7fe8d97

Please sign in to comment.