Skip to content

Commit

Permalink
Merge pull request #280 from yjunechoe/output-raw
Browse files Browse the repository at this point in the history
Support for `output="raw"`
  • Loading branch information
yjunechoe authored Sep 25, 2024
2 parents 1ed6887 + 679a619 commit c3f64e5
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 16 deletions.
49 changes: 36 additions & 13 deletions R/oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ oa_entities <- function() {
#' @param abstract Logical. If TRUE, the function returns also the abstract of each item.
#' Default to \code{abstract = TRUE}.
#' The argument is ignored if entity is different from "works".
#' @param output Character. Type of output, either a list or a tibble/data.frame.
#' @param output Character.
#' Type of output, one of `"tibble"`, `"dataframe"`, `"list"`, or `"raw"`.
##' \itemize{
##' \item{"tibble"}: {a tibble tidy data}
##' \item{"dataframe"}: {a base data.frame tidy data}
##' \item{"list"}: {a list of parsed JSON contents}
##' \item{"raw"}: {a list of raw JSON strings (length depends on query)}
##' }
#'
#' @return A data.frame or a list. Result of the query.
#' @export
Expand Down Expand Up @@ -67,7 +74,7 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten
options = NULL,
search = NULL,
group_by = NULL,
output = c("tibble", "dataframe", "list"),
output = c("tibble", "dataframe", "list", "raw"),
abstract = TRUE,
endpoint = "https://api.openalex.org",
per_page = 200,
Expand Down Expand Up @@ -134,6 +141,7 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten
count_only = count_only,
mailto = mailto,
api_key = api_key,
parse = output != "raw",
verbose = verbose
)
}
Expand All @@ -143,7 +151,7 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten
}
final_res <- unlist(final_res, recursive = FALSE)

if (output == "list") {
if (output %in% c("list", "raw")) {
return(final_res)
}

Expand Down Expand Up @@ -185,6 +193,8 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten
#' Gives OpenAlex an email to enter the polite pool.
#' @param api_key Character string.
#' Your OpenAlex Premium API key, if available.
#' @param parse Logical.
#' If FALSE, returns the raw JSON response as string.
#' @param verbose Logical.
#' If TRUE, print information about the querying process. Defaults to TRUE.
#'
Expand Down Expand Up @@ -301,6 +311,7 @@ oa_request <- function(query_url,
count_only = FALSE,
mailto = oa_email(),
api_key = oa_apikey(),
parse = TRUE,
verbose = FALSE) {
# https://httr.r-lib.org/articles/api-packages.html#set-a-user-agent
ua <- httr::user_agent("https://github.com/ropensci/openalexR/")
Expand Down Expand Up @@ -394,12 +405,20 @@ oa_request <- function(query_url,
Sys.sleep(1 / 10)
next_page <- get_next_page(paging, i, res)
query_ls[[paging]] <- next_page
res <- api_request(query_url, ua, query = query_ls)
if (!is.null(res[[result_name]])) data[[i]] <- res[[result_name]]
}

if (parse) {
res <- api_request(query_url, ua, query = query_ls, parse = TRUE)
if (!is.null(res[[result_name]])) data[[i]] <- res[[result_name]]
} else {
raw <- api_request(query_url, ua, query = query_ls, parse = FALSE)
data[[i]] <- raw
}
}
data <- unlist(data, recursive = FALSE)

# `output = "raw"` early exit
if (!parse) return(data)

if (grepl("filter", query_url) && grepl("works", query_url)) {
truncated <- unlist(truncated_authors(data))
if (length(truncated)) {
Expand Down Expand Up @@ -683,36 +702,40 @@ oa_random <- function(entity = oa_entities(),
final_res
}

api_request <- function(query_url, ua, query, api_key = oa_apikey()) {
api_request <- function(query_url, ua, query, api_key = oa_apikey(), parse = TRUE) {
res <- httr::GET(query_url, ua, query = query, httr::add_headers(api_key = api_key))

empty_res <- if (parse) list() else "{}"

if (httr::status_code(res) == 400) {
stop("HTTP status 400 Request Line is too large")
}

if (httr::status_code(res) == 429) {
message("HTTP status 429 Too Many Requests")
return(list())
return(empty_res)
}

m <- httr::content(res, "text", encoding = "UTF-8")
if (parse) {
m <- jsonlite::fromJSON(m, simplifyVector = FALSE)
}

if (httr::status_code(res) == 503) {
mssg <- regmatches(m, regexpr("(?<=<title>).*?(?=<\\/title>)", m, perl = TRUE))
message(mssg, ". Please try setting `per_page = 25` in your function call!")
return(list())
return(empty_res)
}

parsed <- jsonlite::fromJSON(m, simplifyVector = FALSE)

if (httr::status_code(res) == 200) {
if (httr::http_type(res) != "application/json") {
stop("API did not return json", call. = FALSE)
}
return(parsed)
return(m) # Depending on `parse`, results can be raw JSON or parsed R list
}

if (httr::http_error(res)) {
parsed <- jsonlite::fromJSON(m, simplifyVector = FALSE)
stop(
sprintf(
"OpenAlex API request failed [%s]\n%s\n<%s>",
Expand All @@ -726,6 +749,6 @@ api_request <- function(query_url, ua, query, api_key = oa_apikey()) {

if (httr::status_code(res) != 429 & httr::status_code(res) != 200) {
message("HTTP status ", httr::status_code(res))
return(list())
return(empty_res)
}
}
11 changes: 9 additions & 2 deletions man/oa_fetch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion man/oa_random.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/oa_request.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions tests/testthat/test-oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,37 @@ test_that("pages works", {
)
expect_equal(w1[11:20,], w2)
})

test_that("output=raw works", {
skip_on_cran()

output_raw <- oa_fetch(
entity = "works",
search = "language",
per_page = 2,
options = list(sample = 5, seed = 1),
output = "raw"
)

# length and type checks
expect_type(output_raw, "character")
expect_length(output_raw, ceiling(5 / 2)) # length determined by pages
raw_parsed <- lapply(output_raw, function(x) {
jsonlite::fromJSON(x, simplifyVector = FALSE)$results
})
expect_equal(lengths(raw_parsed), c(2, 2, 1)) # num results in each page
raw_parsed_flattened <- unlist(raw_parsed, recursive = FALSE)

# equivalence check to tibble format
expect_identical(
works2df(raw_parsed_flattened),
oa_fetch(
entity = "works",
search = "language",
per_page = 2,
options = list(sample = 5, seed = 1),
output = "tibble"
)
)

})

0 comments on commit c3f64e5

Please sign in to comment.