Skip to content

Commit

Permalink
Merge pull request #15 from seroanalytics/paging
Browse files Browse the repository at this point in the history
Paging
  • Loading branch information
hillalex authored Sep 25, 2024
2 parents ec8bfad + fc0c48c commit 36abddb
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 89 deletions.
144 changes: 80 additions & 64 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,68 +14,61 @@ target_post_dataset <- function(req, res) {
parsed <- mime::parse_multipart(req)
xcol <- get_xcol(parsed)
name <- get_dataset_name(parsed)
if (is.null(xcol)) {
res$status <- 400L
msg <- "Missing required field: xcol."
return(bad_request_response(msg))
}
if (is.null(parsed$file$type) || parsed$file$type != "text/csv") {
res$status <- 400L
msg <- "Invalid file type; please upload file of type text/csv."
return(bad_request_response(msg))
return(invalid_file_type(res))
}
file_body <- utils::read.csv(parsed$file$datapath)
path <- file.path("uploads", session_id, name)
if (dir.exists(path)) {
res$status <- 400L
msg <- paste(name, "already exists.",
"Please choose a unique name for this dataset.")
return(bad_request_response(msg))
return(duplicate_dataset_name(res, name))
}
required_cols <- c("value", "biomarker", xcol)
missing_cols <- required_cols[!(required_cols %in% colnames(file_body))]
if (length(missing_cols) > 0) {
res$status <- 400L
msg <- paste("Missing required columns:",
paste(missing_cols, collapse = ", "))
return(bad_request_response(msg))
return(missing_columns(res, missing_cols))
}

if (suppressWarnings(all(is.na(as.numeric(file_body[, xcol]))))) {
xtype <- "date"
suppressWarnings({
file_body[, xcol] <- parse_date(file_body[, xcol])
})
if (all(is.na(file_body[, xcol]))) {
res$status <- 400L
msg <- paste("Invalid x column values:",
"these should be numbers or dates in a standard format.")
return(bad_request_response(msg))
}
logger::log_info("Detected date values in x column")
} else {
logger::log_info("Detected numeric values in x column")
xtype <- "number"
file_body[, xcol] <- get_parsed_values(file_body[, xcol])

if (all(is.na(file_body[, xcol]))) {
return(invalid_xcol(res))
}

logger::log_info(paste("Saving dataset", name, "to disk"))
save_dataset(path, file_body, xcol)

response_success(jsonlite::unbox(name))
}

save_dataset <- function(path, file_body, xcol) {
xtype <- get_xtype(file_body[, xcol])
dir.create(path, recursive = TRUE)
utils::write.csv(file_body, file.path(path, "data"), row.names = FALSE)
write(xcol, file.path(path, "xcol"))
write(xtype, file.path(path, "xtype"))
response_success(jsonlite::unbox(name))
}

target_delete_dataset <- function(name, req) {
session_id <- get_or_create_session_id(req)
path <- file.path("uploads", session_id, name)
if (!file.exists(path)) {
porcelain::porcelain_stop(paste("Did not find dataset with name:", name),
code = "DATASET_NOT_FOUND", status_code = 404L)
get_parsed_values <- function(raw_values) {
suppressWarnings({
values <- as.numeric(raw_values)
})

if (all(is.na(values))) {
suppressWarnings({
values <- parse_date(raw_values)
})
}
values
}

get_xtype <- function(values) {
if (is.numeric(values)) {
logger::log_info("Detected numeric values in x column")
return("number")
} else {
logger::log_info("Detected date values in x column")
return("date")
}
logger::log_info(paste("Deleting dataset: ", name))
fs::dir_delete(path)
jsonlite::unbox(name)
}

get_dataset_name <- function(parsed) {
Expand All @@ -100,6 +93,18 @@ get_xcol <- function(parsed) {
return(xcol)
}

target_delete_dataset <- function(name, req) {
session_id <- get_or_create_session_id(req)
path <- file.path("uploads", session_id, name)
if (!file.exists(path)) {
porcelain::porcelain_stop(paste("Did not find dataset with name:", name),
code = "DATASET_NOT_FOUND", status_code = 404L)
}
logger::log_info(paste("Deleting dataset: ", name))
fs::dir_delete(path)
jsonlite::unbox(name)
}

target_get_dataset <- function(name, req) {
logger::log_info(paste("Requesting metadata for dataset:", name))
dataset <- read_dataset(req, name, "natural")
Expand Down Expand Up @@ -200,32 +205,14 @@ target_get_individual <- function(req,
}

dat <- apply_filters(dat, filter)
if (is.null(color)) {
if (is.null(linetype)) {
aes <- ggplot2::aes(x = .data[[xcol]], y = value)
} else {
aes <- ggplot2::aes(x = .data[[xcol]], y = value,
linetype = .data[[linetype]])
}
} else {
if (is.null(linetype)) {
aes <- ggplot2::aes(x = .data[[xcol]], y = value,
color = .data[[color]])
} else {
aes <- ggplot2::aes(x = .data[[xcol]], y = value,
color = .data[[color]],
linetype = .data[[linetype]])
}
}
aes <- get_aes(color, linetype, xcol)

warnings <- NULL
ids <- unique(dat[[pidcol]])
if (length(ids) > 20) {
msg <- paste(length(ids),
"individuals identified; only the first 20 will be shown.")
warnings <- c(warnings, msg)
dat <- dat[dat[[pidcol]] %in% ids[1:20], ]
}
page_length <- 20
num_pages <- ceiling(length(ids) / page_length)
paged_ids <- get_paged_ids(ids, page, page_length)
dat <- dat[dat[[pidcol]] %in% paged_ids, ]

# Facets in plotlyjs are quite a pain. Using ggplot2 and plotly R
# packages to generate the plotly data and layout objects is a bit slower
Expand All @@ -244,10 +231,39 @@ target_get_individual <- function(req,
jsonlite::toJSON(
list(data = as.list(q$x$data),
layout = as.list(q$x$layout),
page = page,
numPages = num_pages,
warnings = warnings),
auto_unbox = TRUE, null = "null")
}

get_paged_ids <- function(ids, current_page, page_length) {
page_start <- ((current_page - 1) * page_length) + 1
page_end <- min(length(ids), page_start + (page_length - 1))
ids[page_start:page_end]
}

get_aes <- function(color, linetype, xcol) {
if (is.null(color)) {
if (is.null(linetype)) {
aes <- ggplot2::aes(x = .data[[xcol]], y = value)
} else {
aes <- ggplot2::aes(x = .data[[xcol]], y = value,
linetype = .data[[linetype]])
}
} else {
if (is.null(linetype)) {
aes <- ggplot2::aes(x = .data[[xcol]], y = value,
color = .data[[color]])
} else {
aes <- ggplot2::aes(x = .data[[xcol]], y = value,
color = .data[[color]],
linetype = .data[[linetype]])
}
}
return(aes)
}

read_dataset <- function(req, name, scale) {
validate_scale(scale)
session_id <- get_or_create_session_id(req)
Expand Down
26 changes: 26 additions & 0 deletions R/dataset-validation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
invalid_file_type <- function(res) {
res$status <- 400L
msg <- "Invalid file type; please upload file of type text/csv."
bad_request_response(msg)
}

duplicate_dataset_name <- function(res, name) {
res$status <- 400L
msg <- paste(name, "already exists.",
"Please choose a unique name for this dataset.")
bad_request_response(msg)
}

missing_columns <- function(res, missing_cols) {
res$status <- 400L
msg <- paste("Missing required columns:",
paste(missing_cols, collapse = ", "))
bad_request_response(msg)
}

invalid_xcol <- function(res) {
res$status <- 400L
msg <- paste("Invalid x column values:",
"these should be numbers or dates in a standard format.")
bad_request_response(msg)
}
3 changes: 2 additions & 1 deletion R/router.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ get_individual <- function() {
porcelain::porcelain_input_query(scale = "string",
color = "string",
filter = "string",
linetype = "string"),
linetype = "string",
page = "numeric"),
returning = porcelain::porcelain_returning_json("Plotly"))
}

Expand Down
3 changes: 2 additions & 1 deletion docker/push
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ set -ex
HERE=$(realpath "$(dirname $0)")
. $HERE/common

docker push $DOCKER_BRANCH_TAG

if [[ "$GIT_BRANCH" == "main" ]]; then
docker push $DOCKER_BRANCH_TAG
docker push $DOCKER_COMMIT_TAG
fi
8 changes: 7 additions & 1 deletion inst/schema/Plotly.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,14 @@
"items": {
"type": "string"
}
},
"numPages": {
"type": "number"
},
"page": {
"type": "number"
}
},
"required": ["data", "layout", "warnings"],
"required": ["data", "layout", "warnings", "numPages", "page"],
"additionalProperties": false
}
71 changes: 71 additions & 0 deletions tests/testthat/test-paging.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
test_that("page 1 is first n ids if n < length(ids)", {
ids <- c("a", "b", "c", "d", "e")
result <- get_paged_ids(ids, 1, 2)
expect_equal(result, c("a", "b"))
})

test_that("page 1 is all ids if n >= length(ids)", {
ids <- c("a", "b", "c", "d", "e")
result <- get_paged_ids(ids, 1, 10)
expect_equal(result, ids)
})

test_that("page 2 is second n ids", {
ids <- c("a", "b", "c", "d", "e")
result <- get_paged_ids(ids, 2, 2)
expect_equal(result, c("c", "d"))
})

test_that("last page is last m ids where m <= n", {
ids <- c("a", "b", "c", "d", "e")
result <- get_paged_ids(ids, 3, 2)
expect_equal(result, c("e"))
})

test_that("first page of results returned by default", {
dat <- data.frame(biomarker = "ab",
pid = 1:25,
value = 1,
day = 1:25)
router <- build_routes(cookie_key)
post_request <- local_POST_dataset_request(dat,
"testdataset",
cookie = cookie)
expect_equal(router$call(post_request)$status, 200)
res <- router$call(make_req("GET",
"/dataset/testdataset/individual/pid/",
HTTP_COOKIE = cookie))

expect_equal(res$status, 200)

body <- jsonlite::fromJSON(res$body)
expect_equal(body$data$page, 1)
expect_equal(body$data$numPages, 2)

data <- body$data$data
expect_equal(nrow(data), 20)
})

test_that("correct page of results returned", {
dat <- data.frame(biomarker = "ab",
pid = 1:25,
value = 1,
day = 1:25)
router <- build_routes(cookie_key)
post_request <- local_POST_dataset_request(dat,
"testdataset",
cookie = cookie)
expect_equal(router$call(post_request)$status, 200)
res <- router$call(make_req("GET",
"/dataset/testdataset/individual/pid/",
qs = "page=2",
HTTP_COOKIE = cookie))

expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
expect_equal(body$data$page, 2)
expect_equal(body$data$numPages, 2)

data <- body$data$data
expect_equal(nrow(data), 5)
})
22 changes: 0 additions & 22 deletions tests/testthat/test-read-individual.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,25 +213,3 @@ test_that("can get dataset with dates", {
expect_equal(unlist(data[1, "x"]), as.numeric(lubridate::ydm(dates)))
expect_equal(unlist(data[1, "y"]), 1:5)
})

test_that("only first 20 individuals are returned", {
dat <- data.frame(biomarker = "ab",
pid = 1:25,
value = 1,
day = 1:25)
router <- build_routes(cookie_key)
post_request <- local_POST_dataset_request(dat,
"testdataset",
cookie = cookie)
expect_equal(router$call(post_request)$status, 200)
res <- router$call(make_req("GET",
"/dataset/testdataset/individual/pid/",
HTTP_COOKIE = cookie))
expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
warnings <- body$data$warnings
expect_equal(warnings, "25 individuals identified; only the first 20 will be shown.")

data <- body$data$data
expect_equal(nrow(data), 20)
})

0 comments on commit 36abddb

Please sign in to comment.