diff --git a/DESCRIPTION b/DESCRIPTION index 544bd38..9aee150 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Imports: fs, jsonlite, logger, + lubridate, mgcv, mime, plumber, diff --git a/R/api.R b/R/api.R index 17549fa..c18485c 100644 --- a/R/api.R +++ b/R/api.R @@ -46,10 +46,28 @@ target_post_dataset <- function(req, res) { return(bad_request_response(msg)) } + if (suppressWarnings(all(is.na(as.numeric(file_body[, xcol]))))) { + xtype <- "date" + suppressWarnings({ + file_body[, xcol] <- parse_date(file_body[, xcol]) + }) + if (all(is.na(file_body[, xcol]))) { + res$status <- 400L + msg <- paste("Invalid x column values:", + "these should be numbers or dates in a standard format.") + return(bad_request_response(msg)) + } + logger::log_info("Detected date values in x column") + } else { + logger::log_info("Detected numeric values in x column") + xtype <- "number" + } + logger::log_info(paste("Saving dataset", filename, "to disk")) dir.create(path, recursive = TRUE) utils::write.csv(file_body, file.path(path, "data"), row.names = FALSE) write(xcol, file.path(path, "xcol")) + write(xtype, file.path(path, "xtype")) response_success(jsonlite::unbox(filename)) } @@ -72,7 +90,7 @@ target_get_dataset <- function(name, req) { variables <- list() for (col in cols) { lvls <- unique(dat[, col]) - if (length(lvls) < 12) { + if (length(lvls) < 25) { variables[[col]] <- list(name = jsonlite::unbox(col), levels = lvls) } } @@ -100,6 +118,7 @@ target_get_trace <- function(name, dataset <- read_dataset(req, name, scale) dat <- dataset$data xcol <- dataset$xcol + xtype <- dataset$xtype cols <- colnames(dat) if (!is.null(filter)) { filters <- strsplit(filter, "+", fixed = TRUE)[[1]] @@ -117,6 +136,7 @@ target_get_trace <- function(name, return(lapply(seq_along(groups), function(i) { model <- with_warnings(model_out(groups[[i]], xcol = xcol, + xtype = xtype, method = method, span = span, k = k)) @@ -129,6 +149,7 @@ target_get_trace <- function(name, logger::log_info("Returning single trace") model <- with_warnings(model_out(dat, xcol = xcol, + xtype = xtype, method = method, span = span, k = k)) @@ -157,10 +178,19 @@ read_dataset <- function(req, name, scale) { dat$value <- log2(dat$value) } xcol <- readLines(file.path(path, "xcol")) - list(data = dat, xcol = xcol) + xtype <- readLines(file.path(path, "xtype")) + logger::log_info("Parsing x column values") + if (xtype == "date") { + dat[, xcol] <- as.Date(dat[, xcol], origin = "1970-01-01") + } + list(data = dat, xcol = xcol, xtype = xtype) } -model_out <- function(dat, xcol, method = "auto", span = 0.75, k = 10) { +model_out <- function(dat, xcol, + xtype = "number", + method = "auto", + span = 0.75, + k = 10) { n <- nrow(dat) if (n == 0) { return(list(x = list(), y = list())) @@ -170,13 +200,16 @@ model_out <- function(dat, xcol, method = "auto", span = 0.75, k = 10) { m <- mgcv::gam(eval(parse(text = fmla)), data = dat, method = "REML") } else { - fmla <- sprintf("value ~ %s", xcol) + fmla <- sprintf("value ~ as.numeric(%s)", xcol) m <- stats::loess(fmla, data = dat, span = span) } range <- range(dat[, xcol], na.rm = TRUE) xseq <- range[1]:range[2] xdf <- tibble::tibble(xcol = xseq) names(xdf) <- xcol + if (xtype == "date") { + xseq <- format(as.Date(xseq, origin = "1970-01-01")) + } list(x = xseq, y = stats::predict(m, xdf)) } diff --git a/R/utils.R b/R/utils.R index f0b6a3c..d09e7c9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -22,3 +22,8 @@ validate_scale <- function(scale) { ) } } + +parse_date <- function(dat) { + as.Date(lubridate::parse_date_time(dat, + c("dmy", "mdy", "ymd", "ydm"))) +} diff --git a/docker/Dockerfile b/docker/Dockerfile index a622ec4..883dd43 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,6 +17,7 @@ RUN install_packages --repo=https://mrc-ide.r-universe.dev \ docopt \ fs \ logger \ + lubridate \ jsonlite \ jsonvalidate \ plumber \ diff --git a/inst/schema/DataSeries.schema.json b/inst/schema/DataSeries.schema.json index 15d17f9..bc642f4 100644 --- a/inst/schema/DataSeries.schema.json +++ b/inst/schema/DataSeries.schema.json @@ -13,7 +13,7 @@ "x": { "type": "array", "items": { - "type": "number" + "type": ["number", "string"] } }, "y": { @@ -32,7 +32,7 @@ "x": { "type": "array", "items": { - "type": "number" + "type": ["number", "string"] } }, "y": { diff --git a/inst/schema/Variable.schema.json b/inst/schema/Variable.schema.json index b963a79..2edc88f 100644 --- a/inst/schema/Variable.schema.json +++ b/inst/schema/Variable.schema.json @@ -8,7 +8,7 @@ "levels": { "type": "array", "items": { - "type": "string" + "type": ["string", "number", "null"] } } }, diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index de16728..0ea6413 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -22,6 +22,7 @@ local_add_dataset <- function(dat, name, session = session_id, env = parent.fram dir.create(filepath, recursive = TRUE) write.csv(dat, file.path(filepath, "data"), row.names = FALSE) write("day", file.path(filepath, "xcol")) + write("number", file.path(filepath, "xtype")) withr::defer(fs::dir_delete(filepath), envir = env) name } diff --git a/tests/testthat/test-dates.R b/tests/testthat/test-dates.R new file mode 100644 index 0000000..11f7d05 --- /dev/null +++ b/tests/testthat/test-dates.R @@ -0,0 +1,83 @@ +test_that("d/m/y", { + router <- build_routes(cookie_key) + request <- local_POST_dataset_request(data.frame(biomarker = "ab", + day = c("01/01/2024", + "02/01/2024"), + value = 1:10), + "testdataset", + cookie = cookie) + upload_res <- router$call(request) + expect_equal(upload_res$status, 200) + + res <- router$call(make_req("GET", + "/dataset/testdataset/trace/ab/", + HTTP_COOKIE = cookie)) + + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-01", "2024-01-02"), 5)) +}) + +test_that("d-m-y", { + router <- build_routes(cookie_key) + request <- local_POST_dataset_request(data.frame(biomarker = "ab", + day = c("01-01-2024", + "02-01-2024"), + value = 1:10), + "testdataset", + cookie = cookie) + upload_res <- router$call(request) + expect_equal(upload_res$status, 200) + + res <- router$call(make_req("GET", + "/dataset/testdataset/trace/ab/", + HTTP_COOKIE = cookie)) + + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-01", "2024-01-02"), 5)) +}) + +test_that("y-m-d", { + router <- build_routes(cookie_key) + request <- local_POST_dataset_request(data.frame(biomarker = "ab", + day = c("2024-01-13", + "2024-01-14"), + value = 1:10), + "testdataset", + cookie = cookie) + upload_res <- router$call(request) + expect_equal(upload_res$status, 200) + + res <- router$call(make_req("GET", + "/dataset/testdataset/trace/ab/", + HTTP_COOKIE = cookie)) + + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-13", "2024-01-14"), 5)) +}) + +test_that("y/d/m", { + router <- build_routes(cookie_key) + request <- local_POST_dataset_request(data.frame(biomarker = "ab", + day = c("2024/14/01", + "2024/15/01"), + value = 1:10), + "testdataset", + cookie = cookie) + upload_res <- router$call(request) + expect_equal(upload_res$status, 200) + + res <- router$call(make_req("GET", + "/dataset/testdataset/trace/ab/", + HTTP_COOKIE = cookie)) + + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-14", "2024-01-15"), 5)) +}) diff --git a/tests/testthat/test-model.R b/tests/testthat/test-model.R index 61ed0b6..7310684 100644 --- a/tests/testthat/test-model.R +++ b/tests/testthat/test-model.R @@ -66,3 +66,19 @@ test_that("model uses loess options", { expect_true(all(res$y == expected)) }) + +test_that("model can handle dates", { + dates <- sapply(1:50, function(x) as.Date(2 * x, origin = "2023-01-01")) + full_range <- sapply(2:100, function(x) as.Date(x, origin = "2023-01-01")) + dat <- data.frame(date = dates, value = rnorm(50)) + res <- model_out(dat, xcol = "date", xtype = "date") + + m <- stats::loess(value ~ as.numeric(date), data = dat) + xdf <- tibble::tibble(date = full_range) + expected <- stats::predict(m, xdf) + + expect_true(all(res$y == expected)) + expect_equal(res$x[1], "2023-01-03") + expect_equal(res$x[2], "2023-01-04") + expect_equal(res$x[99], "2023-04-11") +}) diff --git a/tests/testthat/test-read.R b/tests/testthat/test-read.R index c0b0d27..6dc032d 100644 --- a/tests/testthat/test-read.R +++ b/tests/testthat/test-read.R @@ -289,3 +289,34 @@ test_that("error running the model results in a 400", { expect_equal(body$errors[1, "detail"], "day has insufficient unique values to support 10 knots: reduce k.") }) + +test_that("can get dataset with dates", { + dates <- c("2023/15/01", "2023/16/01", "2023/17/01", "2023/18/01", "2023/20/01") + dat <- data.frame(biomarker = "ab", + value = 1:5, + day = dates) + router <- build_routes(cookie_key) + post_request <- local_POST_dataset_request(dat, + "testdataset", + cookie = cookie) + expect_equal(router$call(post_request)$status, 200) + res <- router$call(make_req("GET", + "/dataset/testdataset/trace/ab/", + HTTP_COOKIE = cookie)) + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(nrow(data), 1) + expect_equal(data$name, "all") + expect_equal(unlist(data$raw[1, "x"]), c("2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-20")) + expect_equal(unlist(data$raw[1, "y"]), 1:5) + + full_range_dates <- c("2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-19", "2023-01-20") + expect_equal(unlist(data$model[1, "x"]), full_range_dates) + parsed <- lubridate::parse_date_time(dates, c("dmy", "mdy", "ymd", "ydm")) + suppressWarnings({m <- stats::loess(value ~ as.numeric(day), data = data.frame(day = parsed, + value = 1:5))}) + parsed_full_range <- lubridate::parse_date_time(full_range_dates, c("dmy", "mdy", "ymd", "ydm")) + expected <- stats::predict(m, parsed_full_range) + expect_equal(unlist(data$model[1, "y"]), expected) +}) \ No newline at end of file diff --git a/tests/testthat/test-upload.R b/tests/testthat/test-upload.R index a338036..d8ec997 100644 --- a/tests/testthat/test-upload.R +++ b/tests/testthat/test-upload.R @@ -46,6 +46,23 @@ test_that("uploading dataset with duplicate name returns 400", { "testdataset already exists. Please choose a unique name for this dataset.") }) +test_that("uploading dataset with invalid xcol values returns 400", { + router <- build_routes(cookie_key) + request <- local_POST_dataset_request(data.frame(biomarker = "ab", + day = c("a", "b"), + value = 1), + "testdataset", + cookie = cookie) + res <- router$call(request) + expect_equal(res$status, 400) + + res <- router$call(request) + body <- jsonlite::fromJSON(res$body) + validate_failure_schema(res$body) + expect_equal(body$errors[1, "detail"], + "Invalid x column values: these should be numbers or dates in a standard format.") +}) + test_that("can upload dataset with different xcol", { router <- build_routes(cookie_key) request <- local_POST_dataset_request(data.frame(biomarker = "ab",