Skip to content

Commit

Permalink
Merge pull request #6 from seroanalytics/dates
Browse files Browse the repository at this point in the history
support date columns
  • Loading branch information
hillalex authored Sep 16, 2024
2 parents 2e7d650 + a6096dd commit 12b55a9
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 7 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Imports:
fs,
jsonlite,
logger,
lubridate,
mgcv,
mime,
plumber,
Expand Down
41 changes: 37 additions & 4 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,28 @@ target_post_dataset <- function(req, res) {
return(bad_request_response(msg))
}

if (suppressWarnings(all(is.na(as.numeric(file_body[, xcol]))))) {
xtype <- "date"
suppressWarnings({
file_body[, xcol] <- parse_date(file_body[, xcol])
})
if (all(is.na(file_body[, xcol]))) {
res$status <- 400L
msg <- paste("Invalid x column values:",
"these should be numbers or dates in a standard format.")
return(bad_request_response(msg))
}
logger::log_info("Detected date values in x column")
} else {
logger::log_info("Detected numeric values in x column")
xtype <- "number"
}

logger::log_info(paste("Saving dataset", filename, "to disk"))
dir.create(path, recursive = TRUE)
utils::write.csv(file_body, file.path(path, "data"), row.names = FALSE)
write(xcol, file.path(path, "xcol"))
write(xtype, file.path(path, "xtype"))
response_success(jsonlite::unbox(filename))
}

Expand All @@ -72,7 +90,7 @@ target_get_dataset <- function(name, req) {
variables <- list()
for (col in cols) {
lvls <- unique(dat[, col])
if (length(lvls) < 12) {
if (length(lvls) < 25) {
variables[[col]] <- list(name = jsonlite::unbox(col), levels = lvls)
}
}
Expand Down Expand Up @@ -100,6 +118,7 @@ target_get_trace <- function(name,
dataset <- read_dataset(req, name, scale)
dat <- dataset$data
xcol <- dataset$xcol
xtype <- dataset$xtype
cols <- colnames(dat)
if (!is.null(filter)) {
filters <- strsplit(filter, "+", fixed = TRUE)[[1]]
Expand All @@ -117,6 +136,7 @@ target_get_trace <- function(name,
return(lapply(seq_along(groups), function(i) {
model <- with_warnings(model_out(groups[[i]],
xcol = xcol,
xtype = xtype,
method = method,
span = span,
k = k))
Expand All @@ -129,6 +149,7 @@ target_get_trace <- function(name,
logger::log_info("Returning single trace")
model <- with_warnings(model_out(dat,
xcol = xcol,
xtype = xtype,
method = method,
span = span,
k = k))
Expand Down Expand Up @@ -157,10 +178,19 @@ read_dataset <- function(req, name, scale) {
dat$value <- log2(dat$value)
}
xcol <- readLines(file.path(path, "xcol"))
list(data = dat, xcol = xcol)
xtype <- readLines(file.path(path, "xtype"))
logger::log_info("Parsing x column values")
if (xtype == "date") {
dat[, xcol] <- as.Date(dat[, xcol], origin = "1970-01-01")
}
list(data = dat, xcol = xcol, xtype = xtype)
}

model_out <- function(dat, xcol, method = "auto", span = 0.75, k = 10) {
model_out <- function(dat, xcol,
xtype = "number",
method = "auto",
span = 0.75,
k = 10) {
n <- nrow(dat)
if (n == 0) {
return(list(x = list(), y = list()))
Expand All @@ -170,13 +200,16 @@ model_out <- function(dat, xcol, method = "auto", span = 0.75, k = 10) {
m <- mgcv::gam(eval(parse(text = fmla)),
data = dat, method = "REML")
} else {
fmla <- sprintf("value ~ %s", xcol)
fmla <- sprintf("value ~ as.numeric(%s)", xcol)
m <- stats::loess(fmla, data = dat, span = span)
}
range <- range(dat[, xcol], na.rm = TRUE)
xseq <- range[1]:range[2]
xdf <- tibble::tibble(xcol = xseq)
names(xdf) <- xcol
if (xtype == "date") {
xseq <- format(as.Date(xseq, origin = "1970-01-01"))
}
list(x = xseq, y = stats::predict(m, xdf))
}

Expand Down
5 changes: 5 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ validate_scale <- function(scale) {
)
}
}

parse_date <- function(dat) {
as.Date(lubridate::parse_date_time(dat,
c("dmy", "mdy", "ymd", "ydm")))
}
1 change: 1 addition & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ RUN install_packages --repo=https://mrc-ide.r-universe.dev \
docopt \
fs \
logger \
lubridate \
jsonlite \
jsonvalidate \
plumber \
Expand Down
4 changes: 2 additions & 2 deletions inst/schema/DataSeries.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"x": {
"type": "array",
"items": {
"type": "number"
"type": ["number", "string"]
}
},
"y": {
Expand All @@ -32,7 +32,7 @@
"x": {
"type": "array",
"items": {
"type": "number"
"type": ["number", "string"]
}
},
"y": {
Expand Down
2 changes: 1 addition & 1 deletion inst/schema/Variable.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"levels": {
"type": "array",
"items": {
"type": "string"
"type": ["string", "number", "null"]
}
}
},
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ local_add_dataset <- function(dat, name, session = session_id, env = parent.fram
dir.create(filepath, recursive = TRUE)
write.csv(dat, file.path(filepath, "data"), row.names = FALSE)
write("day", file.path(filepath, "xcol"))
write("number", file.path(filepath, "xtype"))
withr::defer(fs::dir_delete(filepath), envir = env)
name
}
Expand Down
83 changes: 83 additions & 0 deletions tests/testthat/test-dates.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
test_that("d/m/y", {
router <- build_routes(cookie_key)
request <- local_POST_dataset_request(data.frame(biomarker = "ab",
day = c("01/01/2024",
"02/01/2024"),
value = 1:10),
"testdataset",
cookie = cookie)
upload_res <- router$call(request)
expect_equal(upload_res$status, 200)

res <- router$call(make_req("GET",
"/dataset/testdataset/trace/ab/",
HTTP_COOKIE = cookie))

expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-01", "2024-01-02"), 5))
})

test_that("d-m-y", {
router <- build_routes(cookie_key)
request <- local_POST_dataset_request(data.frame(biomarker = "ab",
day = c("01-01-2024",
"02-01-2024"),
value = 1:10),
"testdataset",
cookie = cookie)
upload_res <- router$call(request)
expect_equal(upload_res$status, 200)

res <- router$call(make_req("GET",
"/dataset/testdataset/trace/ab/",
HTTP_COOKIE = cookie))

expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-01", "2024-01-02"), 5))
})

test_that("y-m-d", {
router <- build_routes(cookie_key)
request <- local_POST_dataset_request(data.frame(biomarker = "ab",
day = c("2024-01-13",
"2024-01-14"),
value = 1:10),
"testdataset",
cookie = cookie)
upload_res <- router$call(request)
expect_equal(upload_res$status, 200)

res <- router$call(make_req("GET",
"/dataset/testdataset/trace/ab/",
HTTP_COOKIE = cookie))

expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-13", "2024-01-14"), 5))
})

test_that("y/d/m", {
router <- build_routes(cookie_key)
request <- local_POST_dataset_request(data.frame(biomarker = "ab",
day = c("2024/14/01",
"2024/15/01"),
value = 1:10),
"testdataset",
cookie = cookie)
upload_res <- router$call(request)
expect_equal(upload_res$status, 200)

res <- router$call(make_req("GET",
"/dataset/testdataset/trace/ab/",
HTTP_COOKIE = cookie))

expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(unlist(data$raw[1, "x"]), rep(c("2024-01-14", "2024-01-15"), 5))
})
16 changes: 16 additions & 0 deletions tests/testthat/test-model.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,19 @@ test_that("model uses loess options", {

expect_true(all(res$y == expected))
})

test_that("model can handle dates", {
dates <- sapply(1:50, function(x) as.Date(2 * x, origin = "2023-01-01"))
full_range <- sapply(2:100, function(x) as.Date(x, origin = "2023-01-01"))
dat <- data.frame(date = dates, value = rnorm(50))
res <- model_out(dat, xcol = "date", xtype = "date")

m <- stats::loess(value ~ as.numeric(date), data = dat)
xdf <- tibble::tibble(date = full_range)
expected <- stats::predict(m, xdf)

expect_true(all(res$y == expected))
expect_equal(res$x[1], "2023-01-03")
expect_equal(res$x[2], "2023-01-04")
expect_equal(res$x[99], "2023-04-11")
})
31 changes: 31 additions & 0 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,34 @@ test_that("error running the model results in a 400", {
expect_equal(body$errors[1, "detail"],
"day has insufficient unique values to support 10 knots: reduce k.")
})

test_that("can get dataset with dates", {
dates <- c("2023/15/01", "2023/16/01", "2023/17/01", "2023/18/01", "2023/20/01")
dat <- data.frame(biomarker = "ab",
value = 1:5,
day = dates)
router <- build_routes(cookie_key)
post_request <- local_POST_dataset_request(dat,
"testdataset",
cookie = cookie)
expect_equal(router$call(post_request)$status, 200)
res <- router$call(make_req("GET",
"/dataset/testdataset/trace/ab/",
HTTP_COOKIE = cookie))
expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(nrow(data), 1)
expect_equal(data$name, "all")
expect_equal(unlist(data$raw[1, "x"]), c("2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-20"))
expect_equal(unlist(data$raw[1, "y"]), 1:5)

full_range_dates <- c("2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-19", "2023-01-20")
expect_equal(unlist(data$model[1, "x"]), full_range_dates)
parsed <- lubridate::parse_date_time(dates, c("dmy", "mdy", "ymd", "ydm"))
suppressWarnings({m <- stats::loess(value ~ as.numeric(day), data = data.frame(day = parsed,
value = 1:5))})
parsed_full_range <- lubridate::parse_date_time(full_range_dates, c("dmy", "mdy", "ymd", "ydm"))
expected <- stats::predict(m, parsed_full_range)
expect_equal(unlist(data$model[1, "y"]), expected)
})
17 changes: 17 additions & 0 deletions tests/testthat/test-upload.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@ test_that("uploading dataset with duplicate name returns 400", {
"testdataset already exists. Please choose a unique name for this dataset.")
})

test_that("uploading dataset with invalid xcol values returns 400", {
router <- build_routes(cookie_key)
request <- local_POST_dataset_request(data.frame(biomarker = "ab",
day = c("a", "b"),
value = 1),
"testdataset",
cookie = cookie)
res <- router$call(request)
expect_equal(res$status, 400)

res <- router$call(request)
body <- jsonlite::fromJSON(res$body)
validate_failure_schema(res$body)
expect_equal(body$errors[1, "detail"],
"Invalid x column values: these should be numbers or dates in a standard format.")
})

test_that("can upload dataset with different xcol", {
router <- build_routes(cookie_key)
request <- local_POST_dataset_request(data.frame(biomarker = "ab",
Expand Down

0 comments on commit 12b55a9

Please sign in to comment.