Skip to content

Commit

Permalink
Merge pull request #52 from billdenney/single-value-group-51
Browse files Browse the repository at this point in the history
Single value group 51
  • Loading branch information
certara-jcraig authored Sep 14, 2023
2 parents 23bd54a + a5e220f commit c2e43a1
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 71 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* `simulated.tidyvpcobj()` detects if the number of simulated rows is not an integer multiple of the number of observed rows and adds the new `xsim` argument to test that x values match between replicated simulations. It will suggest that MDV filtering may not have occurred if either of these fails [#35](https://github.com/certara/tidyvpc/issues/35).
* Prevent division by zero in `predcorrect()` transformation [#31](https://github.com/certara/tidyvpc/issues/31).
* Usability enhancements for prediction corrected VPC (pcVPC), which include support for `binning.tidyvpcobj()` either before or after usage of `predcorrect.tidyvpcobj()`, and automatically performing LOESS pcVPC when `binless.tidyvpcobj()` is used. As a result, the `loess.ypc` argument is no longer required[#43](https://github.com/certara/tidyvpc/issues/43).
* VPC can work with a single value in a group [#51](https://github.com/certara/tidyvpc/issues/51)

# tidyvpc 1.4.0
* Fix for npde calculation fix npde calc [#16](https://github.com/certara/tidyvpc/pull/16)
Expand Down
59 changes: 32 additions & 27 deletions R/vpcstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ binning.tidyvpcobj <- function(o, bin, data=o$data, xbin="xmedian", centers, bre
stop("Invalid xbin")
}
vpc.method <- list(method = "binning")

# check if user supplied predcorrect before binning
if (!is.null(o$predcor) && o$predcor) {
pred <- o$pred
Expand All @@ -540,7 +540,7 @@ binning.tidyvpcobj <- function(o, bin, data=o$data, xbin="xmedian", centers, bre
o$sim[, ypc := ifelse(rep(pred, times = nrow(o$sim) / nrow(o$obs)) == 0, 0, (mpred / pred) * y)]
}
}

update(o, xbin=xbin, vpc.method = vpc.method)
}

Expand Down Expand Up @@ -606,13 +606,13 @@ predcorrect.tidyvpcobj <- function(o, pred, data=o$data, ..., log=FALSE) {

stratbin <- o$.stratbin
# predcorrect after binning, check if binning/binless has already been specified

if (!is.null(o$vpc.method)) {
if(o$vpc.method$method == "binless") {
o$vpc.method$loess.ypc <- TRUE
} else { #binning specified, perform ypc calculcation
mpred <- data.table(stratbin, pred)[, mpred := median(pred), by = stratbin]$mpred

if (log) {
o$obs[, ypc := (mpred - pred) + y]
o$sim[, ypc := (mpred - pred) + y]
Expand Down Expand Up @@ -858,32 +858,37 @@ bin_by_classInt <- function(style, nbins=NULL) {
nbins <- .check_nbins(nbins)
}
function(x, ...) {
args <- list(var=x, style=style)
if (!is.null(nbins)) {
nbins <- .resolve_nbins(nbins, ...)
args$n <- nbins
}
args <- c(args, list(...))
if (style %in% c("kmeans", "hclust", "dpih")) {
# These don't accept '...' arguments
args1 <- args[intersect(names(args), methods::formalArgs(classInt::classIntervals))]
args2 <- if (style == "kmeans") {
args[intersect(names(args), methods::formalArgs(stats::kmeans))]
} else if (style == "hclust") {
args[intersect(names(args), methods::formalArgs(stats::hclust))]
} else if (style == "dpih") {
has_KernSmooth <- requireNamespace("KernSmooth", quietly=TRUE)
if (!has_KernSmooth) {
stop("Package 'KernSmooth' is required to use the binning method. Please install it.")
if (length(unique(x)) > 1) {
args <- list(var=x, style=style)
if (!is.null(nbins)) {
nbins <- .resolve_nbins(nbins, ...)
args$n <- nbins
}
args <- c(args, list(...))
if (style %in% c("kmeans", "hclust", "dpih")) {
# These don't accept '...' arguments
args1 <- args[intersect(names(args), methods::formalArgs(classInt::classIntervals))]
args2 <- if (style == "kmeans") {
args[intersect(names(args), methods::formalArgs(stats::kmeans))]
} else if (style == "hclust") {
args[intersect(names(args), methods::formalArgs(stats::hclust))]
} else if (style == "dpih") {
has_KernSmooth <- requireNamespace("KernSmooth", quietly=TRUE)
if (!has_KernSmooth) {
stop("Package 'KernSmooth' is required to use the binning method. Please install it.")
}
args[intersect(names(args), methods::formalArgs(KernSmooth::dpih))]
} else {
list()
}
args[intersect(names(args), methods::formalArgs(KernSmooth::dpih))]
} else {
list()
args <- c(args1, args2)
}
args <- c(args1, args2)
args <- args[!duplicated(args)]
breaks <- do.call(classInt::classIntervals, args)$brks
} else {
# If a group has a single value, `classInt::classIntervals` gives an error
breaks <- rep(1, length(x))
}
args <- args[!duplicated(args)]
breaks <- do.call(classInt::classIntervals, args)$brks
cut_at(breaks)(x)
}
}
Expand Down
106 changes: 62 additions & 44 deletions tests/testthat/test-binning.R
Original file line number Diff line number Diff line change
@@ -1,46 +1,46 @@
test_that("obs bins equal stats bins", {
obs_data <- as.data.table(tidyvpc::obs_data)
sim_data <- as.data.table(tidyvpc::sim_data)

## Subest MDV = 0
obs_data <- obs_data[MDV == 0]
sim_data <- sim_data[MDV == 0]

unique_bins_obs <- as.factor(unique(obs_data$NTIME))
#Assign observed and simulated data to tidyvpc object
vpc <- observed(obs_data, x = TIME, y = DV )

vpc <- simulated(vpc, sim_data, y = DV)

vpc <- binning(vpc, bin = NTIME)

vpc <- vpcstats(vpc)

unique_bins_vpc <- unique(vpc$stats$bin)

#Check that bins match for binning on xvar NTIME
expect_equal(unique_bins_obs, unique_bins_vpc)

})


test_that("cat obs vpcstats is correct", {
obs_cat_data <- as.data.table(tidyvpc::obs_cat_data)
sim_cat_data <- as.data.table(tidyvpc::sim_cat_data)

vpc <- observed(obs_cat_data, x = agemonths, y = zlencat )
vpc <- simulated(vpc, sim_cat_data, y = DV)
vpc <- binning(vpc, bin = round(agemonths, 0))
vpc <- vpcstats(vpc, vpc.type = "categorical")

location <- system.file("extdata/Binning","cat_stats.csv",package="tidyvpc")

stats <- fread(location, colClasses = c(pname = "factor"))
stats$bin <- as.factor(stats$bin)

setkeyv(stats, c("xbin"))


#Check for equality, dispatches to data.table::all.equal method
expect_identical(all.equal(vpc$stats, stats), TRUE)

Expand All @@ -51,129 +51,147 @@ test_that("cat obs vpcstats is correct", {
test_that("cat obs strat vpcstats is correct", {
obs_cat_data <- as.data.table(tidyvpc::obs_cat_data)
sim_cat_data <- as.data.table(tidyvpc::sim_cat_data)

vpc <- observed(obs_cat_data, x = agemonths, y = zlencat )
vpc <- simulated(vpc, sim_cat_data, y = DV)
vpc <- stratify(vpc, ~ Country_ID_code)
vpc <- binning(vpc, bin = round(agemonths, 0))
vpc <- vpcstats(vpc, vpc.type = "categorical")

location <- system.file("extdata/Binning","cat_strat_stats.csv",package="tidyvpc")

stats <- fread(location, colClasses = c(pname = "factor"))
stats$bin <- as.factor(stats$bin)

setkeyv(stats, c(names(vpc$strat), "xbin"))


#Check for equality, dispatches to data.table::all.equal method
expect_identical(all.equal(vpc$stats, stats), TRUE)

})

test_that("binning methods are valid", {

## Subest MDV = 0
obs <- obs_data[MDV == 0]
sim <- sim_data[MDV == 0]

vpc <- observed(obs, x = TIME, y = DV )
vpc <- simulated(vpc, sim, y = DV)

centers <- c(0,1,5,8,12)
vpc <- binning(vpc, bin = "centers", centers = centers)
expect_equal(vpc$xbin$bin, as.factor(centers))

vpc <- binning(vpc, bin = "breaks", breaks = c(1,3,6,9,11))
expect_true(length(levels(vpc$xbin$bin)) == 11)

vpc <- binning(vpc, bin = "breaks", breaks = c(1,3,6,9,11))
expect_true(length(levels(vpc$xbin$bin)) == 11)

vpc <- binning(vpc, bin = "pam", nbins = 6)
expect_true(max(vpc$xbin$xbin) < 12)

vpc <- binning(vpc, bin = "ntile", nbins = 6)
expect_true(nrow(vpc$xbin) == 6)

vpc <- binning(vpc, bin = "eqcut", nbins = 12)
expect_true(nrow(vpc$xbin) == 12)

vpc <- binning(vpc, bin = "sd", nbins = 4)
expect_true(nrow(vpc$xbin) == 6)

})


test_that("binning by stratum works", {
obs_data <- obs_data[MDV == 0]
sim_data <- sim_data[MDV == 0]
obs_data$PRED <- sim_data[REP == 1, PRED]

vpc <- observed(obs_data, x=TIME, y=DV)
vpc <- simulated(vpc, sim_data, y=DV)
vpc <- stratify(vpc, ~ GENDER + STUDY)
vpc <- stratify(vpc, ~ GENDER + STUDY)
vpc <- binning(vpc, stratum = list(GENDER = "M", STUDY = "Study A"), bin = "jenks", nbins = 5, by.strata = T)
vpc <- binning(vpc, stratum = list(GENDER = "F", STUDY = "Study A"), bin = "centers", centers = c(0.5,3,5,10,15), by.strata = T)
vpc <- binning(vpc, stratum = list(GENDER = "M", STUDY = "Study B"), bin = "kmeans", by.strata = T)
vpc <- binning(vpc, stratum = list(GENDER = "F", STUDY = "Study B"), bin = "pam", nbins = 5, by.strata = T)
vpc <- predcorrect(vpc, pred=PRED)
vpc <- predcorrect(vpc, pred=PRED)
vpc <- vpcstats(vpc)

expect_true(inherits(vpc, "tidyvpcobj") && vpc$bin.by.strata)

})


test_that("binning errors are valid", {

obs <- obs_data[MDV == 0]
sim <- sim_data[MDV == 0]

vpc <- observed(obs, x = TIME, y = DV )
vpc <- simulated(vpc, sim, y = DV)
expect_true(inherits(binning(vpc, xbin = NTIME), "tidyvpcobj"))
expect_error(binning(vpc, xbin = c(1:5)))

})

test_that("binning can be used after predcorrect", {
obs_data <- obs_data[MDV == 0]
sim_data <- sim_data[MDV == 0]
obs_data$PRED <- sim_data[REP == 1, PRED]

vpc <- observed(obs_data, x = TIME, y = DV )
vpc <- simulated(vpc, sim_data, y = DV)
vpc <- stratify(vpc, ~ GENDER)
vpc <- predcorrect(vpc, pred = PRED)
vpc <- binning(vpc, bin = NTIME)
vpc <- vpcstats(vpc)

location <- system.file("extdata/Binning","predcor_strat_stats.csv",package="tidyvpc")
stats <- fread(location, colClasses = c(qname = "factor"))
stats[, bin := factor(bin, levels = levels(vpc$stats$bin))]
setkeyv(stats, c(names(vpc$strat), "xbin"))

expect_equal(vpc$stats, stats)
})

test_that("binning can be used before predcorrect", {
obs_data <- obs_data[MDV == 0]
sim_data <- sim_data[MDV == 0]
obs_data$PRED <- sim_data[REP == 1, PRED]

vpc <- observed(obs_data, x = TIME, y = DV )
vpc <- simulated(vpc, sim_data, y = DV)
vpc <- stratify(vpc, ~ GENDER)
vpc <- binning(vpc, bin = NTIME)
vpc <- predcorrect(vpc, pred = PRED)
vpc <- vpcstats(vpc)

location <- system.file("extdata/Binning","predcor_strat_stats.csv",package="tidyvpc")
stats <- fread(location, colClasses = c(qname = "factor"))
stats[, bin := factor(bin, levels = levels(vpc$stats$bin))]
setkeyv(stats, c(names(vpc$strat), "xbin"))

expect_equal(vpc$stats, stats)
})


test_that("binning works with single-value groups (#51)", {
d_obs <-
data.frame(
group = rep(c("Patient", "Healthy"), each = 5),
conc = c(rep(0, 5), 1:5),
value = 1:10
)

d_sim <-
d_obs[rep(1:nrow(d_obs), 5), ]

value <-
observed(d_obs, x = conc, yobs = value) %>%
simulated(d_sim, xsim = conc, ysim = value) %>%
stratify(~group) %>%
binning(bin = "jenks")
expect_s3_class(value, "tidyvpcobj")
})

0 comments on commit c2e43a1

Please sign in to comment.