Skip to content

Commit

Permalink
implement na.omit
Browse files Browse the repository at this point in the history
  • Loading branch information
mnwright committed Jul 2, 2024
1 parent e779a65 commit 57a07eb
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 5 deletions.
20 changes: 15 additions & 5 deletions R/ranger.R
Original file line number Diff line number Diff line change
Expand Up @@ -332,16 +332,22 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
stop("Error: Missing data in dependent variable.", call. = FALSE)
}
} else if (na.action == "na.omit") {
# TODO: Implement na.omit
stop("na.omit not implemented yet.")
if (anyNA(x)) {
idx_keep <- stats::complete.cases(x)
x <- x[idx_keep, , drop = FALSE]
y <- y[idx_keep, drop = FALSE]
if (nrow(x) < 1) {
stop("Error: No observations left after removing missing values.")
}
}
} else if (na.action == "na.learn") {
if (anyNA(y)) {
stop("Error: Missing data in dependent variable.", call. = FALSE)
}
if (anyNA(x)) {
any.na <- TRUE
if (!is.null(splitrule) && !(splitrule %in% c("gini", "variance", "logrank"))) {
stop("Error: Missing value handling currently only implemented for gini, variance and logrank splitrules.")
if (!is.null(splitrule) && !(splitrule %in% c("gini", "variance"))) {
stop("Error: Missing value handling currently only implemented for gini and variance splitrules.")
}
}
} else {
Expand Down Expand Up @@ -378,6 +384,11 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
stop("Error: Unsupported type of dependent variable.")
}

## No missing value handling for survival yet
if (any.na & treetype == 5) {
stop("Error: Missing value handling not yet implemented for survival forests.")
}

## Number of levels
if (treetype %in% c(1, 9)) {
if (is.factor(y)) {
Expand Down Expand Up @@ -447,7 +458,6 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
## Don't order if only one level
levels.ordered <- levels(xx)
} else if (inherits(y, "Surv")) {
# TODO: Fix missings here
## Use median survival if available or largest quantile available in all strata if median not available
levels.ordered <- largest.quantile(y ~ xx)

Expand Down
50 changes: 50 additions & 0 deletions tests/testthat/test_missings.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,53 @@ test_that("Missing values for survival not yet working", {

expect_error(ranger(Surv(time, status) ~ ., dat, num.trees = 5), "Error: Missing value handling not yet implemented for survival forests\\.")
})

test_that("na.omit leads to same result as manual removal, classification", {
dat <- iris
dat[1, 1] <- NA
rf1 <- ranger(Species ~ ., dat, num.trees = 5, seed = 10, na.action = "na.omit")

dat2 <- na.omit(dat)
rf2 <- ranger(Species ~ ., dat2, num.trees = 5, seed = 10)

expect_equal(rf1$predictions, rf2$predictions)
})

test_that("na.omit leads to same result as manual removal, probability", {
dat <- iris
dat[1, 1] <- NA
rf1 <- ranger(Species ~ ., dat, num.trees = 5, probability = TRUE, seed = 10, na.action = "na.omit")

dat2 <- na.omit(dat)
rf2 <- ranger(Species ~ ., dat2, num.trees = 5, probability = TRUE, seed = 10)

expect_equal(rf1$predictions, rf2$predictions)
})

test_that("na.omit leads to same result as manual removal, regression", {
dat <- iris
dat[1, 1] <- NA
rf1 <- ranger(Sepal.Width ~ ., dat, num.trees = 5, seed = 10, na.action = "na.omit")

dat2 <- na.omit(dat)
rf2 <- ranger(Sepal.Width ~ ., dat2, num.trees = 5, seed = 10)

expect_equal(rf1$predictions, rf2$predictions)
})

test_that("na.omit leads to same result as manual removal, survival", {
dat <- veteran
dat[1, 1] <- NA
rf1 <- ranger(Surv(time, status) ~ ., dat, num.trees = 5, seed = 10, na.action = "na.omit")

dat2 <- na.omit(dat)
rf2 <- ranger(Surv(time, status) ~ ., dat2, num.trees = 5, seed = 10)

expect_equal(rf1$chf, rf2$chf)
})

test_that("na.omit not working if no observations left", {
dat <- iris
dat[1:150, 1] <- NA
expect_error(ranger(Species ~ ., dat, num.trees = 5, na.action = "na.omit"), "Error: No observations left after removing missing values\\.")
})

0 comments on commit 57a07eb

Please sign in to comment.