Skip to content

Commit

Permalink
make coxSnell and Nagelkerke computationally more robust (#265)
Browse files Browse the repository at this point in the history
* make coxSnell and Nagelkerke computationally more robust

* fix typo

* 1-exp -> -exmp1
  • Loading branch information
Kucharssim authored Oct 27, 2023
1 parent e0770da commit 248f4c8
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 11 deletions.
10 changes: 7 additions & 3 deletions R/commonglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@
l0 <- -0.5*nullModel[["deviance"]]
lm <- as.numeric(logLik(glmModel))
n <- length(glmModel[["y"]])
coxSnell <- 1 - exp(l0 - lm)^(2 / n)
denom <- 1 - exp(l0)^(2 / n)
coxSnell <- .coxSnellCompute(l0, lm, n)
denom <- -expm1(2*l0/n)
return(max(c(0,coxSnell / denom)))
}
}
Expand All @@ -195,11 +195,15 @@
l0 <- -0.5*nullModel[["deviance"]]
lm <- as.numeric(logLik(glmModel))
n <- length(glmModel[["y"]])
coxSnell <- 1 - exp(l0 - lm)^(2 / n)
coxSnell <- .coxSnellCompute(l0, lm, n)
return(max(c(0,coxSnell)))
}
}

.coxSnellCompute <- function(l0, lm, n) {
return(-expm1(2*(l0 - lm)/n))
}

.bic <- function(glmModel) {
return(log(length(glmModel[["y"]]))*length(coef(glmModel))+glmModel[["deviance"]])
}
Expand Down
46 changes: 38 additions & 8 deletions tests/testthat/test-regressionlogistic.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ test_that("Confusion Matrix Table Matches", {
list(components="contNormal", isNuisance=FALSE),
list(components="contOutlier", isNuisance=FALSE)
)

options$confusionMatrix <- TRUE
results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options)
table <- results[["results"]][["perfDiag"]][["collection"]][["perfDiag_confusionMatrix"]][["data"]]
Expand Down Expand Up @@ -254,35 +254,35 @@ test_that("Confusion Matrix Table Matches", {
options$covariates <- c("contNormal", "contOutlier")
options$factors <- c("facFive")
options$dependent <- "facGender"

options$modelTerms <- list(
list(components="contNormal", isNuisance=FALSE),
list(components="contOutlier", isNuisance=FALSE),
list(components="facFive", isNuisance=FALSE)
)

options$multicollinearity <- TRUE
results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options)
table <- results[["results"]][["multicolliTable"]][["data"]]
jaspTools::expect_equal_tables(table, list("contNormal", 0.9536754, 1.048575,
"contOutlier", 0.9742628, 1.026417,
"contOutlier", 0.9742628, 1.026417,
"facFive", 0.9377347, 1.0664))

options <- jaspTools::analysisOptions("RegressionLogistic")
options$covariates <- c("contNormal", "contOutlier")
options$dependent <- "facGender"

options$modelTerms <- list(
list(components="contNormal", isNuisance=FALSE),
list(components="contOutlier", isNuisance=FALSE)
)

options$multicollinearity <- TRUE
results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options)
table <- results[["results"]][["multicolliTable"]][["data"]]
jaspTools::expect_equal_tables(table, list("contNormal", 0.9958289, 1.004189,
"contOutlier", 0.9958289, 1.004189))

})

test_that("Error Handling", {
Expand Down Expand Up @@ -353,6 +353,36 @@ test_that("Pseudo R-squared are correct", {
jaspTools::expect_equal_tables(r_squared,
list(0.0856291418878957, 0.141844242772774, 0.0962310669224921, 0.100864461712579)
)


# Another test, inspired by the issue: https://github.com/jasp-stats/jasp-issues/issues/2368
# That issue was caused by a numerical overflow for a somewhat large dataset
set.seed(1)
n <- 1e5
x <- runif(n, min = 0, max = 1)
y_hat <- 5 * x
p <- plogis(y_hat)
y <- rbinom(n, 1, p)

df <- data.frame(x, y)

# library(performance) # version 0.10.5
# fit <- glm(y~x, data = df, family = binomial)
# performance::r2_mcfadden(fit)$R2 # 0.2066583
# performance::r2_nagelkerke(fit) # 0.2767422
# performance::r2_tjur(fit) # 0.1713724
# performance::r2_coxsnell(fit) # 0.1524201

options <- jaspTools::analysisOptions("RegressionLogistic")
options$dependent <- "y"
options$covariates <- "x"
options$modelTerms <- list(list(components = "x", isNuisance = FALSE))

results <- jaspTools::runAnalysis("RegressionLogistic", df, options)
r_squared <- results$results$modelSummary$data[[2]][c("fad", "nag", "tju", "cas")]
jaspTools::expect_equal_tables(r_squared,
list(0.206658284548968, 0.276742170706529, 0.171372447470285, 0.152420076012601)
)
})

test_that("Performance plots match", {
Expand Down

0 comments on commit 248f4c8

Please sign in to comment.