From a82bd4ab4737d8fa160112664939e26e29a8ce14 Mon Sep 17 00:00:00 2001 From: Johnny van Doorn <15704203+JohnnyDoorn@users.noreply.github.com> Date: Wed, 24 Apr 2024 15:02:50 +0200 Subject: [PATCH] blocked regression logistic, use casewise functions from lin reg (#301) * blocked regression logistic, added casewise * Fix unit tests for blocked models logistic reg * update verified tests --- R/commonglm.R | 145 ++++---- R/regressionlinear.R | 2 +- R/regressionlogistic.R | 317 +++++------------- inst/qml/RegressionLinear.qml | 1 + inst/qml/RegressionLogistic.qml | 64 ++-- inst/qml/common/OutlierComponent.qml | 6 +- tests/testthat/helper-regression.R | 20 ++ tests/testthat/test-regressionlogistic.R | 250 +++++++------- .../test-regressionlogisticVerification.R | 36 +- 9 files changed, 344 insertions(+), 497 deletions(-) create mode 100644 tests/testthat/helper-regression.R diff --git a/R/commonglm.R b/R/commonglm.R index fb4a7321..ec2dd4b1 100644 --- a/R/commonglm.R +++ b/R/commonglm.R @@ -1,20 +1,33 @@ # Compute Model -.reglogisticComputeModel <- function(jaspResults, dataset, options) { - if(!is.null(jaspResults[["glmRes"]])) - return(jaspResults[["glmRes"]]$object) - type <- "binomial" - if (type == "binomial") { +.reglogisticComputeModel <- function(jaspResults, dataset, options, ready) { + if(!is.null(jaspResults[["glmRes"]]) || isFALSE(ready)) + return() + # Logistic regression - ff <- .createLogisticRegFormula(options) - nf <- .createNullFormula(options) - # calculate null and full models - nullMod <- glm(nf, family = "binomial", data = dataset) - fullMod <- glm(ff, family = "binomial", data = dataset) + modelTerms <- options[["modelTerms"]] + dependent <- options[["dependent"]] + interceptTerm <- options[["interceptTerm"]] + if (options[["method"]] == "enter") { + + glmRes <- list() + for (modelIndex in seq_along(modelTerms)) { + + model <- glm(.createLogisticRegFormula(modelTerms[[modelIndex]], dependent, interceptTerm), + family = "binomial", dataset) + glmRes[[modelIndex]] <- model + + } + + } else if (options[["method"]] != "enter") { + nullMod <- glm(.createLogisticRegFormula(modelTerms[[1]], dependent, interceptTerm), + family = "binomial", dataset) + fullMod <- glm(.createLogisticRegFormula(modelTerms[[length(modelTerms)]], dependent, interceptTerm), + family = "binomial", dataset) glmRes <- .glmStep(nullMod, fullMod, dataset, method = options$method) - } else - .quitAnalysis("GLM type not supported") + } + jaspResults[["glmRes"]] <- createJaspState(glmRes) - jaspResults[["glmRes"]]$dependOn(optionsFromObject = jaspResults[["modelSummary"]]) + jaspResults[["glmRes"]]$dependOn( c("dependent", "method", "modelTerms", "interceptTerm")) return(glmRes) } @@ -64,34 +77,33 @@ return(f) } -.createLogisticRegFormula <- function(options) { +.createLogisticRegFormula <- function(modelOptions, dependent, interceptTerm) { # this function outputs a formula name with base64 values as varnames f <- NULL - dependent <- options$dependent if (dependent == "") f <- 0~1 # mock formula, always works - modelTerms <- options$modelTerms - interceptTerm <- options$interceptTerm + modelTerms <- modelOptions$components + interceptTerm <- interceptTerm if (length(modelTerms) == 0) { if (interceptTerm) - f <- formula(paste(.v(dependent), "~ 1")) + f <- formula(paste(dependent, "~ 1")) else - f <- formula(paste(.v(dependent), "~ 0")) + f <- formula(paste(dependent, "~ 0")) } else { if (interceptTerm) t <- character(0) else t <- "0" for (i in seq_along(modelTerms)) { - term <- modelTerms[[i]][["components"]] + term <- modelTerms[[i]] if (length(term) == 1) - t <- c(t, .v(term)) + t <- c(t, term) else - t <- c(t, paste(.v(unlist(term)), collapse = ":")) + t <- c(t, paste(unlist(term), collapse = ":")) } - f <- formula(paste(.v(dependent), "~", paste(t, collapse = "+"))) + f <- formula(paste(dependent, "~", paste(t, collapse = "+"))) } return(f) } @@ -114,7 +126,7 @@ if (length(term) == 1) t <- c(t, .v(term)) else - t <- c(t, paste(.v(unlist(term)), collapse = ":")) + t <- c(t, paste(unlist(term), collapse = ":")) } } if (!interceptTerm) @@ -122,7 +134,7 @@ else t <- c(t, "1") - return(formula(paste(.v(dependent), "~", paste(t, collapse = "+")))) + return(formula(paste(dependent, "~", paste(t, collapse = "+")))) } .glmStep <- function(nullModel, fullModel, dataset, method = "enter") { @@ -374,7 +386,9 @@ if (isFALSE(options[["residualsSavedToData"]])) return() - if (is.null(container[["residualsSavedToDataColumn"]]) && options[["residualsSavedToDataColumn"]] != "") { + if (is.null(container[["residualsSavedToDataColumn"]]) && + isFALSE(is.null(options[["residualsSavedToDataColumn"]])) && + options[["residualsSavedToDataColumn"]] != "") { residuals <- model[["residuals"]] # extract residuals @@ -737,8 +751,8 @@ # Table: Influential cases -.glmInfluenceTable <- function(jaspResults, model, dataset, options, ready, position, linRegAnalysis = FALSE) { - +.glmInfluenceTable <- function(jaspResults, model, dataset, options, ready, position, logisticRegression = FALSE) { + tableOptionsOn <- c(options[["dfbetas"]], options[["dffits"]], options[["covarianceRatio"]], @@ -756,16 +770,16 @@ tableOptionsClicked <- c("cooksDistance", tableOptionsClicked) if (is.null(jaspResults[["influenceTable"]])) { - influenceTable <- createJaspTable(gettext("Table: Influential Cases")) - influenceTable$dependOn(optionsFromObject = jaspResults[["modelSummary"]], - options = tableOptions) - influenceTable$dependOn(c("residualCasewiseDiagnostic", "residualCasewiseDiagnosticType", - "residualCasewiseDiagnosticZThreshold", "residualCasewiseDiagnosticCooksDistanceThreshold")) + influenceTable <- createJaspTable(gettext("Influential Cases")) + influenceTable$dependOn(c(tableOptions, "dependent", "method", "modelTerms", "interceptTerm", + "residualCasewiseDiagnostic", "residualCasewiseDiagnosticType", + "residualCasewiseDiagnosticZThreshold", + "residualCasewiseDiagnosticCooksDistanceThreshold")) influenceTable$position <- position influenceTable$showSpecifiedColumnsOnly <- TRUE jaspResults[["influenceTable"]] <- influenceTable } - + tableOptionToColName <- function(x) { switch(x, "dfbetas" = "DFBETAS", @@ -783,13 +797,15 @@ } } else { - colNameList <- c() + + depType <- if (isFALSE(logisticRegression)) "number" else "string" influenceTable$addColumnInfo(name = "caseN", title = "Case Number", type = "integer") influenceTable$addColumnInfo(name = "stdResidual", title = gettext("Std. Residual"), type = "number", format = "dp:3") - influenceTable$addColumnInfo(name = "dependent", title = options$dependent, type = "number") + influenceTable$addColumnInfo(name = "dependent", title = options$dependent, type = depType) influenceTable$addColumnInfo(name = "predicted", title = gettext("Predicted Value"), type = "number") influenceTable$addColumnInfo(name = "residual", title = gettext("Residual"), type = "number", format = "dp:3") + colNameList <- c() alwaysPresent <- c("caseN", "stdResidual", "dependent", "predicted", "residual") for (option in tableOptionsClicked) { if (option == "dfbetas") { @@ -809,6 +825,7 @@ influenceTable$addColumnInfo(name = option, title = gettext(colTitle), type = "number") } } + .glmInfluenceTableFill(influenceTable, dataset, options, ready, model = model, influenceMeasures = tableOptionsClicked, @@ -817,7 +834,7 @@ } .glmInfluenceTableFill <- function(influenceTable, dataset, options, ready, model, influenceMeasures, colNames) { - + influenceRes <- influence.measures(model) nDFBETAS <- length(names(model$coefficients)) @@ -845,7 +862,7 @@ influenceResData[["dependent"]] <- model.frame(model)[[options$dependent]] influenceResData[["predicted"]] <- model$fitted.values influenceResData[["residual"]] <- model$residual - # browser() + modelMatrix <- as.data.frame(model.matrix(model)) modelMatrix <- modelMatrix[colnames(modelMatrix) != "(Intercept)"] influenceResData[["mahalanobis"]] <- mahalanobis(modelMatrix, center = colMeans(modelMatrix), cov = cov(modelMatrix)) @@ -856,7 +873,7 @@ index <- which(abs(influenceResData[["stdResidual"]]) > options$residualCasewiseDiagnosticZThreshold) else # all index <- seq.int(nrow(influenceResData)) - + # funky statement to ensure a df even if only 1 row influenceResSig <- subset(influenceRes[["is.inf"]], 1:nrow(influenceResData) %in% index, select = colInd) colnames(influenceResSig) <- colNames[1:length(colInd)] @@ -887,56 +904,6 @@ } } -.casewiseDiagnosticsLogisticRegression <- function(dataset, model, options) { - last <- length(model) - - # Values for all cases - dependentAll <- dataset[[.v(options$dependent)]] - dependentAllNumeric <- rep(0, nrow(dataset)) - dependentAllNumeric[dependentAll == levels(dataset[[.v(options$dependent)]])[2]] <- 1 - predictedAll <- predict(model[[last]], dataset, type = "response") - predictedGroupAll <- rep(levels(dataset[[.v(options$dependent)]])[1], nrow(dataset)) - predictedGroupAll[predictedAll >= 0.5] <- levels(dataset[[.v(options$dependent)]])[2] - residualAll <- resid(model[[last]], type = "response") - residualZAll <- resid(model[[last]], type = "pearson") - cooksDAll <- cooks.distance(model[[last]]) - - # These will be the variables for the return object - dependent <- NA - predicted <- NA - predictedGroup <- NA - residual <- NA - residualZ <- NA - cooksD <- NA - - - if (options$residualCasewiseDiagnosticType == "residualZ") - index <- which(abs(residualZAll) > options$residualCasewiseDiagnosticZThreshold) - else if (options$residualCasewiseDiagnosticType == "cooksDistance") - index <- which(abs(cooksDAll) > options$residualCasewiseDiagnosticCooksDistanceThreshold) - else - index <- seq_along(cooksDAll) - - if (length(index) == 0) - index <- NA - else { - dependent <- dependentAll[index] - predicted <- predictedAll[index] - predictedGroup <- predictedGroupAll[index] - residual <- residualAll[index] - residualZ <- residualZAll[index] - cooksD <- cooksDAll[index] - } - casewiseDiag <- list(index = unname(index), - dependent = as.character(dependent), - predicted = unname(predicted), - predictedGroup = as.character(predictedGroup), - residual = unname(residual), - residualZ = unname(residualZ), - cooksD = unname(cooksD)) - return(casewiseDiag) -} - .reglogisticVovkSellke <- function(table, options) { table$addColumnInfo(name = "vsmpr", title = gettextf("VS-MPR%s", "\u002A"), type = "number") message <- gettextf("Vovk-Sellke Maximum p-Ratio: Based on the p-value, diff --git a/R/regressionlinear.R b/R/regressionlinear.R index 9aeffa6b..62c89cf9 100755 --- a/R/regressionlinear.R +++ b/R/regressionlinear.R @@ -52,7 +52,7 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { finalModel <- model[[length(model)]] if (options$residualCasewiseDiagnostic && is.null(modelContainer[["influenceTable"]])) - .glmInfluenceTable(modelContainer, finalModel$fit, dataset, options, ready = ready, position = 9, linRegAnalysis = TRUE) + .glmInfluenceTable(modelContainer, finalModel$fit, dataset, options, ready = ready, position = 9) .regressionExportResiduals(modelContainer, finalModel$fit, dataset, options, ready = ready) diff --git a/R/regressionlogistic.R b/R/regressionlogistic.R index d76c2264..f1aae796 100644 --- a/R/regressionlogistic.R +++ b/R/regressionlogistic.R @@ -22,12 +22,19 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... .reglogisticCheckErrors(dataset, options) } # Output tables + model <- .reglogisticComputeModel( jaspResults, dataset, options, ready) .reglogisticModelSummaryTable( jaspResults, dataset, options, ready) .reglogisticEstimatesTable( jaspResults, dataset, options, ready) .reglogisticEstimatesTableBootstrap( jaspResults, dataset, options, ready) .reglogisticMulticolliTable( jaspResults, dataset, options, ready) - .reglogisticFactorDescriptivesTable( jaspResults, dataset, options) - .reglogisticCasewiseDiagnosticsTable(jaspResults, dataset, options, ready) + .reglogisticFactorDescriptivesTable( jaspResults, dataset, options, ready) + + if (options$residualCasewiseDiagnostic && is.null(jaspResults[["influenceTable"]])) { + finalModel <- model[[length(options[["modelTerms"]])]] + .glmInfluenceTable(jaspResults, finalModel, dataset, options, ready, position = 5, logisticRegression = TRUE) + .regressionExportResiduals(jaspResults, finalModel, dataset, options, ready = ready) + } + .reglogisticConfusionMatrixTable( jaspResults, dataset, options, ready) .reglogisticPerformanceMetricsTable( jaspResults, dataset, options, ready) @@ -100,18 +107,12 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... modelSummary$position <- 1 modelSummary$showSpecifiedColumnsOnly <- TRUE - if(options$method == "enter") - chiName <- "\u03A7\u00B2" - else - chiName <- "\u0394\u03A7\u00B2" - - modelSummary$addColumnInfo(name = "mod", title = gettext("Model"), type = "string") modelSummary$addColumnInfo(name = "dev", title = gettext("Deviance"), type = "number") modelSummary$addColumnInfo(name = "aic", title = gettext("AIC"), type = "number", format="dp:3") modelSummary$addColumnInfo(name = "bic", title = gettext("BIC"), type = "number", format="dp:3") modelSummary$addColumnInfo(name = "dof", title = gettext("df"), type = "integer") - modelSummary$addColumnInfo(name = "chi", title = chiName, type = "number", format="dp:3") + modelSummary$addColumnInfo(name = "chi", title = "\u0394\u03A7\u00B2",type = "number", format="dp:3") modelSummary$addColumnInfo(name = "pvl", title = gettext("p"), type = "pvalue") modelSummary$addColumnInfo(name = "fad", title = gettextf("McFadden R%s","\u00B2"), type = "number") modelSummary$addColumnInfo(name = "nag", title = gettextf("Nagelkerke R%s","\u00B2"), type = "number") @@ -140,11 +141,9 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... tmp <- .reglogisticEstimatesInfo(options) ciTitle <- tmp[["ciTitle"]] seTitle <- tmp[["seTitle"]] - multimod <- tmp[["multimod"]] paramtitle <- tmp[["paramtitle"]] - if(options$method != "enter") - estimatesTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE) + estimatesTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE) estimatesTable$addColumnInfo(name = "param", title = paramtitle, type = "string") estimatesTable$addColumnInfo(name = "est", title = gettext("Estimate"), type = "number", format="dp:3") estimatesTable$addColumnInfo(name = "se", title = seTitle, type = "number", format="dp:3") @@ -171,7 +170,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... jaspResults[["estimatesTable"]] <- estimatesTable if(!ready) return() - res <- try(.reglogisticEstimatesFill(jaspResults, dataset, options, multimod)) + res <- try(.reglogisticEstimatesFill(jaspResults, dataset, options)) .reglogisticSetError(res, estimatesTable) } @@ -194,7 +193,6 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... tmp <- .reglogisticEstimatesInfo(options, addBCA = TRUE) ciTitle <- tmp[["ciTitle"]] seTitle <- tmp[["seTitle"]] - multimod <- tmp[["multimod"]] paramtitle <- tmp[["paramtitle"]] if(options$method != "enter") @@ -218,7 +216,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... jaspResults[["estimatesTableBootstrapping"]] <- estimatesTableBootstrap if(!ready) return() - res <- try(.reglogisticEstimatesBootstrapFill(jaspResults, dataset, options, multimod)) + res <- try(.reglogisticEstimatesBootstrapFill(jaspResults, dataset, options)) .reglogisticSetError(res, estimatesTableBootstrap) @@ -241,7 +239,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... multicolliTable$addColumnInfo(name = "var", title = "", type = "string") if (ready) { - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] } else { glmObj <- NULL } @@ -256,38 +254,9 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... .reglogisticSetError(res, multicolliTable) } -.reglogisticCasewiseDiagnosticsTable <- function(jaspResults, dataset, options, ready){ - if(!options$residualCasewiseDiagnostic || !is.null(jaspResults[["casewiseDiagnosticsTable"]])) - return() - - casDiag <- createJaspTable(gettext("Casewise Diagnostics")) - casDiag$dependOn(optionsFromObject = jaspResults[["modelSummary"]], - options = c("residualCasewiseDiagnostic", - "residualCasewiseDiagnosticZThreshold", - "residualCasewiseDiagnosticCooksDistanceThreshold", - "residualCasewiseDiagnosticType")) - casDiag$position <- 5 - casDiag$showSpecifiedColumnsOnly <- TRUE - - casDiag$addColumnInfo(name = "caseNumber", title = gettext("Case Number"), type = "integer") - casDiag$addColumnInfo(name = "observed", title = gettext("Observed"), type = "string") - casDiag$addColumnInfo(name = "predicted", title = gettext("Predicted"), type = "number") - casDiag$addColumnInfo(name = "predGroup", title = gettext("Predicted Group"), type = "string") - casDiag$addColumnInfo(name = "residual", title = gettext("Residual"), type = "number") - casDiag$addColumnInfo(name = "residualZ", title = gettext("Studentized Residual"), type = "number") - casDiag$addColumnInfo(name = "cooksD", title = gettext("Cook's Distance"), type = "number") - - jaspResults[["casewiseDiagnosticsTable"]] <- casDiag - - if(!ready) return() - res <- try(.reglogisticCasewiseDiagnosticsFill(jaspResults, dataset, options)) - - .reglogisticSetError(res, casDiag) -} - -.reglogisticFactorDescriptivesTable <- function(jaspResults, dataset, options){ +.reglogisticFactorDescriptivesTable <- function(jaspResults, dataset, options, ready){ if(!options$descriptives || - !is.null(jaspResults[["factorDescriptives"]])) + !is.null(jaspResults[["factorDescriptives"]]) || isFALSE(ready)) return() if(is.null(dataset)) dataset <- .reglogisticReadData(dataset, options) @@ -329,7 +298,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... confusionMatrix$addColumnInfo(name = "obs", title = gettext("Observed"), type = "string") if (ready) { # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] #modelTerms <- c() #for(i in seq_along(options$modelTerms)) # modelTerms <- c(modelTerms, options$modelTerms[[i]]$components) @@ -379,56 +348,19 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... .reglogisticModelSummaryFill <- function(jaspResults, dataset, options, ready) { if(ready) { # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) - hasNuisance <- .hasNuisance(options) - if (hasNuisance) { - terms <- rownames(summary(glmObj[[1]])[["coefficients"]]) - terms <- sapply(terms[terms!="(Intercept)"], .formatTerm, - glmModel=glmObj[[1]]) - message <- gettextf("Null model contains nuisance parameters: %s", - paste(terms, collapse = ", ")) - jaspResults[["modelSummary"]]$addFootnote(message) - } - if (options$method == "enter") { - # Two rows: h0 and h1 - lr <- .lrtest(glmObj[[1]], glmObj[[2]]) - - rows <- list( - list(mod = "H\u2080", - dev = glmObj[[1]][["deviance"]], - aic = glmObj[[1]][["aic"]], - bic = .bic(glmObj[[1]]), - dof = glmObj[[1]][["df.residual"]], - chi = "", - pvl = "", - fad = "", - nag = "", - tju = "", - cas = ""), - list(mod = "H\u2081", - dev = glmObj[[2]][["deviance"]], - aic = glmObj[[2]][["aic"]], - bic = .bic(glmObj[[2]]), - dof = glmObj[[2]][["df.residual"]], - chi = lr[["stat"]], - pvl = lr[["pval"]], - fad = .mcFadden( glmObj[[2]], glmObj[[1]]), - nag = .nagelkerke(glmObj[[2]], glmObj[[1]]), - tju = .tjur(glmObj[[2]]), - cas = .coxSnell(glmObj[[2]], glmObj[[1]]) - ) - ) - + glmObj <- jaspResults[["glmRes"]][["object"]] - } else { # multiple rows: m1 - mk rows <- vector("list", length(glmObj)) - for (midx in 1:length(glmObj)) { + for (midx in seq_along(glmObj)) { + .linregAddPredictorsInModelFootnote(jaspResults[["modelSummary"]], + options[["modelTerms"]][[midx]][["components"]], midx) mObj <- glmObj[[midx]] if (midx > 1) { if (options$method == "forward" || - options$method == "stepwise") { + options$method == "stepwise" || + options$method == "enter") { fadden <- .mcFadden(mObj, glmObj[[1]]) nagel <- .nagelkerke(mObj, glmObj[[1]]) coxSn <- .coxSnell(mObj, glmObj[[1]]) @@ -438,9 +370,9 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... coxSn <- -1*.coxSnell(glmObj[[1]], mObj) } - lr <- .lrtest(glmObj[[midx]], glmObj[[midx-1]]) + lr <- .lrtest(glmObj[[midx-1]], mObj) rows[[midx]] <- list( - mod = as.character(midx), + mod = gettextf("M%s", intToUtf8(0x2080 + midx - 1, multiple = FALSE)), dev = mObj[["deviance"]], aic = mObj[["aic"]], bic = .bic(mObj), @@ -454,7 +386,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... ) } else { rows[[midx]] <- list( - mod = as.character(midx), + mod = gettextf("M%s", intToUtf8(0x2080, multiple = FALSE)), dev = mObj[["deviance"]], aic = mObj[["aic"]], bic = .bic(mObj), @@ -467,7 +399,6 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... cas = .coxSnell(mObj, mObj) ) } - } } } else { @@ -481,16 +412,18 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... jaspResults[["modelSummary"]]$addRows(rows) } -.reglogisticEstimatesFill <- function(jaspResults, dataset, options, multimod) { +.reglogisticEstimatesFill <- function(jaspResults, dataset, options) { # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] alpha <- qnorm(1 - (1 - options$coefficientCiLevel) / 2) - if (!multimod) { - s <- summary(glmObj[[2]])[["coefficients"]] - rn <- rownames(s) + + for (midx in seq_along(glmObj)) { + mObj <- glmObj[[midx]] + s <- summary(mObj)[["coefficients"]] + rn <- rownames(s) rn[which(rn == "(Intercept)")] <- gettext("(Intercept)") - beta <- .stdEst(glmObj[[2]], type = "X") # stand. X continuous vars + beta <- .stdEst(mObj, type = "X") # stand. X continuous vars # Confidence intervals on the odds ratio scale if (options$coefficientCiAsOddsRatio) @@ -498,119 +431,57 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... else expon <- function(x) x + if (length(rn) == 1) { s <- unname(s) if (options$robustSe) { - s[2] <- unname(.glmRobustSE(glmObj[[2]])) # new se + s[2] <- unname(.glmRobustSE(mObj)) # new se s[3] <- s[1]/s[2] # new z s[4] <- 2*pnorm(-abs(s[3])) # new p } - waldtest <- .waldTest(glmObj[[2]], term = 1) - jaspResults[["estimatesTable"]]$addRows( - list( param = .formatTerm(rn, glmObj[[2]]), - est = s[1], - se = s[2], - std = as.numeric(beta), - or = exp(s[1]), - zval = s[3], - pval = s[4], - waldsta = as.numeric(waldtest), - walddf = as.numeric(1), - vsmpr = VovkSellkeMPR(s[4]), - cilo = expon(s[1] - alpha * s[2]), - ciup = expon(s[1] + alpha * s[2]))) - + waldtest <- .waldTest(mObj, term = 1) + + jaspResults[["estimatesTable"]]$addRows(list( + model = gettextf("M%s", intToUtf8(0x2080 + midx - 1, multiple = FALSE)), + param = .formatTerm(rn, mObj), + est = s[1], + se = s[2], + std = as.numeric(beta), + or = exp(s[1]), + zval = s[3], + pval = s[4], + waldsta = as.numeric(waldtest), + walddf = as.numeric(1), + vsmpr = VovkSellkeMPR(s[4]), + cilo = expon(s[1] - alpha * s[2]), + ciup = expon(s[1] + alpha * s[2]), + .isNewGroup = TRUE + )) } else { if (options$robustSe) { - s[,2] <- unname(.glmRobustSE(glmObj[[2]])) # new se + s[,2] <- unname(.glmRobustSE(mObj)) # new se s[,3] <- s[,1]/s[,2] # new z s[,4] <- 2*pnorm(-abs(s[,3])) # new p } for (i in seq_along(rn)) { - waldtest <- .waldTest(glmObj[[2]], term = i) - + waldtest <- .waldTest(mObj, term = i) jaspResults[["estimatesTable"]]$addRows(list( - param = .formatTerm(rn[i], glmObj[[2]]), - est = s[i,1], - se = s[i,2], - std = as.numeric(beta[i]), - or = exp(s[i,1]), - zval = s[i,3], - pval = s[i,4], - waldsta = as.numeric(waldtest), - walddf = as.numeric(1), - vsmpr = VovkSellkeMPR(s[i,4]), - cilo = expon(s[i,1] - alpha * s[i,2]), - ciup = expon(s[i,1] + alpha * s[i,2]))) - } - } - } else { - for (midx in 1:length(glmObj)) { - mObj <- glmObj[[midx]] - s <- summary(mObj)[["coefficients"]] - rn <- rownames(s) - rn[which(rn == "(Intercept)")] <- gettext("(Intercept)") - beta <- .stdEst(mObj, type = "X") # stand. X continuous vars - - # Confidence intervals on the odds ratio scale - if (options$coefficientCiAsOddsRatio) - expon <- function(x) exp(x) - else - expon <- function(x) x - - - if (length(rn) == 1) { - s <- unname(s) - if (options$robustSe) { - s[2] <- unname(.glmRobustSE(mObj)) # new se - s[3] <- s[1]/s[2] # new z - s[4] <- 2*pnorm(-abs(s[3])) # new p - } - waldtest <- .waldTest(mObj, term = 1) - - jaspResults[["estimatesTable"]]$addRows(list( - model = as.character(midx), - param = .formatTerm(rn, mObj), - est = s[1], - se = s[2], - std = as.numeric(beta), - or = exp(s[1]), - zval = s[3], - pval = s[4], + model = gettextf("M%s", intToUtf8(0x2080 + midx - 1, multiple = FALSE)), + param = .formatTerm(rn[i], mObj), + est = s[i,1], + se = s[i,2], + std = as.numeric(beta[i]), + or = exp(s[i,1]), + zval = s[i,3], + pval = s[i,4], waldsta = as.numeric(waldtest), walddf = as.numeric(1), - vsmpr = VovkSellkeMPR(s[4]), - cilo = expon(s[1] - alpha * s[2]), - ciup = expon(s[1] + alpha * s[2]), - .isNewGroup = TRUE + vsmpr = VovkSellkeMPR(s[i,4]), + cilo = expon(s[i,1] - alpha * s[i,2]), + ciup = expon(s[i,1] + alpha * s[i,2]), + .isNewGroup = (i == 1) )) - } else { - if (options$robustSe) { - s[,2] <- unname(.glmRobustSE(mObj)) # new se - s[,3] <- s[,1]/s[,2] # new z - s[,4] <- 2*pnorm(-abs(s[,3])) # new p - } - for (i in seq_along(rn)) { - - waldtest <- .waldTest(mObj, term = i) - jaspResults[["estimatesTable"]]$addRows(list( - model = as.character(midx), - param = .formatTerm(rn[i], mObj), - est = s[i,1], - se = s[i,2], - std = as.numeric(beta[i]), - or = exp(s[i,1]), - zval = s[i,3], - pval = s[i,4], - waldsta = as.numeric(waldtest), - walddf = as.numeric(1), - vsmpr = VovkSellkeMPR(s[i,4]), - cilo = expon(s[i,1] - alpha * s[i,2]), - ciup = expon(s[i,1] + alpha * s[i,2]), - .isNewGroup = (i == 1) - )) - } } } } @@ -621,9 +492,9 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... jaspResults[["estimatesTable"]]$addFootnote(gettextf("%1$s level '%2$s' coded as class 1.", .unv(predVar), predLevel)) } -.reglogisticEstimatesBootstrapFill <- function(jaspResults, dataset, options, multimod){ +.reglogisticEstimatesBootstrapFill <- function(jaspResults, dataset, options){ # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] ci.fails <- FALSE if (is.null(jaspResults[["bootstrapResults"]])) { @@ -636,10 +507,10 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... expon <- if (options$coefficientCiAsOddsRatio) function(x) exp(x) else identity startProgressbar(options$coefficientBootstrapSamples * - if (multimod) length(glmObj) else 1) + length(glmObj)) for (i in 1:length(glmObj)) { - if (!multimod && i != 2) + if (i != 2) next rn <- rownames(summary(glmObj[[i]])[["coefficients"]]) @@ -754,32 +625,6 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... } } -.reglogisticCasewiseDiagnosticsFill <- function(jaspResults, dataset, options){ - # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) - if (is.null(glmObj)) - return() - else { - casewiseDiag <- .casewiseDiagnosticsLogisticRegression(dataset, glmObj, options) - caseNumbers <- casewiseDiag$index - if (length(caseNumbers) == 1L && is.na(caseNumbers)) # .casewiseDiagnosticsLogisticRegression returns NA if there are no cases. - return() - else { - for (case in seq_along(caseNumbers)) - jaspResults[["casewiseDiagnosticsTable"]]$addRows( - list(caseNumber = caseNumbers[case], - observed = casewiseDiag$dependent[case], - predicted = casewiseDiag$predicted[case], - predGroup = casewiseDiag$predictedGroup[case], - residual = casewiseDiag$residual[case], - residualZ = casewiseDiag$residualZ[case], - cooksD = casewiseDiag$cooksD[case] - ) - ) - } - } -} - .reglogisticFactorDescriptivesFill <- function(jaspResults, dataset, options){ if (length(options$factors) > 0) { @@ -878,7 +723,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... .reglogisticPerformanceMetricsFill <- function(jaspResults, container, dataset, options, ready){ if(ready) { # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] m <- .confusionMatrix(mObj, cutoff = 0.5)[["matrix"]] n = sum(m) @@ -938,7 +783,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... return() } # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] predictors <- character(0) @@ -977,7 +822,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... if(ready){ # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] p <- try(.reglogisticResidPlotFill(options, mObj)) if(isTryError(p)) @@ -1001,7 +846,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... return() } # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] predictors <- character(0) @@ -1038,7 +883,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... if(ready){ # Compute/Get Model - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] p <- try(.reglogisticSquaredPearsonResidualsFill(mObj)) if(isTryError(p)) @@ -1064,7 +909,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... rocPlot$dependOn(c("rocPlot", "rocPlotCutoffStep")) if (ready) { - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] cutoffInt <- seq(0, 1, by = options$rocPlotCutoffStep) @@ -1090,7 +935,7 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... prPlot$dependOn(c("precisionRecallPlot", "precisionRecallPlotCutoffStep")) if (ready) { - glmObj <- .reglogisticComputeModel(jaspResults, dataset, options) + glmObj <- jaspResults[["glmRes"]][["object"]] mObj <- glmObj[[length(glmObj)]] cutoffInt <- seq(0, 1, by = options$precisionRecallPlotCutoffStep) @@ -1374,13 +1219,11 @@ RegressionLogisticInternal <- function(jaspResults, dataset = NULL, options, ... seTitle <- gettextf("Robust
%s", seTitle) if (options$method == "enter") { - multimod <- FALSE paramtitle <- "" } else { - multimod <- TRUE paramtitle <- gettext("Parameter") } - return(list(ciTitle = ciTitle, seTitle = seTitle, multimod = multimod, paramtitle = paramtitle)) + return(list(ciTitle = ciTitle, seTitle = seTitle, paramtitle = paramtitle)) } # wrapper for wald test diff --git a/inst/qml/RegressionLinear.qml b/inst/qml/RegressionLinear.qml index 39630e2a..a5e6c67a 100644 --- a/inst/qml/RegressionLinear.qml +++ b/inst/qml/RegressionLinear.qml @@ -79,6 +79,7 @@ Form label: "Nested" name: "nested" checked: true + visible: false } CheckBox { name: "interceptTerm"; label: qsTr("Include intercept"); checked: true } diff --git a/inst/qml/RegressionLogistic.qml b/inst/qml/RegressionLogistic.qml index 22be9a43..67a5be46 100644 --- a/inst/qml/RegressionLogistic.qml +++ b/inst/qml/RegressionLogistic.qml @@ -18,6 +18,7 @@ import QtQuick import JASP import JASP.Controls +import "./common" as Common Form { @@ -54,23 +55,34 @@ Form Section { title: qsTr("Model") - - VariablesForm + + FactorsForm { - preferredHeight: jaspTheme.smallDefaultVariablesFormHeight - - AvailableVariablesList - { - name: "availableTerms" - title: qsTr("Components") - source: ['covariates', 'factors'] - width: parent.width / 4 - } - ModelTermsList { width: parent.width * 5 / 9 } + id: factors + name: "modelTerms" + allowAll: true + nested: nested.checked + allowInteraction: true + initNumberFactors: 2 + baseName: "model" + baseTitle: "Model" + availableVariablesList.source: ['covariates', 'factors'] + startIndex: 0 + availableVariablesListName: "availableTerms" } - + + CheckBox + { + id: nested + label: "Nested" + name: "nested" + checked: true + visible: false + } + CheckBox { name: "interceptTerm"; label: qsTr("Include intercept"); checked: true } } + Section { @@ -138,31 +150,9 @@ Form CheckBox { name: "brierScore"; label: qsTr("Brier score") } CheckBox { name: "hMeasure"; label: qsTr("H-measure") } } + + Common.OutlierComponent { id: outlierComponent} - Group - { title: qsTr("Residuals") - CheckBox - { - name: "residualCasewiseDiagnostic"; label: qsTr("Casewise diagnostics") - RadioButtonGroup - { - name: "residualCasewiseDiagnosticType" - RadioButton - { - value: "residualZ"; label: qsTr("Standard residual >"); checked: true - childrenOnSameRow: true - DoubleField { name: "residualCasewiseDiagnosticZThreshold"; defaultValue: 3 } - } - RadioButton - { - value: "cooksDistance"; label: qsTr("Cook's distance >") - childrenOnSameRow: true - DoubleField { name: "residualCasewiseDiagnosticCooksDistanceThreshold"; defaultValue: 1 } - } - RadioButton { value: "allCases"; label: qsTr("All") } - } - } - } } diff --git a/inst/qml/common/OutlierComponent.qml b/inst/qml/common/OutlierComponent.qml index 2ec819f6..676cc054 100644 --- a/inst/qml/common/OutlierComponent.qml +++ b/inst/qml/common/OutlierComponent.qml @@ -30,11 +30,14 @@ Group name: "residualStatistic" label: qsTr("Statistics") visible: analysis === Type.Analysis.LinearRegression + value: false } CheckBox { - name: "residualCasewiseDiagnostic"; label: qsTr("Casewise diagnostics") + name: "residualCasewiseDiagnostic" + label: qsTr("Casewise diagnostics") + value: false columns: 2 RadioButtonGroup { @@ -71,6 +74,7 @@ Group id: residualsSavedToData name: "residualsSavedToData" text: qsTr("Append residuals to data") + value: false ComputedColumnField { diff --git a/tests/testthat/helper-regression.R b/tests/testthat/helper-regression.R new file mode 100644 index 00000000..648553bf --- /dev/null +++ b/tests/testthat/helper-regression.R @@ -0,0 +1,20 @@ +initClassicalRegressionOptions <- function(analysis = c("RegressionLinear", "RegressionLogistic")) { + analysis <- match.arg(analysis) + options <- c( + jaspTools::analysisOptions(analysis), + classicalRegressionCommonOptions() + ) + + return(options) +} + +classicalRegressionCommonOptions <- function() { + path <- testthat::test_path("..", "..", "inst", "qml", "common") + files <- list.files(path, full.names = TRUE) + options <- lapply(files, jaspTools:::readQML) |> + lapply(function(x) {x$plotWidth <- NULL; x$plotHeight <- NULL; return(x)}) |> + (function(x) { do.call(c, x)})() + + return(options) +} + diff --git a/tests/testthat/test-regressionlogistic.R b/tests/testthat/test-regressionlogistic.R index 12ec64e0..23589791 100644 --- a/tests/testthat/test-regressionlogistic.R +++ b/tests/testthat/test-regressionlogistic.R @@ -4,16 +4,18 @@ context("Logistic Regression") # Chapter 10 test_that("Fields Book - Chapter 10 results match", { - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "delivered" options$factors <- c("treat") options$modelTerms <- list( - list(components="treat", isNuisance=FALSE) + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("treat"), name="model1", title = "Model 1") ) options$descriptives <- TRUE options$oddsRatio <- TRUE options$coefficientCi <- TRUE options$coefficientCiAsOddsRatio <- TRUE + results <- jaspTools::runAnalysis("RegressionLogistic", dataset = "santas_log.csv", options) output1 <- results[["results"]][["factorDescriptives"]][["data"]] jaspTools::expect_equal_tables(output1, @@ -22,98 +24,129 @@ test_that("Fields Book - Chapter 10 results match", { ) output2 <- results[["results"]][["modelSummary"]][["data"]] jaspTools::expect_equal_tables(output2, - list("H", 529.250590526386, 531.250590526386, 535.242055073494, - 399, "", "", "", "", "", "", - "H", 460.494488544053, - 464.494488544053, 472.477417638269, 398, 68.7561019823323, 1.11022302462516e-16, - 0.129912187559297, 0.215249724800182, 0.162128420464791, 0.157928418719378) + list(531.250590526391, 535.242055073499, 529.250590526391, 399, 0, + "M", 0, 464.494488544052, 472.477417638268, 0.157928418719392, + 68.7561019823392, 460.494488544052, 398, 0.129912187559308, + "M", 0.215249724800201, 1.11022302462516e-16, 0.162128420464791 + ) ) output3 <- results[["results"]][["estimatesTable"]][["data"]] jaspTools::expect_equal_tables(output3, - list("(Intercept)", 1.678431, 0.2058631, - 5.357143, 8.15314, 3.54595e-16, 66.47369, 1, - 3.57851, 8.019813, - "treat (1)", -1.877282, 0.2461223, - 0.1530055, -7.627436, 2.394692e-14, 58.17778, 1, - 0.09445116, 0.2478601) + list("TRUE", 1.36124731046877, 2.04061213301582, 0.510825623765981, + "M", 1.66666666666665, "(Intercept)", 7.57353027162946e-07, + 0.103279553032665, 1, 24.4633905355499, 4.94604797141616, "TRUE", + 3.57850992169288, 8.01981276525923, 1.67843078320261, "M", + 5.35714285329404, "(Intercept)", 3.54594990009727e-16, 0.205863115653286, + 1, 66.4736903106761, 8.15313990501056, "FALSE", 0.0944511613464108, + 0.247860077995534, -1.87728164144707, "M", 0.15300546466741, + "treat (1)", 2.39469198254028e-14, 0.246122251011963, 1, 58.1777752035539, + -7.62743568989958) ) - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "delivered" options$factors <- c("treat") options$covariates <- c("quantity") options$modelTerms <- list( - list(components="treat", isNuisance=FALSE), - list(components="quantity", isNuisance=FALSE), - list(components=list("treat", "quantity"), isNuisance=FALSE) + list(components = NULL, name="model0", title = "Model 0"), + list(components=list("treat", "quantity", list("treat", "quantity")), + name="model1", title = "Model 1") ) + options$residualCasewiseDiagnostic <- FALSE options$oddsRatio <- TRUE options$coefficientCi <- TRUE options$coefficientCiAsOddsRatio <- TRUE results <- jaspTools::runAnalysis("RegressionLogistic", dataset = "santas_log.csv", options) output4 <- results[["results"]][["modelSummary"]][["data"]] jaspTools::expect_equal_tables(output4, - list("H", 529.250590526386, 531.250590526386, 535.242055073494, - 399, "", "", "", "", "", "", - "H", 390.185034282251, - 398.185034282251, 414.150892470683, 396, 139.065556244135, 0, - 0.262759378512591, 0.400251224473979, 0.331341240867329, 0.293663757434995) + list(531.250590526391, 535.242055073499, 529.250590526391, 399, 0, + "M", 0, 398.18503428225, 414.150892470682, 0.293663757435006, + 139.065556244141, 390.18503428225, 396, 0.262759378512601, "M", + 0.400251224473992, 0, 0.331341240867328) ) output5 <- results[["results"]][["estimatesTable"]][["data"]] jaspTools::expect_equal_tables(output5, - list("(Intercept)", 1.829715, 0.3810035, 6.232108, 4.802356, 1.568094e-06, 23.06263, 1, 2.953413, 13.1506, - "treat (1)", 0.199482, 0.520007, 1.22077, 0.3836141, 0.7012645, 0.1471597, 1, 0.4405581, 3.382709, - "quantity", -0.08100546, 0.1678705, 0.9221887, -0.4825472, 0.6294173, 0.2328518, 1, 0.6636332, 1.281479, - "treat (1) * quantity", -1.027643, 0.2309776, 0.3578493, -4.449103, 8.622966e-06, 19.79452, 1, 0.2275578, 0.5627412) + list("TRUE", 1.36124731046877, 2.04061213301582, 0.510825623765981, + "M", 1.66666666666665, "(Intercept)", 7.57353027162946e-07, + 0.103279553032665, 1, 24.4633905355499, 4.94604797141616, "TRUE", + 2.95341293211689, 13.1506047478064, 1.8297145868812, "M", + 6.23210767937535, "(Intercept)", 1.56809395778112e-06, 0.381003510779674, + 1, 23.0626254414104, 4.80235623849484, "FALSE", 0.440558078388745, + 3.38270938575308, 0.199481993012083, "M", 1.22077022683835, + "treat (1)", 0.701264521685676, 0.520006998514146, 1, 0.147159742726858, + 0.383614054391724, "FALSE", 0.663633201382873, 1.28147885872813, + -0.0810054601474337, "M", 0.922188656144837, "quantity", + 0.629417289982631, 0.167870548715548, 1, 0.23285178241894, -0.482547181546986, + "FALSE", 0.22755778991235, 0.562741214139028, -1.02764323147157, + "M", 0.357849335589812, "treat (1) * quantity", 8.6229655831139e-06, + 0.230977621709245, 1, 19.7945175736481, -4.44910300775877) ) - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "delivered" options$covariates <- c("quantity") options$modelTerms <- list( - list(components="quantity", isNuisance=FALSE) + list(components = NULL, name="model0", title = "Model 0"), + list(components= list("quantity"), name="model1", title = "Model 1") ) options$oddsRatio <- TRUE options$coefficientCi <- TRUE options$coefficientCiAsOddsRatio <- TRUE options$conditionalEstimatePlot <- TRUE options$conditionalEstimatePlotPoints <- FALSE + options$residualCasewiseDiagnostic <- FALSE results <- jaspTools::runAnalysis("RegressionLogistic", dataset = "santas_log_subset_treat0.csv", options) output6 <- results[["results"]][["estimatesTable"]][["data"]] jaspTools::expect_equal_tables(output6, - list("(Intercept)", 1.829715, 0.3810035, 6.232108, 4.802356, 1.568094e-06, 23.06263, 1, 2.953413, 13.1506, - "quantity", -0.08100546, 0.1678705, 0.9221887, -0.4825472, 0.6294173, 0.2328518, 1, 0.6636332, 1.281479) + list("TRUE", 3.57850992169287, 8.01981276525911, 1.6784307832026, "M", + 5.35714285329399, "(Intercept)", 3.54594990009555e-16, 0.205863115653283, + 1, 66.4736903106765, 8.15313990501062, "TRUE", 2.95341293211689, + 13.1506047478064, 1.8297145868812, "M", 6.23210767937535, + "(Intercept)", 1.56809395778108e-06, 0.381003510779673, 1, 23.0626254414104, + 4.80235623849485, "FALSE", 0.663633201382873, 1.28147885872813, + -0.0810054601474335, "M", 0.922188656144837, "quantity", + 0.629417289982632, 0.167870548715548, 1, 0.232851782418939, + -0.482547181546985) ) unnumberedFigureA <- results[["state"]][["estimatesPlots"]][["collection"]][[1]] #expect_equal_plots(unnumberedFigureA, "?", dir="Ancova") # This command needs to be updated - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "delivered" options$covariates <- c("quantity") options$modelTerms <- list( - list(components="quantity", isNuisance=FALSE) + list(components = NULL, name="model0", title = "Model 0"), + list(components= list("quantity"), name="model1", title = "Model 1") ) options$oddsRatio <- TRUE options$coefficientCi <- TRUE options$coefficientCiAsOddsRatio <- TRUE options$conditionalEstimatePlot <- TRUE options$conditionalEstimatePlotPoints <- FALSE + options$residualCasewiseDiagnostic <- FALSE results <- jaspTools::runAnalysis("RegressionLogistic", dataset = "santas_log_subset_treat1.csv", options) output7 <- results[["results"]][["estimatesTable"]][["data"]] jaspTools::expect_equal_tables(output7, - list("(Intercept)", 2.029197, 0.3538977, 7.607972, 5.73385, 9.817603e-09, 32.87704, 1, 3.802162, 15.22324, - "quantity", -1.108649, 0.158651, 0.3300046, -6.987972, 2.788889e-12, 48.83175, 1, 0.241811, 0.4503643) + list("TRUE", 0.629242023574777, 1.06773288827061, -0.198850858244477, + "M", 0.819672131557941, "(Intercept)", 0.140449081313175, + 0.134894551619842, 1, 2.17303199956226, -1.47412075474238, "TRUE", + 3.80216170583388, 15.2232426986615, 2.02919657989328, "M", + 7.60797150543205, "(Intercept)", 9.81760318548822e-09, 0.353897729969058, + 1, 32.8770384443136, 5.73385022862593, "FALSE", 0.241810977511836, + 0.450364312443746, -1.10864869161901, "M", 0.33000459788989, + "quantity", 2.78888928052724e-12, 0.158651002531974, 1, 48.8317463436682, + -6.98797154714214) ) unnumberedFigureB <- results[["state"]][["estimatesPlots"]][["collection"]][[1]] #expect_equal_plots(unnumberedFigureB, "?", dir="Ancova") # This command needs to be updated - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "delivered" options$factors <- c("treat") options$covariates <- c("quantity") options$modelTerms <- list( - list(components="treat", isNuisance=FALSE), - list(components="quantity", isNuisance=FALSE), - list(components=list("treat", "quantity"), isNuisance=FALSE) + list(components = NULL, name="model0", title = "Model 0"), + list(components=list("treat", "quantity", list("treat", "quantity")), + name="model1", title = "Model 1") ) options$residualCasewiseDiagnostic <- TRUE options$residualCasewiseDiagnosticZThreshold <- 2 @@ -121,43 +154,22 @@ test_that("Fields Book - Chapter 10 results match", { options$coefficientBootstrapSamples <- 1000 set.seed(1) # For Bootstrapping Unit Tests results <- jaspTools::runAnalysis("RegressionLogistic", dataset = "santas_log.csv", options) - output9 <- results[["results"]][["casewiseDiagnosticsTable"]][["data"]] - + output9 <- results[["results"]][["influenceTable"]][["data"]] + # used to output pearson resdual, switched to deviance using rstandard for std residuals jaspTools::expect_equal_tables(output9, - list(18, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 40, 0.00763627653324742, 0, 1, 0.841269420104102, -0.841269420104102, -2.30216925615302, - 51, 0.0279057279786341, 0, 1, 0.861727722493428, -0.861727722493428, -2.49641897112151, - 64, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 78, 0.0133723433832867, 0, 1, 0.830151056615241, -0.830151056615241, -2.21078819931717, - 90, 0.00763627653324742, 0, 1, 0.841269420104102, -0.841269420104102, -2.30216925615302, - 93, 0.0272626386242438, 1, 0, 0.0827619829298745, 0.917238017070126, 3.32909033263681, - 96, 0.0133723433832867, 0, 1, 0.830151056615241, -0.830151056615241, -2.21078819931717, - 98, 0.0291505277819276, 0, 1, 0.818421994612829, -0.818421994612829, -2.12303437254972, - 105, 0.0279057279786341, 0, 1, 0.861727722493428, -0.861727722493428, -2.49641897112151, - 111, 0.0279057279786341, 0, 1, 0.861727722493428, -0.861727722493428, -2.49641897112151, - 112, 0.00763627653324742, 0, 1, 0.841269420104102, -0.841269420104102, -2.30216925615302, - 113, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 138, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 162, 0.0291505277819276, 0, 1, 0.818421994612829, -0.818421994612829, -2.12303437254972, - 170, 0.00763627653324742, 0, 1, 0.841269420104102, -0.841269420104102, -2.30216925615302, - 188, 0.0272626386242438, 1, 0, 0.0827619829298745, 0.917238017070126, 3.32909033263681, - 195, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 214, 0.0251000549472288, 0, 1, 0.883828611727055, -0.883828611727055, -2.75825515596946, - 215, 0.0133723433832867, 0, 1, 0.830151056615241, -0.830151056615241, -2.21078819931717, - 219, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 222, 0.0291505277819276, 0, 1, 0.818421994612829, -0.818421994612829, -2.12303437254972, - 249, 0.00763627653324742, 0, 1, 0.841269420104102, -0.841269420104102, -2.30216925615302, - 258, 0.0133723433832867, 0, 1, 0.830151056615241, -0.830151056615241, -2.21078819931717, - 265, 0.0272626386242438, 1, 0, 0.0827619829298745, 0.917238017070126, 3.32909033263681, - 270, 0.0291505277819276, 0, 1, 0.818421994612829, -0.818421994612829, -2.12303437254972, - 285, 0.0133723433832867, 0, 1, 0.830151056615241, -0.830151056615241, -2.21078819931717, - 288, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 301, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 307, 0.0251000549472288, 0, 1, 0.883828611727055, -0.883828611727055, -2.75825515596946, - 335, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 370, 0.0121239460461442, 0, 1, 0.851789911140433, -0.851789911140433, -2.39732747153848, - 374, 0.0133723433832867, 0, 1, 0.830151056615241, -0.830151056615241, -2.21078819931717, - 382, 0.0251000549472288, 0, 1, 0.883828611727055, -0.883828611727055, -2.75825515596946) + list(51, 0.027905727978634, 0, 0.861727722493428, -7.23210767937535, + -2.00666634304773, 93, 0.0272626386242436, 1, 0.0827619829298749, + 12.0828424428558, 2.24324229328406, 105, 0.027905727978634, + 0, 0.861727722493428, -7.23210767937535, -2.00666634304773, + 111, 0.027905727978634, 0, 0.861727722493428, -7.23210767937535, + -2.00666634304773, 188, 0.0272626386242436, 1, 0.0827619829298749, + 12.0828424428558, 2.24324229328406, 214, 0.0251000549472288, + 0, 0.883828611727055, -8.60797150543206, -2.08841173681896, + 265, 0.0272626386242436, 1, 0.0827619829298749, 12.0828424428558, + 2.24324229328406, 307, 0.0251000549472288, 0, 0.883828611727055, + -8.60797150543206, -2.08841173681896, 382, 0.0251000549472288, + 0, 0.883828611727055, -8.60797150543206, -2.08841173681896 + ) ) output10 <- results[["results"]][["estimatesTableBootstrapping"]][["data"]] jaspTools::expect_equal_tables(output10, @@ -171,11 +183,14 @@ test_that("Fields Book - Chapter 10 results match", { }) # test the methods for entering predictors -options <- jaspTools::analysisOptions("RegressionLogistic") +options <- initClassicalRegressionOptions("RegressionLogistic") +options$residualCasewiseDiagnostic <- FALSE options$covariates <- list("contNormal") options$dependent <- "contBinom" -options$modelTerms <- list(list(components = list("contNormal"), isNuisance = FALSE)) - +options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("contNormal"), name="model1", title = "Model 1") +) #backward test_that("Method=backward model summary table results match", { options$method <- "backward" @@ -208,14 +223,16 @@ test_that("Method=stepwise model summary table results match", { }) test_that("Confusion Matrix Table Matches", { - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") + options$residualCasewiseDiagnostic <- FALSE + options$covariates <- c("contNormal", "contOutlier") options$dependent <- "facGender" - options$modelTerms <- list( - list(components="contNormal", isNuisance=FALSE), - list(components="contOutlier", isNuisance=FALSE) + options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("contNormal", "contOutlier"), name="model1", title = "Model 1") ) - + options$residualCasewiseDiagnostic <- FALSE options$confusionMatrix <- TRUE results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options) table <- results[["results"]][["perfDiag"]][["collection"]][["perfDiag_confusionMatrix"]][["data"]] @@ -226,12 +243,14 @@ test_that("Confusion Matrix Table Matches", { }) test_that("Performance Metrics Table Matches", { - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- list("contNormal") options$dependent <- "contBinom" - options$modelTerms <- list( - list(components="contNormal", isNuisance=FALSE) + options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("contNormal"), name="model1", title = "Model 1") ) + options$residualCasewiseDiagnostic <- FALSE options$accuracy <- TRUE options$auc <- TRUE options$sensitivity <- TRUE @@ -250,15 +269,14 @@ test_that("Performance Metrics Table Matches", { }) test_that("Confusion Matrix Table Matches", { - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- c("contNormal", "contOutlier") options$factors <- c("facFive") options$dependent <- "facGender" - - options$modelTerms <- list( - list(components="contNormal", isNuisance=FALSE), - list(components="contOutlier", isNuisance=FALSE), - list(components="facFive", isNuisance=FALSE) + options$residualCasewiseDiagnostic <- FALSE + options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("contNormal", "contOutlier", "facFive"), name="model1", title = "Model 1") ) options$multicollinearity <- TRUE @@ -268,13 +286,13 @@ test_that("Confusion Matrix Table Matches", { "contOutlier", 0.9742628, 1.026417, "facFive", 0.9377347, 1.0664)) - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- c("contNormal", "contOutlier") options$dependent <- "facGender" - - options$modelTerms <- list( - list(components="contNormal", isNuisance=FALSE), - list(components="contOutlier", isNuisance=FALSE) + options$residualCasewiseDiagnostic <- FALSE + options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("contNormal", "contOutlier"), name="model1", title = "Model 1") ) options$multicollinearity <- TRUE @@ -287,7 +305,7 @@ test_that("Confusion Matrix Table Matches", { test_that("Error Handling", { # factor levels not equal to 2 - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- list("contNormal") options$dependent <- "facFive" results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options) @@ -295,7 +313,7 @@ test_that("Error Handling", { expect_identical(status, "validationError") # infinity check - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- list("debInf") options$dependent <- "contBinom" results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options) @@ -303,7 +321,7 @@ test_that("Error Handling", { expect_identical(status, "validationError") # <2 observations - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- list("debMiss99") options$dependent <- "contBinom" results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options) @@ -311,7 +329,7 @@ test_that("Error Handling", { expect_identical(status, "validationError") # variance check - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$covariates <- list("debSame") options$dependent <- "contBinom" results <- jaspTools::runAnalysis("RegressionLogistic", "debug.csv", options) @@ -338,16 +356,15 @@ test_that("Pseudo R-squared are correct", { # # performance::r2_coxsnell(fit) # Cox & Snell's R2: 0.1008645 - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "low" options$covariates <- c("age", "lwt") options$factors <- c("race", "smoke") - options$modelTerms <- list(list(components = "age", isNuisance = FALSE), - list(components = "lwt", isNuisance = FALSE), - list(components = "race", isNuisance = FALSE), - list(components = "smoke", isNuisance = FALSE) + options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("age", "lwt", "race", "smoke"), name="model1", title = "Model 1") ) - + options$residualCasewiseDiagnostic <- FALSE results <- jaspTools::runAnalysis("RegressionLogistic", "lowbwt.csv", options) r_squared <- results$results$modelSummary$data[[2]][c("fad", "nag", "tju", "cas")] jaspTools::expect_equal_tables(r_squared, @@ -373,11 +390,14 @@ test_that("Pseudo R-squared are correct", { # performance::r2_tjur(fit) # 0.1713724 # performance::r2_coxsnell(fit) # 0.1524201 - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "y" options$covariates <- "x" - options$modelTerms <- list(list(components = "x", isNuisance = FALSE)) - + options$modelTerms <- options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("x"), name="model1", title = "Model 1") + ) + options$residualCasewiseDiagnostic <- FALSE results <- jaspTools::runAnalysis("RegressionLogistic", df, options) r_squared <- results$results$modelSummary$data[[2]][c("fad", "nag", "tju", "cas")] jaspTools::expect_equal_tables(r_squared, @@ -386,16 +406,16 @@ test_that("Pseudo R-squared are correct", { }) test_that("Performance plots match", { - options <- jaspTools::analysisOptions("RegressionLogistic") + options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "low" options$covariates <- c("age", "lwt") options$factors <- c("race", "smoke") - options$modelTerms <- list(list(components = "age", isNuisance = FALSE), - list(components = "lwt", isNuisance = FALSE), - list(components = "race", isNuisance = FALSE), - list(components = "smoke", isNuisance = FALSE) + options$modelTerms <- options$modelTerms <- list( + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("age", "lwt", "race", "smoke"), name="model1", title = "Model 1") ) - + options$residualCasewiseDiagnostic <- FALSE + options$rocPlot <- TRUE options$precisionRecallPlot <- TRUE diff --git a/tests/testthat/test-regressionlogisticVerification.R b/tests/testthat/test-regressionlogisticVerification.R index 6f998410..39af297e 100644 --- a/tests/testthat/test-regressionlogisticVerification.R +++ b/tests/testthat/test-regressionlogisticVerification.R @@ -4,15 +4,14 @@ context("Logistic Regression -- Verification project") ## Testing Titanice -options <- jaspTools::analysisOptions("RegressionLogistic") +options <- initClassicalRegressionOptions("RegressionLogistic") options$dependent <- "Survived" options$factors <- c("PClass", "Sex") options$covariates <- "Age" options$modelTerms <- list( - list(components=c("Age"), isNuisance=FALSE), - list(components=c("PClass"), isNuisance=FALSE), - list(components=c("Sex"), isNuisance=FALSE) + list(components= NULL, name="model0", title = "Model 0"), + list(components=c("Age", "PClass", "Sex"), name="model1", title = "Model 1") ) options$method <- "enter" @@ -26,10 +25,10 @@ test_that("Main table results match R, SPSS, SAS and MiniTab", { jaspTools::expect_equal_tables( "test"=resultTable, - "ref"=list(1027.57254724533, 1032.20058862151, "", "", 1025.57254724533, - 755, "", "H", "", "", "", 705.140778094395, 728.280984975293, - 0.354079636893354, 330.431769150933, 695.140778094395, 751, - 0.322192486566131, "H", 0.476901084612013, 0, 0.393957934297113) + "ref"=list(1027.57254724532, 1032.2005886215, 1025.57254724532, 755, 0, "M", + 0, 705.140778094396, 728.280984975294, 0.354079636893343, 330.431769150921, + 695.140778094396, 751, 0.322192486566122, "M", 0.476901084612001, + 0, 0.393957934297112) ) }) @@ -40,13 +39,16 @@ test_that("Estimates table results match R, SPSS, SAS and MiniTab", { jaspTools::expect_equal_tables( "test"=resultTable, - "ref"=list(3.75966210120454, "(Intercept)", 3.17912938532242e-21, 0.397567324274317, - 1, 89.4285652804951, 9.45666776832598, -0.0391768149758485, - "Age", 2.69139209369272e-07, 0.0076162175667569, 1, 26.4593741560195, - -5.14386762621471, -1.29196239983359, "PClass (2nd)", 6.77732368246485e-07, - 0.260075781079861, 1, 24.6774298532547, -4.96763825708504, -2.52141915270095, - "PClass (3rd)", 7.94813113283019e-20, 0.276656805343585, 1, - 83.0629554227781, -9.11388805191166, -2.63135683476562, "Sex (male)", - 5.68409321426263e-39, 0.201505378991115, 1, 170.524272319145, - -13.0584942592607)) + "ref"=list("TRUE", -0.347366579504982, "M", "(Intercept)", 2.54655183336123e-06, + 0.0738391799970593, 1, 22.1310660444386, -4.70436669961416, + "TRUE", 3.75966210120455, "M", "(Intercept)", 3.1791293853215e-21, + 0.397567324274317, 1, 89.4285652804962, 9.45666776832601, "FALSE", + -0.0391768149758488, "M", "Age", 2.6913920936924e-07, + 0.00761621756675692, 1, 26.45937415602, -5.14386762621473, "FALSE", + -1.2919623998336, "M", "PClass (2nd)", 6.77732368246352e-07, + 0.260075781079861, 1, 24.6774298532553, -4.96763825708507, "FALSE", + -2.52141915270095, "M", "PClass (3rd)", 7.94813113282811e-20, + 0.276656805343584, 1, 83.0629554227789, -9.11388805191169, "FALSE", + -2.63135683476561, "M", "Sex (male)", 5.68409321426302e-39, + 0.201505378991114, 1, 170.524272319145, -13.0584942592607)) }) \ No newline at end of file