Skip to content

Commit

Permalink
Regression block models + influence measures + consistent interface (j…
Browse files Browse the repository at this point in the history
…asp-stats#296)

* consistent influence table + covariance

* move durbin watson to model summary table

* add options for verified test

* Update helpfile

* add mahalanobis

* bug cook

* add Brunos widget

* Make R work with widget

* better check empty model

* add more blocked functionality

* make it work with new modelterms format

* testPlots and update linreg unit test structure

* More updating unit tests

* dataset rename

* linreg now uses jaspgraphs for qq

* update test plots

* get rid of glmCommonFUnctions, just use commonglm

* put residual plot in common file

* update qq-plots

* footnote influence when too extreme
  • Loading branch information
JohnnyDoorn authored Apr 11, 2024
1 parent 02c5fd7 commit 183b8ca
Show file tree
Hide file tree
Showing 26 changed files with 1,672 additions and 1,488 deletions.
508 changes: 506 additions & 2 deletions R/commonglm.R

Large diffs are not rendered by default.

43 changes: 26 additions & 17 deletions R/correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ CorrelationInternal <- function(jaspResults, dataset, options){
mainTable$dependOn(c("variables", "partialOutVariables",
"pearson", "spearman", "kendallsTauB", "pairwiseDisplay", "significanceReport",
"significanceFlagged", "sampleSize",
"ci", "ciLevel",
"ci", "ciLevel", "covariance",
"vovkSellke", "alternative", "naAction",
"ciBootstrap", "ciBootstrapSamples", "effectSize"))

Expand Down Expand Up @@ -201,7 +201,14 @@ CorrelationInternal <- function(jaspResults, dataset, options){

if(options$significanceReport)
mainTable$addColumnInfo(name = paste0(test, "_p.value"), title = gettext("p"), type = "pvalue", overtitle = overtitle)


if(options$vovkSellke){
mainTable$addColumnInfo(name = paste0(test, "_vsmpr"), title = gettext("VS-MPR"), type = "number", overtitle = overtitle)

mainTable$addFootnote(message = .corrGetTexts()$footnotes$VSMPR, symbol = "\u2020", colNames = paste0(test, "_vsmpr"))
mainTable$addCitation(.corrGetTexts()$references$Sellke_etal_2001)
}

if(options$ci){
mainTable$addColumnInfo(name = paste0(test, "_lower.ci"),
title = gettextf("Lower %s%% CI", 100*options$ciLevel), type = "number",
Expand All @@ -211,19 +218,16 @@ CorrelationInternal <- function(jaspResults, dataset, options){
overtitle = overtitle)
}

if(options$vovkSellke){
mainTable$addColumnInfo(name = paste0(test, "_vsmpr"), title = gettext("VS-MPR"), type = "number", overtitle = overtitle)

mainTable$addFootnote(message = .corrGetTexts()$footnotes$VSMPR, symbol = "\u2020", colNames = paste0(test, "_vsmpr"))
mainTable$addCitation(.corrGetTexts()$references$Sellke_etal_2001)
}

if(options$effectSize){
mainTable$addColumnInfo(name = paste0(test, "_effect.size"), title = gettext("Effect size (Fisher's z)"), type = "number", overtitle = overtitle)
mainTable$addColumnInfo(name = paste0(test, "_se.effect.size"), title = gettext("SE Effect size"), type = "number", overtitle = overtitle)
}
}
}

if(options$covariance){
mainTable$addColumnInfo(name = "covariance", title = gettext("Covariance"), type = "number")
}
}

.corrTitlerer <- function(test, nTests){
Expand Down Expand Up @@ -252,13 +256,19 @@ CorrelationInternal <- function(jaspResults, dataset, options){
overtitle <- paste(vi, variables[vi], sep = ". ")

if(options$sampleSize) {
mainTable$addColumnInfo(name = paste(.v(variables[vi]), "sample.size", sep = "_"), title = "n",
mainTable$addColumnInfo(name = paste(variables[vi], "sample.size", sep = "_"), title = "n",
type = "integer", overtitle = overtitle)
}

for(ti in seq_along(tests)){
.corrInitCorrelationTableRowAsColumn(mainTable, options, variables[vi], testsTitles[ti], tests[ti], overtitle)
}

if(options$covariance) {
mainTable$addColumnInfo(name = paste(variables[vi], "covariance", sep = "_"), gettextf("Covariance"),
type = "number", overtitle = overtitle)
}

mainTable$setRowName(vi, .v(variables[vi]))
}
}
Expand Down Expand Up @@ -290,11 +300,11 @@ CorrelationInternal <- function(jaspResults, dataset, options){
title = gettextf("Upper %s%% CI", 100*options$ciLevel),
type = "number", overtitle = overtitle)
}

if(options$effectSize){
mainTable$addColumnInfo(name = sprintf(name, "effect.size"), title = gettextf("Effect size (Fisher's z)"), type = "number", overtitle = overtitle)
mainTable$addColumnInfo(name = sprintf(name, "se.effect.size"), title = gettext("SE Effect size"), type = "number", overtitle = overtitle)
}
}
}

### Compute results ----
Expand Down Expand Up @@ -343,7 +353,9 @@ CorrelationInternal <- function(jaspResults, dataset, options){
currentResults <- list()
testErrors <- list()
currentResults[['sample.size']] <- nrow(data)

if (isFALSE(errors))
currentResults[['covariance']] <- cov(x = data[,1], y = data[,2])

# even if we do not want the specific tests results
# we still want the output as NaN - to fill the jaspTables correctly
# so we still loop over all tests - .corr.test() returns empty lists if isFALSE(compute)
Expand Down Expand Up @@ -466,7 +478,6 @@ CorrelationInternal <- function(jaspResults, dataset, options){
statsNames <- c("estimate", "p.value", "lower.ci", "upper.ci", "vsmpr", "effect.size", "se.effect.size")
alternative <- match.arg(alternative)


if(isFALSE(compute)){
result <- rep(NaN, length(statsNames))
names(result) <- statsNames
Expand Down Expand Up @@ -513,9 +524,6 @@ CorrelationInternal <- function(jaspResults, dataset, options){
result$se.effect.size <- sqrt((2 / (n * (n - 1))) * (1 - (4 * (s1^2 / pi^2)) + (2 * (n - 2) * ((1/9) - (4 * (s2^2 / pi^2))))))
}




result <- unlist(result[stats], use.names = FALSE)
names(result) <- statsNames
}
Expand Down Expand Up @@ -705,6 +713,7 @@ CorrelationInternal <- function(jaspResults, dataset, options){
testErrors <- lapply(corrResults, function(x) x[['testErrors']])

mainTable[['sample.size']] <- sapply(results, function(x) x[['sample.size']])
mainTable[['covariance']] <- sapply(results, function(x) x[['covariance']])

# would be nice to be able to fill table cell-wise, i.e., mainTable[[row, col]] <- value
colNames <- character() # this is for error footnotes
Expand Down
162 changes: 8 additions & 154 deletions R/generalizedlinearmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,14 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,
.glmOutlierTable(jaspResults, dataset, options, ready, position = 8, residType = "standardized deviance")
.glmOutlierTable(jaspResults, dataset, options, ready, position = 8, residType = "studentized deviance")

.glmInfluenceTable(jaspResults, dataset, options, ready, position = 9)
.glmInfluenceTable(jaspResults[["diagnosticsContainer"]],
jaspResults[["glmModels"]][["object"]][["fullModel"]],
dataset, options, ready, position = 9)

.regressionExportResiduals(jaspResults,
jaspResults[["glmModels"]][["object"]][["fullModel"]],
dataset, options, ready)

.glmMulticolliTable(jaspResults, dataset, options, ready, position = 10)

return()
Expand Down Expand Up @@ -639,53 +646,6 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,
}



# Plots: Residuals Q-Q
.glmPlotResQQ <- function(jaspResults, dataset, options, ready, position) {

plotNames <- c("devianceResidualQqPlot", "pearsonResidualQqPlot", "quantileResidualQqPlot")
if (!ready || !any(unlist(options[plotNames])))
return()

residNames <- c("deviance", "Pearson", "quantile")

glmPlotResQQContainer <- createJaspContainer(gettext("Normal Q-Q Plots: Standardized Residuals"))
glmPlotResQQContainer$dependOn(optionsFromObject = jaspResults[["modelSummary"]],
options = c(plotNames, "seed", "setSeed"))
glmPlotResQQContainer$position <- position
jaspResults[["diagnosticsContainer"]][["glmPlotResQQ"]] <- glmPlotResQQContainer


if (!is.null(jaspResults[["glmModels"]])) {
glmFullModel <- jaspResults[["glmModels"]][["object"]][["fullModel"]]
for (i in 1:length(plotNames)) {
if (options[[plotNames[[i]]]]) {
.glmCreatePlotPlaceholder(glmPlotResQQContainer,
index = plotNames[[i]],
title = gettextf("Normal Q-Q plot: Standardized %1s residuals", residNames[[i]]))

.glmInsertPlot(glmPlotResQQContainer[[plotNames[[i]]]],
.glmFillPlotResQQ,
residType = residNames[[i]],
model = glmFullModel,
family = options[["family"]])
}
}
}
return()
}

.glmFillPlotResQQ <- function(residType, model, family) {

# compute residuals
stdResid <- .glmStdResidCompute(model = model, residType = residType, options = options)

thePlot <- jaspGraphs::plotQQnorm(stdResid, ablineColor = "blue")

return(thePlot)
}


# Plots: Partial residuals
.glmPlotResPartial <- function(jaspResults, dataset, options, ready, position) {
if (!ready)
Expand Down Expand Up @@ -894,112 +854,6 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,
}
}


# Table: Influential cases
.glmInfluenceTable <- function(jaspResults, dataset, options, ready, position) {

tableOptionsOn <- c(options[["dfbetas"]],
options[["dffits"]],
options[["covarianceRatio"]],
options[["cooksDistance"]],
options[["leverage"]])


if (!ready | !any(tableOptionsOn))
return()


tableOptions <- c("dfbetas", "dffits", "covarianceRatio", "cooksDistance", "leverage")
tableOptionsClicked <- tableOptions[tableOptionsOn]

if (is.null(jaspResults[["diagnosticsContainer"]][["influenceTable"]])) {
influenceTable <- createJaspTable(gettext("Table: Influential Cases"))
influenceTable$dependOn(optionsFromObject = jaspResults[["modelSummary"]],
options = tableOptions)
influenceTable$position <- position
influenceTable$showSpecifiedColumnsOnly <- TRUE
jaspResults[["diagnosticsContainer"]][["influenceTable"]] <- influenceTable
}

tableOptionToColName <- function(x) {
switch(x,
"dfbetas" = "DFBETAS",
"dffits" = "DFFITS",
"covarianceRatio" = "Covariance Ratio",
"cooksDistance" = "Cook's Distance",
"leverage" = "Leverage")
}

if (is.null(jaspResults[["glmModels"]])) {
for (option in tableOptionsClicked) {
colTitle <- tableOptionToColName(option)
jaspResults[["influenceTable"]]$addColumnInfo(name = option, title = gettext(colTitle), type = "number")
}
} else {
glmFullModel <- jaspResults[["glmModels"]][["object"]][["fullModel"]]
colNameList <- c()
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = "caseN", title = "Case Number", type = "integer")
for (option in tableOptionsClicked) {
if (option == "dfbetas") {
predictors <- names(glmFullModel$coefficients)
for (predictor in predictors) {
dfbetasName <- gettextf("DFBETAS_%1s", predictor)
colNameList <- c(colNameList, dfbetasName)
if (predictor == "(Intercept)")
dfbetasTitle <- gettext("DFBETAS:Intercept")
else
dfbetasTitle <- gettextf("DFBETAS:%1s", gsub(":", "*", predictor))
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = dfbetasName, title = dfbetasTitle, type = "number")
}
} else {
colNameList <- c(colNameList, option)
colTitle <- tableOptionToColName(option)
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = option, title = gettext(colTitle), type = "number")
}
}
.glmInfluenceTableFill(jaspResults, dataset, options, ready, model = glmFullModel, influenceMeasures = tableOptionsClicked, colNames = colNameList)
}
}

.glmInfluenceTableFill <- function(jaspResults, dataset, options, ready, model, influenceMeasures, colNames) {
influenceRes <- influence.measures(model)
nDFBETAS <- length(names(model$coefficients))

optionToColInd <- function(x, nDFBETAS) {
switch(x,
"dfbetas" = 1:nDFBETAS,
"dffits" = (nDFBETAS+1),
"covarianceRatio" = (nDFBETAS+2),
"cooksDistance" = (nDFBETAS+3),
"leverage" = (nDFBETAS+4))}

colInd <- c()
for (measure in influenceMeasures) {
colInd <- c(colInd, optionToColInd(measure, nDFBETAS))
}

influenceResData <- as.data.frame(influenceRes[["infmat"]][, colInd])
names(influenceResData) <- colNames
caseN <- seq.int(nrow(influenceResData))
influenceResData <- cbind(caseN, influenceResData)

influenceResSig <- influenceRes[["is.inf"]][, colInd]

if (length(colInd) > 1) {
influenceResDataFinal <- influenceResData[rowSums(influenceResSig) > 0, , drop = FALSE]
} else {
influenceResDataFinal <- influenceResData[influenceResSig > 0, , drop = FALSE]
}

nRowInfluential <- nrow(influenceResDataFinal)

if (nRowInfluential == 0)
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addFootnote(gettext("No influential cases found."))
else {
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$setData(influenceResDataFinal)
}
}

# Table: Multicollinearity
.glmMulticolliTable <- function(jaspResults, dataset, options, ready, position) {

Expand Down
Loading

0 comments on commit 183b8ca

Please sign in to comment.