Skip to content

Commit

Permalink
consistent influence table + covariance
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnnyDoorn committed Mar 27, 2024
1 parent 6db0e69 commit 6ecf63c
Show file tree
Hide file tree
Showing 13 changed files with 481 additions and 255 deletions.
43 changes: 26 additions & 17 deletions R/correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ CorrelationInternal <- function(jaspResults, dataset, options){
mainTable$dependOn(c("variables", "partialOutVariables",
"pearson", "spearman", "kendallsTauB", "pairwiseDisplay", "significanceReport",
"significanceFlagged", "sampleSize",
"ci", "ciLevel",
"ci", "ciLevel", "covariance",
"vovkSellke", "alternative", "naAction",
"ciBootstrap", "ciBootstrapSamples", "effectSize"))

Expand Down Expand Up @@ -201,7 +201,14 @@ CorrelationInternal <- function(jaspResults, dataset, options){

if(options$significanceReport)
mainTable$addColumnInfo(name = paste0(test, "_p.value"), title = gettext("p"), type = "pvalue", overtitle = overtitle)


if(options$vovkSellke){
mainTable$addColumnInfo(name = paste0(test, "_vsmpr"), title = gettext("VS-MPR"), type = "number", overtitle = overtitle)

mainTable$addFootnote(message = .corrGetTexts()$footnotes$VSMPR, symbol = "\u2020", colNames = paste0(test, "_vsmpr"))
mainTable$addCitation(.corrGetTexts()$references$Sellke_etal_2001)
}

if(options$ci){
mainTable$addColumnInfo(name = paste0(test, "_lower.ci"),
title = gettextf("Lower %s%% CI", 100*options$ciLevel), type = "number",
Expand All @@ -211,19 +218,16 @@ CorrelationInternal <- function(jaspResults, dataset, options){
overtitle = overtitle)
}

if(options$vovkSellke){
mainTable$addColumnInfo(name = paste0(test, "_vsmpr"), title = gettext("VS-MPR"), type = "number", overtitle = overtitle)

mainTable$addFootnote(message = .corrGetTexts()$footnotes$VSMPR, symbol = "\u2020", colNames = paste0(test, "_vsmpr"))
mainTable$addCitation(.corrGetTexts()$references$Sellke_etal_2001)
}

if(options$effectSize){
mainTable$addColumnInfo(name = paste0(test, "_effect.size"), title = gettext("Effect size (Fisher's z)"), type = "number", overtitle = overtitle)
mainTable$addColumnInfo(name = paste0(test, "_se.effect.size"), title = gettext("SE Effect size"), type = "number", overtitle = overtitle)
}
}
}

if(options$covariance){
mainTable$addColumnInfo(name = "covariance", title = gettext("Covariance"), type = "number")
}
}

.corrTitlerer <- function(test, nTests){
Expand Down Expand Up @@ -252,13 +256,19 @@ CorrelationInternal <- function(jaspResults, dataset, options){
overtitle <- paste(vi, variables[vi], sep = ". ")

if(options$sampleSize) {
mainTable$addColumnInfo(name = paste(.v(variables[vi]), "sample.size", sep = "_"), title = "n",
mainTable$addColumnInfo(name = paste(variables[vi], "sample.size", sep = "_"), title = "n",
type = "integer", overtitle = overtitle)
}

for(ti in seq_along(tests)){
.corrInitCorrelationTableRowAsColumn(mainTable, options, variables[vi], testsTitles[ti], tests[ti], overtitle)
}

if(options$covariance) {
mainTable$addColumnInfo(name = paste(variables[vi], "covariance", sep = "_"), gettextf("Covariance"),
type = "number", overtitle = overtitle)
}

mainTable$setRowName(vi, .v(variables[vi]))
}
}
Expand Down Expand Up @@ -290,11 +300,11 @@ CorrelationInternal <- function(jaspResults, dataset, options){
title = gettextf("Lower %s%% CI", 100*options$ciLevel),
type = "number", overtitle = overtitle)
}

if(options$effectSize){
mainTable$addColumnInfo(name = sprintf(name, "effect.size"), title = gettextf("Effect size (Fisher's z)"), type = "number", overtitle = overtitle)
mainTable$addColumnInfo(name = sprintf(name, "se.effect.size"), title = gettext("SE Effect size"), type = "number", overtitle = overtitle)
}
}
}

### Compute results ----
Expand Down Expand Up @@ -343,7 +353,9 @@ CorrelationInternal <- function(jaspResults, dataset, options){
currentResults <- list()
testErrors <- list()
currentResults[['sample.size']] <- nrow(data)

if (isFALSE(errors))
currentResults[['covariance']] <- cov(x = data[,1], y = data[,2])

# even if we do not want the specific tests results
# we still want the output as NaN - to fill the jaspTables correctly
# so we still loop over all tests - .corr.test() returns empty lists if isFALSE(compute)
Expand Down Expand Up @@ -466,7 +478,6 @@ CorrelationInternal <- function(jaspResults, dataset, options){
statsNames <- c("estimate", "p.value", "lower.ci", "upper.ci", "vsmpr", "effect.size", "se.effect.size")
alternative <- match.arg(alternative)


if(isFALSE(compute)){
result <- rep(NaN, length(statsNames))
names(result) <- statsNames
Expand Down Expand Up @@ -513,9 +524,6 @@ CorrelationInternal <- function(jaspResults, dataset, options){
result$se.effect.size <- sqrt((2 / (n * (n - 1))) * (1 - (4 * (s1^2 / pi^2)) + (2 * (n - 2) * ((1/9) - (4 * (s2^2 / pi^2))))))
}




result <- unlist(result[stats], use.names = FALSE)
names(result) <- statsNames
}
Expand Down Expand Up @@ -705,6 +713,7 @@ CorrelationInternal <- function(jaspResults, dataset, options){
testErrors <- lapply(corrResults, function(x) x[['testErrors']])

mainTable[['sample.size']] <- sapply(results, function(x) x[['sample.size']])
mainTable[['covariance']] <- sapply(results, function(x) x[['covariance']])

# would be nice to be able to fill table cell-wise, i.e., mainTable[[row, col]] <- value
colNames <- character() # this is for error footnotes
Expand Down
108 changes: 73 additions & 35 deletions R/generalizedlinearmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,14 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,
.glmOutlierTable(jaspResults, dataset, options, ready, position = 8, residType = "standardized deviance")
.glmOutlierTable(jaspResults, dataset, options, ready, position = 8, residType = "studentized deviance")

.glmInfluenceTable(jaspResults, dataset, options, ready, position = 9)
.glmInfluenceTable(jaspResults[["diagnosticsContainer"]],
jaspResults[["glmModels"]][["object"]][["fullModel"]],
dataset, options, ready, position = 9)

.regressionExportResiduals(jaspResults,
jaspResults[["glmModels"]][["object"]][["fullModel"]],
dataset, options, ready)

.glmMulticolliTable(jaspResults, dataset, options, ready, position = 10)

return()
Expand Down Expand Up @@ -896,29 +903,31 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,


# Table: Influential cases
.glmInfluenceTable <- function(jaspResults, dataset, options, ready, position) {
.glmInfluenceTable <- function(jaspResults, model, dataset, options, ready, position, linRegAnalysis = FALSE) {

tableOptionsOn <- c(options[["dfbetas"]],
options[["dffits"]],
options[["covarianceRatio"]],
options[["cooksDistance"]],
# options[["cooksDistance"]],
options[["leverage"]])


if (!ready | !any(tableOptionsOn))
if (!ready | !options[["residualCasewiseDiagnostic"]])
return()


tableOptions <- c("dfbetas", "dffits", "covarianceRatio", "cooksDistance", "leverage")
tableOptions <- c("dfbetas", "dffits", "covarianceRatio", "leverage")
tableOptionsClicked <- tableOptions[tableOptionsOn]
tableOptionsClicked <- c("cooksDistance", tableOptionsClicked)

if (is.null(jaspResults[["diagnosticsContainer"]][["influenceTable"]])) {
if (is.null(jaspResults[["influenceTable"]])) {
influenceTable <- createJaspTable(gettext("Table: Influential Cases"))
influenceTable$dependOn(optionsFromObject = jaspResults[["modelSummary"]],
options = tableOptions)
influenceTable$dependOn(c("residualCasewiseDiagnostic", "residualCasewiseDiagnosticType",
"residualCasewiseDiagnosticZThreshold", "residualCasewiseDiagnosticCooksDistanceThreshold"))
influenceTable$position <- position
influenceTable$showSpecifiedColumnsOnly <- TRUE
jaspResults[["diagnosticsContainer"]][["influenceTable"]] <- influenceTable
jaspResults[["influenceTable"]] <- influenceTable
}

tableOptionToColName <- function(x) {
Expand All @@ -930,38 +939,48 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,
"leverage" = "Leverage")
}

if (is.null(jaspResults[["glmModels"]])) {
if (is.null(model)) {
for (option in tableOptionsClicked) {
colTitle <- tableOptionToColName(option)
jaspResults[["influenceTable"]]$addColumnInfo(name = option, title = gettext(colTitle), type = "number")
influenceTable$addColumnInfo(name = option, title = gettext(colTitle), type = "number")
}
} else {
glmFullModel <- jaspResults[["glmModels"]][["object"]][["fullModel"]]

colNameList <- c()
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = "caseN", title = "Case Number", type = "integer")
influenceTable$addColumnInfo(name = "caseN", title = "Case Number", type = "integer")
influenceTable$addColumnInfo(name = "stdResidual", title = gettext("Std. Residual"), type = "number", format = "dp:3")
influenceTable$addColumnInfo(name = "dependent", title = options$dependent, type = "number")
influenceTable$addColumnInfo(name = "predicted", title = gettext("Predicted Value"), type = "number")
influenceTable$addColumnInfo(name = "residual", title = gettext("Residual"), type = "number", format = "dp:3")

alwaysPresent <- c("caseN", "stdResidual", "dependent", "predicted", "residual")
for (option in tableOptionsClicked) {
if (option == "dfbetas") {
predictors <- names(glmFullModel$coefficients)
predictors <- names(model$coefficients)
for (predictor in predictors) {
dfbetasName <- gettextf("DFBETAS_%1s", predictor)
colNameList <- c(colNameList, dfbetasName)
if (predictor == "(Intercept)")
dfbetasTitle <- gettext("DFBETAS:Intercept")
else
dfbetasTitle <- gettextf("DFBETAS:%1s", gsub(":", "*", predictor))
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = dfbetasName, title = dfbetasTitle, type = "number")
influenceTable$addColumnInfo(name = dfbetasName, title = dfbetasTitle, type = "number")
}
} else {
colNameList <- c(colNameList, option)
colTitle <- tableOptionToColName(option)
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = option, title = gettext(colTitle), type = "number")
influenceTable$addColumnInfo(name = option, title = gettext(colTitle), type = "number")
}
}
.glmInfluenceTableFill(jaspResults, dataset, options, ready, model = glmFullModel, influenceMeasures = tableOptionsClicked, colNames = colNameList)
.glmInfluenceTableFill(influenceTable, dataset, options, ready,
model = model,
influenceMeasures = tableOptionsClicked,
colNames = c(colNameList, alwaysPresent))
}
}

.glmInfluenceTableFill <- function(jaspResults, dataset, options, ready, model, influenceMeasures, colNames) {
.glmInfluenceTableFill <- function(influenceTable, dataset, options, ready, model, influenceMeasures, colNames) {

influenceRes <- influence.measures(model)
nDFBETAS <- length(names(model$coefficients))

Expand All @@ -978,25 +997,44 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options,
colInd <- c(colInd, optionToColInd(measure, nDFBETAS))
}

influenceResData <- as.data.frame(influenceRes[["infmat"]][, colInd])
names(influenceResData) <- colNames
caseN <- seq.int(nrow(influenceResData))
influenceResData <- cbind(caseN, influenceResData)

influenceResSig <- influenceRes[["is.inf"]][, colInd]

if (length(colInd) > 1) {
influenceResDataFinal <- influenceResData[rowSums(influenceResSig) > 0, , drop = FALSE]
} else {
influenceResDataFinal <- influenceResData[influenceResSig > 0, , drop = FALSE]
}

nRowInfluential <- nrow(influenceResDataFinal)

if (nRowInfluential == 0)
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addFootnote(gettext("No influential cases found."))
influenceResData <- as.data.frame(influenceRes[["infmat"]][, colInd])
influenceResData[["caseN"]] <- seq.int(nrow(influenceResData))
influenceResData[["stdResidual"]] <- rstandard(model)
influenceResData[["dependent"]] <- model.frame(model)[[options$dependent]]
influenceResData[["predicted"]] <- model$fitted.values
influenceResData[["residual"]] <- model$residual


if (options$residualCasewiseDiagnosticType == "cooksDistance")
index <- which(abs(influenceResData[["cook.d"]]) > options$residualCasewiseDiagnosticCooksDistanceThreshold)
else if (options$residualCasewiseDiagnosticType == "outliersOutside")
index <- which(abs(influenceResData[["stdResidual"]]) > options$residualCasewiseDiagnosticZThreshold)
else # all
index <- seq.int(nrow(influenceResData))

# funky statement to ensure a df even if only 1 row
influenceResSig <- subset(influenceRes[["is.inf"]], 1:nrow(influenceResData) %in% index, select = colInd)
colnames(influenceResSig) <- colNames[1:length(colInd)]

influenceResData <- influenceResData[index, ]
colnames(influenceResData) <- colNames

if (length(index) == 0)
influenceTable$addFootnote(gettext("No influential cases found."))
else {
jaspResults[["diagnosticsContainer"]][["influenceTable"]]$setData(influenceResDataFinal)
influenceTable$setData(influenceResData)
# if any other metrix show influence, add footnotes:
if (sum(influenceResSig) > 0) {
for (thisCol in colnames(influenceResSig)) {
if (sum(influenceResSig[, thisCol]) > 0)
influenceTable$addFootnote(
gettext("Potentially influential case, according to the selected influence measure."),
colNames = thisCol,
rowNames = rownames(influenceResData)[influenceResSig[, thisCol]],
symbol = "*"
)
}
}
}
}

Expand Down
17 changes: 17 additions & 0 deletions R/glmCommonFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,23 @@
}
}

.regressionExportResiduals <- function(container, model, dataset, options, ready) {

if (isFALSE(options[["residualsSavedToData"]]))
return()

if (is.null(container[["residualsSavedToDataColumn"]]) && options[["residualsSavedToDataColumn"]] != "") {

residuals <- model[["residuals"]] # extract residuals

container[["residualsSavedToDataColumn"]] <- createJaspColumn(columnName = options[["residualsSavedToDataColumn"]])
container[["residualsSavedToDataColumn"]]$dependOn(options = c("residualsSavedToDataColumn", "residualsSavedToData"))
container[["residualsSavedToDataColumn"]]$setScale(residuals)

}

}

.constInfoTransform <- function(family, x) {
switch(family,
"bernoulli" = 1/(sin(sqrt(x))),
Expand Down
Loading

0 comments on commit 6ecf63c

Please sign in to comment.