diff --git a/R/correlation.R b/R/correlation.R index 0fe942f1..f3144b98 100644 --- a/R/correlation.R +++ b/R/correlation.R @@ -115,7 +115,7 @@ CorrelationInternal <- function(jaspResults, dataset, options){ mainTable$dependOn(c("variables", "partialOutVariables", "pearson", "spearman", "kendallsTauB", "pairwiseDisplay", "significanceReport", "significanceFlagged", "sampleSize", - "ci", "ciLevel", + "ci", "ciLevel", "covariance", "vovkSellke", "alternative", "naAction", "ciBootstrap", "ciBootstrapSamples", "effectSize")) @@ -201,7 +201,14 @@ CorrelationInternal <- function(jaspResults, dataset, options){ if(options$significanceReport) mainTable$addColumnInfo(name = paste0(test, "_p.value"), title = gettext("p"), type = "pvalue", overtitle = overtitle) - + + if(options$vovkSellke){ + mainTable$addColumnInfo(name = paste0(test, "_vsmpr"), title = gettext("VS-MPR"), type = "number", overtitle = overtitle) + + mainTable$addFootnote(message = .corrGetTexts()$footnotes$VSMPR, symbol = "\u2020", colNames = paste0(test, "_vsmpr")) + mainTable$addCitation(.corrGetTexts()$references$Sellke_etal_2001) + } + if(options$ci){ mainTable$addColumnInfo(name = paste0(test, "_lower.ci"), title = gettextf("Lower %s%% CI", 100*options$ciLevel), type = "number", @@ -211,19 +218,16 @@ CorrelationInternal <- function(jaspResults, dataset, options){ overtitle = overtitle) } - if(options$vovkSellke){ - mainTable$addColumnInfo(name = paste0(test, "_vsmpr"), title = gettext("VS-MPR"), type = "number", overtitle = overtitle) - - mainTable$addFootnote(message = .corrGetTexts()$footnotes$VSMPR, symbol = "\u2020", colNames = paste0(test, "_vsmpr")) - mainTable$addCitation(.corrGetTexts()$references$Sellke_etal_2001) - } - if(options$effectSize){ mainTable$addColumnInfo(name = paste0(test, "_effect.size"), title = gettext("Effect size (Fisher's z)"), type = "number", overtitle = overtitle) mainTable$addColumnInfo(name = paste0(test, "_se.effect.size"), title = gettext("SE Effect size"), type = "number", overtitle = overtitle) } } } + + if(options$covariance){ + mainTable$addColumnInfo(name = "covariance", title = gettext("Covariance"), type = "number") + } } .corrTitlerer <- function(test, nTests){ @@ -252,13 +256,19 @@ CorrelationInternal <- function(jaspResults, dataset, options){ overtitle <- paste(vi, variables[vi], sep = ". ") if(options$sampleSize) { - mainTable$addColumnInfo(name = paste(.v(variables[vi]), "sample.size", sep = "_"), title = "n", + mainTable$addColumnInfo(name = paste(variables[vi], "sample.size", sep = "_"), title = "n", type = "integer", overtitle = overtitle) } for(ti in seq_along(tests)){ .corrInitCorrelationTableRowAsColumn(mainTable, options, variables[vi], testsTitles[ti], tests[ti], overtitle) } + + if(options$covariance) { + mainTable$addColumnInfo(name = paste(variables[vi], "covariance", sep = "_"), gettextf("Covariance"), + type = "number", overtitle = overtitle) + } + mainTable$setRowName(vi, .v(variables[vi])) } } @@ -290,11 +300,11 @@ CorrelationInternal <- function(jaspResults, dataset, options){ title = gettextf("Lower %s%% CI", 100*options$ciLevel), type = "number", overtitle = overtitle) } - + if(options$effectSize){ mainTable$addColumnInfo(name = sprintf(name, "effect.size"), title = gettextf("Effect size (Fisher's z)"), type = "number", overtitle = overtitle) mainTable$addColumnInfo(name = sprintf(name, "se.effect.size"), title = gettext("SE Effect size"), type = "number", overtitle = overtitle) - } + } } ### Compute results ---- @@ -343,7 +353,9 @@ CorrelationInternal <- function(jaspResults, dataset, options){ currentResults <- list() testErrors <- list() currentResults[['sample.size']] <- nrow(data) - + if (isFALSE(errors)) + currentResults[['covariance']] <- cov(x = data[,1], y = data[,2]) + # even if we do not want the specific tests results # we still want the output as NaN - to fill the jaspTables correctly # so we still loop over all tests - .corr.test() returns empty lists if isFALSE(compute) @@ -466,7 +478,6 @@ CorrelationInternal <- function(jaspResults, dataset, options){ statsNames <- c("estimate", "p.value", "lower.ci", "upper.ci", "vsmpr", "effect.size", "se.effect.size") alternative <- match.arg(alternative) - if(isFALSE(compute)){ result <- rep(NaN, length(statsNames)) names(result) <- statsNames @@ -513,9 +524,6 @@ CorrelationInternal <- function(jaspResults, dataset, options){ result$se.effect.size <- sqrt((2 / (n * (n - 1))) * (1 - (4 * (s1^2 / pi^2)) + (2 * (n - 2) * ((1/9) - (4 * (s2^2 / pi^2)))))) } - - - result <- unlist(result[stats], use.names = FALSE) names(result) <- statsNames } @@ -705,6 +713,7 @@ CorrelationInternal <- function(jaspResults, dataset, options){ testErrors <- lapply(corrResults, function(x) x[['testErrors']]) mainTable[['sample.size']] <- sapply(results, function(x) x[['sample.size']]) + mainTable[['covariance']] <- sapply(results, function(x) x[['covariance']]) # would be nice to be able to fill table cell-wise, i.e., mainTable[[row, col]] <- value colNames <- character() # this is for error footnotes diff --git a/R/generalizedlinearmodel.R b/R/generalizedlinearmodel.R index 161d35b3..735cea6b 100644 --- a/R/generalizedlinearmodel.R +++ b/R/generalizedlinearmodel.R @@ -470,7 +470,14 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options, .glmOutlierTable(jaspResults, dataset, options, ready, position = 8, residType = "standardized deviance") .glmOutlierTable(jaspResults, dataset, options, ready, position = 8, residType = "studentized deviance") - .glmInfluenceTable(jaspResults, dataset, options, ready, position = 9) + .glmInfluenceTable(jaspResults[["diagnosticsContainer"]], + jaspResults[["glmModels"]][["object"]][["fullModel"]], + dataset, options, ready, position = 9) + + .regressionExportResiduals(jaspResults, + jaspResults[["glmModels"]][["object"]][["fullModel"]], + dataset, options, ready) + .glmMulticolliTable(jaspResults, dataset, options, ready, position = 10) return() @@ -896,29 +903,31 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options, # Table: Influential cases -.glmInfluenceTable <- function(jaspResults, dataset, options, ready, position) { +.glmInfluenceTable <- function(jaspResults, model, dataset, options, ready, position, linRegAnalysis = FALSE) { tableOptionsOn <- c(options[["dfbetas"]], options[["dffits"]], options[["covarianceRatio"]], - options[["cooksDistance"]], - options[["leverage"]]) + options[["leverage"]], + options[["mahalanobis"]]) - - if (!ready | !any(tableOptionsOn)) + if (!ready | !options[["residualCasewiseDiagnostic"]]) return() - tableOptions <- c("dfbetas", "dffits", "covarianceRatio", "cooksDistance", "leverage") + tableOptions <- c("dfbetas", "dffits", "covarianceRatio", "leverage", "mahalanobis") tableOptionsClicked <- tableOptions[tableOptionsOn] + tableOptionsClicked <- c("cooksDistance", tableOptionsClicked) - if (is.null(jaspResults[["diagnosticsContainer"]][["influenceTable"]])) { + if (is.null(jaspResults[["influenceTable"]])) { influenceTable <- createJaspTable(gettext("Table: Influential Cases")) influenceTable$dependOn(optionsFromObject = jaspResults[["modelSummary"]], options = tableOptions) + influenceTable$dependOn(c("residualCasewiseDiagnostic", "residualCasewiseDiagnosticType", + "residualCasewiseDiagnosticZThreshold", "residualCasewiseDiagnosticCooksDistanceThreshold")) influenceTable$position <- position influenceTable$showSpecifiedColumnsOnly <- TRUE - jaspResults[["diagnosticsContainer"]][["influenceTable"]] <- influenceTable + jaspResults[["influenceTable"]] <- influenceTable } tableOptionToColName <- function(x) { @@ -927,21 +936,28 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options, "dffits" = "DFFITS", "covarianceRatio" = "Covariance Ratio", "cooksDistance" = "Cook's Distance", - "leverage" = "Leverage") + "leverage" = "Leverage", + "mahalanobis" = "Mahalanobis") } - if (is.null(jaspResults[["glmModels"]])) { + if (is.null(model)) { for (option in tableOptionsClicked) { colTitle <- tableOptionToColName(option) - jaspResults[["influenceTable"]]$addColumnInfo(name = option, title = gettext(colTitle), type = "number") + influenceTable$addColumnInfo(name = option, title = gettext(colTitle), type = "number") } } else { - glmFullModel <- jaspResults[["glmModels"]][["object"]][["fullModel"]] + colNameList <- c() - jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = "caseN", title = "Case Number", type = "integer") + influenceTable$addColumnInfo(name = "caseN", title = "Case Number", type = "integer") + influenceTable$addColumnInfo(name = "stdResidual", title = gettext("Std. Residual"), type = "number", format = "dp:3") + influenceTable$addColumnInfo(name = "dependent", title = options$dependent, type = "number") + influenceTable$addColumnInfo(name = "predicted", title = gettext("Predicted Value"), type = "number") + influenceTable$addColumnInfo(name = "residual", title = gettext("Residual"), type = "number", format = "dp:3") + + alwaysPresent <- c("caseN", "stdResidual", "dependent", "predicted", "residual") for (option in tableOptionsClicked) { if (option == "dfbetas") { - predictors <- names(glmFullModel$coefficients) + predictors <- names(model$coefficients) for (predictor in predictors) { dfbetasName <- gettextf("DFBETAS_%1s", predictor) colNameList <- c(colNameList, dfbetasName) @@ -949,19 +965,23 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options, dfbetasTitle <- gettext("DFBETAS:Intercept") else dfbetasTitle <- gettextf("DFBETAS:%1s", gsub(":", "*", predictor)) - jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = dfbetasName, title = dfbetasTitle, type = "number") + influenceTable$addColumnInfo(name = dfbetasName, title = dfbetasTitle, type = "number") } } else { colNameList <- c(colNameList, option) colTitle <- tableOptionToColName(option) - jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addColumnInfo(name = option, title = gettext(colTitle), type = "number") + influenceTable$addColumnInfo(name = option, title = gettext(colTitle), type = "number") } } - .glmInfluenceTableFill(jaspResults, dataset, options, ready, model = glmFullModel, influenceMeasures = tableOptionsClicked, colNames = colNameList) + .glmInfluenceTableFill(influenceTable, dataset, options, ready, + model = model, + influenceMeasures = tableOptionsClicked, + colNames = c(colNameList, alwaysPresent)) } } -.glmInfluenceTableFill <- function(jaspResults, dataset, options, ready, model, influenceMeasures, colNames) { +.glmInfluenceTableFill <- function(influenceTable, dataset, options, ready, model, influenceMeasures, colNames) { + influenceRes <- influence.measures(model) nDFBETAS <- length(names(model$coefficients)) @@ -978,25 +998,48 @@ GeneralizedLinearModelInternal <- function(jaspResults, dataset = NULL, options, colInd <- c(colInd, optionToColInd(measure, nDFBETAS)) } - influenceResData <- as.data.frame(influenceRes[["infmat"]][, colInd]) - names(influenceResData) <- colNames - caseN <- seq.int(nrow(influenceResData)) - influenceResData <- cbind(caseN, influenceResData) - - influenceResSig <- influenceRes[["is.inf"]][, colInd] - - if (length(colInd) > 1) { - influenceResDataFinal <- influenceResData[rowSums(influenceResSig) > 0, , drop = FALSE] - } else { - influenceResDataFinal <- influenceResData[influenceResSig > 0, , drop = FALSE] - } - - nRowInfluential <- nrow(influenceResDataFinal) - - if (nRowInfluential == 0) - jaspResults[["diagnosticsContainer"]][["influenceTable"]]$addFootnote(gettext("No influential cases found.")) + influenceResData <- as.data.frame(influenceRes[["infmat"]][, colInd]) + colnames(influenceResData)[1:length(colInd)] <- colNames[1:length(colInd)] + + influenceResData[["caseN"]] <- seq.int(nrow(influenceResData)) + influenceResData[["stdResidual"]] <- rstandard(model) + influenceResData[["dependent"]] <- model.frame(model)[[options$dependent]] + influenceResData[["predicted"]] <- model$fitted.values + influenceResData[["residual"]] <- model$residual +# browser() + modelMatrix <- as.data.frame(model.matrix(model)) + modelMatrix <- modelMatrix[colnames(modelMatrix) != "(Intercept)"] + influenceResData[["mahalanobis"]] <- mahalanobis(modelMatrix, center = colMeans(modelMatrix), cov = cov(modelMatrix)) + + if (options$residualCasewiseDiagnosticType == "cooksDistance") + index <- which(abs(influenceResData[["cooksDistance"]]) > options$residualCasewiseDiagnosticCooksDistanceThreshold) + else if (options$residualCasewiseDiagnosticType == "outliersOutside") + index <- which(abs(influenceResData[["stdResidual"]]) > options$residualCasewiseDiagnosticZThreshold) + else # all + index <- seq.int(nrow(influenceResData)) + + # funky statement to ensure a df even if only 1 row + influenceResSig <- subset(influenceRes[["is.inf"]], 1:nrow(influenceResData) %in% index, select = colInd) + colnames(influenceResSig) <- colNames[1:length(colInd)] + + influenceResData <- influenceResData[index, ] + + if (length(index) == 0) + influenceTable$addFootnote(gettext("No influential cases found.")) else { - jaspResults[["diagnosticsContainer"]][["influenceTable"]]$setData(influenceResDataFinal) + influenceTable$setData(influenceResData) + # if any other metrix show influence, add footnotes: + if (sum(influenceResSig) > 0) { + for (thisCol in colnames(influenceResSig)) { + if (sum(influenceResSig[, thisCol]) > 0) + influenceTable$addFootnote( + gettext("Potentially influential case, according to the selected influence measure."), + colNames = thisCol, + rowNames = rownames(influenceResData)[influenceResSig[, thisCol]], + symbol = "*" + ) + } + } } } diff --git a/R/glmCommonFunctions.R b/R/glmCommonFunctions.R index 6525f28b..d87379a4 100644 --- a/R/glmCommonFunctions.R +++ b/R/glmCommonFunctions.R @@ -198,6 +198,23 @@ } } +.regressionExportResiduals <- function(container, model, dataset, options, ready) { + + if (isFALSE(options[["residualsSavedToData"]])) + return() + + if (is.null(container[["residualsSavedToDataColumn"]]) && options[["residualsSavedToDataColumn"]] != "") { + + residuals <- model[["residuals"]] # extract residuals + + container[["residualsSavedToDataColumn"]] <- createJaspColumn(columnName = options[["residualsSavedToDataColumn"]]) + container[["residualsSavedToDataColumn"]]$dependOn(options = c("residualsSavedToDataColumn", "residualsSavedToData")) + container[["residualsSavedToDataColumn"]]$setScale(residuals) + + } + +} + .constInfoTransform <- function(family, x) { switch(family, "bernoulli" = 1/(sin(sqrt(x))), diff --git a/R/regressionlinear.R b/R/regressionlinear.R index f33ac7f8..a548df2c 100755 --- a/R/regressionlinear.R +++ b/R/regressionlinear.R @@ -51,9 +51,11 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { # these output elements show statistics of the "final model" (lm fit with all predictors in enter method and last lm fit in stepping methods) finalModel <- model[[length(model)]] - if (options$residualCasewiseDiagnostic && is.null(modelContainer[["casewiseTable"]])) - .linregCreateCasewiseDiagnosticsTable(modelContainer, finalModel, options, position = 9) - + if (options$residualCasewiseDiagnostic && is.null(modelContainer[["influenceTable"]])) + .glmInfluenceTable(modelContainer, finalModel$fit, dataset, options, ready = TRUE, position = 9, linRegAnalysis = TRUE) + .regressionExportResiduals(modelContainer, finalModel$fit, dataset, options, ready = TRUE) + + if (options$residualStatistic && is.null(modelContainer[["residualsTable"]])) .linregCreateResidualsTable(modelContainer, finalModel, options, position = 10) @@ -191,7 +193,7 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { else summaryTable <- createJaspTable(gettextf("Model Summary - %s", options[['dependent']])) - summaryTable$dependOn(c("residualDurbinWatson", "rSquaredChange")) + summaryTable$dependOn(c("residualDurbinWatson", "rSquaredChange", "fChange", "modelAICBIC")) summaryTable$position <- position summaryTable$showSpecifiedColumnsOnly <- TRUE @@ -201,9 +203,16 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { summaryTable$addColumnInfo(name = "adjR2", title = gettextf("Adjusted R%s", "\u00B2"), type = "number", format = "dp:3") summaryTable$addColumnInfo(name = "RMSE", title = gettext("RMSE"), type = "number") - if (options$rSquaredChange) { - summaryTable$addColumnInfo(name = "R2c", title = gettextf("R%s Change", "\u00B2"), type = "number", format = "dp:3") - summaryTable$addColumnInfo(name = "Fc", title = gettext("F Change"), type = "number") + if (options$modelAICBIC) { + summaryTable$addColumnInfo(name = "AIC", title = gettext("AIC"), type = "number", format = "dp:3") + summaryTable$addColumnInfo(name = "BIC", title = gettext("BIC"), type = "number", format = "dp:3") + } + + if (options$rSquaredChange || options$fChange) { + if (options$rSquaredChange) + summaryTable$addColumnInfo(name = "R2c", title = gettextf("R%s Change", "\u00B2"), type = "number", format = "dp:3") + if (options$fChange) + summaryTable$addColumnInfo(name = "Fc", title = gettext("F Change"), type = "number") summaryTable$addColumnInfo(name = "df1", title = gettext("df1"), type = "integer") summaryTable$addColumnInfo(name = "df2", title = gettext("df2"), type = "integer") summaryTable$addColumnInfo(name = "p", title = gettext("p"), type = "pvalue") @@ -242,6 +251,8 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { R = as.numeric(sqrt(lmSummary$r.squared)), R2 = as.numeric(lmSummary$r.squared), adjR2 = as.numeric(lmSummary$adj.r.squared), + AIC = as.numeric(AIC(model[[i]][["fit"]])), + BIC = as.numeric(BIC(model[[i]][["fit"]])), RMSE = as.numeric(lmSummary$sigma), R2c = rSquareChange$R2c, Fc = rSquareChange$Fc, @@ -299,7 +310,7 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { coeffTable <- createJaspTable(gettext("Coefficients")) coeffTable$dependOn(c("coefficientEstimate", "coefficientCi", "coefficientCiLevel", - "collinearityDiagnostic", "vovkSellke")) + "collinearityStatistic", "vovkSellke")) coeffTable$position <- position coeffTable$showSpecifiedColumnsOnly <- TRUE @@ -319,7 +330,7 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { coeffTable$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number", overtitle = overtitle) } - if (options$collinearityDiagnostic) { + if (options$collinearityStatistic) { overtitle <- gettext("Collinearity Statistics") coeffTable$addColumnInfo(name = "tolerance", title = gettext("Tolerance"), type = "number", format = "dp:3", overtitle = overtitle) coeffTable$addColumnInfo(name = "VIF", title = gettext("VIF"), type = "number", overtitle = overtitle) @@ -593,10 +604,28 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { caseDiagTable$addColumnInfo(name = "cooksD", title = gettext("Cook's Distance"), type = "number", format = "dp:3") if (!is.null(finalModel)) { - caseDiagData <- .linregGetCasewiseDiagnostics(finalModel$fit, options) - caseDiagTable$setData(caseDiagData) + + stdResidualsAll <- rstandard(finalModel$fit) + # stdResidualsAll <- statmod::qresid(fit) + # stdResidualsAll <- rstudent(fit + cooksDAll <- cooks.distance(finalModel$fit) + + if (options$residualCasewiseDiagnosticType == "cooksDistance") + index <- which(abs(cooksDAll) > options$residualCasewiseDiagnosticCooksDistanceThreshold) + else if (options$residualCasewiseDiagnosticType == "outliersOutside") + index <- which(abs(stdResidualsAll) > options$residualCasewiseDiagnosticZThreshold) + else # all + index <- seq_along(predictedValuesAll) + + # browser() + diagnosticsContainer <- createJaspContainer(title = gettext("Diagnostics")) + modelContainer[["diagnosticsContainer"]] <- diagnosticsContainer + + .glmInfluenceTable(modelContainer, dataset[index, ], options, ready = TRUE, + position = position, linRegAnalysis = TRUE) + - if (length(caseDiagData) == 0) { + if (sum(index) == 0) { message <- switch( options[["residualCasewiseDiagnosticType"]], cooksDistance = gettextf("No cases where |Cook's distance| > %s", options[["residualCasewiseDiagnosticCooksDistanceThreshold"]]), @@ -1504,6 +1533,8 @@ RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) { residualsAll <- residuals(fit) stdPredictedValuesAll <- (predictedValuesAll - mean(predictedValuesAll)) / sd(predictedValuesAll) stdResidualsAll <- rstandard(fit) + # stdResidualsAll <- statmod::qresid(fit) + # stdResidualsAll <- rstudent(fit cooksDAll <- cooks.distance(fit) if (options$residualCasewiseDiagnosticType == "cooksDistance") diff --git a/inst/help/Correlation.md b/inst/help/Correlation.md index 11f44767..c0e5d65d 100755 --- a/inst/help/Correlation.md +++ b/inst/help/Correlation.md @@ -43,6 +43,7 @@ The Correlation analysis allows estimation of the population correlation, as wel - Vovk-Selke maximum p-ratio: The bound 1/(-e p log(p)) is derived from the shape of the p-value distribution. Under the null hypothesis (H0) it is uniform (0,1), and under the alternative (H1) it is decreasing in p, e.g., a beta (α, 1) distribution, where 0 < α < 1. The Vovk-Sellke MPR is obtained by choosing the shape α of the distribution under H1 such that the obtained p-value is maximally diagnostic. The value is then the ratio of the densities at point p under H0 and H1. For example, if the two-sided p-value equals .05, the Vovk-Sellke MPR equals 2.46, indicating that this p-value is at most 2.46 times more likely to occur under H1 than under H0. - Effect size (Fisher's z): The Fisher transformed effect size with standard error. - Sample size: The number of complete observations for a given pair of variables. +- Covariance: The covariance between each pair of variables. #### Plots - Scatter plots: Display a scatter plots for each possible combination of the selected variables. In a matrix format, these are placed above the diagonal. diff --git a/inst/help/GeneralizedLinearModel.md b/inst/help/GeneralizedLinearModel.md index 41f108c2..b3b5235c 100644 --- a/inst/help/GeneralizedLinearModel.md +++ b/inst/help/GeneralizedLinearModel.md @@ -79,12 +79,18 @@ The following table summarized the available distributions (also called families - Top n standardized quantile residuals - Top n standardized deviance residuals - Top n studentized deviance residuals -- Show influential cases: A table showing influential observations (i.e. outliers with high leverage) according to the selected measure(s). The column "Case Number" refers to the row number of the observation in the data set. The cut-offs for determining whether a case is influential are listed as follows. - - DFBETAS: When the absolute value of DFBETAS is greater than 1. - - DFFITS: When the absolute value of DFFITS is greater than 3 * sqrt(k/(n-k)) where k refers to the number of parameters in the model and n refers to the sample size. - - Covariance ratio: When the covariance ratio is greater than 3 * k/(n-k). - - Cook's distance: When Cook's distance exceeds the 50th percentile of the F distribution with (k, n-k) degrees of freedom. - - Leverages: When the leverages are greater than 3 * k/n. +- Residuals: + - Casewise diagnostic: Casewise and summarized diagnostics for the residuals. + - Standard residual > 3: Outliers outside x standard deviations: Display diagnostics for cases where the absolute value of the standardized residual is larger than x; default is x=3. + - Cook's distance > 1 : Display diagnostics for cases where the value of Cook’s distance is larger than x; default is x = 1. + - All cases: Display diagnostics for all cases. + - Cases are marked as influential in the table, according to the following thresholds: + - DFBETAS: When the absolute value of DFBETAS is greater than 1. + - DFFITS: When the absolute value of DFFITS is greater than 3 * sqrt(k/(n-k)) where k refers to the number of parameters in the model and n refers to the sample size. + - Covariance ratio: When the covariance ratio is greater than 3 * k/(n-k). + - Cook's distance: When Cook's distance exceeds the 50th percentile of the F distribution with (k, n-k) degrees of freedom. + - Leverages: When the leverages are greater than 3 * k/n. + - Append residuals to data: Save the residuals of the most complex model as a new column in the data file. - Multicollinearity: A table showing multicollinearity diagnostics of the model. The choices of measures are as follows. - Tolerance - VIF: Variance Inflation Factor. diff --git a/inst/help/RegressionLinear.md b/inst/help/RegressionLinear.md index 1e68b090..442b7fa2 100755 --- a/inst/help/RegressionLinear.md +++ b/inst/help/RegressionLinear.md @@ -39,22 +39,38 @@ Linear regression allows the user to model a linear relationship between one or - Estimates: Unstandardized and standardized coefficient estimates, standard errors, t-values, and their corresponding p-values. - From `...` bootstraps: By selecting this option, bootstrapped estimation is applied. By default, the number of replications is set to 1000. This can be changed into the desired number. - Confidence Intervals: By selecting this option, confidence intervals for the estimated mean difference will be included. By default the confidence level is set to 95%. This can be changed into the desired percentage. - - Covariance matrix: Display the covariance matrix of the predictor variables, per model. + - Tolerance and VIF: Display Tolerance and Variance Inflation Factor for each predictor in the model to assess multicollinearity. - Vovk-Sellke Maximum *p*-Ratio: The bound 1/(-e *p* log(*p*)) is derived from the shape of the *p*-value distribution. Under the null hypothesis (H0) it is uniform(0,1), and under the alternative (H1) it is decreasing in *p*, e.g., a beta(α, 1) distribution, where 0 < α < 1. The Vovk-Sellke MPR is obtained by choosing the shape α of the distribution under H1 such that the obtained *p*-value is *maximally diagnostic*. The value is then the ratio of the densities at point *p* under H0 and H1. For example, if the two-sided *p*-value equals .05, the Vovk-Sellke MPR equals 2.46, indicating that this *p*-value is at most 2.46 times more likely to occur under H1 than under H0. + +- Model Summary: + - R squared change: Change in R squared between the different steps in Backward, Forward, and Stepwise regression, with corresponding significance test (i.e., df1, df2, p-value). + - F change: Change in F between the different steps in Backward, Forward, and Stepwise regression, with corresponding significance test (i.e., df1, df2, p-value). + - AIC and BIC: Display Akaike Information Criterion and Bayesian Information Criterion. + - Durbin-Watson: Durbin-Watson statistic to test the autocorrelation of the residuals. + +- Display: - Model fit: Separate ANOVA table for each model (i.e., each step in Backward, Forward, and Stepwise regression). - - R squared change: Change in R squared between the different steps in Backward, Forward, and Stepwise regression, with corresponding significance test (i.e., F change value, df1, df2, p-value). - Descriptives: Samples size, sample mean, sample standard deviation, and standard error of the mean. - Part and partial correlations: Semipartial and partial correlations. + - Coefficient covariance matrix: Display the covariance matrix of the predictor variables, per model. - Collinearity diagnostics: Collinearity statistics, eigenvalues, condition indices, and variance proportions. - Residuals: - Statistics: Display descriptive statistics of the residuals and predicted values. - - Durbin-Watson: Durbin-Watson statistic to test the autocorrelation of the residuals. - Casewise diagnostic: Casewise and summarized diagnostics for the residuals. - Standard residual > 3: Outliers outside x standard deviations: Display diagnostics for cases where the absolute value of the standardized residual is larger than x; default is x=3. - Cook's distance > 1 : Display diagnostics for cases where the value of Cook’s distance is larger than x; default is x = 1. - All cases: Display diagnostics for all cases. + - Cases are marked as influential in the table, according to the following thresholds: + - DFBETAS: When the absolute value of DFBETAS is greater than 1. + - DFFITS: When the absolute value of DFFITS is greater than 3 * sqrt(k/(n-k)) where k refers to the number of parameters in the model and n refers to the sample size. + - Covariance ratio: When the covariance ratio is greater than 3 * k/(n-k). + - Cook's distance: When Cook's distance exceeds the 50th percentile of the F distribution with (k, n-k) degrees of freedom. + - Leverages: When the leverages are greater than 3 * k/n. + - Append residuals to data: Save the residuals of the most complex model as a new column in the data file. + + ### Method Specification diff --git a/inst/qml/Correlation.qml b/inst/qml/Correlation.qml index 3e449873..e07afdd7 100644 --- a/inst/qml/Correlation.qml +++ b/inst/qml/Correlation.qml @@ -68,6 +68,8 @@ Form CheckBox { name: "vovkSellke"; label: qsTr("Vovk-Sellke maximum p-ratio") } CheckBox { name: "effectSize"; label: qsTr("Effect size (Fisher's z)") } CheckBox { name: "sampleSize"; label: qsTr("Sample size") } + CheckBox { name: "covariance"; label: qsTr("Covariance") } + } diff --git a/inst/qml/GeneralizedLinearModel.qml b/inst/qml/GeneralizedLinearModel.qml index f21b2094..090d68ac 100644 --- a/inst/qml/GeneralizedLinearModel.qml +++ b/inst/qml/GeneralizedLinearModel.qml @@ -20,11 +20,15 @@ import QtQuick import QtQuick.Layouts import JASP import JASP.Controls -import "./common" as GLM +import "./common" as Common // All Analysis forms must be built with the From QML item Form { + id: form + property int analysis: Common.Type.Analysis.GLM + property int framework: Common.Type.Framework.Classical + Formula { lhs: "dependent" @@ -32,7 +36,7 @@ Form userMustSpecify: "covariates" } - GLM.GlmInputComponent { id: input} + Common.GlmInputComponent {} Section { @@ -94,7 +98,9 @@ Form title: qsTr("Diagnostics") enabled: input.otherFamilyNotSelected - GLM.GlmResidualAnalysisPlotsComponent {} + Common.OutlierComponent { id: outlierComponentt} + + Common.GlmResidualAnalysisPlotsComponent {} Group { @@ -102,7 +108,7 @@ Form CheckBox { name: "quantileResidualOutlierTable" - label: qsTr("Standardized quantile residuals: top") + label: qsTr("Standardized quantile residuals : top") childrenOnSameRow: true IntegerField { name: "quantileResidualOutlierTableTopN"; defaultValue: 3 } } @@ -122,15 +128,7 @@ Form } } - Group - { - title: qsTr("Show Influential Cases") - CheckBox { name: "dfbetas"; label: qsTr("DFBETAS") } - CheckBox { name: "dffits"; label: qsTr("DFFITS") } - CheckBox { name: "covarianceRatio"; label: qsTr("Covariance ratio") } - CheckBox { name: "cooksDistance"; label: qsTr("Cook's distance") } - CheckBox { name: "leverage"; label: qsTr("Leverages") } - } + Group { @@ -140,7 +138,7 @@ Form } } - GLM.EmmComponent { enabled: input.otherFamilyNotSelected } + Common.EmmComponent { enabled: input.otherFamilyNotSelected } Section { diff --git a/inst/qml/RegressionLinear.qml b/inst/qml/RegressionLinear.qml index 9f82fec6..33bc9704 100644 --- a/inst/qml/RegressionLinear.qml +++ b/inst/qml/RegressionLinear.qml @@ -19,9 +19,14 @@ import QtQuick import QtQuick.Layouts import JASP import JASP.Controls +import "./common" as Common Form { + id: form + property int analysis: Common.Type.Analysis.LinearRegression + property int framework: Common.Type.Framework.Classical + Formula { lhs: "dependent" @@ -74,84 +79,68 @@ Form { title: qsTr("Statistics") + columns: 2 + Group + { + title: qsTr("Model Summary") + CheckBox { name: "rSquaredChange"; label: qsTr("R squared change") } + CheckBox { name: "fChange"; label: qsTr("F change") } + CheckBox { name: "modelAICBIC"; label: qsTr("AIC and BIC") } + CheckBox { name: "residualDurbinWatson"; label: qsTr("Durbin-Watson") } + + } + Group { title: qsTr("Coefficients") - columns: 2 - Layout.columnSpan: 2 - Group + CheckBox { + name: "coefficientEstimate" + label: qsTr("Estimates") + checked: true + onClicked: { if (!checked && bootstrapping.checked) bootstrapping.click() } CheckBox { - name: "coefficientEstimate" - label: qsTr("Estimates") - checked: true - onClicked: { if (!checked && bootstrapping.checked) bootstrapping.click() } - CheckBox + id: bootstrapping + name: "coefficientBootstrap" + label: qsTr("From") + childrenOnSameRow: true + IntegerField { - id: bootstrapping - name: "coefficientBootstrap" - label: qsTr("From") - childrenOnSameRow: true - IntegerField - { - name: "coefficientBootstrapSamples" - defaultValue: 5000 - fieldWidth: 50 - min: 100 - afterLabel: qsTr("bootstraps") - } + name: "coefficientBootstrapSamples" + defaultValue: 5000 + fieldWidth: 50 + min: 100 + afterLabel: qsTr("bootstraps") } } - - CheckBox - { - name: "coefficientCi"; label: qsTr("Confidence intervals") - childrenOnSameRow: true - CIField { name: "coefficientCiLevel" } - } - CheckBox { name: "covarianceMatrix"; label: qsTr("Covariance matrix") } - CheckBox { name: "vovkSellke"; label: qsTr("Vovk-Sellke maximum p-ratio") } } - Group + CheckBox { - CheckBox { name: "modelFit"; label: qsTr("Model fit"); checked: true } - CheckBox { name: "rSquaredChange"; label: qsTr("R squared change") } - CheckBox { name: "descriptives"; label: qsTr("Descriptives") } - CheckBox { name: "partAndPartialCorrelation"; label: qsTr("Part and partial correlations") } - CheckBox { name: "collinearityDiagnostic"; label: qsTr("Collinearity diagnostics") } + name: "coefficientCi"; label: qsTr("Confidence intervals") + childrenOnSameRow: true + CIField { name: "coefficientCiLevel" } } + CheckBox { name: "collinearityStatistic"; label: qsTr("Tolerance and VIF") } + CheckBox { name: "vovkSellke"; label: qsTr("Vovk-Sellke maximum p-ratio") } } Group { - title: qsTr("Residuals") - CheckBox { name: "residualStatistic"; label: qsTr("Statistics") } - CheckBox { name: "residualDurbinWatson"; label: qsTr("Durbin-Watson") } - CheckBox - { - name: "residualCasewiseDiagnostic"; label: qsTr("Casewise diagnostics") - RadioButtonGroup - { - name: "residualCasewiseDiagnosticType" - RadioButton - { - value: "outliersOutside"; label: qsTr("Standard residual >"); checked: true - childrenOnSameRow: true - DoubleField { name: "residualCasewiseDiagnosticZThreshold"; defaultValue: 3 } - } - RadioButton - { - value: "cooksDistance"; label: qsTr("Cook's distance >") - childrenOnSameRow: true - DoubleField { name: "residualCasewiseDiagnosticCooksDistanceThreshold"; defaultValue: 1 } - } - RadioButton { value: "allCases"; label: qsTr("All") } - } - } + title: qsTr("Display") + CheckBox { name: "modelFit"; label: qsTr("Model fit"); checked: true } + CheckBox { name: "descriptives"; label: qsTr("Descriptives") } + CheckBox { name: "partAndPartialCorrelation"; label: qsTr("Part and partial correlations") } + CheckBox { name: "covarianceMatrix"; label: qsTr("Coefficients covariance matrix") } + CheckBox { name: "collinearityDiagnostic"; label: qsTr("Collinearity diagnostics") } + } + + + Common.OutlierComponent { id: outlierComponentt} + } Section diff --git a/inst/qml/RegressionLogistic.qml b/inst/qml/RegressionLogistic.qml index dabde8c8..22be9a43 100644 --- a/inst/qml/RegressionLogistic.qml +++ b/inst/qml/RegressionLogistic.qml @@ -123,7 +123,7 @@ Form CheckBox { name: "vovkSellke"; label: qsTr("Vovk-Sellke maximum p-ratio") } } - CheckBox { name: "multicollinearity"; label: qsTr("Multicollinearity Diagnostics") } + CheckBox { name: "multicollinearity"; label: qsTr("Multicollinearity diagnostics") } } Group diff --git a/inst/qml/common/OutlierComponent.qml b/inst/qml/common/OutlierComponent.qml new file mode 100644 index 00000000..560318e8 --- /dev/null +++ b/inst/qml/common/OutlierComponent.qml @@ -0,0 +1,84 @@ +// +// Copyright (C) 2013-2018 University of Amsterdam +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public +// License along with this program. If not, see +// . +// + +import QtQuick +import QtQuick.Layouts +import JASP +import JASP.Controls + +// All Analysis forms must be built with the From QML item +Group +{ + title: qsTr("Residuals") + CheckBox + { + name: "residualStatistic" + label: qsTr("Statistics") + visible: analysis === Type.Analysis.LinearRegression + } + + CheckBox + { + name: "residualCasewiseDiagnostic"; label: qsTr("Casewise diagnostics") + columns: 2 + RadioButtonGroup + { + name: "residualCasewiseDiagnosticType" + RadioButton + { + value: "outliersOutside"; label: qsTr("Std. residual >"); checked: true + childrenOnSameRow: true + DoubleField { name: "residualCasewiseDiagnosticZThreshold"; defaultValue: 3 } + } + RadioButton + { + value: "cooksDistance"; label: qsTr("Cook's dist >") + childrenOnSameRow: true + DoubleField { name: "residualCasewiseDiagnosticCooksDistanceThreshold"; defaultValue: 1 } + } + RadioButton { value: "allCases"; label: qsTr("All") } + } + + Group + { + CheckBox { name: "dfbetas"; label: qsTr("DFBETAS") } + CheckBox { name: "dffits"; label: qsTr("DFFITS") } + CheckBox { name: "covarianceRatio"; label: qsTr("Cov ratio") } + CheckBox { name: "leverage"; label: qsTr("Leverage") } + CheckBox { name: "mahalanobis"; label: qsTr("Mahalanobis") } + + } + + } + + CheckBox + { + id: residualsSavedToData + name: "residualsSavedToData" + text: qsTr("Append residuals to data") + + ComputedColumnField + { + name: "residualsSavedToDataColumn" + text: qsTr("Column name") + placeholderText: qsTr("e.g., residuals") + fieldWidth: 120 + enabled: residualsSavedToData.checked + } + } +} \ No newline at end of file diff --git a/inst/qml/common/Type.qml b/inst/qml/common/Type.qml new file mode 100644 index 00000000..8f0314ef --- /dev/null +++ b/inst/qml/common/Type.qml @@ -0,0 +1,34 @@ +// +// Copyright (C) 2013-2022 University of Amsterdam +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public +// License along with this program. If not, see +// . +// + +import QtQuick + +Item +{ + enum Framework + { + Classical, + Bayesian + } + + enum Analysis + { + LinearRegression, + GLM + } +} diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R index fdc09a65..57d5bf30 100644 --- a/tests/testthat/test-correlation.R +++ b/tests/testthat/test-correlation.R @@ -15,6 +15,7 @@ options$scatterPlotStatistic <- TRUE options$sampleSize <- TRUE options$spearman <- TRUE options$effectSize <- TRUE +options$covariance <- TRUE options$variables <- list("contNormal", "contGamma", "contcor1", "debMiss30") set.seed(1) results <- jaspTools::runAnalysis("Correlation", "debug.csv", options) @@ -65,63 +66,66 @@ test_that("Spearman's rho heatmap matches", { test_that("Correlation Table results match", { table <- results[["results"]][["mainTable"]][["data"]] jaspTools::expect_equal_tables(table, - list(-0.0266729903526464, -0.0266666666666667, -0.15041383947394, 0.694237192757787, - 0.0677819401868667, 0.097080506140607, 1, -0.0592696913271387, - -0.0592003859505642, -0.252680329590477, 0.558497687623534, - 0.101534616513362, 0.138832075039338, 1, 100, -0.0341927371158639, - -0.0341794179417942, -0.229059752837501, 0.73526094223706, 0.101287863086583, - 0.163335243866025, 1, "", "", "", + list(-0.0960185736017089, -0.0266729903526464, -0.0266666666666667, + -0.15041383947394, 0.694237192757787, 0.0677819401868667, 0.097080506140607, + 1, -0.0592696913271387, -0.0592003859505643, -0.252680329590477, + 0.558497687623534, 0.101534616513362, 0.138832075039338, 1, + 100, -0.0341927371158639, -0.0341794179417942, -0.229059752837501, + 0.73526094223706, 0.101287863086583, 0.163335243866025, 1, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", - "", "", "", "", 0.0960518800942078, + "", "", "", "", "", + "", "", 0.17245614334619, 0.0960518800942078, 0.0957575757575758, -0.0373731390706183, 0.15805971278439, 0.0671525377474683, 0.22888829058577, 1.26165085338952, 0.16244591532518, 0.161031927910319, - -0.0365419981231702, 0.109479317429059, 0.101534616513362, 0.346490687832583, + -0.03654199812317, 0.109479317429059, 0.101534616513362, 0.346490687832583, 1.51909334263147, 100, 0.143821784353644, 0.142838283828383, -0.0551264633902869, 0.156055917429528, 0.102156998059743, 0.329997969616898, - 1.26907384634445, -0.142995727486937, -0.142028985507246, -0.302753498566225, - 0.0820536231540238, 0.0798902375559328, 0.0186955275517326, - 1.7930869050848, -0.165268370300722, -0.163779936728643, -0.383976976749411, - 0.175488795918533, 0.122169444356305, 0.0740435803355283, 1.20465290217953, - 70, -0.208206182304557, -0.20524888461202, -0.419968595404043, - 0.0883143492445961, 0.1232177656224, 0.0312313683562874, 1.71644871351761, - "contNormal", "", "", "", "", + 1.26907384634445, -4.54234111641073, -0.142995727486937, -0.142028985507246, + -0.302753498566225, 0.0820536231540238, 0.0798902375559328, + 0.0186955275517326, 1.7930869050848, -0.165268370300722, -0.163779936728643, + -0.383976976749411, 0.175488795918533, 0.122169444356305, 0.0740435803355283, + 1.20465290217953, 70, -0.208206182304557, -0.20524888461202, + -0.419968595404043, 0.0883143492445961, 0.1232177656224, 0.0312313683562874, + 1.71644871351761, "contNormal", "", "", "", + "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", - "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", - "", -0.12919895977801, -0.128484848484848, -0.265695191109086, - 0.0582140897855729, 0.0666069384089473, 0.00872549413938944, - 2.22231002833737, -0.157853482232319, -0.156555303374674, -0.342443190888167, - 0.119832226549265, 0.101534616513362, 0.041127497099264, 1.44695679291394, - 100, -0.185861370097632, -0.183750375037504, -0.3669254548718, - 0.0673279518522942, 0.102488331218907, 0.0131420647686214, 2.02506621791795, - 0.150610965569096, 0.149482401656315, -0.0220394444690113, 0.0672280148907629, + "", "", "", "", "", "", "", "", -0.242747263345942, -0.12919895977801, + -0.128484848484848, -0.265695191109086, 0.0582140897855729, + 0.0666069384089473, 0.00872549413938947, 2.22231002833737, -0.157853482232319, + -0.156555303374674, -0.342443190888167, 0.119832226549265, 0.101534616513362, + 0.0411274970992641, 1.44695679291394, 100, -0.185861370097632, + -0.183750375037504, -0.3669254548718, 0.0673279518522942, 0.102488331218907, + 0.0131420647686214, 2.02506621791795, 6.82842148086829, 0.150610965569096, + 0.149482401656315, -0.0220394444690113, 0.0672280148907629, 0.0796979487434949, 0.321004247781641, 2.02696064848969, 0.173519134850064, - 0.171798366528544, -0.0658332206699671, 0.155001605969274, 0.122169444356305, + 0.171798366528544, -0.065833220669967, 0.155001605969273, 0.122169444356305, 0.39098888887008, 1.27306010334954, 70, 0.214387923136248, 0.211162627941562, -0.0250545433406204, 0.0793767652827101, 0.123275189177231, 0.425046791840888, 1.82929064467251, "contGamma", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", - "", "", "", "", "", "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", - -0.058451570593472, -0.0583850931677019, -0.226151894979294, - 0.474719152682399, 0.0813754587012183, 0.109381708643891, 1, - -0.0906702415181397, -0.0904225863977578, -0.318626758463425, - 0.456613508199801, 0.122169444356305, 0.147689385556226, 1, - 70, -0.102978167463976, -0.10261569416499, -0.329641395147143, - 0.3970672317383, 0.122236141506121, 0.135628607517475, 1, "contcor1", + "", "", -2.14900575670369, -0.058451570593472, + -0.0583850931677019, -0.226151894979294, 0.474719152682399, + 0.0813754587012183, 0.109381708643891, 1, -0.0906702415181398, + -0.0904225863977578, -0.318626758463425, 0.456613508199801, + 0.122169444356305, 0.147689385556226, 1, 70, -0.102978167463976, + -0.10261569416499, -0.329641395147143, 0.3970672317383, 0.122236141506121, + 0.135628607517475, 1, "contcor1", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", - "", "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", diff --git a/tests/testthat/test-generalizedlinearmodel.R b/tests/testthat/test-generalizedlinearmodel.R index 8bb0532d..eafc88c2 100644 --- a/tests/testthat/test-generalizedlinearmodel.R +++ b/tests/testthat/test-generalizedlinearmodel.R @@ -14,7 +14,8 @@ addCommonQmlOptions <- function(options) { options, jaspTools:::readQML(file.path(root, "GlmInputComponent.qml")), jaspTools:::readQML(file.path(root, "GlmResidualAnalysisPlotsComponent.qml")), - jaspTools:::readQML(file.path(root, "EmmComponent.qml")) + jaspTools:::readQML(file.path(root, "EmmComponent.qml")), + jaspTools:::readQML(file.path(root, "OutlierComponent.qml")) ) } @@ -94,10 +95,14 @@ options$standardizedResidualOutlierTableTopN <- 3 options$studentizedResidualOutlierTable <- TRUE options$studentizedResidualOutlierTableTopN <- 3 +options$residualCasewiseDiagnostic <- TRUE +options$residualCasewiseDiagnosticType <- "outliersOutside" +options$residualCasewiseDiagnosticZThreshold <- 1 + + options$dfbetas <- TRUE options$dffits <- TRUE options$covarianceRatio <- TRUE -options$cooksDistance <- TRUE options$leverage <- TRUE options$setSeed <- TRUE @@ -224,9 +229,18 @@ test_that("Outlier table based on studentized deviance residuals matches", { test_that("Influential cases table matches", { table <- results[["results"]][["diagnosticsContainer"]][["collection"]][["diagnosticsContainer_influenceTable"]][["data"]] jaspTools::expect_equal_tables(table, - list(4, -0.1795, 0.146, -0.1918, 1.693, 0.0228, 0.2708, - 9, -0.290, 0.750, 1.439, 0.3537, 0.6321, 0.1902, - 10, 0.1632, -0.2761,-0.3971, 1.687, 0.0982, 0.3111))}) + list(-0.607898874134593, 0.562520030505165, 1, 0.0910041875476176, + 0.811136811632276, 0, -0.608545530898244, 0.122014731899716, + 0.0286397537025298, -1.02948417315995, -1.60670300282295, -0.182578461118574, + 0.0432371780205531, 7, 0.0954834805489629, 1.14396823117325, + 0.214285714285714, -0.421414814442793, 0.141203925314399, 0.283760332058426, + -0.341834673218477, -1.10858029927343, 0, -0.290005764938072, + 0.750048649952999, 9, 0.632130380240399, 0.353736621049185, + 0.647058823529412, 1.43918415935164, 0.190243757445277, 0.468418331001346, + 0.717424210957795, 2.32732272718569, 0.484997819985084, -0.707619585051628, + 11, 0.447283433716663, 1.44554646273235, 0.583333333333333, + -0.886667016095175, 0.36304481969478, 0.662151029321139, -0.352325499848782, + -1.2326217135089))}) # multicollinearity table diff --git a/tests/testthat/test-regressionlinear.R b/tests/testthat/test-regressionlinear.R index c6aa20e3..1926d253 100644 --- a/tests/testthat/test-regressionlinear.R +++ b/tests/testthat/test-regressionlinear.R @@ -7,16 +7,32 @@ context("Linear Regression") # - stepwise methods (currently gives an error if I set p entry too high) # - plots handle errors -test_that("Main table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") +initOptsLinReg <- function() { + options <- jaspTools::analysisOptions("RegressionLinear") + options$dependent <- "contNormal" options$covariates <- "contGamma" - options$weights <- "facFifty" options$modelTerms <- list( list(components="contGamma", isNuisance=FALSE) ) + options$rSquaredChange <- TRUE + options$fChange <- TRUE + options$residualDurbinWatson <- FALSE + options$residualCasewiseDiagnostic <- FALSE + options$residualsSavedToData <- FALSE + options$residualsSavedToDataColumn <- FALSE + options$residualStatistic <- FALSE + + return(options) +} + +test_that("Main table results match", { + options <- initOptsLinReg() + + options$weights <- "facFifty" options$residualDurbinWatson <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_summaryTable"]][["data"]] jaspTools::expect_equal_tables(table, @@ -29,17 +45,15 @@ test_that("Main table results match", { }) test_that("Coefficients table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$coefficientEstimate <- TRUE options$coefficientCi <- TRUE options$coefficientCiLevel <- 0.9 options$collinearityDiagnostic <- TRUE + options$collinearityStatistic <- TRUE options$vovkSellke <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_coeffTable"]][["data"]] jaspTools::expect_equal_tables(table, @@ -55,14 +69,12 @@ test_that("Coefficients table results match", { }) test_that("ANOVA table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() + options$dependent <- "debCollin1" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) options$modelFit <- TRUE options$vovkSellke <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_anovaTable"]][["data"]] jaspTools::expect_equal_tables(table, @@ -73,8 +85,7 @@ test_that("ANOVA table results match", { }) test_that("Coefficients Covariance table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" + options <- initOptsLinReg() options$covariates <- c("contGamma", "contcor1") options$modelTerms <- list( list(components="contGamma", isNuisance=FALSE), @@ -92,13 +103,10 @@ test_that("Coefficients Covariance table results match", { }) test_that("Descriptive table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$descriptives <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_descriptivesTable"]][["data"]] jaspTools::expect_equal_tables(table, @@ -108,14 +116,15 @@ test_that("Descriptive table results match", { }) test_that("Part and Partial Correlations table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" + options <- initOptsLinReg() + options$covariates <- c("debCollin2", "contGamma") options$modelTerms <- list( list(components="debCollin2", isNuisance=FALSE), list(components="contGamma", isNuisance=FALSE) ) options$partAndPartialCorrelation <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_partialCorTable"]][["data"]] jaspTools::expect_equal_tables(table, @@ -125,8 +134,8 @@ test_that("Part and Partial Correlations table results match", { }) test_that("Collinearity Diagonistic table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" + options <- initOptsLinReg() + options$covariates <- "contcor1" options$modelTerms <- list( list(components="contcor1", isNuisance=FALSE) @@ -142,7 +151,7 @@ test_that("Collinearity Diagonistic table results match", { }) test_that("Residuals Statistics table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "contNormal" options$covariates <- "contcor1" options$modelTerms <- list( @@ -162,7 +171,7 @@ test_that("Residuals Statistics table results match", { }) test_that("Casewise Diagnostics table results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "contNormal" options$covariates <- "contOutlier" options$modelTerms <- list( @@ -172,22 +181,19 @@ test_that("Casewise Diagnostics table results match", { options$residualCasewiseDiagnosticType <- "outliersOutside" options$residualCasewiseDiagnosticZThreshold <- 3 results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) - table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_casewiseTable"]][["data"]] + table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_influenceTable"]][["data"]] jaspTools::expect_equal_tables(table, - list(55, 3.34810934796608, 3.356094448, -0.187237305306683, 3.54333175330668, - 0.0577123260439598, 83, 3.22377600371253, 2.958797116, -0.143545494526366, - 3.10234261052637, 1.15289593050314) + list(55, 0.0577123260439598, 3.356094448, -0.187237305306683, 3.54333175330668, + 3.34810934796609, 0, 83, 1.15289593050314, 2.958797116, -0.143545494526367, + 3.10234261052637, 3.22377600371253) ) }) test_that("Residuals vs. Dependent plot matches", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$residualVsDependentPlot <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) testPlot <- results[["state"]][["figures"]][[1]][["obj"]] @@ -195,52 +201,39 @@ test_that("Residuals vs. Dependent plot matches", { }) test_that("Residuals vs. Covariates plot matches", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$residualVsCovariatePlot <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) testPlot <- results[["state"]][["figures"]][[1]][["obj"]] jaspTools::expect_equal_plots(testPlot, "residuals-covariates") }) test_that("Residuals vs. Predicted plot matches", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$residualVsFittedPlot <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) testPlot <- results[["state"]][["figures"]][[1]][["obj"]] jaspTools::expect_equal_plots(testPlot, "residuals-predicted") }) test_that("Standardized Residuals Histogram matches", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$residualHistogramPlot <- TRUE options$residualHistogramStandardizedPlot <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) testPlot <- results[["state"]][["figures"]][[1]][["obj"]] jaspTools::expect_equal_plots(testPlot, "residuals-histogram") }) test_that("Q-Q Plot Standardized Residuals matches", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" - options$covariates <- "contGamma" - options$modelTerms <- list( - list(components="contGamma", isNuisance=FALSE) - ) + options <- initOptsLinReg() + options$residualQqPlot <- TRUE results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) testPlot <- results[["state"]][["figures"]][[1]][["obj"]] @@ -248,8 +241,8 @@ test_that("Q-Q Plot Standardized Residuals matches", { }) test_that("Marginal effects plot matches", { - options <- jaspTools::analysisOptions("RegressionLinear") - options$dependent <- "contNormal" + options <- initOptsLinReg() + options$covariates <- "contcor1" options$modelTerms <- list( list(components="contcor1", isNuisance=FALSE) @@ -266,8 +259,8 @@ test_that("Marginal effects plot matches", { }) test_that("Analysis handles errors", { - options <- jaspTools::analysisOptions("RegressionLinear") - + options <- initOptsLinReg() + options$dependent <- "debInf" options$covariates <- "contGamma" options$modelTerms <- list(list(components="contGamma", isNuisance=FALSE)) @@ -339,7 +332,7 @@ test_that("Analysis handles errors", { }) test_that("Analysis handles categorical predictors in model summary table", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "contNormal" options$covariates <- "contcor1" options$factors <- "facFive" @@ -348,6 +341,8 @@ test_that("Analysis handles categorical predictors in model summary table", { list(components="facFive", isNuisance=FALSE) ) options$rSquaredChange <- TRUE + options$fChange <- TRUE + results <- jaspTools::runAnalysis("RegressionLinear", "test.csv", options) table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_summaryTable"]][["data"]] @@ -363,7 +358,7 @@ test_that("Analysis handles categorical predictors in model summary table", { test_that("Part And Partial Correlations table results match", { # Part and partial correlations, including categorical predictors, verified with SPSS, # see pdf doc in https://github.com/jasp-stats/jasp-issues/issues/1638 - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$covariates <- c("education", "prestige") options$dependent <- "income" options$factors <- "occup_type" @@ -383,7 +378,7 @@ test_that("Part And Partial Correlations table results match", { }) test_that("Bootstrapping runs", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "contNormal" options$covariates <- "contcor1" options$factors <- "facFive" @@ -423,7 +418,7 @@ test_that("Bootstrapping runs", { }) test_that("Marginal effects plots works with interactions", { - options <- analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$.meta <- list(covariates = list(shouldEncode = TRUE), dependent = list( shouldEncode = TRUE), factors = list(shouldEncode = TRUE), modelTerms = list(shouldEncode = TRUE), weights = list(shouldEncode = TRUE)) @@ -451,12 +446,15 @@ test_that("Marginal effects plots works with interactions", { # Chapter 1 test_that("Fields Book - Chapter 1 results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "sales" options$covariates <- "adverts" options$modelTerms <- list( list(components="adverts", isNuisance=FALSE) ) + options$fChange <- FALSE + options$rSquaredChange <- FALSE + results <- jaspTools::runAnalysis("RegressionLinear", dataset = "Album Sales.csv", options) output1 <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_summaryTable"]][["data"]] jaspTools::expect_equal_tables(output1, @@ -521,7 +519,7 @@ test_that("Fields Book - Chapter 1 results match", { # Chapter 2 test_that("Fields Book - Chapter 2 results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "sales" options$covariates <- c("adverts", "airplay", "attract") options$modelTerms <- list( @@ -530,6 +528,8 @@ test_that("Fields Book - Chapter 2 results match", { list(components="attract", isNuisance=FALSE) ) options$rSquaredChange <- TRUE + options$fChange <- TRUE + options$coefficientCi <- TRUE results <- jaspTools::runAnalysis("RegressionLinear", dataset = "Album Sales.csv", options) output4 <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_summaryTable"]][["data"]] @@ -551,7 +551,7 @@ test_that("Fields Book - Chapter 2 results match", { # Chapter 3 test_that("Fields Book - Chapter 3 results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "sales" options$covariates <- c("adverts", "airplay", "attract") options$modelTerms <- list( @@ -581,7 +581,7 @@ test_that("Fields Book - Chapter 3 results match", { jaspTools::expect_equal_plots(figure5a, "field-residuals-histogram") figure5b <- results[["state"]][["figures"]][[3]][["obj"]] # Q-Q-Plot jaspTools::expect_equal_plots(figure5b, "field-qq") - output1 <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_casewiseTable"]][["data"]] + output1 <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_influenceTable"]][["data"]] jaspTools::expect_equal_tables(output1, list(1, 2.177404, 330, 229.9203, 100.0797, 0.05870388, 2, -2.323083, 120, 228.949, -108.949, 0.01088943, @@ -597,7 +597,7 @@ test_that("Fields Book - Chapter 3 results match", { 200, -2.088044, 110, 207.2061, -97.20606, 0.02513455) ) - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "sales" options$covariates <- c("adverts", "airplay", "attract") options$modelTerms <- list( @@ -612,7 +612,7 @@ test_that("Fields Book - Chapter 3 results match", { set.seed(1) # For Bootstrapping Unit Tests options$coefficientCi <- TRUE results <- jaspTools::runAnalysis("RegressionLinear", dataset = "Album Sales.csv", options) - figure10 <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_casewiseTable"]][["data"]] + figure10 <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_influenceTable"]][["data"]] figure10 <- list(figure10[[1]]$cooksD, figure10[[2]]$cooksD, figure10[[3]]$cooksD, figure10[[4]]$cooksD, figure10[[5]]$cooksD, figure10[[6]]$cooksD, figure10[[7]]$cooksD, figure10[[8]]$cooksD, figure10[[9]]$cooksD, figure10[[10]]$cooksD, figure10[[11]]$cooksD, figure10[[12]]$cooksD, @@ -637,7 +637,7 @@ test_that("Fields Book - Chapter 3 results match", { # "", "attract", -0.2110928, 11.08634, 2.234141, 6.502079, 15.13605) # ) - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "spai" options$covariates <- c("tosca", "obq") options$modelTerms <- list( @@ -662,7 +662,7 @@ test_that("Fields Book - Chapter 3 results match", { # Chapter 4 test_that("Fields Book - Chapter 4 results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "Happiness" options$covariates <- c("dummy1", "dummy2") options$modelTerms <- list( @@ -690,7 +690,7 @@ test_that("Fields Book - Chapter 4 results match", { # Chapter 5 test_that("Fields Book - Chapter 5 results match", { - options <- jaspTools::analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$dependent <- "Happiness" options$covariates <- c("Dummy1", "Dummy2") options$modelTerms <- list( @@ -719,8 +719,9 @@ test_that("VIF is correct when the model contains factors", { # test issue reported in https://forum.cogsci.nl/discussion/comment/27675 # previously, we computed the variance inflation factor (vif) in an incorrect manner when categorical variables were in play. - options <- analysisOptions("RegressionLinear") + options <- initOptsLinReg() options$collinearityDiagnostic <- TRUE + options$collinearityStatistic <- TRUE options$covariates <- c("contcor1", "contcor2", "contGamma") options$dependent <- "contNormal" options$factors <- c("contBinom", "facFive") diff --git a/tests/testthat/test-regressionlinearVerification.R b/tests/testthat/test-regressionlinearVerification.R index 0a7298eb..18bc9a73 100644 --- a/tests/testthat/test-regressionlinearVerification.R +++ b/tests/testthat/test-regressionlinearVerification.R @@ -17,6 +17,11 @@ options$modelTerms <- list( list(components="MeanCenteredX", isNuisance=FALSE) ) options$descriptives <- TRUE +options$residualDurbinWatson <- FALSE +options$residualCasewiseDiagnostic <- FALSE +options$residualsSavedToData <- FALSE +options$residualsSavedToDataColumn <- FALSE +options$residualStatistic <- FALSE results <- jaspTools::runAnalysis("RegressionLinear", "Regression.csv", options)