Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Covariance matrix input #239

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
source("renv/activate.R")
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# jaspFactor (development version)

## [Pull Request #235](https://github.com/jasp-stats/jaspFactor/pull/235):
- CFA:
- Major changes:
- add more estimators to allow robust estimation (ordinal data should be fully supported now)

- Minor changes CFA:
- Standardized output is restructured
- Standardization now also for bootstrapped estimates

## [Pull Request #239](https://github.com/jasp-stats/jaspFactor/pull/239):
- add covariance matrix input for CFA, EFA, and PCA



197 changes: 143 additions & 54 deletions R/confirmatoryfactoranalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#



confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ...) {
jaspResults$addCitation("Rosseel, Y. (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. URL http://www.jstatsoft.org/v48/i02/")

Expand All @@ -29,23 +28,27 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
# Error checking
errors <- .cfaCheckErrors(dataset, options)

# covariance matrix
dataset <- .cfaDataCovariance(dataset, options)

# Main table / model
cfaResult <- .cfaComputeResults(jaspResults, dataset, options, errors)

# Output tables
.cfaContainerMain( jaspResults, options, cfaResult) # Main table container
.cfaTableMain( jaspResults, options, cfaResult) # Main table with fit info
.cfaTableFitMeasures(jaspResults, options, cfaResult) # Additional fit indices
.cfaTableKMO( jaspResults, options, cfaResult) # Kaiser-Meyer-Olkin test.
.cfaTableBartlett( jaspResults, options, cfaResult) # Bartlett's test of sphericity
.cfaTableRsquared( jaspResults, options, cfaResult) # R-squared of indicators
.cfaTableParEst( jaspResults, options, cfaResult) # Parameter estimates tables
.cfaTableModIndices( jaspResults, options, cfaResult) # Modification Indices
.cfaTableImpliedCov( jaspResults, options, cfaResult) # Implied Covariance matrix
.cfaTableResCov( jaspResults, options, cfaResult) # Residual Covariance Matrix
.cfaTableAve( jaspResults, options, cfaResult) # Average variance explained table
.cfaTableHtmt( jaspResults, options, cfaResult) # Heterotrait monotrait
.cfaTableReliability(jaspResults, options, cfaResult) # Reliability
.cfaContainerMain( jaspResults, options, cfaResult ) # Main table container
.cfaTableMain( jaspResults, options, cfaResult ) # Main table with fit info
.cfaTableFitMeasures(jaspResults, options, cfaResult ) # Additional fit indices
.cfaTableKMO( jaspResults, options, cfaResult ) # Kaiser-Meyer-Olkin test.
.cfaTableBartlett( jaspResults, options, cfaResult ) # Bartlett's test of sphericity
.cfaTableRsquared( jaspResults, options, cfaResult ) # R-squared of indicators
.cfaTableParEst( jaspResults, options, cfaResult ) # Parameter estimates tables
.cfaTableModIndices( jaspResults, options, cfaResult ) # Modification Indices
.cfaTableImpliedCov( jaspResults, options, cfaResult ) # Implied Covariance matrix
.cfaTableResCov( jaspResults, options, cfaResult ) # Residual Covariance Matrix
.cfaTableAve( jaspResults, options, cfaResult ) # Average variance explained table
.cfaTableHtmt( jaspResults, options, cfaResult, dataset ) # Heterotrait monotrait
# weirdly enough I cannot find the exact sample cov matrix in the lavaan output
.cfaTableReliability(jaspResults, options, cfaResult ) # Reliability


# Output plots
Expand All @@ -61,29 +64,32 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..

# Preprocessing functions ----
.cfaReadData <- function(dataset, options) {

if (!is.null(dataset)) return(dataset)

# NOTE: The GUI does not yet allow for putting the same variable in different factors.
# if the same variable is used twice but with a different type then this would
# crash the R code. However, since this is not possible yet, this should be okay for now
vars <- unlist(lapply(options[["factors"]], `[[`, "indicators"), use.names = FALSE)
types <- unlist(lapply(options[["factors"]], `[[`, "types"), use.names = FALSE)

if (length(vars) == 0)
return(data.frame())

duplicateVars <- duplicated(vars)
vars <- vars[!duplicateVars]
types <- types[!duplicateVars]

splitVars <- split(vars, types)
groupVar <- if (options[["group"]] == "") NULL else options[["group"]]
if (options[["dataType"]] == "raw") {
# make sure on the qml side that groupVar is indeed a nominal variable
groupVar <- if (options[["group"]] == "") NULL else options[["group"]]
dataset <- .readDataSetToEnd(columns = c(vars, groupVar))

return(.readDataSetToEnd(
columns.as.numeric = splitVars[["scale"]],
columns.as.ordinal = splitVars[["ordinal"]],
columns.as.factor = groupVar
))
} else {

dataset <- .readDataSetToEnd(all.columns = TRUE)
FBartos marked this conversation as resolved.
Show resolved Hide resolved
}


return(dataset)

}

Expand All @@ -100,9 +106,8 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
return(options)
}

.cfaCheckErrors <- function(dataset, options) {

# TODO (vankesteren) content error checks, e.g., posdef covmat
.cfaCheckErrors <- function(dataset, options) {

# Number of variables in the factors
nVarsPerFactor <- unlist(lapply(options$factors, function(x) setNames(length(x$indicators), x$title)))
Expand All @@ -117,33 +122,84 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..

vars <- unique(unlist(lapply(options$factors, function(x) x$indicators)))

if (options$group == "") {
if (options[["dataType"]] == "raw") {

.hasErrors(dataset[, vars], type = 'varCovData', exitAnalysisIfErrors = TRUE,
varCovData.corFun = stats::cov)
# possible cov matrix:
if (ncol(dataset) > 0 && !nrow(dataset) > ncol(dataset)) {
.quitAnalysis(gettext("Not more cases than number of variables. Is your data a variance-covariance matrix?"))
}

} else {
if (options$group == "") {

.hasErrors(dataset, type = "factorLevels", factorLevels.target = options$group,
factorLevels.amount = '< 2', exitAnalysisIfErrors = TRUE)

for (group in levels(dataset[[options$group]])) {
.hasErrors(dataset[, vars], type = 'varCovData', exitAnalysisIfErrors = TRUE,
varCovData.corFun = stats::cov)

idx <- dataset[[options$group]] == group
} else {

.hasErrors(dataset, type = "factorLevels", factorLevels.target = options$group,
factorLevels.amount = '< 2', exitAnalysisIfErrors = TRUE)

for (group in levels(dataset[[options$group]])) {

if (any(sapply(dataset[, vars], is.ordered))) {
.hasErrors(dataset[idx, vars], type = 'varCovData', exitAnalysisIfErrors = TRUE,
varCovData.corFun = lavaan::lavCor)
} else {
.hasErrors(dataset[idx, vars], type = 'varCovData', exitAnalysisIfErrors = TRUE,
varCovData.corFun = stats::cov)
idx <- dataset[[options$group]] == group

if (any(sapply(dataset[, vars], is.ordered))) {
.hasErrors(dataset[idx, vars], type = 'varCovData', exitAnalysisIfErrors = TRUE,
varCovData.corFun = lavaan::lavCor)
} else {
.hasErrors(dataset[idx, vars], type = 'varCovData', exitAnalysisIfErrors = TRUE,
varCovData.corFun = stats::cov)
}
}
}
}

return(NULL)
}

.cfaDataCovariance <- function(dataset, options) {

if (options[["dataType"]] == "raw") {
return(dataset)
}

# possible data matrix?
if ((nrow(dataset) != ncol(dataset)) && !all(dt[lower.tri(dt)] == t(dt)[lower.tri(dt)])) {
.quitAnalysis(gettext("Input data does not seem to be a symmetric matrix! Please check the format of the input data."))
}

vars <- unlist(lapply(options[["factors"]], `[[`, "indicators"), use.names = FALSE)

if (length(vars) == 0)
return(data.frame())

duplicateVars <- duplicated(vars)
usedvars <- vars[!duplicateVars]
var_idx <- match(usedvars, colnames(dataset))
mat <- try(as.matrix(dataset[var_idx, var_idx]))
if (inherits(mat, "try-error"))
.quitAnalysis(gettext("All cells must be numeric."))

if (options[["group"]] != "") .quitAnalysis(gettext("Grouping variable not supported for covariance matrix input"))

if (options[["meanStructure"]]) .quitAnalysis(gettext("Mean structure not supported for covariance matrix input"))

.hasErrors(mat, type = "varCovMatrix", message='default', exitAnalysisIfErrors = TRUE)

colnames(mat) <- rownames(mat) <- colnames(dataset)[var_idx]

if (anyNA(mat)) {
inds <- which(is.na(mat))
mat <- mat[-inds, -inds]
if (ncol(mat) < 3) {
.quitAnalysis("Not enough valid columns to run this analysis")
}
}
return(mat)
}


.translateFactorNames <- function(factor_name, options, back = FALSE) {
# make dictionary
fac_names <- vapply(options$factors, function(x) x$name, "name")
Expand Down Expand Up @@ -192,17 +248,38 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..

if (anyNA(dataset)) {
naAction <- switch(
options$naAction,
options$naAction,
"twoStageRobust" = "robust.two.stage",
"twoStage" = "two.stage",
options$naAction)
} else {
naAction <- "listwise"
}

# define estimator from options
estimator = switch(options[["estimator"]],
"default" = "default",
"maximumLikelihood" = "ML",
"generalizedLeastSquares" = "GLS",
"weightedLeastSquares" = "WLS",
"unweightedLeastSquares" = "ULS",
"diagonallyWeightedLeastSquares" = "DWLS"
)

if (options[["dataType"]] == "raw") {
dt <- dataset
sampCov <- NULL
sampCovN <- NULL
} else {
dt <- NULL
sampCov <- dataset
sampCovN <- options[["sampleSize"]]
}
cfaResult[["lav"]] <- try(lavaan::lavaan(
model = mod,
data = dataset,
data = dt,
sample.cov = sampCov,
sample.nobs = sampCovN,
group = grp,
group.equal = geq,
meanstructure = options$meanStructure,
Expand Down Expand Up @@ -273,20 +350,20 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
"all" = "std.all",
"latentVariables" = "std.lv",
"noExogenousCovariates" = "std.nox")
# change this once jaspSem is merged
cfaResult[["lav"]] <- lavBootstrap(cfaResult[["lav"]], options$bootstrapSamples,

if (options[["dataType"]] == "varianceCovariance") {
.quitAnalysis(gettext("Bootstrapping is not available for variance-covariance matrix input."))
}
cfaResult[["lav"]] <- jaspSem::lavBootstrap(cfaResult[["lav"]], options$bootstrapSamples,
standard = options[["standardized"]] != "none", typeStd = type)
# cfaResult[["lav"]] <- jaspSem::lavBootstrap(cfaResult[["lav"]], options$bootstrapSamples,
# standard = options[["standardized"]] != "none", typeStd = type)
}

# Save cfaResult as state so it's available even when opts don't change
jaspResults[["stateCFAResult"]] <- createJaspState(cfaResult)
jaspResults[["stateCFAResult"]]$dependOn(c(
"factors", "secondOrder", "residualsCovarying", "meanStructure", "modelIdentification",
"factorsUncorrelated", "packageMimiced", "estimator", "naAction", "seType", "bootstrapSamples",
"group", "invarianceTesting", "interceptsFixedToZero", "standardized"

"group", "invarianceTesting", "interceptsFixedToZero", "standardized", "dataType", "sampleSize"
))

return(cfaResult)
Expand Down Expand Up @@ -314,6 +391,9 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
spec$bootstrap <- TRUE
} else {
if (options$seType == "robust") {
if (options[["dataType"]] == "varianceCovariance") {
.quitAnalysis(gettext("Robust standard errors are not available for variance-covariance matrix input."))
}
spec$se <- "robust.sem"
} else {
spec$se <- options$seType
Expand Down Expand Up @@ -422,7 +502,7 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
jaspResults[["maincontainer"]]$dependOn(c(
"factors", "secondOrder", "residualsCovarying", "meanStructure", "modelIdentification",
"factorsUncorrelated", "packageMimiced", "estimator", "naAction", "seType", "bootstrapSamples",
"group", "invarianceTesting", "interceptsFixedToZero"
"group", "invarianceTesting", "interceptsFixedToZero", "dataType", "sampleSize"
))
}

Expand Down Expand Up @@ -1369,7 +1449,7 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..

}

.cfaTableHtmt <- function(jaspResults, options, cfaResult) {
.cfaTableHtmt <- function(jaspResults, options, cfaResult, dataset) {
#### this has an ordering argument that still needs to be implemented once the categorical data stuff is done

if (is.null(cfaResult) || !options[["htmt"]] || !is.null(jaspResults[["resHtmtTable"]])) return()
Expand Down Expand Up @@ -1406,14 +1486,22 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
htmtTable$setData(tmp_dat)

} else {
# get the dataset
dataset <- as.data.frame(lavaan::inspect(cfaResult[["lav"]], what = "data"))
colnames(dataset) <- cfaResult[["lav"]]@[email protected][[1]]
if (options[["dataType"]] == "raw") {
# get the dataset
dataset <- as.data.frame(lavaan::inspect(cfaResult[["lav"]], what = "data"))
colnames(dataset) <- cfaResult[["lav"]]@[email protected][[1]]
sampCov <- NULL
} else {
sampCov <- dataset
colnames(sampCov) <- rownames(sampCov) <- cfaResult[["lav"]]@[email protected][[1]]
dataset <- NULL
}

if (is.null(cfaResult[["spec"]][["soIndics"]])) {
htmt_result <- semTools::htmt(model = cfaResult[["model"]], data = dataset,
htmt_result <- semTools::htmt(model = cfaResult[["model"]], data = dataset, sample.cov = sampCov,
missing = cfaResult[["lav"]]@Options[["missing"]])
} else { # the htmt does not allow a second order factor, so we take the model syntax without the seco
htmt_result <- semTools::htmt(model = cfaResult[["model_simple"]], data = dataset,
htmt_result <- semTools::htmt(model = cfaResult[["model_simple"]], data = dataset, sample.cov = sampCov,
missing = cfaResult[["lav"]]@Options[["missing"]])
}

Expand All @@ -1425,6 +1513,7 @@ confirmatoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ..
}
jaspResults[["resHtmtTable"]] <- htmtTable

return()
}


Expand Down
Loading
Loading