Skip to content

Commit

Permalink
Analyses for review patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
cjvanlissa committed Jan 5, 2022
1 parent a5675b9 commit a5c37a6
Show file tree
Hide file tree
Showing 8 changed files with 638 additions and 156 deletions.
Binary file added country_variables.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added country_variables_byday.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified date_distribution.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
312 changes: 156 additions & 156 deletions date_distribution.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
98 changes: 98 additions & 0 deletions reviews_patterns.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# VIFS
df_training <- read.csv("df_training_imputed.csv", stringsAsFactors = FALSE)

df_training[which(sapply(df_training, is.character))] <-
lapply(df_training[which(sapply(df_training, is.character))], factor)

df_training[which(lapply(sapply(df_training, unique), length) <= 5)] <-
lapply(df_training[which(lapply(sapply(df_training, unique), length) <= 5)], factor)

tmp <- df_training[sapply(df_training, is.factor)]

df2 <- df_training

desc <- read.csv("descriptives_itemscales.csv", stringsAsFactors = FALSE)
df2 <- df2[, names(df2) %in% desc$name]

df2[names(tmp)] <- NULL
df2[["date"]] <- NULL
df2[["countryiso3"]] <- NULL

res <- lm(c19perbeh ~.,data = df2)
vifs <- car::vif(res)

range(vifs)


library(ranger)
source("scripts/varimpplot_lasso.R")
source("scripts/model_accuracy.R")

df_training <- read.csv("df_training_imputed.csv", stringsAsFactors = FALSE)
df_testing <- read.csv("df_testing_imputed.csv", stringsAsFactors = FALSE)

remove <- c("c19normshould",
"c19proso",
"isoimmi_inperson",
"houseleave",
"isoothppl_inperson",
"c19normdo",
"ecoproso")

df_training[remove] <- NULL
df_testing[remove] <- NULL
y = "c19perbeh"
X <- df_training[, !names(df_training) == y]
X_las <- model.matrix(~., X)[, -1]
Y <- df_training[[y]]

X_test <- df_testing[, !names(df_testing) == y]
X_las_test <- model.matrix(~., X_test)[, -1]
Y_test <- df_testing[[y]]

set.seed(953007)
res <- ranger(c19perbeh~., data = df_training, num.trees = 1000,
min.node.size = 6,
mtry = 31,
importance = "permutation")


fits <- c(
model_accuracy(res,
olddata = df_training,
observed = Y,
ymean = mean(Y, na.rm = TRUE)),
model_accuracy(res,
newdata = df_testing,
observed = Y_test,
ymean = mean(Y, na.rm = TRUE)))

names(fits) <- c(paste0("train_", c("r2", "mse", "r_actual_pred")), paste0("test_", c("r2", "mse", "r_actual_pred")))
fits


# Separate PDP ------------------------------------------------------------

p <- readRDS("results/pdp_c19perbeh.RData")
df_vars <- read.csv("scripts/df_training_labs.csv", stringsAsFactors = F)
var_rename <- tolower(df_vars$lab)
names(var_rename) <- df_vars$X

p <- for(i in 1:length(p)){
thisp <- p[[i]]+facet_grid(.~Variable, labeller = labeller(
Variable = setNames(paste0(i, ". ", var_rename[vars[i]]), var_rename[vars[i]])
))
})
# Reduce font size
p <- lapply(p, function(x){ x + theme(strip.text.x = element_text(size = 5),
axis.text.x = element_text(size = 5),
axis.text.y = element_text(size = 5))})
if("countryiso3" %in% vars){
p[[which(vars == "countryiso3")]] <- p[[which(vars == "countryiso3")]] + theme(axis.text.x = element_text(angle=90, size = 3))
}
p <- metaforest:::merge_plots(p)
ggsave(
filename = paste0("results/rf_partialdependence_", gsub(".+_(.+)\\.RData", "\\1", thisfile), ".png"),
p,
device = "png")

83 changes: 83 additions & 0 deletions scripts/conspiracy_analyses.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
df_vars <- read.csv("scripts/df_training_labs.csv", stringsAsFactors = F)
df_vars$lab <- tolower(df_vars$lab)
df_vars <- df_vars[grepl("^c_", df_vars$lab), ]
df_vars <- df_vars[df_vars$lab %in% c("c_political stability", "c_govt. effectiveness",
"c_doctors per 10k", "c_control corruption", "c_govt. response",
"c_accountability", "c_containment health index",
"c_tourism expenditures", "c_rule of law"), ]


tmp <- df_training[, c("countryiso3", df_vars$X)]

names(tmp)[-1] <- df_vars$lab[match(names(tmp)[-1], df_vars$X)]
tmp <- tmp[!duplicated(tmp$countryiso3), ]

df_plot <- do.call(rbind, lapply(names(tmp)[-1], function(x){
data.frame(Country = tmp[["countryiso3"]], Variable = x, Value = tmp[[x]])
}))

library(ggplot2)

p <- ggplot(df_plot, aes(x = Value, y = 1)) + ggrepel::geom_text_repel(aes(label = Country), angle = 90, size = 3, max.overlaps = 30, direction = "y") + facet_wrap(~Variable, nrow = 3, scales = "free") + theme_bw() + labs(x = NULL, y = NULL) + scale_y_continuous(labels = NULL, breaks = NULL)
ggsave("country_variables.png", p, "png", width = 8, height = 8)


df_plot <- do.call(rbind, lapply(names(tmp)[-1], function(x){
data.frame(Country = tmp[["countryiso3"]], Variable = x, Value = tmp[[x]])
}))

library(ggplot2)

# df_training should be a data.table

df_plot <- lapply(c("deaths", "confirmed", "recovered", "governmentresponseindex", "stringencyindex", "closepublictransport_flag"), function(x){
tmp <- df_training[, list(mean=mean(get(x), na.rm = TRUE), sd=sd(get(x), na.rm = TRUE)), by=countryiso3]
tmp[, "variable" := x]
})
df_plot <- rbindlist(df_plot)
df_plot$variable <- df_vars$lab[match(df_plot$variable, df_vars$X)]

p <- ggplot(df_plot, aes(y = countryiso3)) +
geom_errorbarh(aes(xmin = mean-sd, xmax = mean+sd)) +
geom_point(aes(x = mean))+
facet_wrap(~variable, nrow = 3, scales = "free") + theme_bw() +
ylab("Country")
ggsave("country_variables_byday.png", p, "png", width = 8, height = 10)



# Check measurement invariance --------------------------------------------
library(lavaan)
df_inv <- df$consp01
df_inv <- df[, c("coded_country", grep("^consp0", names(df), value = T))]

df_tmp <- df_inv
df_tmp <- df_tmp[!rowSums(is.na(df_tmp)) == 3, ]
df_tmp <- df_tmp[!df_tmp$coded_country %in% c("Malaysia", "Philippines"), ]
mod <- paste0('F =~ ', paste0(names(df_tmp[-1]), collapse = " + "))
# configural invariance
# configural invariance
fit <- cfa(mod, df_tmp)

fit1 <- cfa(mod, data = df_tmp, group = "coded_country")
# metric invariance
fit2 <- cfa(mod, data = df_tmp, group = "coded_country",
group.equal = "loadings")
c(fitmeasures(fit1)[c("chisq", "df", "npar", "bic", "cfi", "tli", "rmsea")],
fitmeasures(fit2)[c("chisq", "df", "npar", "bic", "cfi", "tli", "rmsea")],
unlist(lavTestLRT(fit1, fit2)[2, 7]))
anova(fit1, fit2)


df_tmp <- df_inv
names(df_tmp)[-1] <- paste0("consp_", 1:3)
library(tidySEM)
out <- sapply(unique(df_tmp$coded_country), function(i){
#i = df_tmp$coded_country[1]
tmp <- df_tmp[df_tmp$coded_country == i, -1]
tmp <- tidy_sem(tmp)
tmp <- create_scales(tmp)
c(Country = i, unlist(tmp$descriptives))
})
desc <- data.frame(t(out))
write.csv(desc, "conspiracy_reliability_by_country.csv", row.names = F)
7 changes: 7 additions & 0 deletions scripts/model_accuracy.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ model_accuracy <-
predicted <- predict(fit, data = newdata)$predictions
}
},
lm = {
if (is.null(newdata)) {
predicted <- fit$fitted.values
} else {
predicted <- predict(fit, newdata = newdata)
}
},
ranger = {
if (is.null(newdata)) {
predicted <- fit$predictions
Expand Down
Loading

0 comments on commit a5c37a6

Please sign in to comment.