-
Notifications
You must be signed in to change notification settings - Fork 0
/
wilcoxon_features.R
45 lines (39 loc) · 1.81 KB
/
wilcoxon_features.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
library(stats)
significance_level <- 0.05
collective_results_folder <- file.path(
"results_statistical_analysis", "run",
"collective_tests")
prepost_filepaths <- list.files(path=collective_results_folder, pattern="\\.csv$")
# There should be 61 CSV files:
# - 1 CSV file for the pre/post comparison on the weekly rates of tweets posted (full volume)
# - 60 CSV files for the different features:
# - Emotions: 12 files
# - Topics: 10 files
# - URL categories: 26 files (including Undefined)
# - Symptoms: 12 files
names_columns <- c("feature", "pseudomedian", "lb_ci", "ub_ci", "pvalue", "pre_post")
stats_results_df <- data.frame(matrix(ncol = length(names_columns), nrow = 0,
dimnames = list(NULL, names_columns)))
for (fpath in prepost_filepaths){
feature <- gsub("\\.csv$", "", basename(fpath))
feature <- gsub("_pre_post_fractions$", "", feature)
feature <- gsub("_pre_post_rates$", "", feature)
print(feature)
pre_post_df <- read.csv(file.path(collective_results_folder, fpath))
# Apply two-sided Wilcoxon signed-rank test
res <- wilcox.test(pre_post_df$avg_post, pre_post_df$avg_pre, paired=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
if (res$p.value < significance_level){
if (res$estimate < 0) {
change_test <- "decrease"
} else {
change_test <- "increase"
}
} else {
change_test <- "no_change"
}
stats_results_df[nrow(stats_results_df)+1,] <- c(feature, res$estimate,
res$conf.int[1], res$conf.int[2],
res$p.value, change_test)
}
write.xlsx(stats_results_df, file = file.path(collective_results_folder, "wilcoxon_results_all_weeks_before_adjustment.xlsx"),
row.names=FALSE)