-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 78f397e
Showing
141 changed files
with
373,215 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
Amazon Turk Study for Multivariate Network Visualization Techniques | ||
|
||
=================== | ||
|
||
|
||
Data | ||
--- | ||
The raw data from our experiments are stored in the [data](data) folder. | ||
|
||
Analysis | ||
--- | ||
The R Markdown files we used to compute statistics are in the [analysis](analysis) folder. | ||
|
||
Experiment | ||
--- | ||
The code for reproducing our web-based experiment is in the [experiments](experiments) folder. | ||
|
||
|
||
# Config Files | ||
Config files determine how the visualization is rendered along with the task that is used in that trial. | ||
|
||
Config files are written in .hjson to allow for comments, so if you're updating an existing config or generating a new one, follow these steps to ensure you end up with a .json file that javascript can parse out: | ||
|
||
-- to install the tool that converts between .hjson and .json (on a mac) | ||
GET=https://github.com/hjson/hjson-go/releases/download/v3.0.0/darwin_amd64.tar.gz | ||
curl -sSL $GET | sudo tar -xz -C /usr/local/bin | ||
|
||
|
||
|
||
-- to generate a .json from an .hjson | ||
hjson CONFIG.json > CONFIG.hjson | ||
|
||
|
||
-- to generate an .hjson from a .json | ||
hjson -j CONFIG.hjson > CONFIG.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,324 @@ | ||
library(ggplot2) | ||
library(coin) | ||
library(pwr) | ||
library(shiny) | ||
library(miniUI) | ||
library(boot) | ||
library(tidyr) | ||
library(irr) | ||
library(lazyeval) | ||
library(doBy) | ||
library(gtable) | ||
library(grid) | ||
library(scales) | ||
library(plyr) | ||
library(dplyr) | ||
library(bootES) | ||
library(extrafont) | ||
|
||
loadfonts() | ||
|
||
# Reporting the basic summary of a group | ||
report <- function(data, attr) { | ||
cat("N=", round( length( data[[attr]] ), 1), ",", | ||
"M=", round( mean( data[[attr]] ) , 1), ",", | ||
"sd=", round( sd( data[[attr]] ) , 1), ",", | ||
"Mdn=", round( median( data[[attr]] ), 1), ",", | ||
"mad=", round( mad( data[[attr]] ) , 1), | ||
sep="") | ||
} | ||
|
||
reportCI <- function(data, attr) { | ||
# bootstrapping with 1000 replications | ||
ci <- boot.ci( | ||
boot(data=data[[attr]], statistic=mean.fun, R=1000, sim="ordinary") | ||
) | ||
|
||
cat( "M=", round( mean( data[[attr]] ), 1), "~", | ||
"[", round( ci$bca[,4] , 1), ",", | ||
round( ci$bca[,5] , 1), "]", | ||
sep="") | ||
} | ||
|
||
reportES <- function(data, attr, group) { | ||
|
||
if(group=="used_search"){ | ||
b <- bootES(data, | ||
data.col=attr, | ||
group.col=group, | ||
contrast=c("search"=1,"non-search"=-1), | ||
effect.type="cohens.d" | ||
) | ||
|
||
cat( "d=", round( b$t0, 2), "~", | ||
"[", round( b$bounds[1], 2), ",", | ||
round( b$bounds[2], 2), "]", | ||
sep="") | ||
} else if(group=="condition"){ | ||
b <- bootES(data, | ||
data.col=attr, | ||
group.col=group, | ||
contrast=c("foresight"=1,"control"=-1), | ||
effect.type="cohens.d" | ||
) | ||
|
||
cat( "d=", round( b$t0, 2), "~", | ||
"[", round( b$bounds[1], 2), ",", | ||
round( b$bounds[2], 2), "]", | ||
sep="") | ||
} else if(group == "search_state") { | ||
b <- bootES(data, | ||
data.col=attr, | ||
group.col=group, | ||
contrast=c("using_box"=1,"not_using_box"=-1), | ||
effect.type="cohens.d" | ||
) | ||
|
||
cat( "d=", round( b$t0, 2), "~", | ||
"[", round( b$bounds[1], 2), ",", | ||
round( b$bounds[2], 2), "]", | ||
sep="") | ||
} | ||
} | ||
|
||
# Bootstrap 95% CI for mean | ||
# function to obtain mean from the data (with indexing) | ||
mean.fun <- function(D, d) { | ||
return( mean(D[d]) ) | ||
} | ||
|
||
########################## proportional test ########################## | ||
proportionalTest <- function(data, x, compareValue, group){ | ||
data['compareVar_'] <- ifelse(data[x] == compareValue ,0,1) | ||
data['group_'] <- data[group] | ||
|
||
tbl <- table(data$group_,data$compareVar_) | ||
print(tbl) | ||
test <- prop.test(tbl,correct=TRUE) | ||
print(test) | ||
lower <- test$conf.int[1] | ||
upper <- test$conf.int[2] | ||
print(paste("Diff=",test$estimate[1]-test$estimate[2])) | ||
} | ||
|
||
proportionalTestCIPlot <- function(data, x, compareValue, group, xRange=0, yRange=0){ | ||
|
||
data['compareVar_'] <- ifelse(data[x] == compareValue ,1,0) | ||
data['group_'] <- data[group] | ||
|
||
df<-ddply(data,.(group_),plyr::summarise, | ||
prop=sum(compareVar_)/length(compareVar_), | ||
lower=prop.test(sum(compareVar_),length(compareVar_))$conf.int[1], | ||
upper=prop.test(sum(compareVar_),length(compareVar_))$conf.int[2]) | ||
|
||
p <- ggplot(df, aes(group_, y=prop,ymin=low,ymax=upper, colour = group_)) | ||
p <- p + scale_color_manual(values=c("#998EC3","#fa9fb5")) | ||
p <- p + theme(axis.title=element_text(size=20), axis.text=element_text(size=18)) | ||
p <- p + geom_pointrange(aes(ymin = lower, ymax = upper)) | ||
p <- p + expand_limits(y = yRange) | ||
p <- p + ylab("Percentage") | ||
p <- p + xlab("") | ||
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) | ||
p <- p + coord_flip() | ||
p <- p + theme_bw() | ||
p <- p + theme(plot.title=element_text(hjust=0)) | ||
p <- p + theme(panel.border=element_blank()) | ||
p <- p + theme(panel.grid.minor=element_blank()) | ||
p <- p + theme(axis.ticks=element_blank()) | ||
p <- p + theme(legend.key=element_rect(color="white")) | ||
p <- p + theme(axis.text.y = element_blank()) | ||
p <- p + scale_y_continuous(labels=percent) | ||
p <- p + guides(colour=FALSE) | ||
p | ||
|
||
} | ||
|
||
ciplot <- function(data, y, x, yRange=0, xRange=0,colors=c("#998EC3","#F1A340")) { | ||
|
||
data['x_'] <- data[x] | ||
data['y_'] <- data[y] | ||
|
||
data[['x_']] <- factor(data[['x_']]) | ||
|
||
groups <- group_by_(data, 'x_') | ||
|
||
# So far the only way to enable string as param | ||
groupedData <- dplyr::summarize(groups, | ||
mean=mean(y_), | ||
UCI= boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,5], | ||
LCI= boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,4]) | ||
|
||
|
||
df <- data.frame( | ||
trt = factor(groupedData[[1]]), | ||
resp = groupedData[["mean"]], | ||
group = factor(groupedData[[1]]), | ||
upper = c(groupedData[["UCI"]]), | ||
lower = c(groupedData[["LCI"]]) | ||
) | ||
|
||
#ci bar colors | ||
if(x == "used_search") | ||
colors = c("#998EC3","#F26A4D") | ||
else if(x == "condition") | ||
colors = c("#998EC3","#fa9fb5") | ||
else if(x== "search_state") | ||
colors = c("#F1A340","#F1A340") | ||
|
||
p <- ggplot(df, aes(trt, resp, colour = group)) | ||
p <- p + scale_color_manual(values=colors) | ||
p <- p + theme(axis.title=element_text(size=20), axis.text=element_text(size=18)) | ||
p <- p + geom_pointrange(aes(ymin = lower, ymax = upper)) | ||
p <- p + expand_limits(y = yRange) | ||
p <- p + scale_y_continuous(breaks = seq(yRange[1],yRange[2], length.out = 5)) | ||
p <- p + ylab(y) | ||
p <- p + xlab("") | ||
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) | ||
p <- p + coord_flip() | ||
p <- p + theme_bw() | ||
p <- p + theme(plot.title=element_text(hjust=0)) | ||
p <- p + theme(panel.border=element_blank()) | ||
p <- p + theme(panel.grid.minor=element_blank()) | ||
p <- p + theme(axis.ticks=element_blank()) | ||
p <- p + theme(axis.text.y = element_blank()) | ||
p <- p + guides(colour=FALSE) | ||
p | ||
} | ||
|
||
ciplotMulti <- function(data, yCol, x, yRange=0, xRange=0) { | ||
for(y in yCol){ | ||
ciplot(data,y,x,yRange,xRange) | ||
} | ||
} | ||
|
||
ciplotManual <- function(data, y, x, yRange=0, xRange=0) { | ||
|
||
data['x_'] <- data[x] | ||
data['y_'] <- data[y] | ||
|
||
data[['x_']] <- factor(data[['x_']]) | ||
|
||
groups <- group_by_(data, 'x_') | ||
|
||
# So far the only way to enable string as param | ||
groupedData <- dplyr::summarize(groups, | ||
mean=mean(y_) | ||
#UCI=boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,5], | ||
#LCI=boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,4] | ||
) | ||
|
||
|
||
# df <- data.frame( | ||
# trt = factor(groupedData[[1]]), | ||
# resp = groupedData[["mean"]], | ||
# group = factor(groupedData[[1]]), | ||
# upper = c(groupedData[["UCI"]]), | ||
# lower = c(groupedData[["LCI"]]) | ||
# ) | ||
|
||
# Fixed CI calculation? | ||
df2 <- groups %>% | ||
#group_by(data[[x_]]) %>% | ||
summarize(n=n(),resp=mean(y_),sd=sd(y_)) %>% | ||
mutate(se=sd/sqrt(n),lower=resp+qnorm(0.025)*se,upper=resp+qnorm(0.975)*se) | ||
df2['trt'] <- factor(groupedData[[1]]) | ||
|
||
# Plot | ||
p <- ggplot(df2, aes(trt, resp,color=trt)) | ||
p <- p + scale_color_manual(values=c("#998EC3","#F1A340")) | ||
p <- p + theme(axis.title=element_text(size=20), axis.text=element_text(size=18)) | ||
p <- p + geom_pointrange(aes(ymin = lower, ymax = upper)) | ||
p <- p + expand_limits(y = yRange, x = xRange) | ||
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) | ||
p <- p + labs(y=y, | ||
x=x, | ||
title=paste("CI Plot: ",y, " ~ ", x)) | ||
#p <- p + geom_hline(yintercept = 0) | ||
#p <- p + coord_flip() | ||
print(p) | ||
} | ||
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%). | ||
## data: a data frame. | ||
## measurevar: the name of a column that contains the variable to be summariezed | ||
## groupvars: a vector containing names of columns that contain grouping variables | ||
## na.rm: a boolean that indicates whether to ignore NA's | ||
## conf.interval: the percent range of the confidence interval (default is 95%) | ||
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, | ||
conf.interval=.95, .drop=TRUE) { | ||
library(plyr) | ||
|
||
# New version of length which can handle NA's: if na.rm==T, don't count them | ||
length2 <- function (x, na.rm=FALSE) { | ||
if (na.rm) sum(!is.na(x)) | ||
else length(x) | ||
} | ||
|
||
# This does the summary. For each group's data frame, return a vector with | ||
# N, mean, and sd | ||
datac <- ddply(data, groupvars, .drop=.drop, | ||
.fun = function(xx, col) { | ||
c(N = length2(xx[[col]], na.rm=na.rm), | ||
mean = mean (xx[[col]], na.rm=na.rm), | ||
sd = sd (xx[[col]], na.rm=na.rm) | ||
) | ||
}, | ||
measurevar | ||
) | ||
|
||
# Rename the "mean" column | ||
datac <- rename(datac, c("mean" = measurevar)) | ||
|
||
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean | ||
|
||
# Confidence interval multiplier for standard error | ||
# Calculate t-statistic for confidence interval: | ||
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 | ||
ciMult <- qt(conf.interval/2 + .5, datac$N-1) | ||
datac$ci <- datac$se * ciMult | ||
|
||
return(datac) | ||
} | ||
|
||
#with error | ||
ciBar <- function(data, measure, group){ | ||
data <- summarySE(data, measurevar=measure, groupvars=group, | ||
na.rm=FALSE, conf.interval=.95) | ||
xAxis = as.symbol(group) | ||
yAxis = as.symbol(measure) | ||
print(xAxis) | ||
print(yAxis) | ||
ggplot(data, aes(x=xAxis, y=yAxis)) + | ||
geom_bar(position=position_dodge(), stat="identity") + | ||
geom_errorbar(aes(ymin=error_log-ci, ymax=error_log+ci), | ||
width=.2, # Width of the error bars | ||
position=position_dodge(.9)) | ||
} | ||
|
||
fullReport <- function(data, y, group, yRange=0, paired=FALSE){ | ||
|
||
data['group_'] <- data[group] | ||
data['y_'] <- data[y] | ||
|
||
# two levels | ||
if(group == "search_state") | ||
lv <- c('using_box','not_using_box') | ||
else if(group == "used_search") | ||
lv <- c('search','non-search') | ||
else if(group == "condition") | ||
lv <- c('foresight', 'control') | ||
print(lv) | ||
|
||
report(data %>% filter(group_==lv[1]), y) | ||
reportCI(data %>% filter(group_==lv[1]),y) | ||
report(data %>% filter(group_==lv[2]),y) | ||
reportCI(data %>% filter(group_==lv[2]),y) | ||
|
||
wt <- wilcox.test(y_ ~ group_, data, conf.int=TRUE,paired=paired) | ||
print(wt) | ||
|
||
reportES(data, y, group) | ||
|
||
# fancy ci plots | ||
ciplot(data, y,group,yRange) | ||
} | ||
|
Oops, something went wrong.