Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
JackWilb committed Aug 30, 2019
0 parents commit 78f397e
Show file tree
Hide file tree
Showing 141 changed files with 373,215 additions and 0 deletions.
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Amazon Turk Study for Multivariate Network Visualization Techniques

===================


Data
---
The raw data from our experiments are stored in the [data](data) folder.

Analysis
---
The R Markdown files we used to compute statistics are in the [analysis](analysis) folder.

Experiment
---
The code for reproducing our web-based experiment is in the [experiments](experiments) folder.


# Config Files
Config files determine how the visualization is rendered along with the task that is used in that trial.

Config files are written in .hjson to allow for comments, so if you're updating an existing config or generating a new one, follow these steps to ensure you end up with a .json file that javascript can parse out:

-- to install the tool that converts between .hjson and .json (on a mac)
GET=https://github.com/hjson/hjson-go/releases/download/v3.0.0/darwin_amd64.tar.gz
curl -sSL $GET | sudo tar -xz -C /usr/local/bin



-- to generate a .json from an .hjson
hjson CONFIG.json > CONFIG.hjson


-- to generate an .hjson from a .json
hjson -j CONFIG.hjson > CONFIG.json
324 changes: 324 additions & 0 deletions analysis/analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
library(ggplot2)
library(coin)
library(pwr)
library(shiny)
library(miniUI)
library(boot)
library(tidyr)
library(irr)
library(lazyeval)
library(doBy)
library(gtable)
library(grid)
library(scales)
library(plyr)
library(dplyr)
library(bootES)
library(extrafont)

loadfonts()

# Reporting the basic summary of a group
report <- function(data, attr) {
cat("N=", round( length( data[[attr]] ), 1), ",",
"M=", round( mean( data[[attr]] ) , 1), ",",
"sd=", round( sd( data[[attr]] ) , 1), ",",
"Mdn=", round( median( data[[attr]] ), 1), ",",
"mad=", round( mad( data[[attr]] ) , 1),
sep="")
}

reportCI <- function(data, attr) {
# bootstrapping with 1000 replications
ci <- boot.ci(
boot(data=data[[attr]], statistic=mean.fun, R=1000, sim="ordinary")
)

cat( "M=", round( mean( data[[attr]] ), 1), "~",
"[", round( ci$bca[,4] , 1), ",",
round( ci$bca[,5] , 1), "]",
sep="")
}

reportES <- function(data, attr, group) {

if(group=="used_search"){
b <- bootES(data,
data.col=attr,
group.col=group,
contrast=c("search"=1,"non-search"=-1),
effect.type="cohens.d"
)

cat( "d=", round( b$t0, 2), "~",
"[", round( b$bounds[1], 2), ",",
round( b$bounds[2], 2), "]",
sep="")
} else if(group=="condition"){
b <- bootES(data,
data.col=attr,
group.col=group,
contrast=c("foresight"=1,"control"=-1),
effect.type="cohens.d"
)

cat( "d=", round( b$t0, 2), "~",
"[", round( b$bounds[1], 2), ",",
round( b$bounds[2], 2), "]",
sep="")
} else if(group == "search_state") {
b <- bootES(data,
data.col=attr,
group.col=group,
contrast=c("using_box"=1,"not_using_box"=-1),
effect.type="cohens.d"
)

cat( "d=", round( b$t0, 2), "~",
"[", round( b$bounds[1], 2), ",",
round( b$bounds[2], 2), "]",
sep="")
}
}

# Bootstrap 95% CI for mean
# function to obtain mean from the data (with indexing)
mean.fun <- function(D, d) {
return( mean(D[d]) )
}

########################## proportional test ##########################
proportionalTest <- function(data, x, compareValue, group){
data['compareVar_'] <- ifelse(data[x] == compareValue ,0,1)
data['group_'] <- data[group]

tbl <- table(data$group_,data$compareVar_)
print(tbl)
test <- prop.test(tbl,correct=TRUE)
print(test)
lower <- test$conf.int[1]
upper <- test$conf.int[2]
print(paste("Diff=",test$estimate[1]-test$estimate[2]))
}

proportionalTestCIPlot <- function(data, x, compareValue, group, xRange=0, yRange=0){

data['compareVar_'] <- ifelse(data[x] == compareValue ,1,0)
data['group_'] <- data[group]

df<-ddply(data,.(group_),plyr::summarise,
prop=sum(compareVar_)/length(compareVar_),
lower=prop.test(sum(compareVar_),length(compareVar_))$conf.int[1],
upper=prop.test(sum(compareVar_),length(compareVar_))$conf.int[2])

p <- ggplot(df, aes(group_, y=prop,ymin=low,ymax=upper, colour = group_))
p <- p + scale_color_manual(values=c("#998EC3","#fa9fb5"))
p <- p + theme(axis.title=element_text(size=20), axis.text=element_text(size=18))
p <- p + geom_pointrange(aes(ymin = lower, ymax = upper))
p <- p + expand_limits(y = yRange)
p <- p + ylab("Percentage")
p <- p + xlab("")
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1)
p <- p + coord_flip()
p <- p + theme_bw()
p <- p + theme(plot.title=element_text(hjust=0))
p <- p + theme(panel.border=element_blank())
p <- p + theme(panel.grid.minor=element_blank())
p <- p + theme(axis.ticks=element_blank())
p <- p + theme(legend.key=element_rect(color="white"))
p <- p + theme(axis.text.y = element_blank())
p <- p + scale_y_continuous(labels=percent)
p <- p + guides(colour=FALSE)
p

}

ciplot <- function(data, y, x, yRange=0, xRange=0,colors=c("#998EC3","#F1A340")) {

data['x_'] <- data[x]
data['y_'] <- data[y]

data[['x_']] <- factor(data[['x_']])

groups <- group_by_(data, 'x_')

# So far the only way to enable string as param
groupedData <- dplyr::summarize(groups,
mean=mean(y_),
UCI= boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,5],
LCI= boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,4])


df <- data.frame(
trt = factor(groupedData[[1]]),
resp = groupedData[["mean"]],
group = factor(groupedData[[1]]),
upper = c(groupedData[["UCI"]]),
lower = c(groupedData[["LCI"]])
)

#ci bar colors
if(x == "used_search")
colors = c("#998EC3","#F26A4D")
else if(x == "condition")
colors = c("#998EC3","#fa9fb5")
else if(x== "search_state")
colors = c("#F1A340","#F1A340")

p <- ggplot(df, aes(trt, resp, colour = group))
p <- p + scale_color_manual(values=colors)
p <- p + theme(axis.title=element_text(size=20), axis.text=element_text(size=18))
p <- p + geom_pointrange(aes(ymin = lower, ymax = upper))
p <- p + expand_limits(y = yRange)
p <- p + scale_y_continuous(breaks = seq(yRange[1],yRange[2], length.out = 5))
p <- p + ylab(y)
p <- p + xlab("")
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1)
p <- p + coord_flip()
p <- p + theme_bw()
p <- p + theme(plot.title=element_text(hjust=0))
p <- p + theme(panel.border=element_blank())
p <- p + theme(panel.grid.minor=element_blank())
p <- p + theme(axis.ticks=element_blank())
p <- p + theme(axis.text.y = element_blank())
p <- p + guides(colour=FALSE)
p
}

ciplotMulti <- function(data, yCol, x, yRange=0, xRange=0) {
for(y in yCol){
ciplot(data,y,x,yRange,xRange)
}
}

ciplotManual <- function(data, y, x, yRange=0, xRange=0) {

data['x_'] <- data[x]
data['y_'] <- data[y]

data[['x_']] <- factor(data[['x_']])

groups <- group_by_(data, 'x_')

# So far the only way to enable string as param
groupedData <- dplyr::summarize(groups,
mean=mean(y_)
#UCI=boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,5],
#LCI=boot.ci(boot(y_, statistic = mean.fun, R=1000, sim="ordinary"))$bca[,4]
)


# df <- data.frame(
# trt = factor(groupedData[[1]]),
# resp = groupedData[["mean"]],
# group = factor(groupedData[[1]]),
# upper = c(groupedData[["UCI"]]),
# lower = c(groupedData[["LCI"]])
# )

# Fixed CI calculation?
df2 <- groups %>%
#group_by(data[[x_]]) %>%
summarize(n=n(),resp=mean(y_),sd=sd(y_)) %>%
mutate(se=sd/sqrt(n),lower=resp+qnorm(0.025)*se,upper=resp+qnorm(0.975)*se)
df2['trt'] <- factor(groupedData[[1]])

# Plot
p <- ggplot(df2, aes(trt, resp,color=trt))
p <- p + scale_color_manual(values=c("#998EC3","#F1A340"))
p <- p + theme(axis.title=element_text(size=20), axis.text=element_text(size=18))
p <- p + geom_pointrange(aes(ymin = lower, ymax = upper))
p <- p + expand_limits(y = yRange, x = xRange)
p <- p + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1)
p <- p + labs(y=y,
x=x,
title=paste("CI Plot: ",y, " ~ ", x))
#p <- p + geom_hline(yintercept = 0)
#p <- p + coord_flip()
print(p)
}
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
## data: a data frame.
## measurevar: the name of a column that contains the variable to be summariezed
## groupvars: a vector containing names of columns that contain grouping variables
## na.rm: a boolean that indicates whether to ignore NA's
## conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
library(plyr)

# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}

# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)

# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))

datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean

# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult

return(datac)
}

#with error
ciBar <- function(data, measure, group){
data <- summarySE(data, measurevar=measure, groupvars=group,
na.rm=FALSE, conf.interval=.95)
xAxis = as.symbol(group)
yAxis = as.symbol(measure)
print(xAxis)
print(yAxis)
ggplot(data, aes(x=xAxis, y=yAxis)) +
geom_bar(position=position_dodge(), stat="identity") +
geom_errorbar(aes(ymin=error_log-ci, ymax=error_log+ci),
width=.2, # Width of the error bars
position=position_dodge(.9))
}

fullReport <- function(data, y, group, yRange=0, paired=FALSE){

data['group_'] <- data[group]
data['y_'] <- data[y]

# two levels
if(group == "search_state")
lv <- c('using_box','not_using_box')
else if(group == "used_search")
lv <- c('search','non-search')
else if(group == "condition")
lv <- c('foresight', 'control')
print(lv)

report(data %>% filter(group_==lv[1]), y)
reportCI(data %>% filter(group_==lv[1]),y)
report(data %>% filter(group_==lv[2]),y)
reportCI(data %>% filter(group_==lv[2]),y)

wt <- wilcox.test(y_ ~ group_, data, conf.int=TRUE,paired=paired)
print(wt)

reportES(data, y, group)

# fancy ci plots
ciplot(data, y,group,yRange)
}

Loading

0 comments on commit 78f397e

Please sign in to comment.