Skip to content

Commit 1c3543e

Browse files
create figures and overview tables
1 parent 6af33e2 commit 1c3543e

25 files changed

+930
-0
lines changed

.Rhistory

Whitespace-only changes.

figures/.Rhistory

+512
Large diffs are not rendered by default.

figures/acca-sigmoid-left.pdf

5.46 KB
Binary file not shown.

figures/acca-sigmoid-mid.pdf

5.44 KB
Binary file not shown.

figures/acca-sigmoid-right.pdf

5.48 KB
Binary file not shown.

figures/acca-sigmoid-tail.pdf

5.39 KB
Binary file not shown.

figures/accb-sigmoid-left.pdf

5.52 KB
Binary file not shown.

figures/accb-sigmoid-mid.pdf

5.44 KB
Binary file not shown.

figures/accb-sigmoid-right.pdf

5.43 KB
Binary file not shown.

figures/accb-sigmoid-tail.pdf

5.41 KB
Binary file not shown.

figures/bias-combined.pdf

5.05 KB
Binary file not shown.

figures/bias-sigmoid-left.pdf

4.38 KB
Binary file not shown.

figures/bias-sigmoid-mid.pdf

4.37 KB
Binary file not shown.

figures/bias-sigmoid-right.pdf

4.42 KB
Binary file not shown.

figures/bias-sigmoid-tail.pdf

4.38 KB
Binary file not shown.

figures/figures_manuscript_rwdata.R

+289
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
library(ggplot2)
2+
library(readxl)
3+
library(tidyverse)
4+
library(RColorBrewer)
5+
library(mice)
6+
library(xtable)
7+
8+
setwd("C:/Users/20200059/Documents/Github/missingness-effect-complete-dataset/figures/")
9+
10+
#### Simulation results and figures for finalized manuscript ####
11+
12+
datam <- read_csv("../csvs/final/averages.csv")
13+
datas <- read_csv("../csvs/final/stds.csv")
14+
dataa <- read_csv("../csvs/final/values_all.csv")
15+
16+
datam <- datam %>%
17+
mutate(MCAR = ordered(factor(MCAR), levels = c(0,10,20))) %>%
18+
mutate(MAR = ordered(factor(MAR), levels = c(0,10,20))) %>%
19+
mutate(MNAR = ordered(factor(MNAR), levels = c(0,10,20)))
20+
21+
datas <- datas %>%
22+
mutate(MCAR = ordered(factor(MCAR), levels = c(0,10,20))) %>%
23+
mutate(MAR = ordered(factor(MAR), levels = c(0,10,20))) %>%
24+
mutate(MNAR = ordered(factor(MNAR), levels = c(0,10,20)))
25+
26+
datam <- cbind(datam,datas)
27+
colnames(datam) <- c(names(datam[1:18]), paste0('std_', names(datam[1:18]), sep = "", collapse = NULL))
28+
29+
# bias for types
30+
types = c('sigmoid-right', 'sigmoid-left', 'sigmoid-tail', 'sigmoid-mid')
31+
for(i in 1:length(types)){
32+
stype = types[i]
33+
34+
plot_data <- datam %>%
35+
filter(type == stype) %>%
36+
filter(MCAR == '10')
37+
38+
bias <- plot_data %>%
39+
ggplot(aes(x = MNAR, y = bias, fill = MAR)) +
40+
geom_bar(stat = "identity", position = position_dodge()) +
41+
labs(title = paste(stype, sep = " ", collapse = NULL),
42+
fill = 'MAR') +
43+
xlab('MNAR') +
44+
ylab('Mean shift') +
45+
guides(fill = guide_legend(direction = "horizontal")) +
46+
theme(legend.position="top",
47+
legend.justification="right",
48+
plot.title = element_text(vjust=-4),
49+
legend.box.margin = margin(-1,0,0,0, "line"),
50+
#axis.title.y = element_text(),
51+
panel.grid.major.x = element_blank(),
52+
panel.grid.minor.x = element_blank(),
53+
panel.grid.major.y = element_blank(),
54+
panel.grid.minor.y = element_blank()) +
55+
scale_fill_manual(values=c("#deebf7", "#9ecae1", "#3182bd"))
56+
57+
name <- paste('bias-', stype, '.pdf', sep = "", collapse = NULL)
58+
#name <- paste('bias-', stype, '.eps', sep = "", collapse = NULL)
59+
ggsave(name, width = 20, height = 24, units = "cm")
60+
}
61+
62+
# std for types
63+
#types = c('sigmoid-right')
64+
types = c('sigmoid-right', 'sigmoid-left', 'sigmoid-tail', 'sigmoid-mid')
65+
for(i in 1:length(types)){
66+
stype = types[i]
67+
68+
plot_data <- datam %>%
69+
filter(type == stype) %>%
70+
filter(MCAR == '10')
71+
72+
std <- plot_data %>%
73+
ggplot(aes(x = MNAR, y = bias_std, fill = MAR)) +
74+
geom_bar(stat = "identity", position = position_dodge()) +
75+
labs(title = paste(stype, sep = " ", collapse = NULL),
76+
fill = 'MAR') +
77+
xlab('MNAR') +
78+
ylab('Standard deviation shift') +
79+
guides(fill = guide_legend(direction = "horizontal")) +
80+
theme(legend.position="top",
81+
legend.justification="right",
82+
plot.title = element_text(vjust=-4),
83+
legend.box.margin = margin(-1,0,0,0, "line"),
84+
#axis.title.y = element_text(),
85+
panel.grid.major.x = element_blank(),
86+
panel.grid.minor.x = element_blank(),
87+
panel.grid.major.y = element_blank(),
88+
panel.grid.minor.y = element_blank()) +
89+
scale_fill_manual(values=c("#deebf7", "#9ecae1", "#3182bd")) +
90+
scale_y_continuous(limits = c(-0.015, 0.015))
91+
92+
name <- paste('std-', stype, '.pdf', sep = "", collapse = NULL)
93+
#name <- paste('std-', stype, '.eps', sep = "", collapse = NULL)
94+
ggsave(name, width = 20, height = 24, units = "cm")
95+
}
96+
97+
# combination bias
98+
plot_data <- datam %>%
99+
filter(type %in% c('sigmoid-tail', 'sigmoid-right')) %>%
100+
filter(MCAR == '10')
101+
102+
bias <- plot_data %>%
103+
ggplot(aes(x = MNAR, y = bias, alpha = MAR, fill = type)) +
104+
geom_bar(stat = "identity", position = position_dodge(), colour='black') +
105+
labs(title = 'Mean shift') +
106+
xlab('MNAR') +
107+
ylab('') +
108+
guides(fill = guide_legend(direction = "horizontal")) +
109+
theme(legend.position="top",
110+
legend.justification="right",
111+
plot.title = element_text(vjust=-4),
112+
legend.box.margin = margin(-1,0,0,0, "line"),
113+
#axis.title.y = element_text(),
114+
#panel.grid.major.x = element_blank(),
115+
#panel.grid.minor.x = element_blank(),
116+
#panel.grid.major.y = element_blank(),
117+
#panel.grid.minor.y = element_blank()
118+
) +
119+
scale_fill_manual(values=c("#fdb863", "#b2abd2"))
120+
121+
bias
122+
123+
name <- 'bias-combined.pdf'
124+
ggsave(name, width = 20, height = 12, units = "cm")
125+
126+
# combination std
127+
plot_data <- datam %>%
128+
filter(type %in% c('sigmoid-mid', 'sigmoid-left')) %>%
129+
filter(MCAR == '10')
130+
131+
std <- plot_data %>%
132+
ggplot(aes(x = MNAR, y = bias_std, alpha = MAR, fill = type)) +
133+
geom_bar(stat = "identity", position = position_dodge(), colour='black') +
134+
labs(title = 'Standard deviation shift') +
135+
xlab('MNAR') +
136+
ylab('') +
137+
guides(fill = guide_legend(direction = "horizontal")) +
138+
theme(legend.position="top",
139+
legend.justification="right",
140+
plot.title = element_text(vjust=-4),
141+
legend.box.margin = margin(-1,0,0,0, "line"),
142+
#axis.title.y = element_text(),
143+
#panel.grid.major.x = element_blank(),
144+
#panel.grid.minor.x = element_blank(),
145+
#panel.grid.major.y = element_blank(),
146+
#panel.grid.minor.y = element_blank())
147+
)+
148+
scale_fill_manual(values=c("#fdb863","#b2abd2")) +
149+
scale_y_continuous(limits = c(-0.00025, 0.0015))
150+
151+
std
152+
153+
name <- 'std-combined.pdf'
154+
#name <- paste('std-', stype, '.eps', sep = "", collapse = NULL)
155+
ggsave(name, width = 20, height = 12, units = "cm")
156+
157+
### plots for accuracy with bias ###
158+
159+
types <- c('sigmoid-right', 'sigmoid-left', 'sigmoid-tail', 'sigmoid-mid')
160+
for(i in 1:length(types)){
161+
stype <- types[i]
162+
163+
plot_data <- datam %>%
164+
filter(type == stype) %>%
165+
filter(MCAR == '0')
166+
167+
acca <- plot_data %>%
168+
ggplot(aes(x=accuracy_a, y=bias, color=MAR, shape=MNAR)) +
169+
geom_point(size = 3) +
170+
geom_errorbarh(aes(xmin=accuracy_a - (std_accuracy_a/1000),
171+
xmax=accuracy_a + (std_accuracy_a/1000))) +
172+
geom_errorbar(aes(ymin=bias - (bias/1000),
173+
ymax=bias + (bias/1000))) +
174+
labs(title = 'Relation between mean shift and accuracy type a') +
175+
xlab('accuracy type a') +
176+
ylab('') +
177+
guides(fill = guide_legend(direction = "horizontal")) +
178+
theme(legend.position="top",
179+
legend.justification="right",
180+
plot.title = element_text(vjust=-4),
181+
legend.box.margin = margin(-1,0,0,0, "line"),
182+
#axis.title.y = element_text(),
183+
#panel.grid.major.x = element_blank(),
184+
#panel.grid.minor.x = element_blank(),
185+
#panel.grid.major.y = element_blank(),
186+
#panel.grid.minor.y = element_blank()
187+
) +
188+
scale_colour_manual(values = c("#e66101", "#92c5de", "#a6d96a")) +
189+
scale_shape_manual(values=c(22,23,24))
190+
#+ scale_x_continuous(limits = c(0.9175,0.926))
191+
192+
acca
193+
194+
name <- paste('acca-', stype, '.pdf', sep = "", collapse = NULL)
195+
ggsave(name, width = 16, height = 12, units = "cm")
196+
}
197+
198+
# type b
199+
200+
types <- c('sigmoid-right', 'sigmoid-left', 'sigmoid-tail', 'sigmoid-mid')
201+
for(i in 1:length(types)){
202+
stype <- types[i]
203+
204+
plot_data <- datam %>%
205+
filter(type == stype) %>%
206+
filter(MCAR == '0')
207+
208+
acca <- plot_data %>%
209+
ggplot(aes(x=accuracy_b, y=bias, color=MAR, shape=MNAR)) +
210+
geom_point(size = 3) +
211+
geom_errorbarh(aes(xmin=accuracy_b - (std_accuracy_b/1000),
212+
xmax=accuracy_b + (std_accuracy_b/1000))) +
213+
geom_errorbar(aes(ymin=bias - (bias/1000),
214+
ymax=bias + (bias/1000))) +
215+
labs(title = 'Relation between mean shift and accuracy type b') +
216+
xlab('accuracy type b') +
217+
ylab('') +
218+
guides(fill = guide_legend(direction = "horizontal")) +
219+
theme(legend.position="top",
220+
legend.justification="right",
221+
plot.title = element_text(vjust=-4),
222+
legend.box.margin = margin(-1,0,0,0, "line"),
223+
#axis.title.y = element_text(),
224+
#panel.grid.major.x = element_blank(),
225+
#panel.grid.minor.x = element_blank(),
226+
#panel.grid.major.y = element_blank(),
227+
#panel.grid.minor.y = element_blank()
228+
) +
229+
scale_colour_manual(values = c("#e66101", "#92c5de", "#a6d96a")) +
230+
scale_shape_manual(values=c(22,23,24))
231+
#+ scale_x_continuous(limits = c(0.9175,0.926))
232+
233+
accb
234+
235+
name <- paste('accb-', stype, '.pdf', sep = "", collapse = NULL)
236+
ggsave(name, width = 16, height = 12, units = "cm")
237+
}
238+
239+
values <- dataa %>%
240+
mutate(bias = abs(as.numeric(bias)),
241+
accuracy_a = as.numeric(accuracy_a),
242+
accuracy_b = as.numeric(accuracy_b),
243+
std = as.numeric(std)) %>%
244+
group_by(MCAR,MAR,MNAR,type) %>%
245+
summarize(abias = mean(bias),
246+
astd = mean(bias_std),
247+
n=n())
248+
249+
values
250+
251+
corsa <- dataa %>%
252+
mutate(bias = abs(as.numeric(bias)),
253+
accuracy_a = as.numeric(accuracy_a),
254+
accuracy_b = as.numeric(accuracy_b),
255+
bias_std = abs(as.numeric(bias_std))) %>%
256+
filter(MCAR == '0') %>%
257+
group_by(type) %>%
258+
summarize(corbiasa = cor(bias, accuracy_a),
259+
corbiasb = cor(bias, accuracy_b),
260+
corstda = cor(bias_std, accuracy_a),
261+
corstdb = cor(bias_std, accuracy_b),
262+
n=n()) %>%
263+
mutate(corbiasa_se = sqrt((1-corbiasa^2)/(n-2)),
264+
corbiasb_se = sqrt((1-corbiasb^2)/(n-2)),
265+
corstda_se = sqrt((1-corstda^2)/(n-2)),
266+
corstdb_se = sqrt((1-corstdb^2)/(n-2)))
267+
corsa
268+
269+
corsb <- dataa %>%
270+
mutate(bias = as.numeric(bias),
271+
accuracy_a = as.numeric(accuracy_a),
272+
accuracy_b = as.numeric(accuracy_b),
273+
bias_std = as.numeric(bias_std)) %>%
274+
filter(MCAR == '0') %>%
275+
group_by(type) %>%
276+
summarize(corbiasa = cor(bias, accuracy_a),
277+
corbiasb = cor(bias, accuracy_b),
278+
corstda = cor(bias_std, accuracy_a),
279+
corstdb = cor(bias_std, accuracy_b),
280+
n=n()) %>%
281+
mutate(corbiasa_se = sqrt((1-corbiasa^2)/(n-2)),
282+
corbiasb_se = sqrt((1-corbiasb^2)/(n-2)),
283+
corstda_se = sqrt((1-corstda^2)/(n-2)),
284+
corstdb_se = sqrt((1-corstdb^2)/(n-2)))
285+
corsb
286+
287+
pt(0.0145/0.011, df = 8000, lower.tail = FALSE)
288+
289+
+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
library(ggplot2)
2+
library(tidyverse)
3+
4+
setwd("C:/Users/20200059/Documents/Github/missingness-effect-complete-dataset/figures/")
5+
6+
result <- read_csv("../csvs/final/synthetic.csv")
7+
head(result)
8+
9+
result <- result %>%
10+
mutate(type = ordered(factor(type),
11+
levels = c('MCAR','LEFT','MID','TAIL','RIGHT')))
12+
13+
skews <- result %>%
14+
group_by(mech, type, skew) %>%
15+
summarize(am = mean(amount)) %>%
16+
filter(skew == 'yes')
17+
skews
18+
19+
# bias mean
20+
plot <- result %>%
21+
filter(skew != 'diff') %>%
22+
ggplot(aes(y=mean, x=mech, fill=skew)) +
23+
geom_boxplot(outlier.shape=NA)+
24+
geom_boxplot(aes(color=skew),outlier.shape = NA) +
25+
facet_grid(. ~ type, labeller = label_both, scales = "free") +
26+
labs(title = 'Mean shift',
27+
fill = 'skew') +
28+
xlab('mechanism') +
29+
ylab('') +
30+
guides(fill = guide_legend(direction = "horizontal")) +
31+
theme(legend.position="top",
32+
legend.justification="right",
33+
plot.title = element_text(vjust=-10)) +
34+
scale_fill_manual(values=c("#e34a33", "#1a9641")) +
35+
scale_color_manual(values=c("#e34a33", "#1a9641"))
36+
37+
plot
38+
name <- paste('synthetic-bias.pdf', sep = "", collapse = NULL)
39+
ggsave(name, width = 20, height = 12, units = "cm")
40+
41+
result %>%
42+
filter(skew == 'diff') %>%
43+
ggplot(aes(y=mean, x=type, colour=mech)) +
44+
geom_boxplot(outlier.shape = NA)
45+
#+ scale_y_continuous(limits = quantile(result$mean, c(0.1,0.9)))
46+
47+
# bias sd
48+
plot <- result %>%
49+
filter(skew != 'diff') %>%
50+
ggplot(aes(y=sd, x=mech, fill=skew)) +
51+
geom_boxplot(outlier.shape=NA)+
52+
geom_boxplot(aes(color=skew),outlier.shape = NA) +
53+
facet_grid(. ~ type, labeller = label_both, scales = "free") +
54+
labs(title = 'Standard deviation shift',
55+
fill = 'skew') +
56+
xlab('mechanism') +
57+
ylab('') +
58+
guides(fill = guide_legend(direction = "horizontal")) +
59+
theme(legend.position="top",
60+
legend.justification="right",
61+
plot.title = element_text(vjust=-10)) +
62+
scale_fill_manual(values=c("#e34a33", "#1a9641")) +
63+
scale_color_manual(values=c("#e34a33", "#1a9641"))
64+
65+
plot
66+
name <- paste('synthetic-std.pdf', sep = "", collapse = NULL)
67+
ggsave(name, width = 20, height = 12, units = "cm")
68+
69+
result %>%
70+
filter(skew == 'diff') %>%
71+
ggplot(aes(y=sd, x=type, colour=mech)) +
72+
geom_boxplot(outlier.shape = NA)
73+
+ scale_y_continuous(limits = quantile(result$sd, c(0.1,0.9)))
74+
75+
# bias median
76+
result %>%
77+
filter(skew != 'diff') %>%
78+
ggplot(aes(y=median, x=mech, colour=skew)) +
79+
geom_boxplot(outlier.shape = NA) +
80+
facet_grid(. ~ type, labeller = label_both)
81+
+ scale_y_continuous(limits = quantile(result$median, c(0.1,0.9)))
82+
83+
result %>%
84+
filter(skew == 'diff') %>%
85+
ggplot(aes(y=median, x=type, colour=mech)) +
86+
geom_boxplot(outlier.shape = NA)
87+
+ scale_y_continuous(limits = quantile(result$median, c(0.1,0.9)))
88+
89+
# iqr
90+
result %>%
91+
filter(skew != 'diff') %>%
92+
ggplot(aes(y=iqr, x=mech, colour=skew)) +
93+
geom_boxplot(outlier.shape = NA) +
94+
facet_grid(. ~ type, labeller = label_both)
95+
+ scale_y_continuous(limits = quantile(result$iqr, c(0.1,0.9)))
96+
97+
result %>%
98+
filter(skew == 'diff') %>%
99+
ggplot(aes(y=iqr, x=type, colour=mech)) +
100+
geom_boxplot(outlier.shape = NA)
101+
+ scale_y_continuous(limits = quantile(result$iqr, c(0.1,0.9)))
102+
103+
# skew
104+
result %>%
105+
filter(skew != 'diff') %>%
106+
ggplot(aes(y=amount, x=mech, colour=skew)) +
107+
geom_boxplot(outlier.shape = NA) +
108+
facet_grid(. ~ type, labeller = label_both) +
109+
scale_y_continuous(limits = quantile(result$amount, c(0.1,0.9)))

0 commit comments

Comments
 (0)