-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSSFA_3_Plots.Rmd
386 lines (289 loc) · 12 KB
/
SSFA_3_Plots.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
---
title: "Plots of SSFA variables"
author: "Ivan Calandra"
date: "`r Sys.time()`"
output:
github_document:
toc: true
toc_depth: 2
html_preview: false
html_document:
toc: true
toc_depth: 2
toc_float: true
theme: cerulean
highlight: pygments
number_sections: true
knit: (function(inputFile, encoding) {
rmarkdown::render(inputFile, encoding = encoding, output_format = "all", knit_root_dir = rprojroot::find_rstudio_root_file()) })
---
```{r Knitr Options, include = FALSE}
knitr::opts_chunk$set(comment = NA, message = FALSE, indent = "", error = TRUE)
```
---
# Goal of the script
The script plots all SSFA variables for each dataset to compare between the ConfoMap and Toothfrax analyses.
```{r}
dir_in <- "R_analysis/derived_data"
dir_out <- "R_analysis/plots"
```
Input Rbin data file must be located in "`r paste0("~/", dir_in)`".
Plots will be saved in "`r paste0("~/", dir_out)`".
The knit directory for this script is the project directory.
---
# Load packages
```{r}
pack_to_load <- c("R.utils", "ggplot2", "tidyverse", "ggh4x", "patchwork")
sapply(pack_to_load, library, character.only = TRUE, logical.return = TRUE)
```
---
# Read in data
## Get name and path of input file
```{r}
info_in <- list.files(dir_in, pattern = "\\.Rbin$", full.names = TRUE)
info_in
```
## Read in Rbin file
```{r}
all_data <- loadObject(info_in)
str(all_data)
```
---
# Define variables
Here we define which columns are used for X and Y axes on the plots.
```{r}
# x-axis (grouping)
x_var_GP <- x_var_sheep <- "Diet"
x_var_lith <- "Before.after"
# y-axis
y_var <- c("Asfc", "Smfc", "HAsfc9", "HAsfc81", "epLsar", "NewEplsar", "Rsquared")
# colors
grp_colors <- "Software"
# shapes
grp_shapes <- "NMP_cat"
# subplots Lithics dataset
facet_lith <- "Treatment"
```
The following variables will be used:
```{r}
x_var_GP
x_var_sheep
x_var_lith
grp_colors
facet_lith
y_var
```
---
# Calculate y-scales
The range of the y-scales on the plots should be the same for the guinea pig and sheep datasets. Lithics are not comparable at all so this dataset is plotted with appropriate y-scales for this dataset alone.
```{r}
# Select guinea pig and sheep datasets
GP_sheep <- filter(all_data, Dataset %in% c("GuineaPigs", "Sheeps"))
# Create a named empty list to store the ranges of each parameter
yscales <- vector(mode = "list", length = length(y_var))
names(yscales) <- y_var
# Calculate the range of each parameter
for (i in y_var) {
yscales[[i]] <- scale_y_continuous(limits = range(GP_sheep[[i]], na.rm = TRUE))
}
```
---
# Number of diets
In order to have the same width of the boxes for the sheep and guinea pig datasets, we need to calculate how many diets there are in each dataset.
```{r}
diet_GP <- length(unique(all_data[all_data$Dataset == "GuineaPigs", "Diet"]))
diet_sheep <- length(unique(all_data[all_data$Dataset == "Sheeps", "Diet"]))
ratio_diet <- diet_GP / diet_sheep
```
---
# Exclude surfaces with NMP >= 20%
```{r}
data_nmp0_20 <- filter(all_data, NMP_cat != "20-100%")
data_nmp0_20$NMP_cat <- factor(data_nmp0_20$NMP_cat)
str(data_nmp0_20)
```
---
# Plot each set of the selected numeric variables
## Define plotting function
```{r}
boxes_points <- function(dat, x_var, y_var,
group_col, group_shape,
box_width = 0.9, box_size = 0.25, med_size = 3, point_size = 0.8, shape_scale,
point_jitter = 0.6, point_dodge = 0.9, jitter_seed = 123,
wrap_grid, facet_var, facet_ncol = 2, facet_yscales,
grid_row, grid_col,
xlab = NULL, ylab = NULL) {
# package 'ggh4x' is required for the function facetted_pos_scales()
require(ggh4x)
# Supply x, y and color variables as strings for the plotting
p_out <- ggplot(dat, aes_string(x = x_var, y = y_var, color = group_col)) +
# Boxplots:
# hide outliers (all points are shown with geom_point() below)
# and adjust proportions of elements
geom_boxplot(outlier.shape = NA, width = box_width, size = box_size, fatten = med_size,
position = position_dodge2(preserve = "single")) +
# Points:
# Add a layer of shapes for points using the variable 'group_shape' ('group' necessary for dodging)
# Define jitter for points (jitter) within boxplots (dodge)
# Set seed for repeatability
geom_point(mapping = aes_string(shape = group_shape, group = group_col),
size = point_size,
position = position_jitterdodge(jitter.width = point_jitter,
dodge.width = point_dodge,
seed = jitter_seed)) +
# Remove axis labels
labs(x = xlab, y = ylab) +
# Adjust values for shapes
scale_shape_manual(values = shape_scale, drop = FALSE) +
# Choose a light theme
theme_classic() +
# Move legend to bottom and remove legend title
theme(legend.position = "bottom", legend.title = element_blank())
if (wrap_grid == "wrap") {
# Wrap around parameters with free y-scales
p_out <- p_out + facet_wrap(as.formula(paste0("~", facet_var)),
scales = "free_y", ncol = facet_ncol) +
# Use custom y-scales if argument provided
if (!missing(facet_yscales)) facetted_pos_scales(y = facet_yscales)
}
if (wrap_grid == "grid") {
# Plot a grid of subplots, with variables in rows and treatments in columns
p_out <- p_out + facet_grid(as.formula(paste0(grid_row, "~", grid_col)), scales = "free_y")
}
return(p_out)
}
```
## Guinea Pigs
```{r}
# Filter data for guinea pigs and by NMP (0-5%, 5-10% and 10-20%)
GP <- filter(data_nmp0_20, Dataset == "GuineaPigs")
# Change from wide to long format for easier plotting
GP_plot <- pivot_longer(GP[c(x_var_GP, grp_colors, grp_shapes, y_var)], all_of(y_var))
# Re-order factor levels to fit order of plots on facet
GP_plot$name <- factor(GP_plot$name, levels = y_var)
# Plot all variables at once using facet_wrap()
# The factor 'ratio_diet' ensures that the boxes have the same width as for the sheep dataset
p_GP <- boxes_points(dat = GP_plot, x_var = x_var_GP, y_var = "value",
group_col = grp_colors, group_shape = grp_shapes,
box_width = 0.9*ratio_diet,
point_jitter = 0.6, point_dodge = 0.9*ratio_diet,
wrap_grid = "wrap", facet_var = "name", facet_yscales = yscales,
shape_scale = c(19, 3, 4))
# Print and save resulting plot
print(p_GP)
```
Warnings are due to the missing values for Toothfrax - NewEplsar.
## Sheeps
Note that for compactness, comments to explain the code are given only in the section about [Guinea Pigs](#guinea-pigs). .
```{r}
sheep <- filter(data_nmp0_20, Dataset == "Sheeps")
sheep_plot <- pivot_longer(sheep[c(x_var_sheep, grp_colors, grp_shapes, y_var)], all_of(y_var))
sheep_plot$name <- factor(sheep_plot$name, levels = y_var)
p_sheep <- boxes_points(dat = sheep_plot, x_var = x_var_sheep, y_var = "value",
group_col = grp_colors, group_shape = grp_shapes,
box_width = 0.9,
point_jitter = 0.9, point_dodge = 0.9,
wrap_grid = "wrap", facet_var = "name", facet_yscales = yscales,
shape_scale = c(19, 3, 4))
print(p_sheep)
```
## Lithics
Note that for compactness, comments to explain the code are given only in the section about [Guinea Pigs](#guinea-pigs).
There is one difference though: here, three columns are used for the grouping ("Software", "Treatment", and "Before.after").
Software is still shown with colors.
"Before.after" is plotted on the x-axis and the variables are on the y-axes.
`facet_grid()` is used to plot a grid of subplots, with variables in rows and treatments in columns.
```{r}
lith <- filter(data_nmp0_20, Dataset == "Lithics")
lith_plot <- pivot_longer(lith[c(x_var_lith, grp_colors, grp_shapes, facet_lith, y_var)],
all_of(y_var))
lith_plot$name <- factor(lith_plot$name, levels = y_var)
p_lith <- boxes_points(dat = lith_plot, x_var = x_var_lith, y_var = "value",
group_col = grp_colors, group_shape = grp_shapes,
box_width = 0.9,
point_jitter = 0.9, point_dodge = 0.9,
wrap_grid = "grid", grid_row = "name", grid_col = facet_lith,
shape_scale = c(19, 3, 4))
print(p_lith)
```
## Zoom in for Smfc
The previous plots for the parameter Smfc show some extreme values, especially for the sheep dataset.
In order to better visualize the differences in software, we need zoomed-in plots excluding these extreme values (> 5).
```{r}
y_Smfc <- "Smfc"
ext_val <- 5
# Guinea pig dataset
GP_plot_Smfc <- filter(GP_plot, name == y_Smfc)
GP_plot_Smfc_filt <- filter(GP_plot_Smfc, value <= ext_val)
p_GP_Smfc <- boxes_points(dat = GP_plot_Smfc_filt, x_var = x_var_GP, y_var = "value",
group_col = grp_colors, group_shape = grp_shapes,
box_width = 0.9*ratio_diet,
point_jitter = 0.9, point_dodge = 0.9*ratio_diet, point_size = 1,
ylab = y_Smfc, shape_scale = c(19, 3, 4), wrap_grid = "none") +
ggtitle("Guinea Pigs")
# Sheep dataset
sheep_plot_Smfc <- filter(sheep_plot, name == y_Smfc)
sheep_plot_Smfc_filt <- filter(sheep_plot_Smfc, value <= ext_val)
p_sheep_Smfc <- boxes_points(dat = sheep_plot_Smfc_filt, x_var = x_var_sheep, y_var = "value",
group_col = grp_colors, group_shape = grp_shapes,
box_width = 0.9,
point_jitter = 0.9, point_dodge = 0.9, point_size = 1,
ylab = y_Smfc, shape_scale = c(19, 3, 4), wrap_grid = "none") +
ggtitle("Sheep")
# Lithic dataset
lith_plot_Smfc <- filter(lith_plot, name == y_Smfc)
lith_plot_Smfc_filt <- filter(lith_plot_Smfc, value <= ext_val)
p_lith_Smfc <- boxes_points(dat = lith_plot_Smfc_filt, x_var = x_var_lith, y_var = "value",
group_col = grp_colors, group_shape = grp_shapes,
box_width = 0.9,
point_jitter = 0.9, point_dodge = 0.9, point_size = 1,
ylab = y_Smfc, shape_scale = c(19, 3, 4),
wrap_grid = "wrap", facet_var = "Treatment") +
ggtitle("Lithics")
# Combine all three plots
p_Smfc <- p_GP_Smfc / p_sheep_Smfc / p_lith_Smfc +
plot_layout(heights = c(1,1,2), guides = "collect") & theme(legend.position='bottom')
print(p_Smfc)
```
These plots of Smfc do not show all data points.
For the guinea pigs plot, these points are outside of the y-range shown and are therefore excluded from the plot:
```{r}
data.frame(GP_plot_Smfc[GP_plot_Smfc$value > ext_val, ])
```
For the sheep plot, these points are outside of the y-range shown and are therefore excluded from the plot:
```{r}
data.frame(sheep_plot_Smfc[sheep_plot_Smfc$value > ext_val, ])
```
For the lithic plots, these points are outside of the y-ranges shown and were therefore excluded from the plots:
```{r}
data.frame(lith_plot_Smfc[lith_plot_Smfc$value > ext_val, ])
```
## Save plots
```{r}
# Define filenames
plot_files <- c("SSFA_GuineaPigs_plot.pdf", "SSFA_Sheeps_plot.pdf", "SSFA_Lithics_plot.pdf",
"SSFA_plots-Smfc.pdf")
# Add names to 'plot_files' from object names
names(plot_files) <- c("p_GP", "p_sheep", "p_lith", "p_Smfc")
# Save plots as PDF
for (i in seq_along(plot_files)) {
ggsave(plot = get(names(plot_files)[i]), filename = plot_files[i], path = dir_out,
width = 190, height = 240, units = "mm")
}
```
---
# sessionInfo() and RStudio version
```{r}
sessionInfo()
```
RStudio version `r readLines("R_analysis/scripts/SSFA_0_RStudioVersion.txt", n = 1)`.
---
# Cite R packages used
```{r, echo = FALSE, warning = FALSE}
for (i in pack_to_load) {
cat(i, "\n")
cat(citation(i)$textVersion, "\n", "\n")
}
```
---
END OF SCRIPT