-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_data.Rmd
304 lines (244 loc) · 13 KB
/
prepare_data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
---
title: "Data Preparation"
author: "Simon Kucharsky"
date: "`r Sys.Date()`"
output:
rmarkdown::github_document:
# pandoc_args: --webtex
bibliography: "`r here::here('bibliography.bib')`"
csl: "`r here::here('apa.csl')`"
---
## Eye movement data
The eye movement data comes from @renswoude2019object_familiarity [https://osf.io/dp245](https://osf.io/dp245), and is based on the stimulus materials provided in @xu2014beyond.
First, we clean the data and prepare relevant indexes (i.e., id of participants need to be recoded from factors to integers for Stan).
NB. All data are in a coordinate system with origin (0, 0) in the top-left of the picture (i.e., compared to classic cartesian coordinate system, the y-axis is inverted). This is a standard in eye-tracking and image processing, but it is mentioned here to avoid confusion.
```{r echo=TRUE, message=FALSE, warning=FALSE}
library(tidyverse)
library(imager)
library(here)
source(here::here("R", "load_image.R"))
ggplot2::theme_set(ggplot2::theme_classic())
# load data
df <- read.csv(here::here("data", "object_familiarity", "Fixations_all.csv"), sep = ";", dec = ",")
# get relevant variables
df_clean <- dplyr::select(df, PP, Trial, Order, image, mean_x, mean_y, Duration, AGE)
names(df_clean) <- c("ppt", "trial", "order", "image", "x", "y", "duration (ms)", "age")
# convert duration from ms to sec
df_clean$duration <- df_clean[['duration (ms)']]/1000
# convert indexes to integers
df_clean$id_ppt <- as.integer(df_clean$ppt)
df_clean$id_img <- as.integer(as.factor(df_clean$image))
df_clean$image <- as.character(df_clean$image)
# arrange by ppt, img, and order of fixations
df_clean <- dplyr::arrange(df_clean, id_ppt, id_img, order)
# reorder variables
df_clean <- dplyr::select(df_clean, id_ppt, id_img, order, ppt, image, age, trial, x, y, duration)
# save
readr::write_csv(df_clean, path = here::here("data", "object_familiarity", "fixations.csv"))
rm(df, df_clean)
# load again (check whether it can be loaded correctly)
cols_spec <- readr::cols(
id_ppt = readr::col_integer(),
id_img = readr::col_integer(),
order = readr::col_integer(),
ppt = readr::col_character(),
image = readr::col_character(),
age = readr::col_double(),
trial = readr::col_integer(),
x = readr::col_double(),
y = readr::col_double()
)
df <- readr::read_csv(file = here::here("data", "object_familiarity", "fixations.csv"), col_types = cols_spec)
```
Here, we assign each trial into a sample which is used for model fitting, or model validation. We want to have some data for each participant, and some data for each image, in each of the samples.
```{r n_fixations, out.width='100%', fig.align='center'}
n_fixations <- df %>%
dplyr::group_by(id_ppt, id_img) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::ungroup() %>%
tidyr::complete(id_ppt, id_img, fill = list(n = 0)) %>%
dplyr::mutate(is_na = n == 0, is_not_na = n != 0)
n_fixations %>%
ggplot2::ggplot(ggplot2::aes(x = as.factor(id_ppt), y = as.factor(id_img), alpha = n)) +
ggplot2::geom_tile(color = "white") +
ggplot2::geom_point(data = n_fixations %>% subset(is_na), ggplot2::aes(x = id_ppt, y = id_img), shape = 4, size = 2, alpha = 1) +
ggplot2::coord_equal() +
ggplot2::xlab("Participant") +
ggplot2::ylab("Image") +
ggplot2::ggtitle("Number of fixations")
```
First, we see that some participants (`r n_fixations %>% dplyr::group_by(id_ppt) %>% summarise(lesshalf = sum(is_not_na) < dplyr::n()/2) %>% filter(lesshalf) %>% .$id_ppt %>% paste(collapse=", ")`) have less than half of the trials, and so we will group the data by participants, and split each subset on two roughly equal in the number of trials.
```{r assign_train}
set.seed(2020)
df %<>% plyr::ddply(.variables = "id_ppt", .fun = function(d){
img <- unique(d$id_img)
n_img <- length(img)
n_train <- floor(n_img / 2)
train <- sample(x = c(rep(TRUE, n_train), rep(FALSE, n_img - n_train)), size = n_img, replace = FALSE)
d$train <- logical(length = nrow(d))
for(i in 1:nrow(d)) d$train[i] <- train[which(img == d$id_img[i])]
d
})
```
```{r plot_train, out.width='100%', fig.align='center'}
trains <- df %>%
dplyr::group_by(id_ppt, id_img) %>%
dplyr::summarise(train = all(train))
trains %>%
ggplot2::ggplot(ggplot2::aes(x = as.factor(id_ppt), y = as.factor(id_img), fill = as.factor(train))) +
ggplot2::geom_tile() +
ggplot2::geom_point(data = n_fixations %>% subset(is_na), ggplot2::aes(x = id_ppt, y = id_img), shape = 4, size = 2, alpha = 1, inherit.aes = FALSE) +
ggplot2::coord_equal() +
ggplot2::xlab("Participant") +
ggplot2::ylab("Image") +
ggplot2::ggtitle("Used for estimating parameters") +
ggplot2::scale_fill_manual(values = c("#E69F00", "#999999"),
name = NULL,
breaks = c("TRUE", "FALSE"),
labels = c("Yes", "No")
)
```
```{r n_per_img, out.width='100%', fig.align='center'}
par(mfrow=c(1, 2))
barplot(df %>% subset(train) %>% .$id_img %>% table(), main = "Fixations in training set", xlab = "Image", ylab = "Count")
barplot(trains %>% subset(train) %>% .$id_img %>% table(), main = "Trials in training set", xlab = "Image", ylab = "Count")
```
```{r n_per_ppt, out.width='100%', fig.align='center'}
par(mfrow=c(1, 2))
barplot(df %>% subset(train) %>% .$id_ppt %>% table(), main = "Fixations in training set", xlab = "Participant", ylab = "Count")
barplot(trains %>% subset(train) %>% .$id_ppt %>% table(), main = "Trials in training set", xlab = "Participant", ylab = "Count")
```
## Saliency data
We already preprocessed the stimuli images using the Itti and Koch algorithm. The processed images are in [`/data/saliency/`](/data/saliency/). `Python 3.7` script is available at [`get_saliency.py`](/data/saliency/get_saliency.py) and requires cloning the Itti and Koch saliency repository from [https://github.com/tamanobi/saliency-map](https://github.com/tamanobi/saliency-map), originally created by Mayo Yamasaki.
```{r down_pars, echo = FALSE}
down_pars <- list(factor = 40, sigma = 20, range = 40)
down_pars$new_width <- 800 / down_pars$factor
down_pars$new_height <- 600 / down_pars$factor
down_pars$area <- 800/down_pars$new_width * 600/down_pars$new_height
```
We downsample the image saliency by a factor of `r down_pars$factor` in each dimension. First, we apply the gaussian blur with sd = `r down_pars$sigma` and range of `r down_pars$range`, then take every `r down_pars$factor`^th^ row and column. That means that a picture of dimensions 800 by 600 pixels will have a downsampled saliency map of size `r down_pars$new_width` by `r down_pars$new_height` aggregated pixels. Each of the aggregated pixels then subtends an area of `r down_pars$area` original pixels.
```{r downsample_saliency}
library(imager)
library(OpenImageR)
source(here::here("R", "load_image.R"))
img <- unique(df$image)
img_names <- paste0(img, ".jpg")
# load images that are used in the experiment
saliency <- lapply(img_names, load_image, folder = here::here("data", "saliency"))
# downsample
saliency_downsampled <- lapply(saliency, function(s) imager::as.cimg(OpenImageR::down_sample_image(as.matrix(s), down_pars$factor, TRUE, down_pars$sigma, down_pars$range)))
names(saliency) <- names(saliency_downsampled) <- img
# save downsampled images
for(i in img){
imager::save.image(im = saliency_downsampled[[i]],
file = here::here("data", "saliency", sprintf("%s_downsampled.jpg", i)),
quality = 1)
}
```
Here, we normalize the saliency map to that each map sums up to 1 and reshape for convenience.
```{r saliency_data}
# convert to long format
saliency_normalized <- lapply(saliency_downsampled, as.data.frame)
image_key <- dplyr::select(df, id_img, image) %>% unique()
for(i in img){
s <- saliency_normalized[[i]]
# normalize map
s$value_normalized <- s$value / sum(s$value)
# keep info about index of pixel
s$row <- s$x
s$col <- s$y
s$idx <- seq_len(nrow(s))
# rescale to original coordinates
resc <- function(x, factor = 2) {(factor*(x-1) + 1 + factor*x)/2}
s$x <- resc(s$x, down_pars$factor)
s$y <- resc(s$y, down_pars$factor)
s$image <- i
s$id_img <- image_key$id_img [image_key$image == i]
# compute the log of the normalized saliency map (discrete)
s$saliency_log <- log(s$value_normalized)
# compute the log of the density saliency map (standardize by area of pixels)
s$log_lik_saliency <- s$saliency_log - log(down_pars$area)
saliency_normalized[[i]] <- select(s, id_img, image, row, col, idx, x, y, value, value_normalized, saliency_log, log_lik_saliency)
}
saliency_normalized <- dplyr::bind_rows(saliency_normalized)
readr::write_csv(saliency_normalized, path = here::here("data", "saliency.csv"))
```
## Combination of saliency and eye movement data
```{r radius, echo=FALSE}
# subset saliency in radius of 100 from the fixation position
dist_screen <- 60 # distance from the screen (cm)
theta <- 5 # radius of foveal vision (degrees of visual angle)
width_cm <- 51 # width of the screen (cm)
width_pix <- 800 # number of pixels in horizontal location
size_pix <- width_cm / width_pix # size of one pixel
radius <- tan(theta * pi / 180)*dist_screen / size_pix
```
For models that contain saliency as one part of the explanatory variables, we can copy the value of the log-likelihood of the saliency into the fixation data sets for the model of where. For the model of when, we consider only aggregated pixels in a certain distance from the current location. The distance (`r radius` pixels) is calculated by converting width of foveal attention (`r theta` degrees of visual angle), given the distance from the screen (`r dist_screen` cm), width of the monitor (`r width_cm` cm), and number of pixels in the horizontal direction (`r width_pix`).
```{r log_lik_saliency}
df$log_lik_saliency <- numeric(length = nrow(df))
saliency_log_list <- list()
pb <- dplyr::progress_estimated(nrow(df))
for(i in seq_len(nrow(df))){
pb$tick()$print()
x <- df$x[i]
y <- df$y[i]
current_img <- df$id_img[[i]]
s <- subset(saliency_normalized, id_img == current_img) # get saliency of the correct image
distances <- sqrt((x-s$x)^2 + (y-s$y)^2)
mean_sq_distances <- distances^2 / 2
which_closest <- which.min(distances) # get index of the aggregated pixel the fixation location is inside of
df$log_lik_saliency[i] <- s$log_lik_saliency[which_closest] # copy the corresponding log normalized saliency into the dataset
df$n_neighbors[i] <- sum(distances < radius)
saliency_log_list[[i]] <- data.frame(
distances = distances[distances < radius],
mean_sq_distances = mean_sq_distances[distances < radius],
saliency_log = s$saliency_log[distances < radius]
)
}
```
```{r}
#prepare elements of saliency_log as structures passable to Stan
max_neighbors <- max(df$n_neighbors)
mean_sq_distances <- matrix(999, ncol = max_neighbors, nrow = nrow(df))
saliency_log <- matrix(999, ncol = max_neighbors, nrow = nrow(df))
for(i in seq_len(nrow(df))){
mean_sq_distances[i, 1:df$n_neighbors[i]] <- saliency_log_list[[i]]$mean_sq_distances
saliency_log [i, 1:df$n_neighbors[i]] <- saliency_log_list[[i]]$saliency_log
}
```
## Prepare objects data
We also extracted the information about the objects on the scenes.
```{r objects}
obj <- read.csv(here::here("data", "object_familiarity", "cen700.txt"), sep = "", dec = ".")
obj$image <- as.character(1000 + obj$image)
obj <- subset(obj, image %in% image_key$image)
objects <- dplyr::tibble(id_img = image_key$id_img[sapply(obj$image, function(i) which(i == image_key$image))],
image = obj$image,
x = obj$x,
y = obj$y,
width = obj$max_x - obj$min_x,
height = obj$max_y - obj$min_y,
min_x = obj$min_x,
min_y = obj$min_y,
max_x = obj$max_x,
max_y = obj$max_y)
objects_in_images <- objects %>%
dplyr::arrange (id_img) %>%
dplyr::group_by (id_img) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::mutate (to = cumsum(n)) %>%
dplyr::mutate (from = to - n + 1) %>%
dplyr::select (id_img, n, from, to)
```
```{r save, eval=TRUE, include=TRUE}
readr::write_csv(df, path = here::here("data", "fixations.csv"))
readr::write_csv(df %>% subset(train), path = here::here("data", "fixations_train.csv"))
readr::write_csv(df %>% subset(!train), path = here::here("data", "fixations_validate.csv"))
readr::write_csv(as.data.frame(mean_sq_distances), path = here::here("data", "mean_sq_distances.csv"))
readr::write_csv(as.data.frame(saliency_log), path = here::here("data", "saliency_log.csv"))
readr::write_csv(objects, path = here::here("data", "objects.csv"))
readr::write_csv(objects_in_images, path = here::here("data", "objects_in_images.csv"))
save(df, mean_sq_distances, saliency_log, saliency_normalized, image_key, objects, objects_in_images,
file = here::here("data", "cleaned_data.Rdata"))
```
## References