-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcjfas-2023-0093supplb.Rmd
256 lines (210 loc) · 13.1 KB
/
cjfas-2023-0093supplb.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
---
title: "Assessing small pelagic fish trends in space and time using piscivore stomach contents: Supplement b, Model selection"
author: "Sarah Gaichas, James Gartland, Brian Smith, Anthony Wood, Elizabeth Ng, Michael Celestino,
Katie Drew, Abigail Tyrell, and James Thorson"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
bookdown::word_document2:
toc: false
bookdown::html_document2:
toc: true
toc_float: true
bookdown::pdf_document2:
includes:
in_header: latex/header1.tex
keep_tex: true
link-citations: yes
csl: "canadian-journal-of-fisheries-and-aquatic-sciences.csl"
bibliography: FishDiet_EcoIndicators.bib
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, # no code blocks in word doc
message = FALSE,
warning = FALSE,
ft.arraystretch = 1)
library(tidyverse)
theme_set(theme_bw())
library(here)
```
# VAST model selection {-}
Comparisons of AIC are presented for both the first (spatial and spatio-temporal random effects, Table Sb\@ref(tab:modsel1)) and second (catchability covariates, Table Sb\@ref(tab:modsel2)) steps of model selection.
```{r}
# from each output folder in pyindex,
outdir <- here::here("pyindex_modsel1")
moddirs <- list.dirs(outdir)
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)
# function to apply extracting info
getmodinfo <- function(d.name){
# read settings
modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
modname <- modpath[length(modpath)]
settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
fill = TRUE, header = FALSE)
n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
#FieldConfig
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
}
if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
beta1 <- "IID"
beta2 <- "IID"
}
#RhoConfig
rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
# read parameter estimates, object is called parameter_Estimates
load(file.path(d.name, "parameter_estimates.RData"))
AIC <- parameter_estimates$AIC[1]
converged <- parameter_estimates$Convergence_check[1]
fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
#index <- read.csv(file.path(d.name, "Index.csv"))
# return model attributes as a dataframe
out <- data.frame(modname = modname,
n_x = n_x,
grid_size_km = grid_size_km,
max_cells = max_cells,
use_anisotropy = use_anisotropy,
fine_scale = fine_scale,
bias.correct = bias.correct,
omega1 = omega1,
omega2 = omega2,
epsilon1 = epsilon1,
epsilon2 = epsilon2,
beta1 = beta1,
beta2 = beta2,
rho_epsilon1 = rho_epsilon1,
rho_epsilon2 = rho_epsilon2,
rho_beta1 = rho_beta1,
rho_beta2 = rho_beta2,
AIC = AIC,
converged = converged,
fixedcoeff = fixedcoeff,
randomcoeff = randomcoeff#,
#index = index
)
return(out)
}
modcompare <- purrr::map_dfr(moddirs, getmodinfo)
```
Models compared using REML are identified by model name ("modname" in Table Sb\@ref(tab:modsel1)) which always includes all prey aggregated, season ("all" for annual models of months 1-12, "fall" for models of months 7-12, and "spring" for models of months 1-6), number of knots (500 for all models), and which fixed and random spatial and spatio-temporal effects were included in which linear predictor (1 or 2). The names for model options and associated VAST model settings are:
Model selection 1 (spatial, spatio-temporal effects, no covariates) options (following @ng_predator_2021) and naming:
* All models set Use_REML = TRUE in fit_model specifications.
* Modeled effects, model name suffix, and VAST settings by model:
1. "_alleffectson" = Spatial and spatio-temporal random effects plus anisotropy in both linear predictors; FieldConfig default (all IID)
1. "_noaniso" = Spatial and spatio-temporal random effects in both linear predictors with anisotopy turned off; FieldConfig default (all IID) and `use_anisotopy = FALSE`
1. "_noomeps2" = Spatial and spatio-temporal random effects plus anisotropy only in linear predictor 1; FieldConfig = 0 for Omega2, Epsilon2
1. "_noomeps2_noaniso" = Spatial and spatio-temporal random effects only in linear predictor 1 with anisotopy turned off; FieldConfig = 0 for Omega2, Epsilon2 and `use_anisotopy = FALSE`
1. "_noomeps2_noeps1" = Spatial random effects plus anisotropy only in linear predictor 1; FieldConfig = 0 for Omega2, Epsilon2, Epsilon1
1. "_noomeps2_noeps1_noaniso" = Spatial random effects only in linear predictor 1 with anisotopy turned off; FieldConfig = 0 for Omega2, Epsilon2, Epsilon1 and `use_anisotopy = FALSE`
1. "_noomeps12" = Anisotropy, but no spatial or spatio-temporal random effects in either linear predictor; FieldConfig = 0 for both Omega, Epsilon
1. "_noomeps12_noaniso" = No spatial or spatio-temporal random effects in either linear predictor; FieldConfig = 0 for both Omega, Epsilon and `use_anisotopy = FALSE`
```{r modsel1}
modselect <- modcompare %>%
dplyr::mutate(season = case_when(str_detect(modname, "_fall_") ~ "Fall",
str_detect(modname, "spring") ~ "Spring",
str_detect(modname, "_all_") ~ "Annual",
TRUE ~ as.character(NA))) %>%
dplyr::mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
str_detect(converged, "is likely not") ~ "unlikely",
TRUE ~ as.character(NA))) %>%
dplyr::mutate(modname = str_extract(modname, '(?<=allagg_).*')) |>
dplyr::group_by(season) %>%
dplyr::mutate(deltaAIC = AIC-min(AIC)) %>%
dplyr::select(modname, season, deltaAIC, fixedcoeff,
randomcoeff, use_anisotropy,
omega1, omega2, epsilon1, epsilon2,
beta1, beta2, AIC, converged2) %>%
dplyr::arrange(season, AIC)
# DT::datatable(modselect, rownames = FALSE,
# options= list(pageLength = 25, scrollX = TRUE),
# caption = "Comparison of delta AIC values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures. See text for model descriptions.")
flextable::flextable(modselect %>%
dplyr::select(-c(use_anisotropy,
omega1, omega2, epsilon1, epsilon2,
beta1, beta2))) %>%
flextable::set_header_labels(modname = "Model name",
season = "Season",
deltaAIC = "dAIC",
fixedcoeff = "N fixed",
randomcoeff = "N random",
converged2 = "Convergence") |>
flextable::set_caption("Comparison of delta AIC (dAIC) values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures, with number of fixed (N fixed) and random (N random) coefficients. See text for model descriptions associated with each model name.") %>%
flextable::fontsize(size = 9, part = "all") %>%
flextable::colformat_double(digits = 2) |>
flextable::set_table_properties(layout = "autofit", width = 1)
```
Using REML, models including spatial and spatio-temporal random effects as well as anisotropy were best supported by the data. This was true for the spring dataset, the fall dataset, and the annual (seasons combined) dataset.
For the second step of model selection with different combinations of vessel effects and or catchability covariates, "modname" in Table Sb\@ref(tab:modsel2) follows a similar pattern as above, with all prey aggregated ("allagg" for all models), season ("all" for annual models of months 1-12, "fall" for models of months 7-12, and "spring" for models of months 1-6), number of knots (500 for all models), and which vessel effects or covariates were included. The names for model options and associated VAST model settings are:
Model selection 2 (covariates) options, FieldConfig default (all IID), with anisotropy:
1. "_base" = No vessel overdispersion or length/number covariates
1. "_len" = Predator mean length covariate
1. "_num" = Number of predator species covariate
1. "_lennum" = Predator mean length and number of predator species covariates
1. "_sst" = Combined in situ and OISST covariate
1. "_lensst" = Predator mean length and SST covariates
1. "_numsst" = Number of predator species and SST covariates
1. "_lennumsst" = Predator mean length, number of predator species, and SST covariates
1. "_eta10" = Overdispersion (vessel effect) in first linear predictor (prey encounter)
1. "_eta11" = Overdispersion (vessel effect) in both linear predictors (prey encounter and weight)
```{r modsel2}
# from each output folder in pyindex,
outdir <- here("pyindex_modsel2")
moddirs <- list.dirs(outdir)
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)
modcompare2 <- purrr::map_dfr(moddirs, getmodinfo)
modselect2 <- modcompare2 %>%
dplyr::mutate(season = case_when(str_detect(modname, "_fall_") ~ "Fall",
str_detect(modname, "spring") ~ "Spring",
str_detect(modname, "_all_") ~ "Annual",
TRUE ~ as.character(NA))) %>%
dplyr::mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
str_detect(converged, "is likely not") ~ "unlikely",
TRUE ~ as.character(NA))) %>%
dplyr::group_by(season) %>%
dplyr::mutate(deltaAIC = AIC-min(AIC)) %>%
dplyr::select(modname, season, deltaAIC, fixedcoeff,
randomcoeff, use_anisotropy,
omega1, omega2, epsilon1, epsilon2,
beta1, beta2, AIC, converged2) %>%
dplyr::arrange(season, AIC)
# DT::datatable(modselect2, rownames = FALSE,
# options= list(pageLength = 50, scrollX = TRUE),
# caption = "Comparison of delta AIC values for alternative vessel effects and catchability covariates. See text for model descriptions.")
flextable::flextable(modselect2%>%
dplyr::select(-c(use_anisotropy,
omega1, omega2, epsilon1, epsilon2,
beta1, beta2))) %>%
flextable::set_header_labels(modname = "Model name",
season = "Season",
deltaAIC = "dAIC",
fixedcoeff = "N fixed coefficients",
randomcoeff = "N random coefficients",
converged2 = "Convergence") |>
flextable::set_caption("Comparison of delta AIC (dAIC) values for alternative vessel effects and catchability covariates. See text for model descriptions associated with each model name.") %>%
flextable::fontsize(size = 9, part = "all") %>%
flextable::colformat_double(digits = 2) |>
flextable::set_table_properties(layout = "autofit", width = .9)
```
Catchability covariates were better supported by the data than vessel effects. Model comparisons above led us to the best model fit using mean predator length, number of predator species, and SST at a survey station as catchability covariates.
# References