-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathSimulation_EM_Albacore.R
284 lines (210 loc) · 12.2 KB
/
Simulation_EM_Albacore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#############################################################
###### Location^3 Species Distribution Modelling ######
###### 'Albacore' Archetype ######
###### April 2020; built with R v3.6.3 ######
#############################################################
# --- Notes ---
# * All delta models are fit to normally distributed abundance
# * Performance is measured using a data subset (500 obs per year), but testing shows this is enough to accurately estimate RMSE etc
# * Take care comparing fitted responses to the specified suitability functions, bc they may not represent the fundamental niche when there are multiple enviro drivers
# * It appears that the rate of degregation of the forecast is less about which model, and more about the (non-linear?) rate of environmental change, and
# ... whether the fitting period covers a broad range of enviro conditions likely to be encountered by the species
# ... In the albacore case, it seems the N-ward shift in COG during 1985-2020 continued at same rate into future, making forecasting quite successful
# * The observations are still quite 'knife-edge' in terms of presence, and we could explore other options for the 'convertToPA' function in VirtualSpecies
# * Adding lat, lon or year often has little impact for albacore, bc there is not much spatially- or temporally-structured residual information
# ... which encouarges comparing models with diff numbers of covariates (e.g. full model vs temp only)
# --- To Do ---
# * Consider comparison of:
# ... 1) rate of change of warming (and change rate from historical period)
# ... 2) number of covariates, and adding 'unimportant' ones that vary in collinearity with important ones
# ... 3) duration of fitting period, or remove warmest temps from sampled observations
#################################
## Load Libraries ##
#################################
library(mgcv)
library(gbm)
library(dismo)
library(BBmisc)
library(neuralnet)
# devtools::install_github("pbs-assess/sdmTMB")
# library(sdmTMB)
library(sp)
library(dplyr)
library(foreach)
library(doParallel, quietly = TRUE)
#################################
## Data Simulation ##
#################################
# load ROMS simulation function
source('~/PROJECTS/WRAP Location/WRAP_Location_CaseStudy/SimulatedWorld_ROMS_Albacore_Function.R')
#Three ESM directories
# had <- "~/Dropbox/WRAP Location^3/Rasters_2d_Spring/had/"
# ipsl <- "~/Dropbox/WRAP Location^3/Rasters_2d_Spring/ipsl/"
gfdl <- "~/Dropbox/WRAP Location^3/Rasters_2d_Spring/gfdl/"
# create abundance data - choose ESM directory
dir_name <- "gfdl" #of either "had", "ipsl", "gfdl"
dat <- SimulateWorld_ROMS_Albacore(dir = gfdl)
# name changes
names(dat)[names(dat) == 'sst'] <- 'temp'
names(dat)[names(dat) == 'chla_surface'] <- 'chla'
# add scaled and 'range normalised' variables for GLMs and MLPs
dat$temp_s = scale(dat$temp)
dat$chla_s = scale(dat$chla)
dat$mld_s = scale(dat$mld)
dat$temp_n <- BBmisc::normalize(dat$temp, method = "range", range = c(0, 1)) #covariates must be normalised for MLP
dat$chla_n <- BBmisc::normalize(dat$chla, method = "range", range = c(0, 1))
dat$mld_n <- BBmisc::normalize(dat$mld, method = "range", range = c(0, 1))
dat$lat_n <- BBmisc::normalize(dat$lat, method = "range", range = c(0, 1))
dat$lon_n <- BBmisc::normalize(dat$lon, method = "range", range = c(0, 1))
dat$year_n <- BBmisc::normalize(dat$year, method = "range", range = c(0, 1))
# transform and add factor
dat$log_abundance <- log(dat$abundance) #Must use log abundance to get some EMs to run
dat$fYear <- as.factor(dat$year)
# remove less certain ROMS years
dat <- dat[dat$year >= 1985,]
# create data subset for model fitting and evaluation (500 locations per year [*random, but same every year])
dat_sub <- dat[dat$sample == 1,]
# # save
saveRDS(dat, paste0("~/PROJECTS/WRAP Location/Albacore EMs/",dir_name,"/dat_albacore_all.rds"))
saveRDS(dat_sub, paste0("~/PROJECTS/WRAP Location/Albacore EMs/",dir_name,"/dat_albacore_sub.rds"))
#################################
## Model Fitting ##
#################################
rm(list = ls()) #remove all objects and re-load necessary ones; consider restarting R too
# load data subset for model fitting
dir_name <- "ipsl" #of either "had", "ipsl", "gfdl"
dat <- readRDS(paste0("~/PROJECTS/WRAP Location/Albacore EMs/",dir_name,"/dat_albacore_sub.rds"))
# year_fcast <- 2010
year_fcast <- 2040
dat_hist <- dat[dat$year <= year_fcast,] #subset for model fitting
dat_fcast <- dat[dat$year > year_fcast,] #subset for forecast evaluation
# #Test extrapolation:
# dat_hist$sample <- ifelse(dat_hist$lat<=43 & dat_hist$lat>=35 & dat_hist$lat<=year_fcast,dat_hist$sample==1,dat_hist$sample==0)
# dat_hist <- dat_hist[dat_hist$sample==1,]
# dat <- rbind(dat_hist,dat_fcast)
# --- GAMS ---
# * fitting ECor will take 1-2 hours
type <- "delta" #delta or tweedie
covs <- c("E","S","ES","EST","ECor") #covariate combinations, options: c("E","S","ES","EST","ECor")
env_formula <- "s(temp) + s(chla) + s(mld)" #full: "s(temp) + s(chla) + s(mld)"
# env_formula <- "s(temp)"
source("~/PROJECTS/WRAP Location/WRAP_Location_CaseStudy/Fitting_GAMs.R")
# --- GLMs ---
# * fitting Sr and ESr will take 2-6 hours, and if data are changed may not fit w/o changing model structure
# * warnings are ok, but if there are model failures... seek help
# * all GLMs assume at least one quadratic (i.e. specify no intercept and have quadratic_roots=T)
type <- "delta" #delta or tweedie
covs <- c("E", "ESt", "Sr", "ESr") #options: c("E", "Sr", "ESt", "ESr")
env_formula <- "temp_s + I(temp_s^2) + mld_s + I(mld_s^2) + chla_s + I(chla_s^2)" #for tweedie models and presence part of delta; the '_s' indicates scaled variables
env_formula_deltaN <- "temp_s + I(temp_s^2) + mld_s + I(mld_s^2) + chla_s" #no quadratic for chl in abundance part of delta
# env_formula <- "temp_s + I(temp_s^2)" #temp only model
# env_formula_deltaN <- "temp_s + I(temp_s^2)" #temp only model
t1 <- Sys.time()
source("~/PROJECTS/WRAP Location/WRAP_Location_CaseStudy/Fitting_GLMs.R")
t2 <- Sys.time()
t2 - t1
# --- BRTs & MLPs ---
# * check MLP response curves - if some are flat (constant), run them again... then seek help
type <- "delta" #only delta at this stage
covs <- c("E","ES","EST") #options: c("E","S","ES","EST")
env_formula_BRT <- c("temp", "chla", "mld") #full: c("temp", "chla", "mld")
env_formula_MLP <- "temp_n + chla_n + mld_n" #full: "temp_n + chla_n + mld_n"; the '_n' indicates range normalised variables
# env_formula_BRT <- c("temp")
# env_formula_MLP <- "temp_n" #full: "temp_n + chla_n + mld_n"; the '_n' indicates range normalised variables
t1 <- Sys.time()
source("~/PROJECTS/WRAP Location/WRAP_Location_CaseStudy/Fitting_BRTs.R")
source("~/PROJECTS/WRAP Location/WRAP_Location_CaseStudy/Fitting_MLPs.R")
t2 <- Sys.time()
t2 - t1
# --- Save Results and Models ---
# * take care not to delete some models by overwriting with a model subset (change saved names if doing testing)
setwd(paste0('~/PROJECTS/WRAP Location/Albacore EMs 2040train/',dir_name,'/'))
saveRDS(dat_hist, "dat_hist_results_full.rds") # * 'full' [full models] or 'temp' [temp-only models]
saveRDS(dat_fcast, "dat_fcast_results_full.rds")
all_mods <- c("gam_E","gam_S","gam_ES","gam_EST","gam_ECor",
"brt_E","brt_S","brt_ES","brt_EST",
"mlp_E","mlp_S","mlp_ES","mlp_EST",
"glm_E","glm_Sr","glm_ESt","glm_ESr")
save(list = ls(pattern = paste0(all_mods, collapse="|")),
file = "saved_models_full.RData") # * 'full' or 'temp'; this will save all models of these names, even when outdated, if they're in the environment
#hist_save <- dat_hist
#fcast_save <- dat_fcast
dat_fcast <- fcast_save
dat_fcast <- dat_fcast[,c(1:32,34:38)]
dat_hist <- dat_hist[,c(1:22,27:31,23:26,32:37)]
###################################
## Model Performance ##
###################################
rm(list = ls()) #remove all objects and re-load necessary ones
# --- load functions, data, and model objects ---
dir_name <- "had"
setwd('~/PROJECTS/WRAP Location/WRAP_Location_CaseStudy/')
source("Performance_rmse_fun.R")
source("Performance_cog_fun.R")
source("Performance_eao_fun.R")
# * load these, for full models
dat_hist <- readRDS(paste0("~/PROJECTS/WRAP Location/Albacore EMs TempOnly/",dir_name,"/dat_hist_results_full.rds"))
dat_fcast <- readRDS(paste0("~/PROJECTS/WRAP Location/Albacore EMs TempOnly/",dir_name,"/dat_fcast_results_full.rds"))
load(paste0("~/PROJECTS/WRAP Location/Albacore EMs TempOnly/",dir_name,"/saved_models_full.RData")) #full models
# * note that the glm_ESr full model has a different structure to other glms (abundance part of delta has no quadratics - this was done to speed up fitting)
# * or load these, for temp-only models
# dat_hist <- readRDS("dat_hist_results_temp.rds") #temp-only models
# dat_fcast <- readRDS("dat_fcast_results_temp.rds")
# load("saved_models_temp.RData") #temp-only models
# --- RMSE ---
ylim=c(1000,8000)
rmse_results <- sdm_rmse(dat_hist = dat_hist,
dat_fcast = dat_fcast)
# * space-only models are flat lines, bc the subset of locations sampled each year are identical
# RMSE of all observations (500 per year)
rmse_results$rmse_abund_all[order(rmse_results$rmse_abund_all$rmse_hist),]
#^ Hist: ordered best -> worst for historical
rmse_results$rmse_abund_all[order(rmse_results$rmse_abund_all$rmse_fcast),]
#^ Fcast: ordered best -> worst for forecast
# RMSE of annual total abundance (sum of 500 cells per year)
rmse_results$rmse_abund_annual[order(rmse_results$rmse_abund_annual$rmse_hist),]
#^ Hist: ordered best -> worst for historical
rmse_results$rmse_abund_annual[order(rmse_results$rmse_abund_annual$rmse_fcast),]
#^ Fcast: ordered best -> worst for forecast
# Annual RMSE for all observations (plotted)
# rmse_results$rmse_abund_all_year
# --- COG Latitude ---
cog_results <- sdm_cog(dat_hist = dat_hist,
dat_fcast = dat_fcast)
cog_results$rmse_cog[order(cog_results$rmse_cog$rmse_hist),]
#Hist: glm_Sr best, then glm_ESr, brt_EST, brt_ES, gam_EST ...
cog_results$rmse_cog[order(cog_results$rmse_cog$rmse_fcast),]
#Fcast: lm_ESr best, then glm_ESt, brt_E, glm_E, brt_ES ...
# Difference in cog over time (plotted)
#cog_results$cog_lat_fcast_y #not often a trend for enviro+ SDMs
# --- Effective Area Occupied ---
eao_results <- sdm_eao(dat_hist = dat_hist,
dat_fcast = dat_fcast)
eao_results$eao_hist_y[]
eao_results$eao_fcast_y
# * Most models are positively biased - may be due to lack of zeros in modelled data?
# --- Maps of Spatial Distribution ---
# load full data
dat_all <- readRDS(paste0("~/PROJECTS/WRAP Location/Albacore EMs/",dir_name,"/dat_albacore_all.rds"))
source("Performance_plotMaps.R")
performance_plotMaps_function(om ="hms") #om options are "hms","cps","grf". Needed for GLMpredict function within maps
#-----summary plots of OM----
# # #Make some quick plots to explore the data
# # #All Years
par(mfrow=c(2,3))
dat_all <- dat
dat_sub <- dat[dat$sample==1,]
plot(aggregate(suitability~year,dat,FUN="mean"),type="l", lwd=2, main="Mean Suitability",col="dark grey", ylab="")
lines(aggregate(suitability~year,dat[dat$year<=2010,],FUN="mean"),col="blue")
plot(aggregate(pres~year,dat,FUN="mean"),type="l", lwd=2, main="Prevalence",col="dark grey", ylab="")
lines(aggregate(pres~year,dat[dat$year<=2010,],FUN="mean"),col="blue")
plot(aggregate(abundance~year,dat_sub,FUN="sum"),type="l", lwd=2,main="Total Biomass", col="dark grey", ylab="")
lines(aggregate(abundance~year,dat_sub[dat_sub$year<=2010,],FUN="sum"),col="blue")
plot(aggregate(abundance~year,dat_all,FUN="sum"),type="l", lwd=2,main="Total Biomass", col="dark grey", ylab="")
lines(aggregate(abundance~year,dat_all[dat_all$year<=2010,],FUN="sum"),col="blue")
plot(aggregate(temp~year,dat,FUN="mean"),type="l",main="Mean Temperature", col="dark grey", ylab="")
lines(aggregate(temp~year,dat[dat$year<=2010,],FUN="mean"),col="blue")
plot(aggregate(mld~year,dat,FUN="mean"),type="l",main="Mean MLD",col="dark grey")
lines(aggregate(mld~year,dat[dat$year<=2010,],FUN="mean"),col="blue")
plot(aggregate(zoo_200~year,dat,FUN="mean"),type="l",main="Mean Zooplankton",col="dark grey", ylab="")
lines(aggregate(zoo_200~year,dat[dat$year<=2010,],FUN="mean"),col="blue")