-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy path02_run_models.R
129 lines (97 loc) · 5.71 KB
/
02_run_models.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Combined effects of hydrometeorological hazards and urbanisation on dengue risk in Brazil: a spatiotemporal modelling study
# Rachel Lowe (2021)
# https://github.com/drrachellowe/hydromet_dengue
# R script to run INLA models of increasing complexity
# WARNING: the script may take over a day to run
# Step 0: load packages and pre-processed data
# Step 1: formulate a baseline model including spatiotemporal random effects and test different combinations of DLNM climate indicators
# Step 2: using best fitting model in Step 1 to test interactions between PDSI-DLNM and
# 1) % urban population from highly urbanised to more rural
# 2) frequency of reported water supply shortages from high to low frequency
# centred at high (upper quartile), medium (median) and low (lower quartile)
# Step 0: load packages and pre-processed data
source("00_load_packages_data.R")
# run models of increasing complexity in INLA
# Step 1: fit a baseline model including spatiotemporal random effects
# formulate a base model including:
# state-specific monthly random effects to account for variation in seasonality between states (random walk cyclic prior)
# year-specific spatial random effects to account for interannual variation in spatial overdisperson and dependency structures (modified Besag-York-Mollie prior bym2)
# baseline model
baseformula <- Y ~ 1 + f(T1, replicate = S2, model = "rw1", cyclic = TRUE, constr = TRUE,
scale.model = TRUE, hyper = precision.prior) +
f(S1, model = "bym2", replicate = T2, graph = "output/map.graph",
scale.model = TRUE, hyper = precision.prior)
# test baseline model with Poisson distribution
# model <- mymodel(baseformula, family = "poisson")
# model$dic$dic
# define formulas by updating the baseline formula with different combinations of Tmin, Tmax and PDSI cross-basis functions
formula0.1 <- update.formula(baseformula, ~. + basis_tmin)
formula0.2 <- update.formula(baseformula, ~. + basis_tmax)
formula0.3 <- update.formula(baseformula, ~. + basis_pdsi)
formula0.4 <- update.formula(baseformula, ~. + basis_tmin + basis_pdsi)
formula0.5 <- update.formula(baseformula, ~. + basis_tmax + basis_pdsi)
# create a list of formulas
formulas <- list(baseformula, formula0.1, formula0.2, formula0.3, formula0.4, formula0.5)
# create model label string
lab <- c("basemodel", "model0.1", "model0.2", "model0.3", "model0.4", "model0.5")
# create a function to run a model for each formula in the list and save the model output to file
# WARNING: this may take a long time to run
models <- lapply(1:length(formulas),
function(i) {
model <- mymodel(formulas[[i]], df)
save(model, file = paste0("output/", lab[i],".RData"))})
# create table to store DIC and select best model
table0 <- data.table(Model = c("base", "tmin", "tmax", "pdsi", "tmin + pdsi", "tmax + pdsi"),
DIC = NA)
for(i in 1:length(formulas))
{
load(paste0("output/",lab[i],".RData"))
table0$DIC[i] <- round(model$dic$dic, 0)
}
# view table
table0
# define position of best fitting model
best.fit <- which.min(table0$DIC)
# Step 3: use best fitting model in Step 2 to test interactions betweeen
# PDSI-DLNM and % population living in urban areas and PDSI-DLNM and water supply shortage frequency
# centred on high (model1.1), medium (model1.2) and low (model1.3) levels of urbanisation and
# centred on high (model1.4), medium (model1.5) and low (model1.6) frequency of water supply shortages
# assign formula for best fitting model to the new baseformula
# baseformula <- formulas[[best.fit]]
# redefine baseformula as best fitting model from above
baseformula <- Y ~ 1 + f(T1, replicate = S2, model = "rw1", cyclic = TRUE, constr = TRUE,
scale.model = TRUE, hyper = precision.prior) +
f(S1, model = "bym2", replicate = T2, graph = "output/map.graph",
scale.model = TRUE, hyper = precision.prior) + basis_tmin + basis_pdsi
# define formulas by updating the best fitting model0 formula with interactions between pdsi cross-basis and socio-economic indicators
# best fitting model0 + interaction between pdsi cross-basis and urbanisation
formula1.1 <- update.formula(baseformula, ~. + urban_basis1_pdsi + Vu)
formula1.2 <- update.formula(baseformula, ~. + urban_basis2_pdsi + Vu)
formula1.3 <- update.formula(baseformula, ~. + urban_basis3_pdsi + Vu)
# best fitting model0 + interaction between pdsi cross-basis and water supply shortage
formula1.4 <- update.formula(baseformula, ~. + water_basis1_pdsi + Vw)
formula1.5 <- update.formula(baseformula, ~. + water_basis2_pdsi + Vw)
formula1.6 <- update.formula(baseformula, ~. + water_basis3_pdsi + Vw)
# create a list of formulas
formulas <- list(formula1.1, formula1.2, formula1.3, formula1.4, formula1.5, formula1.6)
# create model label string
lab <- c("model1.1_urban", "model1.2_urban", "model1.3_urban",
"model1.1_water", "model1.2_water", "model1.3_water")
# create a function to run a model for each formula in the list and save the model output to file
# WARNING: this may take a long time to run
models <- lapply(1:length(formulas),
function(i) {
model <- mymodel(formulas[[i]], df)
save(model, file = paste0("output/", lab[i],".RData"))})
# create table to store DIC
table1 <- data.frame(Model = c("high urban", "intermediate urban", "low urban",
"high water shortage", "intermediate water shortage", "low water shortage"),
DIC = NA)
for(i in 1:length(formulas))
{
load(paste0("output/",lab[i],".RData"))
table1$DIC[i] <- round(model$dic$dic, 0)
}
# view table
# note: high, intermediate and low urban (water) models are different parametrisations of the same urban (water) interaction model
table1