-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path2a_map_spp_str_intsx.Rmd
264 lines (207 loc) · 9.59 KB
/
2a_map_spp_str_intsx.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
---
title: 'Map species stressor intersections'
author: "*Compiled on `r date()` by `r Sys.info()['user']`*"
output:
html_document:
code_folding: hide
toc: true
toc_depth: 3
toc_float: yes
number_sections: true
theme: cerulean
highlight: haddock
includes:
in_header: '~/github/src/templates/ohara_hdr.html'
pdf_document:
toc: true
---
``` {r setup, echo = TRUE, message = FALSE, warning = FALSE}
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(raster)
source('https://raw.githubusercontent.com/oharac/src/master/R/common.R')
source(here('common_fxns.R'))
dir_str_95vc <- file.path(dir_bd_anx, 'layers/stressors_10km_95vc')
dir_spp_maps <- file.path(dir_bd_anx, 'spp_rasts_mol_2020')
```
# Summary
Here we will calculate, for each species, a map of impacted area for each of the stressors to which the species is sensitive; stressor ranges may change over time as well. These maps will be saved as a .csv of `cell_id` and `str_year`. The stressor values are dropped here to save on resulting file size; simply mapping present or not present. Stressor values can be recovered from the stressor maps themselves.
Maps (.csvs) will be saved for each species in a folder for each stressor. For non-impacted species, a place-holder will be saved, perhaps with `cell_id = NA`.
The matrix method would work well for calculating cumulative impact, but we will skip that for now to enable more resolution at the species and stressor level.
## Methods summary
Briefly, the method will loop over each stressor and species; if the species has a non-zero sensitivity to the stressor, the overlap of species range and stressor range will be mapped. If the species is not sensitive, it will still be logged with a placeholder, for process checking...
Note that the intersection cells include only those cells where the species AND stressor are both present in a given year for a given stressor. Range cells outside the stressor area are dropped and are assumed to be unimpacted (refugia); stressor cells outside the range area are dropped and assumed to have no impact on this species.
# Methods
## Get the species sensitivities
The species sensitivity dataframe is created in script 1: `1d_combine_spp_stressor_sensitivities.Rmd`. Load this and use it to identify included species and stressors; load the maps file to determine which of these species have range maps as well. Note, the sensitivity file includes non-threatened species, so these will be included in the overall set of intersection maps.
```{r}
spp_incl_all <- get_incl_spp() %>%
# filter(!is.na(stressor)) %>% ### make placeholders for non-impacted spp
select(-code) %>%
distinct()
spp_ids <- spp_incl_all$iucn_sid %>% unique() %>% sort()
str_names <- spp_incl_all$stressor %>% unique() %>% sort()
spp_str_df <- spp_incl_all %>%
select(iucn_sid, stressor) %>%
distinct()
### spp to check for sensitivity to OA and SST: wide-ranging spp sensitive to
### wide-ranging stressors
### 2477, 8005, 22698209, 22694740, 22697870, 22698375, 8153, 13583, 6336
# check_ids <- c(2477, 8005, 22698209, 22694740, 22697870, 22698375, 8153, 13583, 6336)
# check_spp_str <- spp_str_df %>%
# filter(iucn_sid %in% check_ids)
### none are sensitive to OA; 6336, 8153, 22694740, 22698375 are sensitive to SST
str_w_maps <- list.files(dir_str_95vc) %>%
str_replace('_[0-9].*', '') %>%
unique()
if(any(!str_names %in% str_w_maps)) {
message('NOTE: some listed stressors do not have maps! \n')
}
```
## Loop over stressors
Let's create a function to take a stressor name and use that to pull all the stressor layers (all years) into a raster stack. Let's also create a function to turn the raster stack into a dataframe of cell IDs and filter to non-NA cells.
```{r}
build_str_stack <- function(str_name, dir_str = dir_str_95vc) {
### str_name <- str_names[1]
str_rast_files <- list.files(dir_str, pattern = str_name, full.names = TRUE) %>%
sort()
years <- str_extract(basename(str_rast_files), '[0-9]{4}')
str_stack <- raster::stack(str_rast_files)
names(str_stack) <- sprintf('%s_%s', str_name, years)
return(str_stack)
}
stack_to_df <- function(str_stack) {
rast_df_list <- parallel::mclapply(names(str_stack), mc.cores = 20,
FUN = function(x) {
### x <- names(str_stack)[1]
rast <- str_stack[[x]]
df <- data.frame(cell_id = 1:ncell(rast),
str_val = values(rast)) %>%
filter(!is.na(str_val)) %>%
select(-str_val)
})
names(rast_df_list) <- names(str_stack)
rast_df <- bind_rows(rast_df_list, .id = 'str_year') %>%
mutate(stressor = str_replace(str_year, '_[0-9]{4}', ''),
year = as.integer(str_extract(str_year, '[0-9]{4}'))) %>%
select(-str_year)
return(rast_df)
}
get_spp_map <- function(spp_id) {
spp_map_file <- file.path(dir_spp_maps,
sprintf('iucn_sid_%s.csv', spp_id))
if(!file.exists(spp_map_file)) {
stop('Map does not exist: ', basename(spp_map_file))
# return(NULL)
}
spp_map <- read_csv(spp_map_file, col_types = cols(.default = 'i'))
if(nrow(spp_map) == 0) {
warning('Species map for ', spp_id, ' is zero length')
}
return(spp_map)
}
calc_intsx <- function(spp_id, str_df) {
spp_map <- get_spp_map(spp_id) %>%
filter(presence > 0 & !is.na(presence) & presence != 5) %>%
select(-presence)
spp_str_intsx <- spp_map %>%
inner_join(str_df, by = 'cell_id') %>%
select(-stressor)
if(nrow(spp_str_intsx) == 0) {
### no intersection; warn and create empty df
message('No intersection found for species ', spp_id, ' and stressor ', str_name)
spp_str_intsx <- data.frame(
cell_id = NA,
year = str_df$year %>% unique())
}
return(spp_str_intsx)
}
```
Using this function, loop through each of the included stressors, build the stack, convert to df for easier handling, and then loop over all species to create a map of cell ID and year for all stressor occurrences on the species range. The stressor name will be included in the filename, so no need to include a "stressor" column here... this should save significantly on file size.
```{r}
reload <- FALSE
### to gut the directory for a fresh start:
# x <- list.files(file.path(dir_bd_anx, 'spp_str_rasts'),
# full.names = TRUE, recursive = TRUE)
# unlink(x)
for(str_name in str_names) {
### str_name <- str_names[13]
message('Processing ', str_name)
dir_spp_str_rast <- file.path(dir_bd_anx, 'spp_str_rasts', str_name)
if(!dir.exists(dir_spp_str_rast)) {
dir.create(dir_spp_str_rast)
}
str_stack <- build_str_stack(str_name)
str_df <- stack_to_df(str_stack)
### Determine which species are sensitive to this stressor
spp_str_sens <- spp_str_df %>%
filter(stressor == str_name)
### Stressors with large maps (e.g. OA, SST) take up a lot of memory -
### esp if spp also have large maps. Limit cores such that the
### cores * stack_df_size ~ 25% of Mazu memory (119 GB)
stack_df_size <- object.size(str_df)
num_cores <- min(as.integer(119e9 / stack_df_size * .25), 16)
# for(spp_id in spp_ids) {
tmp <- parallel::mclapply(spp_ids, mc.cores = num_cores,
FUN = function(spp_id) {
### spp_id <- spp_ids[1]
### spp_id <- spp_not_done[1]
# message('species: ', spp_id, '; stressor: ', str_name,
# '; calculating spp/str intersection')
spp_str_file <- file.path(dir_spp_str_rast,
sprintf('spp_intsx_%s_%s.csv',
str_name, spp_id))
if(!file.exists(spp_str_file) | reload) {
if(spp_id %in% spp_str_sens$iucn_sid) {
### species is sensitive; create intersection
spp_str_intsx <- calc_intsx(spp_id, str_df)
} else {
### species is not sensitive; create dummy intersection
# message('Spp ', spp_id, ' is not sensitive to ', str_name)
spp_str_intsx <- data.frame(
cell_id = NA,
year = str_df$year %>% unique()
)
}
write_csv(spp_str_intsx, spp_str_file)
} else {
# message(basename(spp_str_file), ' exists: skipping!')
}
return(TRUE)
}) ### end of mclapply
message('For stressor ', str_name, ' found ', sum(unlist(tmp)), ' instances')
}
```
# Check results
Make sure all spp are represented in all stressors (even if an empty csv as a placeholder). Using file size as a proxy for the map size, we can get an impression of how widely distributed the stressor/species interactions are.
```{r}
spp_mapped <- list.files(file.path(dir_bd_anx, 'spp_str_rasts'),
pattern = '.csv',
recursive = TRUE, full.names = TRUE)
filesize_mb <- file.size(spp_mapped) / 1e6
chk_df <- data.frame(spp_mapped,
filesize_mb) %>%
mutate(str = dirname(spp_mapped) %>% basename(),
spp = basename(spp_mapped) %>%
str_extract('[0-9]+') %>%
as.integer()) %>%
group_by(str) %>%
mutate(n_spp = n_distinct(spp)) %>%
ungroup()
spp_done <- chk_df %>%
filter(str == 'art_fish') %>%
.$spp
spp_not_done <- spp_ids[!spp_ids %in% spp_done]
chk_sum <- chk_df %>%
select(str, n_spp) %>%
distinct()
knitr::kable(chk_sum)
### plot file size densities for each stressor, dropping the
### empty maps (file size 101 B or .000101 MB)
ggplot(chk_df %>% filter(filesize_mb > .000102), aes(x = filesize_mb)) +
ggtheme_plot() +
geom_density(fill = 'slateblue', alpha = .4) +
scale_x_log10() +
theme(axis.text.y = element_blank()) +
facet_wrap(~ str, scales = 'free_y')
```