3_calc_impacts_and_trends_per_spp.Rmd

---
title: 'Calculate species stressor impacts and intensification'
author: "*Compiled on `r date()` by `r Sys.info()['user']`*"
output: 
  html_document:
    code_folding: hide
    toc: true
    toc_depth: 3
    toc_float: yes
    number_sections: true
    theme: cerulean
    highlight: haddock
    includes: 
      in_header: '~/github/src/templates/ohara_hdr.html'
  pdf_document:
    toc: true
---

``` {r setup, echo = TRUE, message = FALSE, warning = FALSE}

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)
options(dplyr.summarise.inform=F)

library(raster)
source('https://raw.githubusercontent.com/oharac/src/master/R/common.R')

source(here('common_fxns.R'))

dir_str_trend <- file.path(dir_bd_anx, 'layers/stressor_trends')
dir_spp_maps  <- file.path(dir_bd_anx, 'spp_rasts_mol_2020')
dir_spp_intsx <- file.path(dir_bd_anx, 'spp_str_rasts')
```


# Summary

For each species, pull in all associated stressor intersections and stressor trends, as well as species range.

For individual stressors, for each year:

* Range impacted (km^2^) 
* Impacted range that is intensifying or deintensifying (km^2^)

For cumulative stressor groups (and across all stressors):

* Total range impacted (km^2^) by count of stressors
* Total impacted range intensifying (km^2^) by count of stressors

For each year, also calculate the number of impacts per cell and save as a map of species cumulative impact.

This will be done for the species entire range, as well as the range enclosed in each EEZ.

# Methods

## Create list of species for mapping cumulative impacts

Here we will identify all species with sensitivity to one or more stressor, and a range map.

```{r setup file names and species ids for processing}
spp_incl <- get_incl_spp()

spp_ids <- spp_incl$iucn_sid %>%
  unique() %>% sort()

ocean_area_rast <- raster(here('_spatial/ocean_area_mol.tif'))
eez_rast <- raster(here('_spatial/eez_mol.tif'))
cell_area_km2 <- (res(ocean_area_rast)[1]^2) / 1e6
ocean_area_df <- data.frame(ocean_prop = values(ocean_area_rast),
                            eez = values(eez_rast),
                            cell_id = 1:ncell(ocean_area_rast)) %>%
  filter(!is.na(ocean_prop)) %>%
  mutate(ocean_area_km2 = ocean_prop * cell_area_km2)

str_cats  <- get_str_cats()
stressors <- str_cats$stressor
```

## Create stressor "pace" dataframe

To represent significantly intensifying/increasing and deintensifying/decreasing trends of stressors in a location, we can consider the "pace" (from pace of change paper), focusing on significant high and low paces of change at the stressor level.

### Examine stressor trend/significance to test thresholds of intensification

This table checks the trends across each stressor map, determines whether stressor intensity in a cell is increasing or decreasing.  For "incr" trends and "decr" trends, what proportion of these cells show a statistically significant trend?

```{r examine significance}

pace_sum_file <- here('int/pace_summary.csv')

cells_ocean <- nrow(ocean_area_df)

if(!file.exists(pace_sum_file)) {
  
  check_list <- vector('list', length = length(stressors))
  for(i in seq_along(stressors)) {
    # i <- 1
    str <- stressors[i]
    message('Calculating pace summary for ', str)
    
    trend_r_file <- file.path(dir_str_trend,
                              'stressor_trend_%s_value.tif')
    p_val_r_file <- file.path(dir_str_trend,
                              'stressor_trend_%s_p_val.tif')
    trend_r <- raster(sprintf(trend_r_file, str))
    p_val_r <- raster(sprintf(p_val_r_file, str))
    
    df <- rast_to_map(trend_r, cell_val = 'trend') %>%
      full_join(rast_to_map(p_val_r, cell_val = 'p'), by = 'cell_id')
    df2 <- df %>%
      mutate(range = case_when(trend >   0.01 ~ 'incr3',
                               trend <  -0.01 ~ 'decr3',
                               trend >  0.001 ~ 'incr2',
                               trend < -0.001 ~ 'decr2',
                               trend > 0 ~ 'incr1', 
                               trend < 0 ~ 'decr1',
                               TRUE ~ 'no_change'),
             sig = p < 0.05) %>%
      group_by(range) %>%
      summarize(pct_sig = sum(sig, na.rm = TRUE) / n(),
                pct_area = n() / cells_ocean) %>%
      ungroup() %>%
      mutate(stressor = str)
    check_list[[i]] <- df2
  }
  check_df <- bind_rows(check_list) %>%
    mutate(sig_pct_area = pct_area * pct_sig) %>%
    # select(-pct_area, -pct_sig) %>%
    # spread(range, sig_pct_area) %>%
    mutate(range = factor(range, levels = c('incr3', 'incr2', 'incr1', 
                                            'decr1', 'decr2', 'decr3'))) %>%
    filter(!is.na(range))
    
  sig_df <- bind_rows(check_list) %>% 
    select(-pct_area) %>%
    spread(range, pct_sig) %>%
    select(-no_change)
  area_df <- bind_rows(check_list) %>%
    select(-pct_sig) %>%
    spread(range, pct_area) %>%
    select(stressor, incr3, incr2, incr1, decr1, decr2, decr3)
  
  write_csv(check_df, pace_sum_file)
}
  
check_df <- read_csv(pace_sum_file) %>%
  mutate(range = factor(range, levels = c('incr3', 'incr2', 'incr1', 
                                          'decr1', 'decr2', 'decr3')))

knitr::kable(check_df, caption = '% total ocean area by significant trend')

ggplot(check_df, aes(x = range, y = sig_pct_area, fill = range)) +
  ggtheme_plot() +
  geom_col(width = 1) + 
  geom_vline(xintercept = 3.5, color = 'grey30', size = .25) +
  geom_hline(yintercept = 0, color = 'grey30', size = .25) +
  scale_fill_brewer(palette = 'RdYlGn') +
  theme(axis.title = element_blank(),
        axis.text.x = element_blank(),
        legend.position = c(.75, .10),
        legend.direction = 'horizontal',
        legend.title = element_blank()) +
  labs(title = '% Ocean Area by significant trend') +
  facet_wrap(~ stressor, scales = 'free_y')
```

Reclassify trend values into pace

* For each cell, reclassify trend $T$ (linear model slope) into pace $P$ (categorical):

\begin{align*}
  P = \begin{cases} 
    +3 &\text{if } (T > 0.01) \wedge (p < 0.05)\\
    +2 &\text{if } (T > 0.001) \wedge (p < 0.05)\\
    +1 &\text{if } (T > 0) \wedge (p < 0.05)\\
    -1 &\text{if } (T < 0) \wedge (p < 0.05)\\
    -2 &\text{if } (T < 0.001) \wedge (p < 0.05)\\
    -3 &\text{if } (T < 0.01) \wedge (p < 0.05)\\
    NA &\text{else}
  \end{cases}
\end{align*}

Cells that are neither intensifying nor deintensifying will be dropped here, for a smaller and more nimble .csv.

``` {r}
### stem for output pace raster
pace_fstem <- file.path(dir_bd_anx, 'layers/stressor_pace/stressor_pace_%s.csv')
# to clear: unlink(file.path(dirname(pace_fstem), '*.*'))

### stems for trend and pvalue rasters (inputs)
trend_fstem <- file.path(dir_str_trend, 'stressor_trend_%s_value.tif')
p_val_fstem <- file.path(dir_str_trend, 'stressor_trend_%s_p_val.tif')

for(str in stressors) {
  ### str <- stressors[1]
  pace_file <- sprintf(pace_fstem, str)
  
  if(!file.exists(pace_file)) {
    message('Determining pace raster for ', str)
    t_r <- raster(sprintf(trend_fstem, str))
    p_r <- raster(sprintf(p_val_fstem, str))

    pace_df <- rast_to_map(t_r, cell_val = 'trend') %>%
      full_join(rast_to_map(p_r, cell_val = 'p'), by = 'cell_id') %>%
      mutate(pace = case_when(trend > 0.01 & p < 0.05 ~ 3,
                              trend < -0.01 & p < 0.05 ~ -3,
                              trend > 0.001 & p < 0.05 ~ 2,
                              trend < -0.001 & p < 0.05 ~ -2,
                              trend > 0 & p < 0.05 ~ 1,
                              trend < 0 & p < 0.05 ~ -1,
                              TRUE                 ~ NA_real_),
             stressor = str) %>%
      filter(!is.na(pace)) %>%
      select(cell_id, pace, stressor)
    
    write_csv(pace_df, pace_file)
  }
}
```

```{r}
### function to read in all the pace files
get_pace_df <- function() {
  pace_fs <- list.files(file.path(dir_bd_anx, 'layers/stressor_pace'), 
                        full.names = TRUE)
  pace_df <- parallel::mclapply(pace_fs, FUN = function(f) {
        df <- read_csv(f, col_types = c(cell_id = 'i', pace = 'i', stressor = 'c'))
      }, mc.cores = 14) %>%
    bind_rows()
}
```

## loop over species by ID

For each species, create a dataframe summarizing cumulative impacts:

* calculate individual stressor impacts range for global and eez
* calculate cumulative impact for global and eez
* bind 'em together, join with ranges, and save .csv to annex, `spp_impacts` folder.

### set up functions

```{r cumulative impact functions}

get_intsx_files <- function(spp_id, impact_cat) {
  ### To calc across all stressors, set impact_cat = 'all'
  ### NOTE: this uses global variables to avoid relisting all the intersection
  ### files each time...
  
  # impact_cat <- 'fishing'
  # spp_id <- spp_ids[1]
  
  ### get all the intersection files for this spp in the intsx directory
  intsx_files <- intsx_files_all[intsx_id_vec == spp_id]
  
  if(impact_cat != 'all') {
    ### subset stressors by impact category
    intsx_dirs <- basename(dirname(intsx_files))
    strs <- str_cats %>% 
      filter(category == impact_cat) %>%
      .$stressor
    intsx_files <- intsx_files[intsx_dirs %in% strs]
  }
  
  return(intsx_files)
}

summarize_impacts <- function(spp_intsx_df, range_df, eez = FALSE) {
  if(eez) {
    gps <- c('stressor', 'year', 'eez')
    eez_vec <- range_df %>% filter(eez > 0) %>% .$eez %>% unique()
  } else {
    gps <- c('stressor', 'year')
  }
  df <- spp_intsx_df %>%
    group_by_at(vars(all_of(gps))) %>%
    summarize(impact_km2 = sum(ocean_area_km2),
              incr1_km2   = sum((pace %in% 1:3) * ocean_area_km2, na.rm = TRUE), 
              incr2_km2   = sum((pace %in% 2:3) * ocean_area_km2, na.rm = TRUE), 
              incr3_km2   = sum((pace %in% 3) * ocean_area_km2, na.rm = TRUE), 
              decr1_km2   = sum((pace %in% -1:-3) * ocean_area_km2, na.rm = TRUE),
              decr2_km2   = sum((pace %in% -2:-3) * ocean_area_km2, na.rm = TRUE),
              decr3_km2   = sum((pace %in% -3) * ocean_area_km2, na.rm = TRUE)) %>%
    ungroup()
  if(eez) {
    ### if any cells with no EEZ (or region), they'll be dropped here
    df <- df %>%
      group_by(stressor) %>%
      filter(!is.na(eez)) %>%
      complete(eez = eez_vec, fill = list(impact_km2 = 0)) %>%
      ungroup()
  } else {
    ### not EEZ, so global analysis - add in a zero EEZ
    df <- df %>% mutate(eez = 0)
  }
  df <- df %>% filter(!is.na(stressor))
  return(df)
}

summarize_cum_impacts <- function(spp_intsx_df, range_df, eez = FALSE) {
  ### take the dataframe and summarize 
  if(eez) {
    gps_1 <- c('cell_id', 'year', 'ocean_area_km2', 'eez')
    gps_2 <- c('year', 'eez')
    eez_vec <- range_df %>% filter(eez > 0) %>% .$eez %>% unique()
  } else {
    gps_1 <- c('cell_id', 'year', 'ocean_area_km2')
    gps_2 <- c('year')
  }
  df <- spp_intsx_df %>%
    # select(cell_id, year, ocean_area_km2, stressor) %>%
    distinct() %>%
    group_by_at(vars(all_of(gps_1))) %>%
    summarize(incr1 = any(pace %in% 1:3, na.rm = TRUE) & !any(pace < 0, na.rm = TRUE),
              incr2 = any(pace %in% 2:3, na.rm = TRUE) & !any(pace < 0, na.rm = TRUE),
              incr3 = any(pace %in% 3, na.rm = TRUE) & !any(pace < 0, na.rm = TRUE),
              decr1 = any(pace %in% -1:-3, na.rm = TRUE) & !any(pace > 0, na.rm = TRUE),
              decr2 = any(pace %in% -2:-3, na.rm = TRUE) & !any(pace > 0, na.rm = TRUE),
              decr3 = any(pace %in% -3, na.rm = TRUE) & !any(pace > 0, na.rm = TRUE),
              n_impact = n()) %>%
    group_by_at(vars(all_of(gps_2))) %>%
    summarize(impact_km2 = sum(ocean_area_km2),
              impact_2plus_km2 = sum((n_impact > 1) * ocean_area_km2),
              impact_3plus_km2 = sum((n_impact > 2) * ocean_area_km2),
              incr1_km2   = sum(incr1 * ocean_area_km2, na.rm = TRUE), 
              incr2_km2   = sum(incr2 * ocean_area_km2, na.rm = TRUE), 
              incr3_km2   = sum(incr3 * ocean_area_km2, na.rm = TRUE), 
              decr1_km2   = sum(decr1 * ocean_area_km2, na.rm = TRUE),
              decr2_km2   = sum(decr2 * ocean_area_km2, na.rm = TRUE),
              decr3_km2   = sum(decr3 * ocean_area_km2, na.rm = TRUE),
              stressor = 'cumulative') %>%
    ungroup()
  
  if(nrow(df) == 0) {
    df <- data.frame(
      stressor = 'cumulative',
      year = NA,
      impact_km2 = 0, incr1_km2 = 0, decr1_km2 = 0, eez = 0)
  }
  
  if(eez) {
    ### if any cells with no EEZ (or region), they'll be dropped here
    df <- df %>%
      complete(eez = eez_vec, nesting(impact_km2, incr1_km2, decr1_km2, stressor)) %>%
      filter(eez > 0 & !is.na(eez))
  } else {
    ### not EEZ, so global analysis - add in a zero EEZ
    df <- df %>% mutate(eez = 0)
  }
  

  return(df)
}

calc_cum_impact <- function(spp_id, impact_cat, pace_df) {
  ### dir_spp_maps to point to location of range files;
  ### dir_spp_intsx to point to location of intersection files
  ### ocean_area_df to calc range including coastal (i.e. not 100% ocean) cells
  ### impact_cat allows to select subsets of impacts

  spp_range_df <- get_spp_range(id = spp_id)
  
  intsx_files <- get_intsx_files(spp_id, impact_cat)
  
  spp_intsx_df <- lapply(intsx_files,
      FUN = function(x) { # x <- intsx_files[1]
        str_name <- basename(x) %>%
          str_replace_all('spp_intsx_|_[0-9].*', '')
        
        tmp <- read_csv(x, col_types = cols('cell_id' = 'i',
                                     'year' = 'i')) %>%
          mutate(stressor = str_name)
        return(tmp)
      }) %>%
    bind_rows() %>%
    filter(!is.na(cell_id)) %>%
    left_join(ocean_area_df, by = 'cell_id') %>%
    left_join(pace_df, by = c('cell_id', 'stressor'))
      ### left joining here will drop any cells with paces that
      ### do not fall within the footprint for the stressor/year - 
      ### accounts for footprint/trend overlap, no need to crop trends
  
  spp_impact_by_stressor <- summarize_impacts(spp_intsx_df, 
                                              range_df = spp_range_df, 
                                              eez = FALSE) %>%
    group_by(stressor) %>%
    complete(year = 2003:2013, fill = list(impact_km2 = 0, eez = 0))
  spp_impact_by_stressor_eez <- summarize_impacts(spp_intsx_df, 
                                                  range_df = spp_range_df,  
                                                  eez = TRUE) %>%
    group_by(stressor, eez) %>%
    complete(year = 2003:2013, fill = list(impact_km2 = 0))

  spp_cum_impact <- summarize_cum_impacts(spp_intsx_df, 
                                          range_df = spp_range_df,  
                                          eez = FALSE) %>%
    group_by(stressor) %>%
    complete(year = 2003:2013, fill = list(impact_km2 = 0, eez = 0))
  spp_cum_impact_eez <- summarize_cum_impacts(spp_intsx_df, 
                                              range_df = spp_range_df,  
                                              eez = TRUE) %>%
    group_by(stressor, eez) %>%
    complete(year = 2003:2013, fill = list(impact_km2 = 0))
  
  
  impact_df <- bind_rows(spp_impact_by_stressor, 
                         spp_impact_by_stressor_eez, 
                         spp_cum_impact, 
                         spp_cum_impact_eez) %>%
    filter(!is.na(year)) %>%
    full_join(spp_range_df, by = 'eez')
  
  return(impact_df)
}
```

### calculate species cumulative impacts by stressor category

NOTE: This chunk runs into problems and craps out.  Check that the resulting files total 6355 (1271 species * 5 stressors).

``` {r calc species cumulative impacts by category}

### set up list of all intersection files and an ID for indexing
intsx_files_all <- list.files(dir_spp_intsx, recursive = TRUE, full.names = TRUE)
intsx_id_vec    <- as.integer(str_extract(basename(intsx_files_all), '[0-9]{3,}'))

### get pace of change maps dataframe
pace_df <- get_pace_df()

### set up categories to loop over
categories <- c(str_cats$category %>% unique(), 'all')

x <- list.files(file.path(dir_bd_anx, 'spp_impacts'), full.names = TRUE)
### to clear old versions:
# # unlink(x)

### to check whether any are missing, look for ids with fewer than 
### five stressors
y <- data.frame(x) %>%
  mutate(id = str_extract(basename(x), '[0-9]+') %>% as.integer(),
         str = str_replace_all(basename(x), '.+[0-9]+_|.csv', '')) %>%
  group_by(id) %>%
  summarize(n_str = n(), str = paste0(str, collapse = ', '))


for(impact_cat in categories) {
  # impact_cat <- categories[5]
  
  ### only consider those that aren't already complete
  spp_ids_incomplete <- y %>%
    filter(n_str < 5) %>%
    filter(!str_detect(str, impact_cat)) %>%
    .$id
  
  spp_ids_to_process <- spp_ids[!spp_ids %in% y$id | spp_ids %in% spp_ids_incomplete]
  
  message('Processing ', length(spp_ids_to_process), ' species for ', impact_cat)

  tmp <- parallel::mclapply(spp_ids_to_process, mc.cores = 20,
    FUN = function(spp_id) {
      ### spp_id <- spp_ids_to_process[1]
      ### spp_id <- 2477 
      ### spp_id <- 907
      ### spp_id <- 10030
      ### spp_id <- 22697963
      spp_impact_fstem <- file.path(dir_bd_anx, 'spp_impacts/spp_impacts_%s_%s.csv')
      spp_impact_file <- file.path(sprintf(spp_impact_fstem, spp_id, impact_cat))
      
      if(!file.exists(spp_impact_file)) {
        message('Processing spp ', spp_id, '; ', impact_cat)
        ### Calculate total species range, accounting for partial ocean cells
        impact_df <- calc_cum_impact(spp_id, 
                                     impact_cat = impact_cat,
                                     pace_df = pace_df)
        write_csv(impact_df, spp_impact_file)
      } else {
        # message('Impact df file exists: ', basename(spp_impact_file), '... skipping!')
      }
      return(paste('success:', spp_id))
    }
  )
}

z <- list.files(file.path(dir_bd_anx, 'spp_impacts'), full.names = TRUE)
if(length(z) < length(spp_ids) * length(categories)) {
  stop('Not all species impacts were calculated: ', length(z),
       ' out of ', length(spp_ids) * length(categories))
}
```

## Calculate maps of cumulative impacts

For each species, create a map of cumulative impacts:

* `left_join` each stressor intersection to the species range by `cell_id`
* summarize number of stressors in each cell
* write out as .csv to annex, `spp_impact_rasts`

Each of these will be performed for each stressor category.  Full cumulative can be found from simply adding 'em up.

### map species cumulative impacts by stressor category

``` {r setup functions}
map_cum_impact <- function(spp_id, 
                           dir_spp_maps,
                           impacts = 'all') {
  ### dir_spp_maps to point to location of range files

  ### assemble file name and read the range data
  spp_range_file <- file.path(dir_spp_maps,
                              sprintf('iucn_sid_%s.csv', spp_id))
  spp_range_df <- read_csv(spp_range_file, 
                           col_types = cols('cell_id' = 'i', 'presence' = 'i'))
  
  intsx_files <- get_intsx_files(spp_id, impacts)
  
  spp_intsx_df <- lapply(intsx_files,
      FUN = function(x) { # x <- intsx_files[1]
        str_name <- basename(x) %>%
          str_replace_all('spp_intsx_|_[0-9].*', '')
        
        read_csv(x, col_types = cols('cell_id' = 'i',
                                     'year' = 'i')) %>%
          mutate(stressor = str_name)
      }) %>%
    bind_rows() %>%
    filter(!is.na(cell_id)) 

  spp_cum_impact_map <- spp_range_df %>%
    left_join(spp_intsx_df, by = 'cell_id') %>%
      ### left_join keeps totally unimpacted cells
    group_by(cell_id, year) %>%
    summarize(n_impacts = sum(!is.na(stressor))) %>%
    ungroup() %>%
    filter(!is.na(year))
    ### Note: totally unimpacted cells will have an NA for year and
    ### zero for n_impacts; cells with impacts some years and not others
    ### will only have data for impact years...
    ### dropping these observations will make for smaller files; unimpacted
    ### ranges can be added back in later from the range map files.
  
  if(nrow(spp_cum_impact_map) == 0) {
    ### for spp with no impacts in current impact group
    spp_cum_impact_map <- data.frame(cell_id = NA, 
                                     n_impacts = 0, 
                                     year = 2003:2013)
  }
  
  return(spp_cum_impact_map)
}
```

``` {r map species cumulative impacts, eval = TRUE}
x <- list.files(file.path(dir_bd_anx, 'spp_impact_rasts'),
                full.names = TRUE)
### to clear old versions:
# # unlink(x)

### to check whether any are missing, look for ids with fewer than 
### five stressors
y <- data.frame(x) %>%
  mutate(id = str_extract(basename(x), '[0-9]+') %>% as.integer(),
         str = str_replace_all(basename(x), '.+[0-9]+_|.csv', '')) %>%
  group_by(id) %>%
  summarize(n_str = n(), str = paste0(str, collapse = ', '))


categories <- c(str_cats$category %>% unique(), 'all')

for(impact_cat in categories) {
  # impact_cat <- categories[5]
  # for(spp_id in spp_ids) {
    ### only consider those that aren't already complete
  spp_ids_incomplete <- y %>%
    filter(n_str < 5) %>%
    filter(!str_detect(str, impact_cat)) %>%
    .$id
  
  spp_ids_to_process <- spp_ids[!spp_ids %in% y$id | spp_ids %in% spp_ids_incomplete]

  tmp <- parallel::mclapply(spp_ids_to_process, mc.cores = 4,
    FUN = function(spp_id) {
      ### spp_id <- spp_ids_to_process[2]
      impact_fstem <- file.path(dir_bd_anx, 'spp_impact_rasts/spp_impact_map_%s_%s.csv')
      impact_rast_file <- sprintf(impact_fstem, spp_id, impact_cat)
      if(!file.exists(impact_rast_file)) {
        message('Mapping spp ', spp_id, '; ', impact_cat)
        ### Calculate total species range, accounting for partial ocean cells
        impact_rast <- map_cum_impact(spp_id,
                                      dir_spp_maps,
                                      impacts = impact_cat)
        write_csv(impact_rast, impact_rast_file)
      } else {
        # message('Impact map file exists: ', basename(impact_rast_file), '... skipping!')
      }
      # return(paste('success:', spp_id))
    }
  )
}

z <- list.files(file.path(dir_bd_anx, 'spp_impact_rasts'), full.names = TRUE)
if(length(z) < length(spp_ids) * length(categories)) {
  stop('Not all species impact rasters were calculated: ', length(z),
       ' out of ', length(spp_ids) * length(categories))
}

```

## Examine distribution of impacts

Proportion of species falling into these categories.  `incr2` means increasing at greater than 0.1% per year, while `incr3` means increasing at greater than 1.0% per year.

```{r}

spp_ids <- get_incl_spp() %>%
  filter(!is.na(stressor)) %>%
  .$iucn_sid %>% unique() %>% sort()

spp_impact_fstem <- file.path(dir_bd_anx, 'spp_impacts/spp_impacts_%s_%s.csv')
spp_impact_files <- sprintf(spp_impact_fstem, spp_ids, 'all')

spp_impacts <- parallel::mclapply(spp_impact_files, 
      FUN = function(x) {
        read_csv(x, col_types = cols(.default = 'd', 
                                     stressor = 'c',
                                     iucn_sid = 'i',
                                     eez = 'i', 
                                     year = 'i'))
      }, mc.cores = 24) %>%
  bind_rows() %>%
  filter(year %in% c(2003, 2013) & stressor == 'cumulative') %>%
  mutate(impact_pct = impact_km2 / range_km2,
         incr1_pct   = incr1_km2 / range_km2,
         incr2_pct   = incr2_km2 / range_km2,
         incr3_pct   = incr3_km2 / range_km2,
         decr1_pct   = decr1_km2 / range_km2,
         decr2_pct   = decr2_km2 / range_km2,
         decr3_pct   = decr3_km2 / range_km2,
         imp_2plus_pct = impact_2plus_km2 / range_km2) %>%
  mutate_if(is.double, .funs = function(x) ifelse(is.na(x), 0, x))

spp_means <- spp_impacts %>%
  group_by(year, eez) %>%
  summarize(mean_impact = mean(impact_pct),    sd_impact = sd(impact_pct),
            mean_incr1   = mean(incr1_pct),    sd_incr1   = sd(incr1_pct),
            mean_incr2   = mean(incr2_pct),    sd_incr2   = sd(incr2_pct),
            mean_incr3   = mean(incr3_pct),    sd_incr3   = sd(incr3_pct),
            mean_decr1   = mean(decr1_pct),    sd_decr1   = sd(decr1_pct),
            mean_decr2   = mean(decr2_pct),    sd_decr2   = sd(decr2_pct),
            mean_decr3   = mean(decr3_pct),    sd_decr3   = sd(decr3_pct),
            mean_imp_2p = mean(imp_2plus_pct), sd_imp_2p = sd(imp_2plus_pct))

spp_impact_levels <- spp_impacts %>%
  group_by(year, eez) %>%
  summarize(impact_over_50 = sum(impact_pct >= .5) / n(),
            impact_over_90 = sum(impact_pct >= .9) / n(),
            impact_at_100  = sum(impact_pct == 1.0) / n())
knitr::kable(spp_impact_levels %>% filter(eez == 0))

spp_incr_levels <- spp_impacts %>%
  group_by(year, eez) %>%
  summarize(incr2_over_50 = sum(incr2_pct >= .5) / n(),
            incr2_over_90 = sum(incr2_pct >= .9) / n(),
            incr2_at_100  = sum(incr2_pct == 1.0) / n(),
            incr3_over_50 = sum(incr3_pct >= .5) / n(),
            incr3_over_90 = sum(incr3_pct >= .9) / n(),
            incr3_at_100  = sum(incr3_pct == 1.0) / n())
knitr::kable(spp_incr_levels %>% filter(eez == 0))

spp_imp2plus_levels <- spp_impacts %>%
  group_by(year, eez) %>%
  summarize(imp2pl_over_50 = sum(imp_2plus_pct >= .5) / n(),
            imp2pl_over_90 = sum(imp_2plus_pct >= .9) / n(),
            imp2pl_at_100  = sum(imp_2plus_pct == 1.0) / n())
knitr::kable(spp_imp2plus_levels %>% filter(eez == 0))

```