MHW_stats_and_figures_supplement.Rmd

---
output: 
  bookdown::pdf_document2:
    toc: false
classoption: landscape
header-includes: 
  - \renewcommand{\figurename}{Supplementary Figure }
  - \renewcommand{\tablename}{Supplementary Table}
  - \makeatletter 
  - \def\fnum@figure{\figurename\thefigure}
  - \makeatother
editor_options: 
  markdown: 
    wrap: 72
---

```{r setup, echo=FALSE, results="hide",message=FALSE, warning=FALSE, include=FALSE}
set.seed(42)
# load packages
library(tidyverse)
library(here)
library(lubridate) # for standardizing date format of MHW data
library(sf)
#library(rnaturalearth)
library(kableExtra)
#library(modelsummary)
library(ggpubr)
library(broom)
#library(mcp)
library(lme4)
library(mgcv)
library(cowplot)
library(glmmTMB)

select <- dplyr::select

# marine heatwave data 

# glorys and oisst are different data sources 
# "5 day" indicates that MHWs shorter than 5 days were omitted (common threshold) 
# d / nod indicates detrending / no detrending

mhw_summary_oisst_d <- read_csv(here("processed-data","MHW_oisst.csv"))
mhw_summary_glorys_d <- read_csv(here("processed-data","MHW_glorys.csv"))

mhw_summary_oisst_d_5_day <- read_csv(here("processed-data","MHW_oisst_5_day_threshold.csv"))
mhw_summary_glorys_d_5_day <- read_csv(here("processed-data","MHW_glorys_5_day_threshold.csv")) # this is the dataset used in the main analysis 

mhw_summary_oisst_nod <- read_csv(here("processed-data","MHW_oisst_no_detrending.csv"))
mhw_summary_glorys_nod <- read_csv(here("processed-data","MHW_glorys_no_detrending.csv"))

mhw_summary_oisst_nod_5_day <- read_csv(here("processed-data","MHW_oisst_5_day_threshold_no_detrending.csv"))
mhw_summary_glorys_nod_5_day <- read_csv(here("processed-data","MHW_glorys_5_day_threshold_no_detrending.csv"))

mhw_summary_glorys_d_any_summer <- read_csv(here("processed-data","MHW_glorys_summer_only.csv"))

# mhw_summary_dhd <- read_csv(here("processed-data","MHW_glorys_dhd_no_detrending.csv"))
mhw_summary_dhd_glorys <- read_csv(here("processed-data","MHW_glorys_dhd_baseline.csv"))
# mhw_summary_dhd_oisst <- read_csv(here("processed-data","MHW_oisst_dhd_baseline.csv"))

# raw MHW data
oisst_raw_d <- read.delim(here("raw-data","MHW_95P_surveys_satellite_surf.csv"), sep=";") %>% 
  rename("dateRaw"=X) %>% 
  mutate(date = dmy(dateRaw), 
         source="OISST")
glorys_raw_d <- read.delim(here("raw-data","MHW_95P_surveys_glorys_surf.csv"), sep=";") %>% 
  rename("dateRaw"=X) %>% 
  mutate(date = dmy(dateRaw),
         source="GLORYS")

# survey data 
survey_summary <-read_csv(here("processed-data","survey_biomass_with_CTI.csv"))
survey_spp_summary <- read_csv(here("processed-data","species_biomass_with_CTI.csv")) %>% 
  rename('spp'=accepted_name) %>% 
  mutate(wt_mt_log = as.numeric(wt_mt_log)) 
survey_start_times <- read_csv(here("processed-data","survey_start_times.csv"))
coords_dat <- read_csv(here("processed-data","survey_coordinates.csv"))
haul_info <- read_csv(here("processed-data","haul_info.csv"))
#biomass_time <- read_csv(here("processed-data","biomass_time.csv"))
#footprint <- read_csv(here("processed-data","spatial_standardization_summary.csv"))
med_lat <- haul_info %>% group_by(survey) %>% summarise(med_lat = median(latitude))
survey_n <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day) %>% 
  group_by(survey) %>% 
  summarise(n=n())
survey_names <- read_csv(here("processed-data","survey_names.csv")) %>%
  left_join(survey_n) %>% 
  mutate(title = recode(title, 'Norway' = 'Barents Sea')) %>% 
  mutate(abbr = c(
    "BalS",
    "BC",
    "EBS",
    "FR",
    "EC",
    "GoM",
    "GoA",
    "GSL",
    "IR",
    "NeUS",
    "NI",
    "BarS",
    "NS",
    "PO",
    "SS",
    "SeUS",
    "SC",
    "WUS"
  )) 

# traits data
traits <- read_csv(here("raw-data","TraitCollectionFishNAtlanticNEPacificContShelf.csv")) %>% 
  select(taxon, tl, feeding.mode, habitat) %>% 
  distinct() %>% 
  group_by(taxon, feeding.mode, habitat) %>% 
  summarise(tl = mean(tl)) %>% 
  group_by(taxon) %>% 
  mutate(n=n()) 

# clean up raw MHW data
rawsourcedat <- bind_rows(oisst_raw_d,glorys_raw_d) %>%
  pivot_longer(cols=baltic_sea:west_coast, names_to="survey", values_to="anom") %>% 
  mutate(survey = gsub('_','-',survey),
         survey = toupper(survey),
         survey = recode(survey, 
                         "BALTIC-SEA" = "BITS",
                         "BRITISH-COLUMBIA" = "DFO-QCS", 
                         "EASTERN-BERING-SEA" = "EBS",
                         "GULF-OF-MEXICO" = "GMEX",
                         "GULF-OF-ALASKA" = "GOA",
                         "NOR-BTS" = "Nor-BTS",
                         "SCOTIAN-SHELF" = "SCS",
                         "SOUTHEAST" = "SEUS",
                         "WEST-COAST" = "WCANN"))%>%
  left_join(survey_names)


#dissimilarity data
beta_div <- read_csv(here("processed-data","survey_temporal_beta_diversity.csv")) %>% 
  left_join(survey_start_times) %>% # add in the ref_yr column 
  select(-month_year, -survey_date) %>% 
  left_join(mhw_summary_glorys_d_5_day) # add in mhw data

tl <- survey_spp_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day) %>% 
  inner_join(traits %>% select(taxon, tl) %>% distinct(), by=c('spp'='taxon')) %>% 
  mutate(tl_cat = ifelse(tl>4, "4-5", ifelse(tl>3, "3-4","2-3")))

feed <- survey_spp_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day) %>% 
  inner_join(traits %>% select(taxon, feeding.mode, n) %>% distinct() %>% filter(!is.na(feeding.mode)) %>% group_by(taxon) %>% mutate(n=n()) %>% filter(n==1), # get a list of taxa with exactly one feeding mode record 
             by=c('spp'='taxon')) 

hab <- survey_spp_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day) %>% 
  inner_join(traits %>% select(taxon, habitat, n) %>% distinct() %>% filter(!is.na(habitat)) %>% group_by(taxon) %>% mutate(n=n()) %>% filter(n==1), # get a list of taxa with exactly one feeding mode record 
             by=c('spp'='taxon')) 

modeldat <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
  filter(!is.na(wt_mt_log), !is.na(anom_sev)) %>% 
  left_join(med_lat) %>% 
  mutate(med_lat_scale =  as.numeric(scale(med_lat, center=TRUE, scale=TRUE))) %>% # doesn't need to be centered and scaled within surveys first, since there's only one lat value per survey 
  group_by(survey) %>% 
  mutate(
    wt_mt_scale = as.numeric(scale(wt_mt, center=TRUE, scale=TRUE)),
    wt_mt_log_scale = as.numeric(scale(wt_mt_log, center=TRUE, scale=TRUE)),
    cti_diff_scale =  as.numeric(scale(cti_diff, center=TRUE, scale=TRUE)),
    anom_sev_scale =  as.numeric(scale(anom_sev, center=TRUE, scale=TRUE)),
    depth_wt_scale =  as.numeric(scale(depth_wt, center=TRUE, scale=TRUE))
  ) %>% 
  arrange(year) %>% 
  mutate(
    wt_mt_lag = log(lag(wt_mt))) %>% 
  ungroup() %>% 
  mutate( # alternative scaling method--among, not within, regions
    anom_sev_scale_alt =  as.numeric(scale(anom_sev, center=TRUE, scale=TRUE)),
    wt_mt_log_scale_abs = abs(wt_mt_log_scale)
  ) 

#pull in catch data and merge with modeldat

catch_me <- read_csv(here("raw-data","MEcatchdataforAlexa-2.csv")) %>% # by marine ecoregion (ME)
  rename(year=Year)
me_dat <- read_csv(here("raw-data","ME_to_surveys.csv")) # note that some surveys are matched with more than one ME (big coastwide US surveys) and some with zero (small Europe ones)
catchdat <- full_join(catch_me, me_dat, by="ME") %>% 
  left_join(survey_summary %>% select(ref_yr, survey, year), by=c("survey", "year")) %>% 
  filter(!is.na(ref_yr)) %>% 
  group_by(survey, year, ref_yr) %>% 
  summarise(catch = sum(Catch)) %>% # aggregate catches for surveys that cover multiple MEs (WCANN, Nor-BTS and NEUS) 
  ungroup() %>% 
  arrange(year) %>% 
  group_by(survey) %>% 
  mutate(catch_3yr = (catch + lag(catch) + lag(catch, n=2))/3, # calculate mean of past 3 years
         catch_3yr_scale = scale(catch_3yr, center=TRUE, scale=TRUE)) %>% 
  ungroup() %>% 
  right_join(modeldat)


# lagged effects of MHW cumulative intensity on biomass, for GAMs
lags <- modeldat %>%
  group_by(survey) %>% 
  arrange(year) %>% 
  mutate(lag1 = lag(anom_sev_scale, 1),
         lag2 = lag(anom_sev_scale, 2),
         lag3 = lag(anom_sev_scale, 3),
         lag4 = lag(anom_sev_scale, 4)) %>% 
  ungroup() %>% 
  select(wt_mt_log_scale, anom_sev_scale, lag1, lag2, lag3, lag4)

# make colorblind friendly palettes for plots of single species 
# http://mkweb.bcgsc.ca/colorblind/palettes.mhtml#page-container

cbpal6 <- c('#2271B2','#3DB7E9','#F748A5','#359B73','#D55E00',"black")
# cbpal12 <- c("#9F0162","#009F81","#FF5AAF","#00FCCF","#8400CD","#008DF9","gray50","#00C2F9","#FFB2FD","#A40122","#E20134","#FFC33B") 

# cbpal16 <- c('#68023F','#008169','#EF0096','#00DCB5','#FFCFE2','#003C86','#9400E6','#009FFA','#FF71FD','grey50','#7CFFFA','#6A0213','#008607','#F60239','#00E307','#FFDC3D')

# for power analysis
sim_test_summ_gamma_oisst <- readRDS(here("processed-data","sim_test_summ_gamma_oisst.rds"))
sim_test_summ_yrs_oisst <- readRDS(here("processed-data","sim_test_summ_yrs_oisst.rds"))
colnames(sim_test_summ_gamma_oisst) <- c('exp_gamma','propsig')
colnames(sim_test_summ_yrs_oisst) <- c('n_years','propsig','n_years_tot')

sim_test_summ_gamma_glorys <- readRDS(here("processed-data","sim_test_summ_gamma_glorys.rds"))
sim_test_summ_yrs_glorys <- readRDS(here("processed-data","sim_test_summ_yrs_glorys.rds"))
colnames(sim_test_summ_gamma_glorys) <- c('exp_gamma','propsig')
colnames(sim_test_summ_yrs_glorys) <- c('n_years','propsig','n_years_tot')
```

```{r opts, setup2, include=FALSE}
knitr::opts_chunk$set(
  message=FALSE, echo=FALSE, warning=FALSE
)
#knitr::opts_knit$set(eval.after = "fig.cap")
```

```{r tbl-surveys}

# Include abbreviations used on Figure 1
survey_names %>% 
  kbl(booktabs=TRUE, col.names=c("FISHGLOB Code","Survey","Number of Survey-Years","Abbreviation"), caption="Survey names and sample sizes used in the main analysis, and corresponding abbreviations used in figures and tables. The survey codes from FISHGLOB are also listed for comparison to other data from the FISHGLOB Consortium.") %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage

```{r tbl-models-mhws}
null.wt <- lm(wt_mt_log_scale ~ 1, data=modeldat)
lm.wt <- lm(wt_mt_log_scale ~ anom_sev_scale, data = modeldat)
lm.fixed.wt <- lm(wt_mt_log_scale ~ anom_sev_scale + factor(survey), data = modeldat)
gam.wt <- gam(wt_mt_log_scale ~ s(anom_sev_scale), data = modeldat )
gam.re.wt <- gam(wt_mt_log_scale ~ s(anom_sev_scale) + s(survey, bs="re"), data = modeldat  %>% mutate(survey = as.factor(survey)))

formulas.wt <- c('Biomass LR* $\\sim$ 1','Biomass LR* $\\sim$ MHW CInt*','Biomass LR* $\\sim$ MHW CInt* + Survey[fixed]','Biomass LR* $\\sim$ s(MHW CInt*)','Biomass LR* $\\sim$ s(MHW CInt*) + Survey[random]')
# all of the ugly code below pulls out the correct table values from the models and formats them nicely so they don't have too many digits printed 
int.wt <- c(
  paste0(format(round(coef(summary(null.wt))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.wt))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.wt))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.wt))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.wt))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.wt))[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.wt)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.wt)$p.table[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.re.wt)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.re.wt)$p.table[1,2], digits=3), nsmall=2))
)
coef.wt <- c(
  "NA",
  paste0(format(round(coef(summary(lm.wt))[2,1], digits=3), nsmall=2)," ± ",
         format(round(coef(summary(lm.wt))[2,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.wt))[2,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.wt))[2,2]), digits=3), nsmall=2),
  "NA",
  "NA"
)
p.wt <- c(
  "NA",
  format(round(coef(summary(lm.wt))[2,4], digits=3), nsmall=2),
  format(round(coef(summary(lm.fixed.wt))[2,4], digits=3), nsmall=2),
  format(round(summary(gam.wt)$s.pv, digits=3), nsmall=2),
  format(round(summary(gam.re.wt)$s.pv[1], digits=3), nsmall=2)
)

r2.wt <- c(
           format(round(c(
             summary(null.wt)$r.squared,
             summary(lm.wt)$r.squared, 
             summary(lm.fixed.wt)$r.squared,
             summary(gam.wt)$r.sq, 
             summary(gam.re.wt)$r.sq
           ), digits=3), nsmall=2))
aic.wt <- round(c(
  AIC(null.wt),
  AIC(lm.wt),
  AIC(lm.fixed.wt),
  AIC(gam.wt),
  AIC(gam.re.wt)
))
df.wt <- round(c(
  summary(null.wt)$df[2], 
  summary(lm.wt)$df[2], 
  summary(lm.fixed.wt)$df[2],
  gam.wt$df.residual, 
  gam.re.wt$df.residual
))


colnames.wt <- c('Null','LM','LM Survey','GAM','GAM Survey')
rownames.wt <- c('Model formula','Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")


wt.tbl <- data.frame(rbind(formulas.wt, int.wt, coef.wt, p.wt, r2.wt, aic.wt, df.wt), row.names=rownames.wt) 
colnames(wt.tbl) <- colnames.wt

# ±
kbl(wt.tbl, booktabs = TRUE, caption = "Models of biomass log ratio (LR) response to MHW cumulative intensity in °C-days (MHW CInt). MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Model names correspond to: null (intercept-only) model, linear model, linear model including survey as a fixed effect, generalized additive model (GAM), and GAM including survey as a random effect. * denotes variables that were scaled and centered within surveys and s() denotes a GAM smoother.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage


```{r tbl-models-byreg}
byreg.tbl <- NULL
colnames.byreg <- c('Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")

for(i in 1:nrow(survey_names)) {
  surv_i = survey_names$survey[i]
  tmpmod <- modeldat %>% filter(survey==surv_i)
  lm.i <- lm(wt_mt_log_scale ~ anom_sev_scale, data=tmpmod)
  int.wt.i <-  paste0(format(round(coef(summary(lm.i))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.i))[1,2], digits=3), nsmall=2))
  coef.wt.i <- paste0(format(round(coef(summary(lm.i))[2,1], digits=3), nsmall=2)," ± ",
                      format(round(coef(summary(lm.i))[2,2], digits=3), nsmall=2))
  p.wt.i <- format(round(coef(summary(lm.i))[2,4], digits=3), nsmall=2)
  r2.wt.i <- format(round(
    summary(lm.i)$r.squared, digits=3), nsmall=2)
  aic.wt.i <- round(AIC(lm.i))
  df.wt.i <- round(summary(lm.i)$df[2])
  
  wt.tbl.i <- data.frame(cbind(int.wt.i, coef.wt.i, p.wt.i, r2.wt.i, aic.wt.i, df.wt.i), row.names = survey_names$title[i]) 
  colnames(wt.tbl.i) <- colnames.byreg
  if(i==1){
    byreg.tbl <- wt.tbl.i
  }else{
    byreg.tbl <- rbind(byreg.tbl, wt.tbl.i)
  }
}

# ±

kbl(byreg.tbl, booktabs = TRUE, caption = "Linear models of biomass log ratio (LR) response to MHW cumulative intensity in °C-days (MHW CInt) in each individual region (model formula: Biomass LR* $\\sim$ MHW CInt*, where * denotes variables that were scaled and centered within surveys). MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE) 
```

\clearpage

```{r tbl-models-mhws-abs}
null.abs <- lm(wt_mt_log_scale_abs ~ 1, data=modeldat)
lm.abs <- lm(wt_mt_log_scale_abs ~ anom_sev_scale, data = modeldat)
lm.fixed.abs <- lm(wt_mt_log_scale_abs ~ anom_sev_scale + factor(survey), data = modeldat)
gam.abs <- gam(wt_mt_log_scale_abs ~ s(anom_sev_scale), data = modeldat )
gam.re.abs <- gam(wt_mt_log_scale_abs ~ s(anom_sev_scale) + s(survey, bs="re"), data = modeldat  %>% mutate(survey = as.factor(survey)))

formulas.abs <- c('Absolute Biomass LR* $\\sim$ 1','Absolute Biomass LR* $\\sim$ MHW CInt*','Absolute Biomass LR* $\\sim$ MHW CInt* + Survey[fixed]','Absolute Biomass LR* $\\sim$ s(MHW CInt*)','Absolute Biomass LR* $\\sim$ s(MHW CInt*) + Survey[random]')
# all of the ugly code below pulls out the correct table values from the models and formats them nicely so they don't have too many digits printed 
int.abs <- c(
  paste0(format(round(coef(summary(null.abs))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.abs))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.abs))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.abs))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.abs))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.abs))[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.abs)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.abs)$p.table[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.re.abs)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.re.abs)$p.table[1,2], digits=3), nsmall=2))
)
coef.abs <- c(
  "NA",
  paste0(format(round(coef(summary(lm.abs))[2,1], digits=3), nsmall=2)," ± ",
         format(round(coef(summary(lm.abs))[2,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.abs))[2,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.abs))[2,2]), digits=3), nsmall=2),
  "NA",
  "NA"
)
p.abs <- c(
  "NA",
  format(round(coef(summary(lm.abs))[2,4], digits=3), nsmall=2),
  format(round(coef(summary(lm.fixed.abs))[2,4], digits=3), nsmall=2),
  format(round(summary(gam.abs)$s.pv, digits=3), nsmall=2),
  format(round(summary(gam.re.abs)$s.pv[1], digits=3), nsmall=2)
)

r2.abs <- c(
            format(round(c(
              summary(null.abs)$r.squared,
              summary(lm.abs)$r.squared, 
              summary(lm.fixed.abs)$r.squared,
              summary(gam.abs)$r.sq, 
              summary(gam.re.abs)$r.sq
            ), digits=3), nsmall=2))
aic.abs <- round(c(
  AIC(null.abs),
  AIC(lm.abs),
  AIC(lm.fixed.abs),
  AIC(gam.abs),
  AIC(gam.re.abs)
))
df.abs <- round(c(
  summary(null.abs)$df[2], 
  summary(lm.abs)$df[2], 
  summary(lm.fixed.abs)$df[2],
  gam.abs$df.residual, 
  gam.re.abs$df.residual
))


colnames.abs <- c('Null','LM','LM Survey','GAM','GAM Survey')
rownames.abs <- c('Model formula','Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")


wt.tbl <- data.frame(rbind(formulas.abs, int.abs, coef.abs, p.abs, r2.abs, aic.abs, df.abs), row.names=rownames.abs) 
colnames(wt.tbl) <- colnames.abs

# ±
kbl(wt.tbl, booktabs = TRUE, caption = "Models of absolute biomass log ratio (LR) response to MHW cumulative intensity in °C-days (MHW CInt). MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Model names correspond to: null (intercept-only) model, linear model, linear model including survey as a fixed effect, generalized additive model (GAM), and GAM including survey as a random effect. * denotes variables that were scaled and centered within surveys and s() denotes a GAM smoother.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage

```{r tbl-models-alt}
null.altmhw <- lm(wt_mt_log_scale ~ 1, data = modeldat)
lm.altmhw <- lm(wt_mt_log_scale ~ anom_sev_scale_alt, data = modeldat)
lm.fixed.altmhw <- lm(wt_mt_log_scale ~ anom_sev_scale_alt + factor(survey), data = modeldat)
gam.altmhw <- gam(wt_mt_log_scale ~ s(anom_sev_scale_alt), data = modeldat )
gam.re.altmhw <- gam(wt_mt_log_scale ~ s(anom_sev_scale_alt) + s(survey, bs="re"), data = modeldat  %>% mutate(survey = as.factor(survey)))

formulas.altmhw <- c('Biomass LR* $\\sim$ 1','Biomass LR* $\\sim$ MHW CInt**','Biomass LR* $\\sim$ MHW CInt** + Survey[fixed]','Biomass LR* $\\sim$ s(MHW CInt**)','Biomass LR* $\\sim$ s(MHW CInt**) + Survey[random]')

int.altmhw <- c(
  paste0(format(round(coef(summary(null.altmhw))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.altmhw))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.altmhw))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.altmhw))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.altmhw))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.altmhw))[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.altmhw)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.altmhw)$p.table[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.re.altmhw)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.re.altmhw)$p.table[1,2], digits=3), nsmall=2))
)
coef.altmhw <- c(
  "NA",
  paste0(format(round(coef(summary(lm.altmhw))[2,1], digits=3), nsmall=2)," ± ",
         format(round(coef(summary(lm.altmhw))[2,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.altmhw))[2,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.altmhw))[2,2]), digits=3), nsmall=2),
  "NA",
  "NA"
)
p.altmhw <- c(
  "NA",
  format(round(coef(summary(lm.altmhw))[2,4], digits=3), nsmall=2),
  format(round(coef(summary(lm.fixed.altmhw))[2,4], digits=3), nsmall=2),
  format(round(summary(gam.altmhw)$s.pv, digits=3), nsmall=2),
  format(round(summary(gam.re.altmhw)$s.pv[1], digits=3), nsmall=2)
)
r2.altmhw <- c(format(round(c(
    summary(null.altmhw)$r.squared, 
  summary(lm.altmhw)$r.squared, 
  summary(lm.fixed.altmhw)$r.squared,
  summary(gam.altmhw)$r.sq, 
  summary(gam.re.altmhw)$r.sq
), digits=3), nsmall=2))
aic.altmhw <- round(c(
  AIC(null.altmhw),
  AIC(lm.altmhw),
  AIC(lm.fixed.altmhw),
  AIC(gam.altmhw),
  AIC(gam.re.altmhw)
))
df.altmhw <- round(c(
  summary(null.altmhw)$df[2], 
  summary(lm.altmhw)$df[2], 
  summary(lm.fixed.altmhw)$df[2],
  gam.altmhw$df.residual, 
  gam.re.altmhw$df.residual
))


colnames.altmhw <- c('Null','LM','LM Survey','GAM','GAM Survey')
rownames.altmhw <- c('Model formula','Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")

alt.tbl <- data.frame(rbind(formulas.altmhw, int.altmhw, coef.altmhw, p.altmhw, r2.altmhw, aic.altmhw, df.altmhw), row.names=rownames.altmhw) 
colnames(alt.tbl) <- colnames.altmhw

kbl(alt.tbl, booktabs = TRUE, caption = "Models of biomass log ratio (LR) response to MHW cumulative intensity in °C-days (MHW CInt). MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Model names correspond to: null (intercept-only) model, linear model, linear model including survey as a fixed effect, generalized additive model (GAM), and GAM including survey as a random effect. * denotes variables that were scaled and centered within surveys, ** denotes variables that were scaled and centered across surveys, and s() denotes a GAM smoother. This table is identical to Supp. Tab. 2, except rather than centering and scaling MHW CInt within regions, it is centered and scaled across regions. The approach used in most of our analysis (centering and scaling MHW CInt within regions) assumes that history matters in ecological responses to MHW responses, i.e., that biomass change should be compared to how anomalous a MHW is relative to other MHWs that occurred in the region. Here, we test the hypothesis that absolute MHW CInt matters regardless of the oceanographic history of each region by centering and scaling MHW CInt across regions.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage

```{r tbl-models-lags}

gam1 = gam( wt_mt_log_scale ~ s(as.matrix(lags)[,2:3]), data=lags )
gam2 = gam( wt_mt_log_scale ~ s(as.matrix(lags)[,2:4]), data=lags )
gam3= gam( wt_mt_log_scale ~ s(as.matrix(lags)[,2:5]), data=lags )
gam4 =gam( wt_mt_log_scale ~ s(as.matrix(lags)[,2:6]), data=lags )

formulas.lags = c('Biomass LR* $\\sim$ s(matrix(MHW CInt*[lag1:2]))','Biomass LR* $\\sim$ s(matrix(MHW CInt*[lag1:3]))','Biomass LR* $\\sim$ s(matrix(MHW CInt*[lag1:4]))','Biomass LR* $\\sim$ s(matrix(MHW CInt*[lag1:5]))')

r2.lags <- format(round(c(
  summary(gam1)$r.sq,
  summary(gam2)$r.sq,
  summary(gam3)$r.sq,
  summary(gam4)$r.sq
), digits=3), nsmall=2)
aic.lags <- round(c(
  AIC(gam1),
  AIC(gam2),
  AIC(gam3),
  AIC(gam4)
))
df.lags <- round(c(
  gam1$df.residual, 
  gam2$df.residual,
  gam3$df.residual,
  gam4$df.residual
))
p.lags <- format(round(c(
  summary(gam1)$s.pv,
  summary(gam2)$s.pv,
  summary(gam3)$s.pv,
  summary(gam4)$s.pv),                     
  digits=3), 
  nsmall=2)

colnames.lags <- c('1-2 Years','1-3 Years','1-4 Years','1-5 Years')
rownames.lags <- c('Model formula','p-value','R$^2$',"AIC","Degrees of freedom")

lags.tbl <- data.frame(rbind(formulas.lags, p.lags, r2.lags, aic.lags, df.lags), row.names=rownames.lags) 
colnames(lags.tbl) <- colnames.lags

kbl(lags.tbl, booktabs = TRUE, caption = "Models of biomass log ratio (LR) response to lagged MHW cumulative intensity in °C-days (MHW CInt). MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. These are generalized additive models (GAMs) that use a smoothed predictor matrix containing lagged MHW data for up to five years into the past. Results from the GAM for 0-1 years in the past, i.e., the twelve months preceding a survey, can be found in Supp. Tab. 2. * denotes variables that were scaled and centered within surveys and s() denotes a GAM smoother.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage

```{r tbl-models-autocorr}
gompertz.glmm = glmmTMB(wt_mt_log ~ factor(survey) + anom_sev_scale + wt_mt_lag:factor(survey), data=modeldat, dispformula = ~survey, family=gaussian())
gompertz.null = glmmTMB(wt_mt_log ~ 1, data=modeldat, dispformula = ~survey, family=gaussian())

formulas.gompertz <- c('Biomass LR $\\sim$ 1','Biomass LR $\\sim$ MHW CInt* + Lag(Log Biomass) + Survey + MHW CInt*:Survey')

gompertz.summary <- summary(gompertz.glmm)$coefficients$cond

int.gompertz <- c(
  paste0(format(round(summary(gompertz.null)$coefficients$cond["(Intercept)","Estimate"] , digits=3), nsmall=2)," ± ", format(round(summary(gompertz.null)$coefficients$cond["(Intercept)","Std. Error"] , digits=3), nsmall=2)),
  paste0(format(round(gompertz.summary["(Intercept)","Estimate"] , digits=3), nsmall=2)," ± ", format(round(gompertz.summary["(Intercept)","Std. Error"] , digits=3), nsmall=2))) 

coef.gompertz <- c('NA',paste0(format(round(gompertz.summary["anom_sev_scale","Estimate"] , digits=3), nsmall=2)," ± ", format(round(gompertz.summary["anom_sev_scale","Std. Error"] , digits=3), nsmall=2)) )

p.gompertz <- c('NA',paste0(format(round(gompertz.summary["anom_sev_scale","Pr(>|z|)"] , digits=3), nsmall=2)))

r2.gompertz <- c(format(round(unname(performance::r2_xu(gompertz.null)), digits=3), nsmall=2),
                 format(round(unname(performance::r2_xu(gompertz.glmm)), digits=3), nsmall=2)) # see performance() documentation -- xu is one of the simpler and more flexible R2 metrics

aic.gompertz <- round(c(AIC(gompertz.null), AIC(gompertz.glmm)))

df.gompertz <- c(unname(summary(gompertz.null)$AICtab["df.resid"]),
                 unname(summary(gompertz.glmm)$AICtab["df.resid"]))


colnames.gompertz <- c('Null model','Gompertz GLM')
rownames.gompertz <- c('Model formula','Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")

gompertz.tbl <- data.frame(rbind(formulas.gompertz, int.gompertz, coef.gompertz, p.gompertz, r2.gompertz, aic.gompertz, df.gompertz), row.names=rownames.gompertz) 
colnames(gompertz.tbl) <- colnames.gompertz

kbl(gompertz.tbl, booktabs = TRUE, caption = "Null (intercept-only) model and generalized linear model (GLM) of biomass log ratio (LR) as a function of MHW cumulative intensity in °C-days (MHW CInt) and biomass from the previous time step. * denotes variables that were scaled and centered within surveys and : denotes an interaction term between two variables. This Gompertz model accounts for autoregressive properties of the biomass time-series and tests whether the magnitude of biomass in any given year affected the biomass LR in the following year. MHW Cint was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Rather than centering and scaling biomass variables, to account for variability and heteroskedasticity among surveys, we included survey identity as a fixed effect and allowed dispersion to vary among surveys.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)

```

\clearpage

```{r tbl-models-lat}
null.lat <- lm(wt_mt_log_scale ~ med_lat_scale, data = modeldat)
lm.lat <- lm(wt_mt_log_scale ~ anom_sev_scale + med_lat_scale + med_lat_scale:anom_sev_scale, data = modeldat)

formulas.lat <- c('Biomass LR* $\\sim$ Latitude**','Biomass LR* $\\sim$ Latitude** + MHW CInt* + Latitude**:MHW CInt*')

int.lat <- c(
  paste0(format(round(coef(summary(null.lat))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.lat))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.lat))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.lat))[1,2], digits=3), nsmall=2))
)
mhw.coef.lat <- c('NA',
                  paste0(format(round(coef(summary(lm.lat))[2,1], digits=3), nsmall=2)," ± ",
                         format(round(coef(summary(lm.lat))[2,2], digits=3), nsmall=2))
)
lat.coef.lat <- c(paste0(format(round(coef(summary(null.lat))[2,1], digits=3), nsmall=2)," ± ",
                         format(round(coef(summary(null.lat))[2,2], digits=3), nsmall=2)),
                  paste0(format(round(coef(summary(lm.lat))[3,1], digits=3), nsmall=2)," ± ",
                         format(round(coef(summary(lm.lat))[3,2], digits=3), nsmall=2))
)

int.coef.lat <- c('NA',
                  paste0(format(round(coef(summary(lm.lat))[4,1], digits=3), nsmall=2)," ± ",
                         format(round(coef(summary(lm.lat))[4,2], digits=3), nsmall=2))
)
p.lat.mhw <- c('NA',format(round(c(
  coef(summary(lm.lat))[2,4]), digits=3), nsmall=2))
p.lat.lat <- c(format(round(c(
  coef(summary(null.lat))[2,4]), digits=3), nsmall=2),
  format(round(c(
    coef(summary(lm.lat))[3,4]), digits=3), nsmall=2))
p.lat.int <- c('NA',format(round(c(
  coef(summary(lm.lat))[4,4]), digits=3), nsmall=2))

r2.lat <- c(format(round(c(
  summary(null.lat)$r.squared), digits=3), nsmall=2),
  format(round(c(
    summary(lm.lat)$r.squared), digits=3), nsmall=2))
aic.lat <- round(c(AIC(null.lat), AIC(lm.lat)))
df.lat <- round(c(
  summary(null.lat)$df[2],
  summary(lm.lat)$df[2]))

colnames.lat <- c('Null model','Linear model')
rownames.lat <- c('Model formula','Intercept', 'MHW coefficient','Latitude coefficient','Interaction coefficient', "MHW coefficient p-value",'Latitude p-value','Interaction p-value','R$^2$',"AIC","Degrees of freedom")

lat.tbl <- data.frame(rbind(formulas.lat, int.lat, mhw.coef.lat, lat.coef.lat, int.coef.lat, p.lat.mhw, p.lat.lat, p.lat.int, r2.lat, aic.lat, df.lat), row.names=rownames.lat) 
colnames(lat.tbl) <- colnames.lat

kbl(lat.tbl, booktabs = TRUE, caption = "Models of biomass log ratio (LR) response to latitude only (null model) or MHW cumulative intensity in °C-days (MHW CInt) and latitude. Latitude was calculated as the median latitude of each survey. * denotes variables that were scaled and centered within surveys, ** denotes variables that were scaled and centered across surveys, and : denotes an interaction term between two variables. MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage

```{r tbl-models-depth}
null.depth <- lm(depth_wt_scale ~ 1, data = modeldat)
lm.depth <- lm(depth_wt_scale ~ anom_sev_scale, data = modeldat)

formulas.depth <- c('Depth* $\\sim$ 1','Depth* $\\sim$ MHW CInt*')

int.depth <- c(
  paste0(format(round(coef(summary(null.depth))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.depth))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.depth))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.depth))[1,2], digits=3), nsmall=2))
)
coef.depth <- c('NA',
                paste0(format(round(coef(summary(lm.depth))[2,1], digits=3), nsmall=2)," ± ",
                       format(round(coef(summary(lm.depth))[2,2], digits=3), nsmall=2))
)
p.depth <- c('NA',format(round(c(
  coef(summary(lm.depth))[2,4]), digits=3), nsmall=2))
r2.depth <- c(
  format(round(c(
  summary(null.depth)$r.squared), digits=3), nsmall=2),
  format(round(c(
  summary(lm.depth)$r.squared), digits=3), nsmall=2))
aic.depth <- round(c(AIC(null.depth), AIC(lm.depth)))
df.depth <- round(c(
  summary(null.depth)$df[2],
  summary(lm.depth)$df[2]))

colnames.depth <- c('Null model','Linear model')
rownames.depth <- c('Model formula','Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")

depth.tbl <- data.frame(rbind(formulas.depth, int.depth, coef.depth, p.depth, r2.depth, aic.depth, df.depth), row.names=rownames.depth) 
colnames(depth.tbl) <- colnames.depth

kbl(depth.tbl, booktabs = TRUE, caption = "Null (intercept-only) model and model of depth response to MHW cumulative intensity in °C-days (MHW CInt). Depth was calculated as the  weighted mean depth of the fish assemblage every year. * denotes variables that were scaled and centered within surveys. We calculated MHW CInt from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)

```

\clearpage

```{r tbl-models-catch}
null.catch <- lm(wt_mt_log_scale ~ catch_3yr_scale, data=catchdat)
lm.catch <- lm(wt_mt_log_scale ~ anom_sev_scale + catch_3yr_scale + catch_3yr_scale:anom_sev_scale, data=catchdat)

formulas.catch <- c('Biomass LR* $\\sim$ Catch*','Biomass LR* $\\sim$ MHW CInt* + Catch* + MHW CInt*:Catch*')

int.catch <- c(
  paste0(format(round(coef(summary(null.catch))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.catch))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.catch))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.catch))[1,2], digits=3), nsmall=2))
)
mhw.coef.catch <- c('NA',
                    paste0(format(round(coef(summary(lm.catch))[2,1], digits=3), nsmall=2)," ± ",
                           format(round(coef(summary(lm.catch))[2,2], digits=3), nsmall=2))
)
catch.coef.catch <- c(paste0(format(round(coef(summary(null.catch))[2,1], digits=3), nsmall=2)," ± ",
                             format(round(coef(summary(null.catch))[2,2], digits=3), nsmall=2)),
                      paste0(format(round(coef(summary(lm.catch))[3,1], digits=3), nsmall=2)," ± ",
                             format(round(coef(summary(lm.catch))[3,2], digits=3), nsmall=2))
)
int.coef.catch <- c('NA',
                    paste0(format(round(coef(summary(lm.catch))[4,1], digits=3), nsmall=2)," ± ",
                           format(round(coef(summary(lm.catch))[4,2], digits=3), nsmall=2))
)
p.catch.mhw <- c('NA',format(round(c(
  coef(summary(lm.catch))[2,4]), digits=3), nsmall=2))

p.catch.catch <- c(format(round(c(
  coef(summary(null.catch))[2,4]), digits=3), nsmall=2),
  format(round(c(
    coef(summary(lm.catch))[3,4]), digits=3), nsmall=2))


p.catch.int <- c('NA',
                 format(round(c(
                   coef(summary(lm.catch))[4,4]), digits=3), nsmall=2))

r2.catch <- c(format(round(c(
  summary(null.catch)$r.squared), digits=3), nsmall=2),
  format(round(c(
    summary(lm.catch)$r.squared), digits=3), nsmall=2))
aic.catch <- round(c(AIC(null.catch), AIC(lm.catch)))
df.catch <- round(c(
  summary(null.catch)$df[2],
  summary(lm.catch)$df[2]))

colnames.catch <- c('Null model','Linear model')
rownames.catch <- c('Model formula','Intercept', 'MHW coefficient','Catch coefficient','Interaction coefficient', "MHW coefficient p-value",'Catch coefficient p-value','Interaction p-value','R$^2$',"AIC","Degrees of freedom")

catch.tbl <- data.frame(rbind(formulas.catch, int.catch, mhw.coef.catch, catch.coef.catch,int.coef.catch, p.catch.mhw, p.catch.catch, p.catch.int, r2.catch, aic.catch, df.catch), row.names=rownames.catch) 
colnames(catch.tbl) <- colnames.catch

kbl(catch.tbl, booktabs = TRUE, caption = "Models of biomass log ratio (LR) response to catch only (null model) or MHW cumulative intensity in °C-days (MHW CInt) and catch. We matched survey footprints to Marine Ecoregions (MEs) and extracted catch data from the Sea Around Us database (see Methods). Surveys from the English Channel and France did not correspond well to ME boundaries and were omitted. Because catch was available by calendar year and surveys occur midyear, we compared biomass change to the mean of the last three years of catch (i.e., biomass change in a 2010 survey was predicted by mean catch in 2008, 2009, and 2010). * denotes variables that were scaled and centered within surveys and : denotes an interaction term between two variables.  MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)
```

\clearpage

```{r tbl-models-cti}
null.cti <- lm(cti_diff_scale ~ 1, data = modeldat)
lm.cti <- lm(cti_diff_scale ~ anom_sev_scale, data = modeldat)
lm.fixed.cti <- lm(cti_diff_scale ~ anom_sev_scale + factor(survey), data = modeldat)
gam.cti <- gam(cti_diff_scale ~ s(anom_sev_scale), data = modeldat )
gam.re.cti <- gam(cti_diff_scale ~ s(anom_sev_scale) + s(survey, bs="re"), data = modeldat  %>% mutate(survey = as.factor(survey)))


formulas.cti <- c('CTI Diff* $\\sim$ 1','CTI Diff* $\\sim$ MHW CInt*','CTI Diff* $\\sim$ MHW CInt* + Survey[fixed]','CTI Diff* $\\sim$ s(MHW CInt*)','CTI Diff* $\\sim$ s(MHW CInt*) + Survey[random]')

int.cti <- c(
  paste0(format(round(coef(summary(null.cti))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(null.cti))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.cti))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.cti))[1,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.cti))[1,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.cti))[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.cti)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.cti)$p.table[1,2], digits=3), nsmall=2)),
  paste0(format(round(summary(gam.re.cti)$p.table[1,1], digits=3), nsmall=2)," ± ", format(round(summary(gam.re.cti)$p.table[1,2], digits=3), nsmall=2))
)
coef.cti <- c(
  'NA',
  paste0(format(round(coef(summary(lm.cti))[2,1], digits=3), nsmall=2)," ± ",
         format(round(coef(summary(lm.cti))[2,2], digits=3), nsmall=2)),
  paste0(format(round(coef(summary(lm.fixed.cti))[2,1], digits=3), nsmall=2)," ± ", format(round(coef(summary(lm.fixed.cti))[2,2]), digits=3), nsmall=2),
  "NA",
  "NA"
)
p.cti <- c('NA',
           format(round(coef(summary(lm.cti))[2,4], digits=3), nsmall=2),
           format(round(coef(summary(lm.fixed.cti))[2,4], digits=3), nsmall=2),
           format(round(summary(gam.cti)$s.pv, digits=3), nsmall=2),
           format(round(summary(gam.re.cti)$s.pv[1], digits=3), nsmall=2)
)
r2.cti <- c(format(round(c(
    summary(null.cti)$r.squared, 
summary(lm.cti)$r.squared, 
  summary(lm.fixed.cti)$r.squared,
  summary(gam.cti)$r.sq, 
  summary(gam.re.cti)$r.sq
), digits=3), nsmall=2))
aic.cti <- format(round(c(
  AIC(null.cti),
  AIC(lm.cti),
  AIC(lm.fixed.cti),
  AIC(gam.cti),
  AIC(gam.re.cti)
), digits=3), nsmall=0)
df.cti <- format(round(c(
  summary(null.cti)$df[2], 
  summary(lm.cti)$df[2], 
  summary(lm.fixed.cti)$df[2],
  gam.cti$df.residual, 
  gam.re.cti$df.residual
), digits=3), nsmall=0)


colnames.cti <- c('Null','LM','LM Survey','GAM','GAM Survey')
rownames.cti <- c('Model formula','Intercept', 'MHW coefficient',"Coefficient p-value",'R$^2$',"AIC","Degrees of freedom")

cti.tbl <- data.frame(rbind(formulas.cti, int.cti, coef.cti, p.cti, r2.cti, aic.cti, df.cti), row.names=rownames.cti) 
colnames(cti.tbl) <- colnames.cti

kbl(cti.tbl, booktabs = TRUE, caption = "Models of Community Temperature Index change (measured as year-over-year difference values, CTI Diff) and MHW cumulative intensity in °C-days (MHW CInt). * denotes variables that were scaled and centered within surveys and s() denotes a GAM smoother. MHW CInt was calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Model names correspond to: null (intercept-only) model, linear model, linear model including survey as a fixed effect, generalized additive model (GAM), and GAM including survey as a random effect.",
    escape = FALSE) %>%
  kable_styling(font_size = 8, latex_options = c("striped"), full_width=TRUE)

```

```{r fig-mhw-point-by-reg, fig.cap="Alternate version of Fig. 2 from the main text, showing results by region. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Points represent log ratios of mean biomass in a survey from one year to the next. The fitted lines are linear regressions. The shaded areas are 95% confidence intervals. Survey names are listed in Supp. Tab. 1.", fig.width=9, fig.height=7}
gg_mhw_biomass_point <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
  left_join(survey_names) %>%
  ggplot(aes(x=anom_sev, y=wt_mt_log)) +
  geom_point() +
  geom_smooth(method="lm", color = "gray35") +
  facet_wrap(~abbr, ncol=5) +
  theme_bw() +
  coord_cartesian(clip = "off") +
  labs(x="MHW cumulative intensity (°C-days)", y="Biomass log ratio") +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  stat_regline_equation(aes(label=..rr.label..), label.x=25, label.y=-1) +
  NULL
#gg_mhw_biomass_point

ggsave(gg_mhw_biomass_point, filename=here("extended","mainplot_by_reg.jpg"), dpi=300, width=10, height=7, scale=0.9)

```

```{r fig-mhw-point-by-cpue-method, fig.cap="Alternate version of Fig. 2 from the main text using different metrics of biomass change: mean abundance, mean biomass (used in the main text), median abundance, and median biomass. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Points represent log ratios of each metric in a survey from one year to the next. The fitted lines are linear regressions. The shaded areas are 95% confidence intervals. The Northeast US survey was omitted because it did not have abundance data recorded.", fig.width=9, fig.height=7}
gg_mhw_cpue_metric_point <- survey_summary %>% 
  filter(!survey=='NEUS') %>% 
  mutate_at(c("num_log_med","wt_mt_log_med"), as.numeric) %>% # no idea why this is a character
  inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
  pivot_longer(cols=c(num_log, num_log_med, wt_mt_log, wt_mt_log_med), names_to = "method", values_to="lr") %>% 
  mutate(method  = recode(method, 
                          num_log = "Mean Abundance", 
                          num_log_med = "Median Abundance", 
                          wt_mt_log = "Mean Biomass", 
                          wt_mt_log_med="Median Biomass")) %>% 
  ggplot(aes(x=anom_sev, y=lr)) +
  geom_point() +
  geom_smooth(method="lm", color = "gray35") +
  theme_bw() +
  coord_cartesian(clip = "off") +
  facet_wrap(~method, ncol=2) +
  labs(x="MHW cumulative intensity (°C-days)", y="Log ratio") +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  stat_regline_equation(aes(label=..rr.label..), label.x=50) +
  NULL
#gg_mhw_cpue_metric_point
ggsave(gg_mhw_cpue_metric_point, filename=here("extended","fish_metric.jpg"), dpi=300, width=6, height=7, scale=0.9)
```

```{r fig-mhw-incidence, fig.cap="MHW cumulative intensity (total anomaly in °C-days) in each survey region with and without detrending the temperature data to remove the signal of secular warming. The main text results are detrended. Here, we plot MHW cumulative intensity based on all SBT anomalies from GLORYS, rather than applying the five-day threshold that was used the main text, to more clearly show the differences between the two methods."}

tmpdat <- mhw_summary_glorys_d %>% 
  mutate(group = "Detrended") %>% 
  bind_rows(mhw_summary_glorys_nod %>% mutate(group = "Not Detrended"))

gg_mhw_incidence <- survey_summary %>% 
  inner_join(tmpdat, by="ref_yr") %>%
  left_join(survey_names) %>%
  ggplot(aes(x=year, y=anom_sev, group=group, color=group)) + 
  scale_color_manual(values=c(cbpal6[3], cbpal6[4]))+ 
  geom_line(position = position_dodge(width = 0.15)) + 
  facet_wrap(~abbr, scales="free_y", ncol=4)+
  theme_bw() +
  labs(x="Year",y="MHW cumulative intensity") +
  theme(legend.title = element_blank(),
        legend.position = "bottom")

#gg_mhw_incidence
ggsave(gg_mhw_incidence, filename=here("extended","detrending_by_reg.jpg"), dpi=300, width=9, height=7, scale=0.9)
```

```{r fig-mhw-detrending, fig.cap="Alternate version of Fig. 2 from the main text, showing biomass change (log ratio) and MHW cumulative intensity (total anomaly in °C-days, using GLORYS data with the five-day MHW threshold) calculated from non-detrended data. The fitted lines are linear regressions. The shaded areas are 95% confidence intervals."}

gg_mhw_detrending <- survey_summary %>% 
  inner_join(mhw_summary_glorys_nod_5_day, by="ref_yr") %>%
  ggplot(aes(x=anom_sev, y=wt_mt_log)) + 
  scale_y_continuous(limits=c(-1.5, 1.5)) +
  geom_point() +
  theme_bw() + 
  geom_smooth(method="lm", color = "gray35") +
  labs(x="Non-detrended MHW cumulative intensity (°C-days)", y="Biomass log ratio") +
  stat_regline_equation(aes(label=..rr.label..), label.x=25, label.y=1.25) +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") 

#gg_mhw_detrending
ggsave(gg_mhw_detrending, filename=here("extended","detrending_scatter.jpg"), dpi=300, width=4, height=4)
```

```{r fig-mhw-summer, fig.cap="Alternate version of Fig. 2 from the main text, showing biomass change (log ratio) and MHW cumulative intensity (total anomaly in °C-days, using GLORYS data without the five-day MHW threshold) based on only summer temperature anomalies (June, July, and August in the Northern Hemisphere). The fitted lines are linear regressions. The shaded areas are 95% confidence intervals."}

gg_mhw_summer <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_any_summer, by="ref_yr") %>%
  ggplot(aes(x=anom_sev, y=wt_mt_log)) + 
  geom_point() +
    scale_y_continuous(limits=c(-1.5, 1.5)) +
theme_bw() + 
  geom_smooth(method="lm", color = "gray35") +
  labs(x="Summer-only MHW cumulative intensity (°C-days)", y="Biomass log ratio") +
  stat_regline_equation(aes(label=..rr.label..), label.x=10, label.y=1.25) +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") 

ggsave(gg_mhw_summer, filename=here("extended","summer_scatter.jpg"), dpi=300, width=4, height=4)

#gg_mhw_summer
```

```{r fig-mhw-sst, fig.cap="Alternate version of Fig. 2 from the main text, showing biomass change (log ratio) and MHW cumulative intensity (total anomaly in °C-days, using detrended OISST sea surface temperature data with the five-day MHW threshold). The fitted lines are linear regressions. The shaded areas are 95% confidence intervals."}

gg_mhw_sst <- survey_summary %>% 
  inner_join(mhw_summary_oisst_d_5_day, by="ref_yr") %>%
  ggplot(aes(x=anom_sev, y=wt_mt_log)) + 
  geom_point() +
  scale_y_continuous(limits=c(-1.5, 1.5)) +
  theme_bw() + 
  geom_smooth(method="lm", color = "gray35") +
  labs(x="SST MHW cumulative intensity (°C-days)", y="Biomass log ratio") +
  stat_regline_equation(aes(label=..rr.label..), label.x=75, label.y=1.25) +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") 

#gg_mhw_sst
ggsave(gg_mhw_sst, filename=here("extended","oisst_scatter.jpg"), dpi=300, width=4, height=4)
```

```{r fig-mhw-dhd, fig.cap="Biomass change (log ratio) and degree heating days (number of days in the year preceding each survey that had anomalies of 1 °C or greater above the average temperatures in the hottest summer month; see Methods). The fitted lines are linear regressions. The shaded areas are 95% confidence intervals."}

gg_mhw_dhd <- survey_summary %>% 
  inner_join(mhw_summary_dhd_glorys, by="ref_yr") %>%
  ggplot(aes(x=dhd_days, y=wt_mt_log)) + 
  scale_y_continuous(limits=c(-1.5, 1.5)) +
  geom_point() +
  theme_bw() + 
  geom_smooth(method="lm", color = "gray35") +
  labs(x="Degree heating days", y="Biomass log ratio") +
  stat_regline_equation(aes(label=..rr.label..), label.x=75, label.y=1.25) +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") 


#gg_mhw_sst
ggsave(gg_mhw_dhd, filename=here("extended","dhd_scatter.jpg"), dpi=300, width=4, height=4)
```


```{r fig-mhw-metric, fig.cap="Biomass change (log ratio) and three alternative metrics of MHW impacts: cumulative intensity calculated from the detrended GLORYS sea bottom temperature data with no five-day cutoff duration (°C-days), and duration (total number of MHW-days) and intensity (cumulative intensity divided by duration) calculated from the detrended GLORYS sea bottom temperature data with a minimum MHW duration of five days. The fitted lines are linear regressions. The shaded areas are 95% confidence intervals." }
# gg_mhw_metric <- survey_summary %>% 
#   inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
#   select(ref_yr, anom_days, anom_int, wt_mt_log) %>% 
#   inner_join(mhw_summary_glorys_d %>% select(ref_yr, anom_sev)) %>% 
#   pivot_longer(cols=c(anom_days, anom_int, anom_sev), values_to = "value", names_to = "metric") %>% 
#   mutate(metric = recode(metric, anom_days= 'Duration (days)',anom_int= 'Intensity (°C)', anom_sev = ' Cumulative intensity, no 5-day cutoff (°C-days)')) %>% 
#   ggplot(aes(x=value, y=wt_mt_log)) +
#   geom_point() + 
#   geom_smooth(method="lm", color = "gray35") +
#   facet_wrap(~metric, scales="free_x", labeller = labeller(metric = label_wrap_gen(25))) +
#   stat_regline_equation(aes(label=..rr.label..), label.y=-2) +
#   labs(x="MHW metric",y="Biomass log ratio") +
#   theme_bw() + 
#   NULL
#gg_mhw_metric

gg_mhw_metric_duration <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
  select(ref_yr, anom_days, wt_mt_log) %>% 
  ggplot(aes(x=anom_days, y=wt_mt_log)) +
  scale_y_continuous(limits=c(-1.5, 1.5)) +
  geom_point() + 
  geom_smooth(method="lm", color = "gray35") +
  stat_regline_equation(aes(label=..rr.label..),label.x=50, label.y=1.25) +
  labs(x="MHW duration (days)",y="Biomass log ratio") +
  theme_bw() + 
  NULL

ggsave(gg_mhw_metric_duration, filename=here("extended","duration_scatter.jpg"), dpi=300, height=4, width=4)


gg_mhw_metric_intensity <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
  select(ref_yr, anom_int, wt_mt_log) %>% 
  ggplot(aes(x=anom_int, y=wt_mt_log)) +
  scale_y_continuous(limits=c(-1.5, 1.5)) +
  geom_point() + 
  geom_smooth(method="lm", color = "gray35") +
  stat_regline_equation(aes(label=..rr.label..), label.x=0.3, label.y=1.25) +
  labs(x="MHW intensity (°C)",y="Biomass log ratio") +
  theme_bw() + 
  NULL

ggsave(gg_mhw_metric_intensity, filename=here("extended","intensity_scatter.jpg"), dpi=300, height=4, width=4)

gg_mhw_metric_no5 <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d %>% select(ref_yr, anom_sev)) %>% 
  ggplot(aes(x=anom_sev, y=wt_mt_log)) +
  geom_point() + 
  scale_y_continuous(limits=c(-1.5, 1.5)) +
  geom_smooth(method="lm", color = "gray35") +
  stat_regline_equation(aes(label=..rr.label..), label.x=50, label.y=1.25) +
  labs(x="Cumulative intensity, no 5-day cutoff (°C-days)",y="Biomass log ratio") +
  theme_bw() + 
  NULL

ggsave(gg_mhw_metric_no5, filename=here("extended","no5day_scatter.jpg"), dpi=300, width=4, height=4)
```

```{r fig-mhw-source, fig.cap="Daily 95th percentile anomalies in the two MHW data sources: sea surface temperature from OISST and sea bottom temperature from GLORYS (both detrended). To simplify comparison we plot all anomalies, not just those MHWs that exceeded a five-day threshold. Note that the OISST time-series began in 1982 and GLORYS began in 1993. Region names are listed in Supp. Tab. 1." }

gg_mhw_source <- ggplot(data=rawsourcedat %>% filter(date<="2019-12-31")) + 
  geom_point(aes(x=date, y=anom, color=source, fill=source), position="jitter", alpha=0.25, size=0.5) +
  theme_classic() +
  labs(x="Date", y="Detrended temperature anomaly", fill="Dataset", color="Dataset") +
  facet_wrap(~abbr) +
  scale_color_manual(values=cbpal6[3:4]) +
  scale_fill_manual(values=cbpal6[3:4]) +
  theme(legend.position = "bottom")

#gg_mhw_source
ggsave(gg_mhw_source, filename=here("extended","oisst_vs_glorys_by_reg.jpg"), dpi=300, width=10, height=7, scale=0.9)
```

```{r fig-abs-biomass, fig.cap="MHW cumulative intensity (total anomaly in °C-days) and absolute value of biomass log ratio. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. The fitted line is a linear regression. The shaded area is its 95% confidence interval."}

gg_mhw_biomass_point_abs <- survey_summary %>% 
  inner_join (mhw_summary_glorys_d_5_day, by="ref_yr") %>% # get MHW data matched to surveys
  ggplot(aes(x=anom_sev, y=abs(wt_mt_log))) +
  geom_point() +
  theme_bw() + 
  geom_smooth(method="lm", color = "gray35") +
  labs(x="MHW cumulative intensity (°C-days)", y="Absolute biomass log ratio") +
  stat_regline_equation(aes(label=..rr.label..), label.x=25, label.y=2.25) +
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") 
#gg_mhw_biomass_point_abs

```

```{r fig-depth-point, fig.cap="Fish assemblage depth change (log ratio) and MHW cumulative intensity (total anomaly in °C-days). MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. The fitted line is a linear regression. The shaded area is its 95% confidence interval."}
gg_mhw_biomass_point_depth <- survey_summary %>% inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% # get MHW data matched to surveys 
  ggplot(aes(x=anom_sev, y=depth_wt_log, fill=wt_mt_log, color=wt_mt_log, group=mhw_yes_no)) + 
  geom_point(position = "jitter") + 
  geom_smooth(method="lm", color = "gray35") + 
  theme_bw() + 
  coord_cartesian(clip = "off") + 
  labs(x="Marine heatwave cumulative intensity (°C-days)", y="Depth log ratio", fill="Biomass log ratio",color="Biomass log ratio") + scale_color_viridis_c() + scale_fill_viridis_c() + 
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  stat_regline_equation(aes(label=..rr.label..), label.y=0.5, label.x=25)

#gg_mhw_biomass_point_depth # lm estimate is 0.3 with SE 0.16, p = 0.0575

ggsave(gg_mhw_biomass_point_depth, filename=here("extended","depthpoint.jpg"), dpi=600, width=6, height=4, scale=0.9)
```

```{r fig-depth-hist, fig.cap="Fish assemblage depth change (log ratio) and MHW occurrence. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text."}

gg_mhw_biomass_hist_depth <- survey_summary %>% inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% # get MHW data matched to surveys 
  mutate(mhw_yes_no = recode(mhw_yes_no, no="No MHW", yes="MHW")) %>% 
  ggplot(aes(x=depth_wt_log, group=mhw_yes_no, fill=mhw_yes_no, color=mhw_yes_no)) + geom_freqpoly(binwidth=0.1, alpha=0.8, size=2) + 
  scale_color_manual(values=c("#E31A1C","#1F78B4")) + 
  scale_fill_manual(values=c("#E31A1C","#1F78B4")) + 
  theme_bw() + 
  labs(x="Depth log ratio", y="Frequency") + 
  theme(legend.position = "right", legend.title = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 

#gg_mhw_biomass_hist_depth
ggsave(gg_mhw_biomass_hist_depth, filename=here("extended","depthhist.jpg"), dpi=600, width=6, height=4, scale=0.9)

```

```{r fig-lat-point, fig.cap="Biomass change (log ratio) and MHW cumulative intensity (total anomaly in °C-days), color-coded by median latitude of each survey region. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. The fitted line is a linear regression. The shaded area is its 95% confidence interval."}
med_lat <- haul_info %>% group_by(survey) %>% summarise(med_lat = median(latitude))

gg_mhw_biomass_point_latitude <- survey_summary %>% inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% # get MHW data matched to surveys
  left_join(med_lat)%>% 
  ggplot(aes(x=anom_sev, y=wt_mt_log, fill=med_lat, color=med_lat)) +
  geom_point(position = "jitter") + 
  theme_bw() + coord_cartesian(clip = "off") + 
  labs(x="Marine heatwave cumulative intensity (°C-days)", y="Biomass log ratio", fill='Median latitude', color='Median latitude') + 
  scale_color_viridis_c() + 
  scale_fill_viridis_c() + 
  geom_hline(aes(yintercept=0), linetype="dashed", color="black")+
  geom_smooth(method="lm", color="gray35")+
  stat_regline_equation(aes(label=..rr.label..))

#gg_mhw_biomass_point_latitude

```

```{r fig-lag-point, fig.cap="Biomass change (log ratio) and MHW cumulative intensity (total anomaly in °C-days) calculated the preceding year as in the main text (0-12 months), a one-year lag (12-24 months), and a two-year lag (24-36 months). MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Fitted lines are linear regressions. Shaded areas are 95% confidence intervals."}

gg_mhw_biomass_point_lags <- survey_summary %>% 
  inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
  filter(!survey %in% c('AI','GOA')) %>% 
  group_by(survey) %>% 
  arrange(year) %>% 
  mutate(anom_sev_lag1 = lag(anom_sev, n=1), anom_sev_lag2 = lag(anom_sev, n=2)) %>% select(survey, wt_mt_log, anom_sev, anom_sev_lag1, anom_sev_lag2) %>% pivot_longer(cols=c(anom_sev, anom_sev_lag1, anom_sev_lag2), names_to="lag", values_to="anomaly_days") %>% 
  mutate(lag = recode(lag, anom_sev = "0_12",anom_sev_lag1= "12_24",anom_sev_lag2="24_36")) %>% 
  ggplot(aes(x=anomaly_days, y=wt_mt_log)) + 
  geom_point() + geom_smooth(method="lm", color = "gray35") + 
  theme_bw() + 
  coord_cartesian(clip = "off") + 
  labs(x="MHW cumulative intensity (°C-days)", y="Biomass log ratio") + 
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
  facet_wrap(~lag) +
  stat_regline_equation(aes(label=..rr.label..), label.x=40)

#gg_mhw_biomass_point_lags
```

```{r fig-spp-tl, fig.cap="Biomass log ratio and MHW cumulative intensity (total anomaly in °C-days) grouped by trophic level of each taxon. Trophic levels are binned (2-3, 3-4, and 4-5). MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Fitted lines are linear regressions. Shaded areas are 95% confidence intervals."}

ggtl <- ggplot(tl %>% filter(wt_mt_log < Inf & wt_mt_log > -Inf), aes(x=anom_sev, y=wt_mt_log) )+
  geom_point(size=0.5, fill="grey70", color="grey30", alpha=0.5, position = "jitter") +
  facet_wrap(~tl_cat, ncol=1) +
  geom_smooth(method="lm", color = "gray35") + 
  theme_bw() + 
  labs(x="MHW cumulative intensity (°C-days)", y="Biomass log ratio") + 
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  stat_regline_equation(aes(label=..rr.label..), label.x=40)

#ggtl
ggsave(ggtl, filename=here("extended","trophic.jpg"), dpi=300, height=9, width=3.6, scale=0.8)
```

```{r fig-spp-feed, fig.cap="Biomass log ratio and MHW cumulative intensity (total anomaly in °C-days) grouped by feeding mode of each taxon. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Fitted lines are linear regressions. Shaded areas are 95% confidence intervals."}

ggfeed <- ggplot(feed %>% filter(wt_mt_log < Inf & wt_mt_log > -Inf), aes(x=anom_sev, y=wt_mt_log) )+
  geom_point(size=0.5, fill="grey70", color="grey30", alpha=0.5, position = "jitter") +
  facet_wrap(~feeding.mode, ncol=1) +
  geom_smooth(method="lm", color = "gray35") + 
  theme_bw() + 
  labs(x="MHW cumulative intensity (°C-days)", y="Biomass log ratio") + 
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  stat_regline_equation(aes(label=..rr.label..), label.x = 40, label.y = 8)

#ggfeed 

ggsave(ggfeed, filename=here("extended","feeding.jpg"), dpi=300, width=3.7, height=9, scale=0.8)
```

```{r fig-spp-hab, fig.cap="Biomass log ratio and MHW cumulative intensity (total anomaly in °C-days) grouped by habitat preference of each taxon. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Fitted lines are linear regressions. Shaded areas are 95% confidence intervals."}

gghab <- ggplot(hab %>% filter(wt_mt_log < Inf & wt_mt_log > -Inf), aes(x=anom_sev, y=wt_mt_log) )+
  geom_point(size=0.5, fill="grey70", color="grey30", alpha=0.5, position = "jitter") +
  facet_wrap(~habitat, ncol=2) +
  geom_smooth(method="lm", color = "gray35") + 
  theme_bw() + 
  labs(x="MHW cumulative intensity (°C-days)", y="Biomass log ratio") + 
  geom_hline(aes(yintercept=0), linetype="dashed", color="black") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  stat_regline_equation(aes(label=..rr.label..), label.x=40, label.y=8)

#gghab
ggsave(gghab, filename=here("extended","habitat.jpg"), dpi=300, width=6, height=8, scale=0.8)
```

```{r fig-dissim-mhw, fig.cap="Box-and-whisker plots of temporal community dissimilarity and MHW incidence for partitioned occurrence-based beta diversity metrics of substitution and subset (Jaccard turnover and nestedness, respectively; top) and partitioned biomass-based beta diversity metrics of substitution and subset (Bray-Curtis balanced variation and biomass gradient, respectively; bottom). MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text."}

#convert mhw_yes_no to factor
beta_div$mhw_yes_no <- factor(beta_div$mhw_yes_no)

#Turnover Component

MHW_jaccard_dissimilarity_turnover <- ggplot(beta_div %>% filter(complete.cases(mhw_yes_no)), aes(x=mhw_yes_no, y=jaccard_dissimilarity_turnover)) +
  geom_boxplot(outlier.size=0.5, outlier.fill="grey70", outlier.color="grey30", outlier.alpha=0.5) +
  theme_bw() + 
  labs(x="Marine heatwave", y="Turnover component\nJaccard dissimilarity") + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#Nestedness Component

MHW_jaccard_dissimilarity_nestedness <- ggplot(beta_div %>% filter(complete.cases(mhw_yes_no)), aes(x=mhw_yes_no, y=jaccard_dissimilarity_nestedness)) +
  geom_boxplot(outlier.size=0.5, outlier.fill="grey70", outlier.color="grey30", outlier.alpha=0.5) +
  theme_bw() + 
  labs(x="Marine heatwave", y="Nestedness component\nJaccard dissimilarity") + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#Balanced Variation Component (we use 'turnover' as variable name for easier parallel with Jaccard metrics)

MHW_bray_dissimilarity_turnover <- ggplot(beta_div %>% filter(complete.cases(mhw_yes_no)), aes(x=mhw_yes_no, y=bray_dissimilarity_turnover)) +
  geom_boxplot(outlier.size=0.5, outlier.fill="grey70", outlier.color="grey30", outlier.alpha=0.5) +
  theme_bw() + 
  labs(x="Marine heatwave", y="Balanced variation component\nBray-Curtis dissimilarity") + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#Biomass Gradient Component (we use 'nestedness' as variable name for easier parallel with Jaccard metrics)

MHW_bray_dissimilarity_nestedness <- ggplot(beta_div %>% filter(complete.cases(mhw_yes_no)), aes(x=mhw_yes_no, y=bray_dissimilarity_nestedness)) +
  geom_boxplot(outlier.size=0.5, outlier.fill="grey70", outlier.color="grey30", outlier.alpha=0.5) +
  theme_bw() + 
  labs(x="Marine heatwave", y="Biomass gradient component\nBray-Curtis dissimilarity") + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggcomm <- plot_grid(MHW_jaccard_dissimilarity_turnover + theme(axis.title.x = element_blank()),
                    MHW_jaccard_dissimilarity_nestedness  + theme(axis.title.x = element_blank()),
                    MHW_bray_dissimilarity_turnover,
                    MHW_bray_dissimilarity_nestedness,
                    ncol = 2, nrow = 2,
                    align = "hv")


ggsave(ggcomm, filename=here("extended","community.jpg"), dpi=300, width=6, height=7, scale=0.8)
```


```{r fig-power-yrs-glorys, fig.cap="Results from a power analysis applying our methods to a simulated dataset in which MHWs reduced biomass by 6% and study duration was varied. The sample sizes plotted are total survey-years across all regions. Dashed vertical line shows the sample size of our actual dataset. Dashed horizontal line denotes one conventionally accepted threshold for power (0.8). MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Simulations were run to 200 years per survey (3600 total survey-years across the 18 regions) but truncated in this figure after power saturated at 1.0."}
gg_power_yrs_glorys <- sim_test_summ_yrs_glorys %>% 
  filter(n_years <= 110) %>% 
  ggplot(aes(x=n_years_tot, y=propsig)) +
  geom_area(fill="grey30", alpha=0.7, color="grey30") +
  geom_hline(aes(yintercept=0.8), color="black", linetype='dashed') + geom_vline(aes(xintercept=nrow(survey_summary %>%                                                   inner_join(mhw_summary_glorys_d_5_day, by="ref_yr"))), color="black", linetype='dashed') +
  labs(x="Sample size (survey-years)", y="Power", title='GLORYS')+
  theme_bw() +
  scale_y_continuous(limits=c(0, 1))
#gg_power_yrs_glorys
ggsave(gg_power_yrs_glorys, filename=here("extended","power_yrs_glorys.jpg"), dpi=300, width=5, height=4, scale=0.8)

```

```{r fig-power-yrs-oisst, fig.cap="Results from a power analysis applying our methods to a simulated dataset in which MHWs reduced biomass by 6% and study duration was varied. The sample sizes plotted are total survey-years across all regions. Dashed vertical line shows the sample size of our actual dataset. Dashed horizontal line denotes one conventionally accepted threshold for power (0.8). MHWs were calculated from the detrended OISST sea surface temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. Simulations were run to 200 years per survey (3600 total survey-years across the 18 regions) but truncated in this figure after power saturated at 1.0."}
gg_power_yrs_oisst <- sim_test_summ_yrs_oisst %>% 
  filter(n_years <= 110) %>% 
  ggplot(aes(x=n_years_tot, y=propsig)) +
  geom_area(fill="grey30", alpha=0.7, color="grey30") +
  geom_hline(aes(yintercept=0.8), color="black", linetype='dashed') + geom_vline(aes(xintercept=nrow(survey_summary %>%                                                   inner_join(mhw_summary_glorys_d_5_day, by="ref_yr"))), color="black", linetype='dashed') +
  labs(x="Sample size (survey-years)", y="Power", title="OISST")+
  theme_bw() +
  scale_y_continuous(limits=c(0, 1))
#gg_power_yrs_oisst
ggsave(gg_power_yrs_oisst, filename=here("extended","power_yrs_oisst.jpg"), dpi=300, width=5, height=4, scale=0.8)
```


```{r fig-power-gamma-glorys, fig.cap="Results from a power analysis applying our methods to a simulated dataset that varied the MHW effect on biomass over the true number of survey-years in our dataset (*n* = 369 total for GLORYS). Dashed horizontal line denotes one conventionally accepted threshold for power (0.8). MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text."}
gg_power_gamma_glorys <- sim_test_summ_gamma_glorys %>% 
  mutate(pct = (1-exp_gamma)*100) %>% 
  ggplot(aes(x=pct, y=propsig)) +
  geom_area(fill="grey30", alpha=0.7, color="grey30") +
  geom_hline(aes(yintercept=0.8), color="black", linetype='dashed') +
  labs(x="MHW effect on biomass (%)", y="Power", title="GLORYS")+
  theme_bw() +
  scale_y_continuous(limits=c(0, 1))
#gg_power_gamma_glorys
ggsave(gg_power_gamma_glorys, filename=here("extended","power_gamma_glorys.jpg"), dpi=300, width=5, height=4, scale=0.8)

```


```{r fig-power-gamma-oisst, fig.cap="Results from a power analysis applying our methods to a simulated dataset that varied the MHW effect on biomass over the true number of survey-years in our dataset (*n* = 441 total for OISST). The sample sizes plotted are total survey-years across all regions. Dashed horizontal line denotes one conventionally accepted threshold for power (0.8). MHWs were calculated from the detrended OISST sea surface temperature data with a five-day minimum duration threshold for MHWs, as used in the main text."}
gg_power_gamma_oisst <- sim_test_summ_gamma_oisst %>% 
  mutate(pct = (1-exp_gamma)*100) %>% 
  ggplot(aes(x=pct, y=propsig)) +
  geom_area(fill="grey30", alpha=0.7, color="grey30") +
  geom_hline(aes(yintercept=0.8), color="black", linetype='dashed') +
  labs(x="MHW effect on biomass (%)", y="Power", title="OISST")+
  theme_bw() +
  scale_y_continuous(limits=c(0, 1))
#gg_power_gamma_oisst
ggsave(gg_power_gamma_oisst, filename=here("extended","power_gamma_oisst.jpg"), dpi=300, width=5, height=4, scale=0.8)
```

```{r fig-spp, fig.keep="all", fig.height=4, fig.cap=paste0("Biomass trends and historical MHWs in a single region. MHWs were calculated from the detrended GLORYS sea bottom temperature data with a five-day minimum duration threshold for MHWs, as used in the main text. The top five taxa by biomass are highlighted. Shaded grey rectangles denote when any MHWs occurred in the preceding survey-year."), message = F, echo = F, warning = F}

survey_names <- survey_names %>%
  mutate(plot_title = paste0(title, " (", abbr, ")"))

# generate_title <- function(x){
#     plot_title_placeholder <<- survey_names[survey_names$survey==x,]$title
# }

plot_spp <- function(x){
  
  topspp <- survey_spp_summary %>% 
    filter(survey == x) %>% 
    group_by(spp, survey) %>% 
    summarise(tot = sum(wt_mt)) %>% 
    ungroup() %>% 
    arrange(-tot) %>% 
    slice(1:5) 
  
  plot_prep <- survey_spp_summary %>% 
    filter(survey == topspp$survey, spp %in% topspp$spp) %>% 
    group_by(year, spp, ref_yr, survey) %>% 
    summarise(wt_mt = sum(wt_mt)) %>% 
    ungroup() %>% 
    bind_rows(survey_spp_summary %>% filter(survey == topspp$survey, !spp %in% topspp$spp) %>% group_by(year, ref_yr, survey) %>% summarise(wt_mt = sum(wt_mt)) %>% mutate(spp="Others")) %>% 
    inner_join(mhw_summary_glorys_d_5_day, by="ref_yr") %>% 
    group_by(survey, year) %>% 
    mutate(tot = sum(wt_mt)) %>% # set up height for MHW rectangle
    group_by(survey) %>% 
    mutate(maxtot = max(tot)) %>% 
    ungroup() %>% 
    mutate(bar1 = ifelse(mhw_yes_no == "yes", maxtot, 0)) %>% 
    left_join(survey_names) %>% 
    mutate(spp = factor(spp, levels=c(topspp$spp, "Others")))
  
  if(x=='NIGFS'){ # special case to force x-axis labels 
    reg_plot <-  ggplot() +
      geom_area(data=plot_prep, aes(x=year, y=wt_mt, color=spp, fill=spp, group=spp)) + 
      geom_col(data=plot_prep %>% select(year, bar1) %>% distinct(), aes(x=year, y=bar1), color="transparent", fill="grey70", alpha=0.5) +
      scale_color_manual(values=cbpal6) +
      scale_fill_manual(values=cbpal6) +
      scale_x_continuous(breaks=c(2010, 2014, 2018)) +
      theme_bw() + 
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
            legend.position="right",
            legend.title = element_blank()) + 
      labs(x="Year",y="Biomass (mt)", title=survey_names[survey_names$survey==x,]$plot_title) +
      NULL
    
  } else {
    reg_plot <-  ggplot() +
      geom_area(data=plot_prep, aes(x=year, y=wt_mt, color=spp, fill=spp, group=spp)) + 
      geom_col(data=plot_prep %>% select(year, bar1) %>% distinct(), aes(x=year, y=bar1), color="transparent", fill="grey70", alpha=0.5) +
      scale_color_manual(values=cbpal6) +
      scale_fill_manual(values=cbpal6) +
      theme_bw() + 
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
            legend.position="right",
            legend.title = element_blank()) + 
      labs(x="Year",y="Biomass (mt)", title=survey_names[survey_names$survey==x,]$plot_title) +
      NULL
  }
  
  ggsave(reg_plot, filename=here("extended",paste0("top_spp_biomass_",x,".jpg")), height=2, width=3, scale=1.7, dpi=300)
  #print plot
  #cat('\n\n') 
  #print(reg_plot)
  #space
  #cat('\n\n') 
  
}

walk(survey_names$survey, ~plot_spp(.x))
```