diff --git a/.gitignore b/.gitignore index cba43cc..d48db9f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,8 @@ .Ruserdata /.quarto/ + +data/*.rds +/ex1_cache/ +/ex2_cache/ +/ex3_cache/ \ No newline at end of file diff --git a/HV_ex1_centroid_distance.R b/HV_ex1_centroid_distance.R new file mode 100644 index 0000000..e69de29 diff --git a/HV_ex1_sgState.R b/HV_ex1_sgState.R new file mode 100644 index 0000000..e9579b5 --- /dev/null +++ b/HV_ex1_sgState.R @@ -0,0 +1,271 @@ +#' """ FL Bay SAV state and stability using centroid distance +#' @author: W. Ryan James +#' date: 6/21/24""" + +# load libraries +library(tidyverse) +library(hypervolume) + +# load sav monitoring data +# BASIN = Basin sampled +# YEAR = year of monitoring +# STATION = monitoring station +# TT = Thalassia testudium percent cover +# HW = Halodule wrightii percent cover +# SF = Syringodium filliforme percent cover +# TMA = total macroalgae percent cover +# TDR = total drift algae percent cover +# sg_rich = seagrass species richness + +df = read_csv('data/FLbay_SAV.csv') +head(df) + +# z-score and nest data to make hypervolume +set.seed(14) +df = df |> + # z score data across all sites and years + mutate(across(c(TT:sg_rich), scale), + # add tiny amount so when all values the same can make hv + across(c(TT:sg_rich), + ~map_dbl(., ~. + rnorm(1, mean = 0, sd = 0.0001)))) |> + # remove station from dataset + select(-STATION) |> + # nest data by basin and year + group_by(BASIN, YEAR) |> + nest() +head(df) + +# generate hypervolumes +df = df |> + mutate(hv = map(data, \(data) hypervolume_gaussian(data, name = paste(BASIN,YEAR,sep = '_'), + samples.per.point = 1000, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + centroid = map(hv, \(hv) get_centroid(hv))) +# do not try to open dataframe with hv column it will hang because it is too big +head(df) + +# save output as .rds +#saveRDS(df, 'data/SAV_hvs.rds') +# df = readRDS('data/hvAll.rds') + +# plot hypervolumes +hvj = hypervolume_join(df$hv[[1]], df$hv[[2]]) + +plot(hvj, pairplot = T, colors=c('goldenrod','blue'), + show.3d=FALSE,plot.3d.axes.id=NULL, + show.axes=TRUE, show.frame=TRUE, + show.random=T, show.density=TRUE,show.data=F, + show.legend=T, limits=c(-5,5), + show.contour=F, contour.lwd= 2, + contour.type='alphahull', + contour.alphahull.alpha=0.25, + contour.ball.radius.factor=1, + contour.kde.level=0.01, + contour.raster.resolution=100, + show.centroid=TRUE, cex.centroid=2, + point.alpha.min=0.2, point.dark.factor=0.5, + cex.random=0.5,cex.data=1,cex.axis=1.5,cex.names=2,cex.legend=2, + num.points.max.data = 100000, num.points.max.random = 200000, reshuffle=TRUE, + plot.function.additional=NULL, + verbose=FALSE) + +# comparison of across each year +df_y= tibble(y1 = unique(df$YEAR), + y2 = unique(df$YEAR)) |> + expand(y1,y2) + +# make all unique year comparisons +df_y = df_y[!duplicated(t(apply(df_y,1,sort))),] %>% + filter(!(y1 == y2)) + +# make two df to join all unique comparisons +df1 = df |> + select(BASIN, y1 = YEAR, hv1 = hv, cent1 = centroid) + +df2 = df |> + select(BASIN, y2 = YEAR, hv2 = hv, cent2 = centroid) + + +# create data frame of all data and make yearly comparisons +df_cd = tibble(BASIN = rep(unique(df$BASIN), + each = nrow(df_y)), + y1 = rep(df_y$y1, times = length(unique(df$BASIN))), + y2 = rep(df_y$y2, times = length(unique(df$BASIN)))) |> + inner_join(df1, by = c('BASIN', 'y1')) |> + inner_join(df2, by = c('BASIN', 'y2')) |> + mutate(ychange = y2-y1, + # join hypervolumees in a set for centroid distance + set = map2(hv1,hv2, \(hv1, hv2) hypervolume_set(hv1, hv2, check.memory = F, verbose = F)), + # calculate centroid distance + dist_cent = map2_dbl(hv1, hv2, \(hv1,hv2) hypervolume_distance(hv1, hv2, type = 'centroid', check.memory=F)), + # calculate the difference of centroid of each axis + dif = map2(cent1, cent2, \(cent1,cent2) cent2 - cent1)) |> + #unnest centroid differences + unnest_wider(dif) |> + # select only metrics of interest + select(BASIN, y1, y2, ychange, + dist_cent, TT, HW, SF, sg_rich, TMA, TDR) +df_cd + +# save output +write_csv(df_cd, 'data/SAV_centDist.csv') + +# plot centroid distance +df_cd = read_csv('data/SAV_centDist.csv') |> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG'))) + +ggplot(df_cd, aes(BASIN, dist_cent, fill = BASIN))+ + geom_hline(aes(yintercept = 1), linetype = 'dashed', linewidth = 1)+ + geom_point(aes(color = BASIN), size = 1, + position=position_jitterdodge(dodge.width = 1, jitter.width = 1))+ + # geom_errorbar(aes(ymin = lc, ymax = uc), linewidth = 2, width = 0)+ + geom_boxplot(alpha = 0.6, outliers = F)+ + labs(x = 'Basin', y = 'Centroid distance')+ + scale_fill_viridis_d(option = 'turbo')+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +# ggsave('boxCentDist.png', +# units="in", width=8, height=5, dpi=600) + +# trends in stability +library(MuMIn) +df_cd = df_cd |> + group_by(BASIN) |> + nest() |> + # fit intercept, linear, and quadratic model + mutate(m_int = map(data, \(df)lm(dist_cent~1, data = df)), + m_lin = map(data, \(df)lm(dist_cent~ychange, data = df)), + m_quad = map(data, \(df)lm(dist_cent~ychange + I(ychange^2), data = df)), + AICc_int = map_dbl(m_int, \(x) AICc(x)), + AICc_lin = map_dbl(m_lin, \(x) AICc(x)), + AICc_quad = map_dbl(m_quad, \(x) AICc(x)), + model = case_when( + AICc_int - min(c(AICc_int,AICc_lin,AICc_quad)) <= 4 ~ 'Intercept', + AICc_lin < AICc_quad ~ 'Linear', + AICc_quad < AICc_lin ~ 'Quadratic')) + +# unnest data +d = df_cd |> + select(BASIN, data, model) |> + unnest(cols = c(data)) |> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG'))) + +ggplot(d, aes(ychange, dist_cent, color = BASIN))+ + geom_hline(aes(yintercept = 1), linetype = 'dashed')+ + geom_point(size = 2.5)+ + geom_smooth(data = d |> filter(model == 'Intercept'), + method = 'lm', formula = y~1, + linewidth = 1, color = 'black')+ + geom_smooth(data = d |> filter(model == 'Linear'), + method = 'lm', formula = y~x, + linewidth = 1, color = 'black')+ + geom_smooth(data = d |> filter(model == 'Quadratic'), + method = 'lm', formula = y~x+I(x^2), + linewidth = 1, color = 'black')+ + facet_wrap(~BASIN, nrow = 1)+ + labs(x = 'Years between comparison', y = 'Centroid distance')+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +# ggsave('stabCentDist.png', +# units="in", width=8, height=5, dpi=600) + +# variable importance +# pivot data longer +df_c = read_csv('data/SAV_centDist.csv') |> + mutate(across(TT:TDR, \(x) x^2)) |> + pivot_longer(TT:TDR, names_to = 'axis', values_to = 'dist') + +# create vector of unique axes +ax = unique(df_c$axis) + +# for loop to calculate variable importance of each axis +for(i in 1:length(ax)){ + d = df_c |> + # remove axis + filter(axis != ax[i]) |> + group_by(BASIN,y1,y2,ychange,dist_cent) |> + #calculate euclidean distance without axis + summarise(cd = sqrt(sum(dist))) |> + # calculate importance of axis + mutate(imp = (dist_cent/cd) - 1, + axis = ax[i]) + + # bind data into new data frame to store + if(i == 1){ + df_imp = d + }else{ + df_imp = bind_rows(df_imp, d) + } +} + +# calculate relative importance across all years for each basin +df_cdi = df_imp |> + #filter(ychange == 1) |> + group_by(BASIN,axis) |> + summarize(imp = mean(imp)) |> + group_by(BASIN) |> + mutate(s_imp = imp/max(imp))|> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG')), + axis = factor(axis, levels = c('TDR', 'TMA', 'sg_rich', + 'SF', 'HW', 'TT'))) +# labeler function for plotting +y_label_formatter = function(x) { + ifelse(x %% 1 == 0, formatC(x, format = "f", digits = 0), formatC(x, format = "f", digits = 2)) +} + +ggplot(df_cdi, aes(axis, s_imp, fill = BASIN))+ + geom_col()+ + labs(x = 'Variable', y = 'Centroid distance variable importance')+ + coord_flip()+ + theme_bw()+ + facet_wrap(~BASIN, nrow = 2)+ + scale_y_continuous( + breaks = c(0.0, 0.25, 0.5, 0.75, 1.0), + limits = c(0, 1), + labels = y_label_formatter) + + scale_x_discrete(labels = c('Total drift algae', 'Total Macroalgae', 'Seagrass richness', + expression(italic('Syrngodium filliforme')), + expression(italic('Halodule wrightii')), + expression(italic('Thalassia testudium'))))+ + scale_fill_viridis_d(option = 'turbo')+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave('s_impCentDist.png', + units="in", width=10, height=6, dpi=600) diff --git a/HV_ex2_traits.R b/HV_ex2_traits.R new file mode 100644 index 0000000..961d770 --- /dev/null +++ b/HV_ex2_traits.R @@ -0,0 +1,98 @@ +#' """ Puerto Rico coral benthic community trait diversity +#' @author: W. Ryan James +#' date: 6/21/24""" + +# load libraries +library(tidyverse) +library(hypervolume) +library(truncnorm) + +# load trait data +df_tr = read_csv('data/coral_traits.csv') +df_tr + +# load benthic community percent cover data across regions +# pc = percent cover 0-100 +# pc_sd = standard deviation in percent cover +df_ben = read_csv('data/coral_PC.csv') +df_ben + +# randomly generate percent cover data for each species and region +# based on mean and sd of percent cover for the designated number of reps +reps = 100 + +set.seed(14) +df = df_ben |> + # join trait data to benthic data + left_join(df_tr, by = 'species') |> + # duplicate the data to create the number of reps needed + slice(rep(1:n(), each=reps))|> + # randomly generate percent cover + mutate(i = rep(1:reps, times=nrow(df_ben)), + percentcover = truncnorm::rtruncnorm(1, a = 0.001, b = 100, + mean = pc, sd = pc_sd)) |> + select(region, `corallite diameter`:`percentcover`) + +df + +# z-score across all regions for each rep and generate hypervolumes +df = df |> + # z-score across regions for each rep + group_by(i) |> + mutate(across(`corallite diameter`:`colony maximum diameter`, scale)) |> + # nest data for each region and rep to make hypervolume + group_by(region, i) |> + # create a column for the percent cover to weight hypervolume as well as input data + nest(weight = percentcover, data = `corallite diameter`:`colony maximum diameter`) |> + # create community weighted hypervolumes + mutate(hv = map2(data,weight, \(data,weight) hypervolume_gaussian(data, + name = paste(region,i,sep = '_'), + weight = weight$percentcover, + samples.per.point = 1000, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + # extrace size for each hypervolume + hv_size = map_dbl(hv, \(hv) get_volume(hv))) + +# do not try to open df it will freeze your r since it is too big +head(df) +# save output +saveRDS(df, 'data/coral_region_hvs.rds') + +# plot hypervolume size +d = df |> + group_by(region) |> + summarize(mean = mean(hv_size), + upper = quantile(hv_size, 0.975), + lower = quantile(hv_size, 0.025)) |> + mutate(region = factor(region, + levels = c('North/Northeast', 'Vieques/Culebra', + 'Southeast', 'South', 'Southwest', + 'West', 'Mona/Desecheo'), + labels = c('North/\nNortheast', 'Vieques/\nCulebra', + 'Southeast', 'South', 'Southwest', + 'West', 'Mona/\nDesecheo'))) + + +ggplot(d, aes(region, mean, color = region))+ + geom_pointrange(aes(ymin = lower, ymax = upper), size = 1.5, linewidth = 2)+ + labs(x = 'Region', y = 'Trait diversity', color = 'Region')+ + scale_y_log10()+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave('coral_size.png', + units="in", width=10, height=6, dpi=600) diff --git a/HV_ex3_trophicNiche.R b/HV_ex3_trophicNiche.R new file mode 100644 index 0000000..97ee6f3 --- /dev/null +++ b/HV_ex3_trophicNiche.R @@ -0,0 +1,345 @@ +#' """ FL Bay seagrass consumer trophic niche +#' @author: W. Ryan James +#' date: 6/21/24""" + +library(tidyverse) +library(hypervolume) + +#Function to make random points of n length---- +# data from random sample with mean and sd but +# can be generated between a high and low value if end_points = T +# *** Note chose either column names or column numbers for ID_rows and names +# either work but must be the same +# df = dataframe or tibble with each row containing +# unique entry for making random points +# ID_rows = vector of column names or numbers with id information +# names = column name or number of name of measure variables +# mean = column name or column number of df with mean +# sd = column name or column number of df with sd +# n = number of points to randomly generate +# z_score = T or F, if T z-scores values +# end_points = T or F for if random points need to be generated between +# a high and low end point (e.g. 5% and 95% interval) +# low and high required if end_points = T +# low = column name or column number of df with lower bound to sample in +# high = column name or column number of df with upper bound to sample in +HVvalues = function(df, ID_rows, names, mean, sd, n, z_score = F, + end_points = F, low = NULL, high = NULL){ + require(tidyverse) + require(truncnorm) + + # check to see if information is needed to restrict where points are + if (end_points){ + if (is_empty(df[,low]) | is_empty(df[,high])){ + return(cat('Warning: low and/or high columns not specified \n + Specific and run again or end_points = F \n')) + } + } + + # check to see if there are more + if(T %in% duplicated(df[,c(ID_rows,names)])){ + return(cat('Warning: some of the rows contain duplicated information \n + make sure data is correct \n')) + } + + # rename variables to make code work + if (is.numeric(mean)){ + names(df)[mean] = 'Mean' + }else { + df = df |> rename(Mean = mean) + } + + if (is.numeric(sd)){ + names(df)[sd] = 'SD' + }else { + df = df |> rename(SD = sd) + } + + if (end_points){ + if (is.numeric(low)){ + names(df)[low] = 'lower' + }else { + df = df |> rename(lower = low) + } + + if (is.numeric(high)){ + names(df)[high] = 'upper' + }else { + df = df |> rename(upper = high) + } + } + + # make sure the names is not numeric + if (is.numeric(names)){ + names = names(df)[names] + } + + labs = unique(as.vector(df[,names])[[1]]) + # generate random points within bounds + if(end_points){ + + df_tot = df |> slice(rep(1:n(), each=n))|> + mutate(point = + truncnorm::rtruncnorm(1, a = lower, b = upper, + mean = Mean, sd = SD)) |> + ungroup() |> + mutate(num = rep(1:n, times=nrow(df))) |> + dplyr::select(-Mean, -SD, -lower, -upper)|> + pivot_wider(names_from = names, values_from = point)|> + dplyr::select(-num) + }else { + # generate random points outside of bounds + df_tot = df |> slice(rep(1:n(), each=n))|> + mutate(point = + truncnorm::rtruncnorm(1, mean = Mean, sd = SD)) |> + ungroup() |> + mutate(num = rep(1:n, times=nrow(df))) |> + dplyr::select(-Mean, -SD)|> + pivot_wider(names_from = names, values_from = point)|> + dplyr::select(-num) + } + if (z_score){ + df_tot = df_tot |> + mutate(across(all_of(labs), scale)) + } + + return(df_tot) + +} + +# generate hvs ---- +# load all data +d = read_csv('data/CESImixResults.csv') + +# number or iterations +reps = 50 + +# generate points and z-score across iterations +set.seed(14) +df = d |> + # duplicate for number of reps + slice(rep(1:n(), each=reps))|> + mutate(i = rep(1:reps, times=nrow(d))) |> + group_by(i) |> + nest() |> + # apply function to generate random points + mutate(points = map(data, \(data) HVvalues(df = data, ID_rows = c('species', 'season'), + names = c('source'), + mean = 'mean', sd = 'sd', n = 30, + end_points = T, low = 'lowend', high = 'highend', + z_score = T))) |> + select(i, points) |> + unnest(points) + +df + +# generate hypervolumes +df = df |> + group_by(species, season, i) |> + nest() |> + mutate(hv = map(data, \(data) hypervolume_gaussian(data, name = paste(species, season, i,sep = '_'), + samples.per.point = 500, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + hv_size = map_dbl(hv, \(hv) get_volume(hv)), + centroid = map(hv, \(hv) get_centroid(hv))) + +write_rds(df, 'data/CESIhvAll.rds', compress = 'gz') + + +# overlap within species by season ---- +ov_sn = df |> + select(species, season, hv, hv_size) |> + pivot_wider(names_from = season, values_from = c(hv,hv_size)) |> + mutate(size_rat = hv_size_Dry/hv_size_Wet, + set = map2(hv_Wet,hv_Dry, \(hv1, hv2) hypervolume_set(hv1, hv2, check.memory = F, verbose = F)), + ov = map(set, \(set) hypervolume_overlap_statistics(set)), + dist_cent = map2_dbl(hv_Wet,hv_Dry, \(hv1,hv2) hypervolume_distance(hv1, hv2, type = 'centroid', check.memory=F))) |> + unnest_wider(ov) |> + select(species, i, hv_size_Wet, hv_size_Dry, + size_rat, jaccard, sorensen, + uniq_Wet = frac_unique_1, uniq_Dry = frac_unique_2, + dist_cent) +ov_sn +write_csv(ov_sn, 'data/hvOv_season.csv') + + +# plot size +df = read_csv('data/hvOv_season.csv') |> + pivot_longer(hv_size_Wet:hv_size_Dry,names_to = 'season', + values_to = 'vol') |> + group_by(species, season) |> + summarize(mean = mean(vol), + median = median(vol), + low = quantile(vol, 0.025), + up = quantile(vol, 0.975)) |> + mutate(season = factor(season, levels = c('hv_size_Wet', + 'hv_size_Dry'))) + +ggplot(df, aes(x = species, y = mean, color = season))+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = 'Species', y = 'Trophic niche width', + color = 'Season') + + scale_fill_manual(values = c('hv_size_Wet' = 'skyblue3', + 'hv_size_Dry' = 'indianred3'), + labels = c('hv_size_Wet' = 'Wet', + 'hv_size_Dry' = 'Dry')) + + scale_color_manual(values = c('hv_size_Wet' = 'skyblue3', + 'hv_size_Dry' = 'indianred3'), + labels = c('hv_size_Wet' = 'Wet', + 'hv_size_Dry' = 'Dry')) + + scale_x_discrete(labels = c("Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'right', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave("sizePres.png", units="in", width=10, height=7, dpi=600) + + +# centroid distance +cols = c("Pinfish" = 'yellow2', + "Mojarra" = 'slategray4', + "Silver perch" = 'snow3', + "Bay anchovy" = 'deepskyblue1', + "Pigfish" = 'orange', + "Pink shrimp" = 'pink', + "Rainwater killifish" = 'firebrick', + 'All' = 'black') +df = ov_sn|> + group_by(species) |> + summarize(mean = mean(dist_cent), + median = median(dist_cent), + low = quantile(dist_cent, 0.025), + up = quantile(dist_cent, 0.975)) + +ggplot(df, aes(x = species, y = mean, color = species))+ + geom_hline(aes(yintercept = 1), linewidth = 1, linetype = 'dashed')+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = NULL, y = 'Centroid distance') + + scale_x_discrete(labels = c("Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + scale_color_manual(values = cols)+ + scale_y_continuous(limits = c(0,4.1))+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave("ovPres.png", units="in", width=6, height=6, dpi=600) + +# overlap +df = ov_sn|> + group_by(species) |> + summarize(mean = mean(sorensen), + median = median(sorensen), + low = quantile(sorensen, 0.025), + up = quantile(sorensen, 0.975)) + +ggplot(df, aes(x = species, y = mean, color = species))+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = NULL, y = 'Niche overlap') + + scale_x_discrete(labels = c("All", + "Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + scale_color_manual(values = cols)+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave("ovPres.png", units="in", width=6, height=6, dpi=600) + +# percent unique +df = ov_sn|> + pivot_longer(uniq_Wet:uniq_Dry,names_to = 'season', + values_to = 'vol') |> + group_by(species, season) |> + summarize(mean = mean(vol), + median = median(vol), + low = quantile(vol, 0.025), + up = quantile(vol, 0.975)) |> + mutate(season = factor(season, levels = c('uniq_Wet', + 'uniq_Dry'))) + +ggplot(df, aes(x = species, y = mean, color = season))+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = 'Species', y = 'Niche volume unique', + color = 'Season') + + scale_fill_manual(values = c('uniq_Wet' = 'skyblue3', + 'uniq_Dry' = 'indianred3'), + labels = c('uniq_Wet' = 'Wet', + 'uniq_Dry' = 'Dry')) + + scale_color_manual(values = c('uniq_Wet' = 'skyblue3', + 'uniq_Dry' = 'indianred3'), + labels = c('uniq_Wet' = 'Wet', + 'uniq_Dry' = 'Dry')) + + scale_x_discrete(labels = c("Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'right', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave("uniquePres.png", units="in", width=10, height=7, dpi=600) diff --git a/_quarto.yml b/_quarto.yml index 38d066a..f520012 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -7,14 +7,14 @@ website: navbar: left: - - text: "Computer setup" + - text: "R & Rstudio" href: comp.html - # - text: 'Ex 1' - # href: ws1.qmd - # - text: 'Ex 2' - # href: ws2.qmd - # - text: 'Ex 3' - # href: ws3.qmd + - text: 'Ex 1' + href: ex1.qmd + - text: 'Ex 2' + href: ex2.qmd + - text: 'Ex 3' + href: ex3.qmd format: html: diff --git a/boxCentDist.png b/boxCentDist.png new file mode 100644 index 0000000..1a48e47 Binary files /dev/null and b/boxCentDist.png differ diff --git a/coral_size.png b/coral_size.png new file mode 100644 index 0000000..3866df0 Binary files /dev/null and b/coral_size.png differ diff --git a/docs/HV_ex1_sgState.R b/docs/HV_ex1_sgState.R new file mode 100644 index 0000000..e9579b5 --- /dev/null +++ b/docs/HV_ex1_sgState.R @@ -0,0 +1,271 @@ +#' """ FL Bay SAV state and stability using centroid distance +#' @author: W. Ryan James +#' date: 6/21/24""" + +# load libraries +library(tidyverse) +library(hypervolume) + +# load sav monitoring data +# BASIN = Basin sampled +# YEAR = year of monitoring +# STATION = monitoring station +# TT = Thalassia testudium percent cover +# HW = Halodule wrightii percent cover +# SF = Syringodium filliforme percent cover +# TMA = total macroalgae percent cover +# TDR = total drift algae percent cover +# sg_rich = seagrass species richness + +df = read_csv('data/FLbay_SAV.csv') +head(df) + +# z-score and nest data to make hypervolume +set.seed(14) +df = df |> + # z score data across all sites and years + mutate(across(c(TT:sg_rich), scale), + # add tiny amount so when all values the same can make hv + across(c(TT:sg_rich), + ~map_dbl(., ~. + rnorm(1, mean = 0, sd = 0.0001)))) |> + # remove station from dataset + select(-STATION) |> + # nest data by basin and year + group_by(BASIN, YEAR) |> + nest() +head(df) + +# generate hypervolumes +df = df |> + mutate(hv = map(data, \(data) hypervolume_gaussian(data, name = paste(BASIN,YEAR,sep = '_'), + samples.per.point = 1000, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + centroid = map(hv, \(hv) get_centroid(hv))) +# do not try to open dataframe with hv column it will hang because it is too big +head(df) + +# save output as .rds +#saveRDS(df, 'data/SAV_hvs.rds') +# df = readRDS('data/hvAll.rds') + +# plot hypervolumes +hvj = hypervolume_join(df$hv[[1]], df$hv[[2]]) + +plot(hvj, pairplot = T, colors=c('goldenrod','blue'), + show.3d=FALSE,plot.3d.axes.id=NULL, + show.axes=TRUE, show.frame=TRUE, + show.random=T, show.density=TRUE,show.data=F, + show.legend=T, limits=c(-5,5), + show.contour=F, contour.lwd= 2, + contour.type='alphahull', + contour.alphahull.alpha=0.25, + contour.ball.radius.factor=1, + contour.kde.level=0.01, + contour.raster.resolution=100, + show.centroid=TRUE, cex.centroid=2, + point.alpha.min=0.2, point.dark.factor=0.5, + cex.random=0.5,cex.data=1,cex.axis=1.5,cex.names=2,cex.legend=2, + num.points.max.data = 100000, num.points.max.random = 200000, reshuffle=TRUE, + plot.function.additional=NULL, + verbose=FALSE) + +# comparison of across each year +df_y= tibble(y1 = unique(df$YEAR), + y2 = unique(df$YEAR)) |> + expand(y1,y2) + +# make all unique year comparisons +df_y = df_y[!duplicated(t(apply(df_y,1,sort))),] %>% + filter(!(y1 == y2)) + +# make two df to join all unique comparisons +df1 = df |> + select(BASIN, y1 = YEAR, hv1 = hv, cent1 = centroid) + +df2 = df |> + select(BASIN, y2 = YEAR, hv2 = hv, cent2 = centroid) + + +# create data frame of all data and make yearly comparisons +df_cd = tibble(BASIN = rep(unique(df$BASIN), + each = nrow(df_y)), + y1 = rep(df_y$y1, times = length(unique(df$BASIN))), + y2 = rep(df_y$y2, times = length(unique(df$BASIN)))) |> + inner_join(df1, by = c('BASIN', 'y1')) |> + inner_join(df2, by = c('BASIN', 'y2')) |> + mutate(ychange = y2-y1, + # join hypervolumees in a set for centroid distance + set = map2(hv1,hv2, \(hv1, hv2) hypervolume_set(hv1, hv2, check.memory = F, verbose = F)), + # calculate centroid distance + dist_cent = map2_dbl(hv1, hv2, \(hv1,hv2) hypervolume_distance(hv1, hv2, type = 'centroid', check.memory=F)), + # calculate the difference of centroid of each axis + dif = map2(cent1, cent2, \(cent1,cent2) cent2 - cent1)) |> + #unnest centroid differences + unnest_wider(dif) |> + # select only metrics of interest + select(BASIN, y1, y2, ychange, + dist_cent, TT, HW, SF, sg_rich, TMA, TDR) +df_cd + +# save output +write_csv(df_cd, 'data/SAV_centDist.csv') + +# plot centroid distance +df_cd = read_csv('data/SAV_centDist.csv') |> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG'))) + +ggplot(df_cd, aes(BASIN, dist_cent, fill = BASIN))+ + geom_hline(aes(yintercept = 1), linetype = 'dashed', linewidth = 1)+ + geom_point(aes(color = BASIN), size = 1, + position=position_jitterdodge(dodge.width = 1, jitter.width = 1))+ + # geom_errorbar(aes(ymin = lc, ymax = uc), linewidth = 2, width = 0)+ + geom_boxplot(alpha = 0.6, outliers = F)+ + labs(x = 'Basin', y = 'Centroid distance')+ + scale_fill_viridis_d(option = 'turbo')+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +# ggsave('boxCentDist.png', +# units="in", width=8, height=5, dpi=600) + +# trends in stability +library(MuMIn) +df_cd = df_cd |> + group_by(BASIN) |> + nest() |> + # fit intercept, linear, and quadratic model + mutate(m_int = map(data, \(df)lm(dist_cent~1, data = df)), + m_lin = map(data, \(df)lm(dist_cent~ychange, data = df)), + m_quad = map(data, \(df)lm(dist_cent~ychange + I(ychange^2), data = df)), + AICc_int = map_dbl(m_int, \(x) AICc(x)), + AICc_lin = map_dbl(m_lin, \(x) AICc(x)), + AICc_quad = map_dbl(m_quad, \(x) AICc(x)), + model = case_when( + AICc_int - min(c(AICc_int,AICc_lin,AICc_quad)) <= 4 ~ 'Intercept', + AICc_lin < AICc_quad ~ 'Linear', + AICc_quad < AICc_lin ~ 'Quadratic')) + +# unnest data +d = df_cd |> + select(BASIN, data, model) |> + unnest(cols = c(data)) |> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG'))) + +ggplot(d, aes(ychange, dist_cent, color = BASIN))+ + geom_hline(aes(yintercept = 1), linetype = 'dashed')+ + geom_point(size = 2.5)+ + geom_smooth(data = d |> filter(model == 'Intercept'), + method = 'lm', formula = y~1, + linewidth = 1, color = 'black')+ + geom_smooth(data = d |> filter(model == 'Linear'), + method = 'lm', formula = y~x, + linewidth = 1, color = 'black')+ + geom_smooth(data = d |> filter(model == 'Quadratic'), + method = 'lm', formula = y~x+I(x^2), + linewidth = 1, color = 'black')+ + facet_wrap(~BASIN, nrow = 1)+ + labs(x = 'Years between comparison', y = 'Centroid distance')+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +# ggsave('stabCentDist.png', +# units="in", width=8, height=5, dpi=600) + +# variable importance +# pivot data longer +df_c = read_csv('data/SAV_centDist.csv') |> + mutate(across(TT:TDR, \(x) x^2)) |> + pivot_longer(TT:TDR, names_to = 'axis', values_to = 'dist') + +# create vector of unique axes +ax = unique(df_c$axis) + +# for loop to calculate variable importance of each axis +for(i in 1:length(ax)){ + d = df_c |> + # remove axis + filter(axis != ax[i]) |> + group_by(BASIN,y1,y2,ychange,dist_cent) |> + #calculate euclidean distance without axis + summarise(cd = sqrt(sum(dist))) |> + # calculate importance of axis + mutate(imp = (dist_cent/cd) - 1, + axis = ax[i]) + + # bind data into new data frame to store + if(i == 1){ + df_imp = d + }else{ + df_imp = bind_rows(df_imp, d) + } +} + +# calculate relative importance across all years for each basin +df_cdi = df_imp |> + #filter(ychange == 1) |> + group_by(BASIN,axis) |> + summarize(imp = mean(imp)) |> + group_by(BASIN) |> + mutate(s_imp = imp/max(imp))|> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG')), + axis = factor(axis, levels = c('TDR', 'TMA', 'sg_rich', + 'SF', 'HW', 'TT'))) +# labeler function for plotting +y_label_formatter = function(x) { + ifelse(x %% 1 == 0, formatC(x, format = "f", digits = 0), formatC(x, format = "f", digits = 2)) +} + +ggplot(df_cdi, aes(axis, s_imp, fill = BASIN))+ + geom_col()+ + labs(x = 'Variable', y = 'Centroid distance variable importance')+ + coord_flip()+ + theme_bw()+ + facet_wrap(~BASIN, nrow = 2)+ + scale_y_continuous( + breaks = c(0.0, 0.25, 0.5, 0.75, 1.0), + limits = c(0, 1), + labels = y_label_formatter) + + scale_x_discrete(labels = c('Total drift algae', 'Total Macroalgae', 'Seagrass richness', + expression(italic('Syrngodium filliforme')), + expression(italic('Halodule wrightii')), + expression(italic('Thalassia testudium'))))+ + scale_fill_viridis_d(option = 'turbo')+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave('s_impCentDist.png', + units="in", width=10, height=6, dpi=600) diff --git a/docs/HV_ex2_traits.R b/docs/HV_ex2_traits.R new file mode 100644 index 0000000..961d770 --- /dev/null +++ b/docs/HV_ex2_traits.R @@ -0,0 +1,98 @@ +#' """ Puerto Rico coral benthic community trait diversity +#' @author: W. Ryan James +#' date: 6/21/24""" + +# load libraries +library(tidyverse) +library(hypervolume) +library(truncnorm) + +# load trait data +df_tr = read_csv('data/coral_traits.csv') +df_tr + +# load benthic community percent cover data across regions +# pc = percent cover 0-100 +# pc_sd = standard deviation in percent cover +df_ben = read_csv('data/coral_PC.csv') +df_ben + +# randomly generate percent cover data for each species and region +# based on mean and sd of percent cover for the designated number of reps +reps = 100 + +set.seed(14) +df = df_ben |> + # join trait data to benthic data + left_join(df_tr, by = 'species') |> + # duplicate the data to create the number of reps needed + slice(rep(1:n(), each=reps))|> + # randomly generate percent cover + mutate(i = rep(1:reps, times=nrow(df_ben)), + percentcover = truncnorm::rtruncnorm(1, a = 0.001, b = 100, + mean = pc, sd = pc_sd)) |> + select(region, `corallite diameter`:`percentcover`) + +df + +# z-score across all regions for each rep and generate hypervolumes +df = df |> + # z-score across regions for each rep + group_by(i) |> + mutate(across(`corallite diameter`:`colony maximum diameter`, scale)) |> + # nest data for each region and rep to make hypervolume + group_by(region, i) |> + # create a column for the percent cover to weight hypervolume as well as input data + nest(weight = percentcover, data = `corallite diameter`:`colony maximum diameter`) |> + # create community weighted hypervolumes + mutate(hv = map2(data,weight, \(data,weight) hypervolume_gaussian(data, + name = paste(region,i,sep = '_'), + weight = weight$percentcover, + samples.per.point = 1000, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + # extrace size for each hypervolume + hv_size = map_dbl(hv, \(hv) get_volume(hv))) + +# do not try to open df it will freeze your r since it is too big +head(df) +# save output +saveRDS(df, 'data/coral_region_hvs.rds') + +# plot hypervolume size +d = df |> + group_by(region) |> + summarize(mean = mean(hv_size), + upper = quantile(hv_size, 0.975), + lower = quantile(hv_size, 0.025)) |> + mutate(region = factor(region, + levels = c('North/Northeast', 'Vieques/Culebra', + 'Southeast', 'South', 'Southwest', + 'West', 'Mona/Desecheo'), + labels = c('North/\nNortheast', 'Vieques/\nCulebra', + 'Southeast', 'South', 'Southwest', + 'West', 'Mona/\nDesecheo'))) + + +ggplot(d, aes(region, mean, color = region))+ + geom_pointrange(aes(ymin = lower, ymax = upper), size = 1.5, linewidth = 2)+ + labs(x = 'Region', y = 'Trait diversity', color = 'Region')+ + scale_y_log10()+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +ggsave('coral_size.png', + units="in", width=10, height=6, dpi=600) diff --git a/docs/about.html b/docs/about.html index cd5446e..fbebb13 100644 --- a/docs/about.html +++ b/docs/about.html @@ -116,7 +116,19 @@
diff --git a/docs/comp.html b/docs/comp.html index 267c5d9..0121302 100644 --- a/docs/comp.html +++ b/docs/comp.html @@ -82,7 +82,19 @@
diff --git a/docs/ex1.html b/docs/ex1.html new file mode 100644 index 0000000..aa19703 --- /dev/null +++ b/docs/ex1.html @@ -0,0 +1,1108 @@ + + + + + + + + + + + +ISWB 15 Hypervolume Workshop - Example 1: Stability of seagrass ecosystems in Florida Bay + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ +
+
+

Example 1: Stability of seagrass ecosystems in Florida Bay

+
+ + + +
+ +
+
Author
+
+

W. Ryan James

+
+
+ +
+
Published
+
+

June 21, 2024

+
+
+ + +
+ + +
+ +
+

Stability of seagrass ecosystems in Florida Bay

+

This vignette uses hypervolumes to understand the temporal stability of seagrass ecosystems. Hypervolumes are generated using common seagrass monitoring metrics yearly from 2007-2023 across four basins in Florida Bay, USA. Centroid distance is used to compare mean conditions across all years to determine temporal stability and variable importance is calculated to determine the metric driving stability within each basin.

+

R script

+
+
+

data

+

The data used for this example comes from the South Florida Fisheries Habitat Assessment Program which monitors seagrass habitats annually using quadrat samples. The benthic cover data consists of data from 4 basins and measures 6 metrics of the SAV community. Data is averaged across stations for each metric.

+
    +
  • BASIN = Basin sampled
  • +
  • YEAR = year of monitoring
  • +
  • STATION = monitoring station
  • +
  • TT = Thalassia testudium percent cover
  • +
  • HW = Halodule wrightii percent cover
  • +
  • SF = Syringodium filliforme percent cover
  • +
  • TMA = total macroalgae percent cover
  • +
  • TDR = total drift algae percent cover
  • +
  • sg_rich = seagrass species richness
    +
  • +
+
+
+

+
Map of sampling basins
+
+
+
+
# load libraries
+library(tidyverse)
+library(hypervolume)
+## Loading required package: Rcpp
+
+# load sav monitoring data 
+df = read_csv('data/FLbay_SAV.csv') 
+## Rows: 1919 Columns: 9
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr (1): BASIN
+## dbl (8): YEAR, STATION, TT, HW, SF, TMA, TDR, sg_rich
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+head(df)
+## # A tibble: 6 × 9
+##   BASIN  YEAR STATION     TT    HW    SF    TMA    TDR sg_rich
+##   <chr> <dbl>   <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>
+## 1 EAG    2007       1 0.135  0         0 0.0262 0         1   
+## 2 EAG    2007       2 0.0986 0         0 0.0471 0         1   
+## 3 EAG    2007       3 0.15   0         0 0.03   0         1   
+## 4 EAG    2007       4 0.3    0         0 0.07   0         1   
+## 5 EAG    2007       5 0.03   0.005     0 0.03   0         1.17
+## 6 EAG    2007       6 0.206  0         0 0.0262 0.0188    1
+
+
+
+

Prepare data

+

Because hypervolumes can be generated with any continuous data as an axes, many of the times the units are not combatible. Blonder et al. 2014 & 2018 to convert all of the axes into the same units. This can be done by taking the z-score of the values to convert units into standard deviations. Z-scoring data can be done with the formula: \[ z = \frac{x_{i}-\overline{x}}{sd} \] Where \(x_{i}\) is a value, \(\overline{x}\) is the mean, and \(sd\) is the standard deviation. By z-scoring each axis, 0 is the mean of that axis, a value of 1 means that the value is 1 standard deviation above the global mean of that axis, and a value of -1 is 1 standard deviation below the global mean of the axis. In R this can be done manually or with the scale() function.

+

Hypervolumes cannot be made when all values for a single axis are the same (e.g. all values 0 for a species cover in a basin for a year), so we can add a tiny bit of variation in order to make the hypervolume.

+

We then can nest() the data to take advantage of the purr package and map().

+
+
# z-score and nest data to make hypervolume
+set.seed(14)
+df = df |> 
+  # z score data across all sites and years
+  mutate(across(c(TT:sg_rich), scale), 
+  # add tiny amount so when all values the same can make hv       
+         across(c(TT:sg_rich), 
+                ~map_dbl(., ~. + rnorm(1, mean = 0, sd = 0.0001)))) |> 
+  # remove station from dataset
+  select(-STATION) |> 
+  # nest data by basin and year
+  group_by(BASIN, YEAR) |> 
+  nest() 
+head(df)
+## # A tibble: 6 × 3
+## # Groups:   BASIN, YEAR [6]
+##   BASIN  YEAR data             
+##   <chr> <dbl> <list>           
+## 1 EAG    2007 <tibble [31 × 6]>
+## 2 EAG    2008 <tibble [31 × 6]>
+## 3 EAG    2009 <tibble [31 × 6]>
+## 4 EAG    2010 <tibble [31 × 6]>
+## 5 EAG    2011 <tibble [31 × 6]>
+## 6 EAG    2012 <tibble [31 × 6]>
+
+
+
+

Generate hypervolumes

+

Hypervolumes are a multidimensional tool that is based on Hutchinson’s n-dimensional niche concept and we can build them with the hypervolume package.
+

+

With a nested dataset of our columns that we want to build hypervolumes for we can use mutate() and map() to generate the hypervolume.

+

We can also use map() and get_centroid() to extract centroid values of each hypervolume.

+
+
# generate hypervolumes
+df = df |> 
+  mutate(hv = map(data, \(data) hypervolume_gaussian(data, name = paste(BASIN,YEAR,sep = '_'),
+                                                     samples.per.point = 1000,
+                                                     kde.bandwidth = estimate_bandwidth(data), 
+                                                     sd.count = 3, 
+                                                     quantile.requested = 0.95, 
+                                                     quantile.requested.type = "probability", 
+                                                     chunk.size = 1000, 
+                                                     verbose = F)),
+         centroid = map(hv, \(hv) get_centroid(hv)))
+
+

** Do not try to open dataframe with hv column it will hang because it is too big

+
+
head(df)
+## # A tibble: 6 × 5
+## # Groups:   BASIN, YEAR [6]
+##   BASIN  YEAR data              hv         centroid 
+##   <chr> <dbl> <list>            <list>     <list>   
+## 1 EAG    2007 <tibble [31 × 6]> <Hypervlm> <dbl [6]>
+## 2 EAG    2008 <tibble [31 × 6]> <Hypervlm> <dbl [6]>
+## 3 EAG    2009 <tibble [31 × 6]> <Hypervlm> <dbl [6]>
+## 4 EAG    2010 <tibble [31 × 6]> <Hypervlm> <dbl [6]>
+## 5 EAG    2011 <tibble [31 × 6]> <Hypervlm> <dbl [6]>
+## 6 EAG    2012 <tibble [31 × 6]> <Hypervlm> <dbl [6]>
+
+

If wanting to save you can save output as .rds

+
+
saveRDS(df, 'data/SAV_hvs.rds') 
+
+
+
+

plotting hypervolumes

+

We can plot multiple hypervolumes by joining them together

+
+
hvj = hypervolume_join(df$hv[[1]], df$hv[[2]])
+
+plot(hvj, pairplot = T, colors=c('goldenrod','blue'),
+     show.3d=FALSE,plot.3d.axes.id=NULL,
+     show.axes=TRUE, show.frame=TRUE,
+     show.random=T, show.density=TRUE,show.data=F,
+     show.legend=T, limits=c(-5,5), 
+     show.contour=F, contour.lwd= 2, 
+     contour.type='alphahull', 
+     contour.alphahull.alpha=0.25,
+     contour.ball.radius.factor=1, 
+     contour.kde.level=0.01,
+     contour.raster.resolution=100,
+     show.centroid=TRUE, cex.centroid=2,
+     point.alpha.min=0.2, point.dark.factor=0.5,
+     cex.random=0.5,cex.data=1,cex.axis=1.5,cex.names=2,cex.legend=2,
+     num.points.max.data = 100000, num.points.max.random = 200000, reshuffle=TRUE,
+     plot.function.additional=NULL,
+     verbose=FALSE
+)
+
+

+
+
+
+
+

Centroid distance

+

We can use the centroid distance to compare mean conditions between years to understand the stability. Centroid distance can be calculated by a set of hypervolumes. This can be done by creating a data frame with all of the possible year combinations, and merging dataframes together to easily join. We can subtract the centroids from each comparison to generate the centroid difference of each axis.

+
+
# comparison of across each year
+df_y= tibble(y1 = unique(df$YEAR),
+             y2 = unique(df$YEAR)) |> 
+  expand(y1,y2)
+
+# make all unique year comparisons 
+df_y = df_y[!duplicated(t(apply(df_y,1,sort))),] %>% 
+  filter(!(y1 == y2))
+
+# make two df to join all unique comparisons  
+df1 = df |> 
+  select(BASIN, y1 = YEAR, hv1 = hv, cent1 = centroid)
+
+df2 = df |> 
+  select(BASIN, y2 = YEAR, hv2 = hv, cent2 = centroid)
+
+
+# create data frame of all data and make yearly comparisons
+df_cd = tibble(BASIN = rep(unique(df$BASIN),
+                           each = nrow(df_y)),
+               y1 = rep(df_y$y1, times = length(unique(df$BASIN))),
+               y2 = rep(df_y$y2, times = length(unique(df$BASIN)))) |> 
+  inner_join(df1, by = c('BASIN', 'y1')) |> 
+  inner_join(df2, by = c('BASIN', 'y2')) |> 
+  mutate(ychange = y2-y1,
+  # join hypervolumees in a set for centroid distance
+         set = map2(hv1,hv2, \(hv1, hv2) hypervolume_set(hv1, hv2, check.memory = F, verbose = F)),
+  # calculate centroid distance 
+         dist_cent = map2_dbl(hv1, hv2, \(hv1,hv2) hypervolume_distance(hv1, hv2, type = 'centroid', check.memory=F)),
+  # calculate the difference of centroid of each axis
+         dif = map2(cent1, cent2, \(cent1,cent2) cent2 - cent1)) |> 
+  #unnest centroid differences
+  unnest_wider(dif) |> 
+  # select only metrics of interest
+  select(BASIN, y1, y2, ychange,  
+         dist_cent, TT, HW, SF, sg_rich, TMA, TDR)
+
+# save output
+write_csv(df_cd, 'data/SAV_centDist.csv')
+
+
+
## Rows: 465 Columns: 11
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr  (1): BASIN
+## dbl (10): y1, y2, ychange, dist_cent, TT, HW, SF, sg_rich, TMA, TDR
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+
+
+
df_cd
+## # A tibble: 465 × 11
+##    BASIN    y1    y2 ychange dist_cent     TT      HW        SF  sg_rich     TMA
+##    <chr> <dbl> <dbl>   <dbl>     <dbl>  <dbl>   <dbl>     <dbl>    <dbl>   <dbl>
+##  1 EAG    2007  2008       1     0.502 -0.109 -0.0440  -1.15e-5  0.0104   0.0467
+##  2 EAG    2007  2009       2     0.493 -0.262 -0.0265  -5.03e-5 -0.0846   0.0631
+##  3 EAG    2007  2010       3     0.576 -0.174 -0.0810  -3.60e-5 -0.143    0.303 
+##  4 EAG    2007  2011       4     0.616 -0.210 -0.0625  -2.13e-5 -0.0286   0.572 
+##  5 EAG    2007  2012       5     0.373 -0.150 -0.0462  -3.45e-5  0.0162   0.329 
+##  6 EAG    2007  2013       6     0.227 -0.140 -0.0291   9.11e-6  0.0763   0.0580
+##  7 EAG    2007  2014       7     0.454 -0.409 -0.0134  -2.21e-5 -0.0539   0.0827
+##  8 EAG    2007  2015       8     0.564 -0.445 -0.0243   1.43e-5  0.00913  0.315 
+##  9 EAG    2007  2016       9     0.490 -0.320 -0.0981  -1.55e-5 -0.170    0.169 
+## 10 EAG    2007  2017      10     0.339 -0.210 -0.0607  -2.36e-5 -0.0816  -0.0509
+## # ℹ 455 more rows
+## # ℹ 1 more variable: TDR <dbl>
+
+

Plot centroid distance for each basin.

+
+
df_cd = read_csv('data/SAV_centDist.csv') |> 
+  mutate(BASIN = factor(BASIN, levels = 
+                          c('JON', 'RKB', 'RAN', 'EAG')))
+## Rows: 465 Columns: 11
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr  (1): BASIN
+## dbl (10): y1, y2, ychange, dist_cent, TT, HW, SF, sg_rich, TMA, TDR
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+
+ggplot(df_cd, aes(BASIN, dist_cent, fill = BASIN))+
+  geom_hline(aes(yintercept = 1), linetype = 'dashed', linewidth = 1)+
+  geom_point(aes(color = BASIN), size = 1, 
+             position=position_jitterdodge(dodge.width = 1, jitter.width = 1))+
+  # geom_errorbar(aes(ymin = lc, ymax = uc), linewidth = 2, width = 0)+
+  geom_boxplot(alpha = 0.6, outliers = F)+
+  labs(x = 'Basin', y = 'Centroid distance')+
+  scale_fill_viridis_d(option = 'turbo')+
+  scale_color_viridis_d(option = 'turbo')+
+  theme_bw()+
+  theme(axis.title = element_text(size = 14), 
+        axis.text = element_text(size = 14, colour = "gray0"), 
+        plot.title = element_text(size = 14, hjust=0.5),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        legend.position = 'none',
+        legend.title = element_text(size = 14),
+        strip.text.x = element_text(size = 14),
+        legend.text = element_text(size = 12))
+
+

+
+
+
+
+

Trend in stability

+

We can look across the number of years to understand the trend in stability. By fitting three possible models we can determine the trend of time of the centroid distance. When intercept model is the best, we can determine that the trend is static and not changing with the number of years between comparison. If linear, the centroid distance can indicate a shift in state overtime, and a quadratic with a maximum at middle values can indicate a disturbance with recovery in state.

+
+
library(MuMIn)
+df_cd = df_cd |> 
+  group_by(BASIN) |>
+  nest() |> 
+  # fit intercept, linear, and quadratic model
+  mutate(m_int = map(data, \(df)lm(dist_cent~1, data = df)),
+         m_lin = map(data, \(df)lm(dist_cent~ychange, data = df)),
+         m_quad = map(data, \(df)lm(dist_cent~ychange + I(ychange^2), data = df)),
+         AICc_int = map_dbl(m_int, \(x) AICc(x)),
+         AICc_lin = map_dbl(m_lin, \(x) AICc(x)),
+         AICc_quad = map_dbl(m_quad, \(x) AICc(x)),
+         model = case_when(
+           AICc_int - min(c(AICc_int,AICc_lin,AICc_quad)) <= 4 ~ 'Intercept',
+           AICc_lin < AICc_quad ~ 'Linear',
+           AICc_quad < AICc_lin ~ 'Quadratic'))
+
+# unnest data 
+d = df_cd |> 
+  select(BASIN, data, model) |> 
+  unnest(cols = c(data)) |>  
+  mutate(BASIN = factor(BASIN, levels = 
+                          c('JON', 'RKB', 'RAN', 'EAG')))
+
+ggplot(d, aes(ychange, dist_cent, color = BASIN))+
+  geom_hline(aes(yintercept = 1), linetype = 'dashed')+
+  geom_point(size = 2.5)+
+  geom_smooth(data = d |> filter(model == 'Intercept'),
+              method = 'lm', formula = y~1, 
+              linewidth = 1, color = 'black')+
+  geom_smooth(data = d |> filter(model == 'Linear'),
+              method = 'lm', formula = y~x, 
+              linewidth = 1, color = 'black')+
+  geom_smooth(data = d |> filter(model == 'Quadratic'),
+              method = 'lm', formula = y~x+I(x^2), 
+              linewidth = 1, color = 'black')+
+  facet_wrap(~BASIN,  nrow = 1)+
+  labs(x = 'Years between comparison', y = 'Centroid distance')+
+  scale_color_viridis_d(option = 'turbo')+
+  theme_bw()+
+  theme(axis.title = element_text(size = 14), 
+        axis.text.y = element_text(size = 14, colour = "black"),
+        axis.text.x = element_text(size = 12, colour = "black"), 
+        plot.title = element_text(size = 14, hjust=0.5),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        legend.position = 'none',
+        legend.title = element_text(size = 14),
+        strip.text.x = element_text(size = 14),
+        legend.text = element_text(size = 12))
+
+

+
+
+
+
+

Variable importance

+

Becuase centroid distance is the multivariate difference in mean conditions between hypervolumes, we can determine the influence of each axis on the overall change. This can be done by removing an axis and calculating the euclidean distance without that axis. Using the formula \[ imp_x = cd/cd_x - 1\] where \(imp_x\) is the importance of axis \(x\), \(cd\) is centroid distance with all axes, and \(cd_x\) is the centroid distance excluding axis \(x\).

+
+
# pivot data longer 
+df_c = read_csv('data/SAV_centDist.csv') |> 
+  mutate(across(TT:TDR, \(x) x^2)) |> 
+  pivot_longer(TT:TDR, names_to = 'axis', values_to = 'dist')
+## Rows: 465 Columns: 11
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr  (1): BASIN
+## dbl (10): y1, y2, ychange, dist_cent, TT, HW, SF, sg_rich, TMA, TDR
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+
+# create vector of unique axes
+ax = unique(df_c$axis)
+
+# for loop to calculate variable importance of each axis
+for(i in 1:length(ax)){
+  d = df_c |> 
+    # remove axis 
+    filter(axis != ax[i]) |> 
+    group_by(BASIN,y1,y2,ychange,dist_cent) |> 
+    #calculate euclidean distance without axis 
+    summarise(cd = sqrt(sum(dist))) |> 
+    # calculate importance of axis
+    mutate(imp = (dist_cent/cd) - 1,
+           axis = ax[i])
+  
+  # bind data into new data frame to store 
+  if(i == 1){
+    df_imp = d
+  }else{
+    df_imp = bind_rows(df_imp, d)
+  }
+}
+## `summarise()` has grouped output by 'BASIN', 'y1', 'y2', 'ychange'. You can
+## override using the `.groups` argument.
+## `summarise()` has grouped output by 'BASIN', 'y1', 'y2', 'ychange'. You can
+## override using the `.groups` argument.
+## `summarise()` has grouped output by 'BASIN', 'y1', 'y2', 'ychange'. You can
+## override using the `.groups` argument.
+## `summarise()` has grouped output by 'BASIN', 'y1', 'y2', 'ychange'. You can
+## override using the `.groups` argument.
+## `summarise()` has grouped output by 'BASIN', 'y1', 'y2', 'ychange'. You can
+## override using the `.groups` argument.
+## `summarise()` has grouped output by 'BASIN', 'y1', 'y2', 'ychange'. You can
+## override using the `.groups` argument.
+
+# calculate relative importance across all years for each basin
+df_cdi = df_imp |> 
+  #filter(ychange == 1) |>
+  group_by(BASIN,axis) |>
+  summarize(imp = mean(imp)) |>
+  group_by(BASIN) |> 
+  mutate(s_imp = imp/max(imp))|> 
+  mutate(BASIN = factor(BASIN, levels = 
+                          c('JON', 'RKB', 'RAN', 'EAG')),
+         axis = factor(axis, levels = c('TDR', 'TMA', 'sg_rich',
+                                        'SF', 'HW', 'TT')))
+## `summarise()` has grouped output by 'BASIN'. You can override using the
+## `.groups` argument.
+# labeler function for plotting
+y_label_formatter = function(x) {
+  ifelse(x %% 1 == 0, formatC(x, format = "f", digits = 0), formatC(x, format = "f", digits = 2))
+}
+
+ggplot(df_cdi, aes(axis, s_imp, fill = BASIN))+
+  geom_col()+
+  labs(x = 'Variable', y = 'Centroid distance variable importance')+
+  coord_flip()+
+  theme_bw()+
+  facet_wrap(~BASIN,  nrow = 2)+
+  scale_y_continuous(
+    breaks = c(0.0, 0.25, 0.5, 0.75, 1.0),
+    limits = c(0, 1),
+    labels = y_label_formatter) +
+  scale_x_discrete(labels = c('Total drift algae', 'Total Macroalgae', 'Seagrass richness',
+                              expression(italic('Syrngodium filliforme')), 
+                              expression(italic('Halodule wrightii')),
+                              expression(italic('Thalassia testudium'))))+
+  scale_fill_viridis_d(option = 'turbo')+
+  theme(axis.title = element_text(size = 14), 
+        axis.text = element_text(size = 14, colour = "gray0"), 
+        plot.title = element_text(size = 14, hjust=0.5),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        legend.position = 'none',
+        legend.title = element_text(size = 14),
+        strip.text.x = element_text(size = 14),
+        legend.text = element_text(size = 12))
+
+

+
+
+
+
+

Background info

+
+

Merge/Join

+

If two data frames contain different columns of data, then they can be merged together with the family of join functions.

+

+left_join() = uses left df as template and joins all matching columns from right df +right_join() = uses right df as template and joins all matching columns from left df +inner_join() = only matches columns contained in both dfs +full_join() = combines all rows in both dfs

+
+
library(tidyverse)
+## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr     1.1.4     ✔ readr     2.1.5
+## ✔ forcats   1.0.0     ✔ stringr   1.5.1
+## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
+## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
+## ✔ purrr     1.0.2     
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag()    masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+left = tibble(name = c('a', 'b', 'c'),
+              n = c(1, 6, 7), 
+              bio = c(100, 43, 57))
+
+right = tibble(name = c('a', 'b', 'd', 'e'),
+               cals = c(500, 450, 570, 600))
+
+left_join(left, right, by = 'name')
+## # A tibble: 3 × 4
+##   name      n   bio  cals
+##   <chr> <dbl> <dbl> <dbl>
+## 1 a         1   100   500
+## 2 b         6    43   450
+## 3 c         7    57    NA
+
+right_join(left, right, by = 'name')
+## # A tibble: 4 × 4
+##   name      n   bio  cals
+##   <chr> <dbl> <dbl> <dbl>
+## 1 a         1   100   500
+## 2 b         6    43   450
+## 3 d        NA    NA   570
+## 4 e        NA    NA   600
+
+inner_join(left, right, by = 'name')
+## # A tibble: 2 × 4
+##   name      n   bio  cals
+##   <chr> <dbl> <dbl> <dbl>
+## 1 a         1   100   500
+## 2 b         6    43   450
+
+full_join(left, right, by = 'name')
+## # A tibble: 5 × 4
+##   name      n   bio  cals
+##   <chr> <dbl> <dbl> <dbl>
+## 1 a         1   100   500
+## 2 b         6    43   450
+## 3 c         7    57    NA
+## 4 d        NA    NA   570
+## 5 e        NA    NA   600
+
+# multiple matches
+fish = tibble(species = rep(c('Salmon', 'Cod'),times = 3),
+              year = rep(c(1999,2005,2020), each = 2),
+              catch = c(50, 60, 40, 50, 60, 100))
+
+col = tibble(species = c('Salmon', 'Cod'),
+             coast = c('West', 'East'))
+
+left_join(fish, col, by = 'species')
+## # A tibble: 6 × 4
+##   species  year catch coast
+##   <chr>   <dbl> <dbl> <chr>
+## 1 Salmon   1999    50 West 
+## 2 Cod      1999    60 East 
+## 3 Salmon   2005    40 West 
+## 4 Cod      2005    50 East 
+## 5 Salmon   2020    60 West 
+## 6 Cod      2020   100 East
+
+
+
+

scaling data

+

Because hypervolumes can be generated with any continuous data as an axes, many of the times the units are not combatible. Blonder et al. 2014 & 2018 to convert all of the axes into the same units. This can be done by taking the z-score of the values to convert units into standard deviations. Z-scoring data can be done with the formula: \[ z = \frac{x_{i}-\overline{x}}{sd} \] Where \(x_{i}\) is a value, \(\overline{x}\) is the mean, and \(sd\) is the standard deviation. By z-scoring each axis, 0 is the mean of that axis, a value of 1 means that the value is 1 standard deviation above the global mean of that axis, and a value of -1 is 1 standard deviation below the global mean of the axis. In R this can be done manually or with the scale() function.

+
+
fish = tibble(species = rep(c('Salmon', 'Cod'),times = 3),
+              year = rep(c(1999,2005,2020), each = 2),
+              catch = c(50, 60, 40, 50, 60, 100))
+
+#
+fish = fish |> 
+  mutate(zcatch1 = (catch - mean(catch))/sd(catch), # manual
+         zcatch2 = scale(catch)) # with scale
+
+fish 
+## # A tibble: 6 × 5
+##   species  year catch zcatch1 zcatch2[,1]
+##   <chr>   <dbl> <dbl>   <dbl>       <dbl>
+## 1 Salmon   1999    50  -0.477      -0.477
+## 2 Cod      1999    60   0           0    
+## 3 Salmon   2005    40  -0.953      -0.953
+## 4 Cod      2005    50  -0.477      -0.477
+## 5 Salmon   2020    60   0           0    
+## 6 Cod      2020   100   1.91        1.91
+
+# center = mean, scale = sd
+fish$zcatch2
+##            [,1]
+## [1,] -0.4767313
+## [2,]  0.0000000
+## [3,] -0.9534626
+## [4,] -0.4767313
+## [5,]  0.0000000
+## [6,]  1.9069252
+## attr(,"scaled:center")
+## [1] 60
+## attr(,"scaled:scale")
+## [1] 20.97618
+
+
+
+

nesting data

+

One benefit of tibbles is that they can contain list columns. This means that we can make columns of tibbles that are nested within a dataset. Nesting creates a list-column of data frames; unnesting flattens it back out into regular columns. Nesting is a implicitly summarising operation: you get one row for each group defined by the non-nested columns. This is useful in conjunction with other summaries that work with whole datasets, most notably models. This can be done with the nest() and then flattened with unnest()

+
+
fish = tibble(species = rep(c('Salmon', 'Cod'),times = 3),
+              year = rep(c(1999,2005,2020), each = 2),
+              catch = c(50, 60, 40, 50, 60, 100))
+
+# using group_by
+fish_nest = fish |> 
+  group_by(species) |> 
+  nest()
+
+fish_nest
+## # A tibble: 2 × 2
+## # Groups:   species [2]
+##   species data            
+##   <chr>   <list>          
+## 1 Salmon  <tibble [3 × 2]>
+## 2 Cod     <tibble [3 × 2]>
+fish_nest$data
+## [[1]]
+## # A tibble: 3 × 2
+##    year catch
+##   <dbl> <dbl>
+## 1  1999    50
+## 2  2005    40
+## 3  2020    60
+## 
+## [[2]]
+## # A tibble: 3 × 2
+##    year catch
+##   <dbl> <dbl>
+## 1  1999    60
+## 2  2005    50
+## 3  2020   100
+
+# using .by in nest
+# column name becomes data unless you change .key
+fish_nest2 = fish |> 
+  nest(.by = year, .key = 'df')
+
+fish_nest2
+## # A tibble: 3 × 2
+##    year df              
+##   <dbl> <list>          
+## 1  1999 <tibble [2 × 2]>
+## 2  2005 <tibble [2 × 2]>
+## 3  2020 <tibble [2 × 2]>
+fish_nest2$df
+## [[1]]
+## # A tibble: 2 × 2
+##   species catch
+##   <chr>   <dbl>
+## 1 Salmon     50
+## 2 Cod        60
+## 
+## [[2]]
+## # A tibble: 2 × 2
+##   species catch
+##   <chr>   <dbl>
+## 1 Salmon     40
+## 2 Cod        50
+## 
+## [[3]]
+## # A tibble: 2 × 2
+##   species catch
+##   <chr>   <dbl>
+## 1 Salmon     60
+## 2 Cod       100
+
+
+
+

map

+
+

purr

+

The newest and new standard package with tidyverse is purr with its set of map() functions. Some similarity to plyr (and base) and dplyr functions but with more consistent names and arguments. Notice that map function can have some specification for the type of output. + map() makes a list. + map_lgl() makes a logical vector. + map_int() makes an integer vector. + map_dbl() makes a double vector. + map_chr() makes a character vector.

+
+
df = iris  |> 
+  select(-Species)
+#summary statistics
+map_dbl(df, mean)
+## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
+##     5.843333     3.057333     3.758000     1.199333
+
+# using map with mutate and nest
+d = tibble(species = rep(c('Salmon', 'Cod'),times = 3),
+           year = rep(c(1999,2005,2020), each = 2),
+           catch = c(50, 60, 40, 50, 60, 100)) |> 
+  nest(.by = species) |> 
+  mutate(correlation = map(data, \(data) cor.test(data$year, data$catch)))
+
+d
+## # A tibble: 2 × 3
+##   species data             correlation
+##   <chr>   <list>           <list>     
+## 1 Salmon  <tibble [3 × 2]> <htest>    
+## 2 Cod     <tibble [3 × 2]> <htest>
+d$correlation
+## [[1]]
+## 
+##  Pearson's product-moment correlation
+## 
+## data:  data$year and data$catch
+## t = 0.96225, df = 1, p-value = 0.5122
+## alternative hypothesis: true correlation is not equal to 0
+## sample estimates:
+##       cor 
+## 0.6933752 
+## 
+## 
+## [[2]]
+## 
+##  Pearson's product-moment correlation
+## 
+## data:  data$year and data$catch
+## t = 1.963, df = 1, p-value = 0.3
+## alternative hypothesis: true correlation is not equal to 0
+## sample estimates:
+##       cor 
+## 0.8910421
+
+ + +
+
+
+ +
+ +
+ + + + \ No newline at end of file diff --git a/docs/ex1_files/figure-html/unnamed-chunk-11-1.png b/docs/ex1_files/figure-html/unnamed-chunk-11-1.png new file mode 100644 index 0000000..359cb03 Binary files /dev/null and b/docs/ex1_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/ex1_files/figure-html/unnamed-chunk-12-1.png b/docs/ex1_files/figure-html/unnamed-chunk-12-1.png new file mode 100644 index 0000000..fef88ac Binary files /dev/null and b/docs/ex1_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/ex1_files/figure-html/unnamed-chunk-13-1.png b/docs/ex1_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 0000000..ed21cdc Binary files /dev/null and b/docs/ex1_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/ex1_files/figure-html/unnamed-chunk-7-1.png b/docs/ex1_files/figure-html/unnamed-chunk-7-1.png new file mode 100644 index 0000000..52adbbc Binary files /dev/null and b/docs/ex1_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/ex2.html b/docs/ex2.html new file mode 100644 index 0000000..ad149a6 --- /dev/null +++ b/docs/ex2.html @@ -0,0 +1,640 @@ + + + + + + + + + + + +ISWB 15 Hypervolume Workshop - Example 2: Trait diversity of coral communities + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ +
+
+

Example 2: Trait diversity of coral communities

+
+ + + +
+ +
+
Author
+
+

W. Ryan James

+
+
+ +
+
Published
+
+

June 21, 2024

+
+
+ + +
+ + +
+ +
+

Trait diversity of coral communities in Puerto Rico

+

This vignette uses hypervolumes to understand the spatial variation in trait diversity of coral reef communities. Hypervolumes are generated using species trait data and weighted based on random percent cover of species from mean and sd across all sites within a region. This process is repeated 100 times. Hypervolume size is used to quantify the trait diversity of each community.

+

R script

+
+
+

data

+

The data used for this example comes from the NOAA which monitors coral benthic communities throughout Puerto Rico. The benthic cover data consists of percent cover (pc = mean percent cover, sd = standard deviation percent cover) from coral species collected across 8 regions. Data is averaged across sites within each region. The coral trait data is based on average values of each trait for each species from published literature values.

+
+
+

+
Map of sampling basins
+
+
+
+
# load libraries
+library(tidyverse)
+## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr     1.1.4     ✔ readr     2.1.5
+## ✔ forcats   1.0.0     ✔ stringr   1.5.1
+## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
+## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
+## ✔ purrr     1.0.2     
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag()    masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+library(hypervolume)
+## Loading required package: Rcpp
+library(truncnorm)
+
+# load trait data
+df_tr = read_csv('data/coral_traits.csv') 
+## Rows: 13 Columns: 6
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr (1): species
+## dbl (5): corallite diameter, growth rate, skeletal density, symbiodinium den...
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+df_tr
+## # A tibble: 13 × 6
+##    species                 `corallite diameter` `growth rate` `skeletal density`
+##    <chr>                                  <dbl>         <dbl>              <dbl>
+##  1 Acropora cervicornis                    1.2         111.                1.51 
+##  2 Acropora palmata                        0.65         71.0               1.84 
+##  3 Agaricia spp.                           2.54          1.42              2.16 
+##  4 Colpophyllia natans                    22.5           5.56              0.764
+##  5 Diploria labyrinthifor…                 7             3.99              1.48 
+##  6 Montastraea cavernosa                   6.06          4.51              1.70 
+##  7 Orbicella annularis                     2.45          7.31              1.73 
+##  8 Orbicella faveolata                     2.38          8.59              1.18 
+##  9 Orbicella franksi                       3.08          5.22              2.12 
+## 10 Porites astreoides                      1.39          3.64              1.54 
+## 11 Porites furcata                         1.7          29.6               1.05 
+## 12 Pseudodiploria spp.                     5.87          4.36              1.45 
+## 13 Siderastrea spp.                        3.56          5.32              1.64 
+## # ℹ 2 more variables: `symbiodinium density` <dbl>,
+## #   `colony maximum diameter` <dbl>
+
+# load benthic community percent cover data across regions
+# pc = percent cover 0-100
+# pc_sd = standard deviation in percent cover
+df_ben = read_csv('data/coral_PC.csv')
+## Rows: 52 Columns: 4
+## ── Column specification ────────────────────────────────────────────────────────
+## Delimiter: ","
+## chr (2): species, region
+## dbl (2): pc, pc_sd
+## 
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+df_ben
+## # A tibble: 52 × 4
+##    species              region              pc  pc_sd
+##    <chr>                <chr>            <dbl>  <dbl>
+##  1 Acropora cervicornis North/Northeast 0.0617  0.237
+##  2 Acropora cervicornis South           0.0536  0.268
+##  3 Acropora cervicornis Southwest       0.16    0.505
+##  4 Acropora cervicornis West            0.0647  0.541
+##  5 Acropora palmata     Southwest       3.94   10.1  
+##  6 Acropora palmata     West            5.04   13.9  
+##  7 Colpophyllia natans  Mona/Desecheo   0.653   1.55 
+##  8 Colpophyllia natans  South           0.467   1.39 
+##  9 Colpophyllia natans  Southwest       0.317   1.05 
+## 10 Colpophyllia natans  Vieques/Culebra 0.0757  0.296
+## # ℹ 42 more rows
+
+
+
+

Prep data

+

To generate hypervolumes a random percent cover for each species is generated based on the mean and sd of percent cover of that species across all sites in the region. This process is repeated 100 times.

+
+
reps = 100
+
+set.seed(14)
+df = df_ben |> 
+  # join trait data to benthic data
+  left_join(df_tr, by = 'species') |> 
+  # duplicate the data to create the number of reps needed
+  slice(rep(1:n(), each=reps))|> 
+  # randomly generate percent cover
+  mutate(i = rep(1:reps, times=nrow(df_ben)),
+         percentcover = truncnorm::rtruncnorm(1, a = 0.001, b = 100,
+                                              mean = pc, sd = pc_sd)) |> 
+  select(region, `corallite diameter`:`percentcover`)
+
+df 
+## # A tibble: 5,200 × 8
+##    region          `corallite diameter` `growth rate` `skeletal density`
+##    <chr>                          <dbl>         <dbl>              <dbl>
+##  1 North/Northeast                  1.2          111.               1.51
+##  2 North/Northeast                  1.2          111.               1.51
+##  3 North/Northeast                  1.2          111.               1.51
+##  4 North/Northeast                  1.2          111.               1.51
+##  5 North/Northeast                  1.2          111.               1.51
+##  6 North/Northeast                  1.2          111.               1.51
+##  7 North/Northeast                  1.2          111.               1.51
+##  8 North/Northeast                  1.2          111.               1.51
+##  9 North/Northeast                  1.2          111.               1.51
+## 10 North/Northeast                  1.2          111.               1.51
+## # ℹ 5,190 more rows
+## # ℹ 4 more variables: `symbiodinium density` <dbl>,
+## #   `colony maximum diameter` <dbl>, i <int>, percentcover <dbl>
+
+
+
+

Hypervolumes

+

Values are z-scored across all sites and species for each replication separately. Here we nest two columns one with all of the input data for the hypervolume (data) and one with a vector of the randomly generated percent cover that will be used to weight the hypervolume (weight). Hypervolume size is extracted from each hypervolume using map_dbl() and get_size().

+
+
# z-score across all regions for each rep and generate hypervolumes
+df = df |> 
+  # z-score across regions for each rep
+  group_by(i) |> 
+  mutate(across(`corallite diameter`:`colony maximum diameter`, scale)) |> 
+  # nest data for each region and rep to make hypervolume
+  group_by(region, i) |> 
+  # create a column for the percent cover to weight hypervolume as well as input data
+  nest(weight = percentcover, data = `corallite diameter`:`colony maximum diameter`) |> 
+  # create community weighted hypervolumes 
+  mutate(hv = map2(data,weight, \(data,weight) hypervolume_gaussian(data, 
+                                                                    name = paste(region,i,sep = '_'),
+                                                                    weight = weight$percentcover,
+                                                                    samples.per.point = 1000,
+                                                                    kde.bandwidth = estimate_bandwidth(data), 
+                                                                    sd.count = 3, 
+                                                                    quantile.requested = 0.95, 
+                                                                    quantile.requested.type = "probability", 
+                                                                    chunk.size = 1000, 
+                                                                    verbose = F)),
+  # extrace size for each hypervolume 
+          hv_size = map_dbl(hv, \(hv) get_volume(hv)))
+
+

** Do not try to view df in rstudio it will freeze your r since it is too big

+
+
head(df)
+## # A tibble: 6 × 6
+## # Groups:   region, i [6]
+##   region              i weight           data             hv         hv_size
+##   <chr>           <int> <list>           <list>           <list>       <dbl>
+## 1 North/Northeast     1 <tibble [8 × 1]> <tibble [8 × 5]> <Hypervlm>    164.
+## 2 North/Northeast     2 <tibble [8 × 1]> <tibble [8 × 5]> <Hypervlm>    258.
+## 3 North/Northeast     3 <tibble [8 × 1]> <tibble [8 × 5]> <Hypervlm>    307.
+## 4 North/Northeast     4 <tibble [8 × 1]> <tibble [8 × 5]> <Hypervlm>    268.
+## 5 North/Northeast     5 <tibble [8 × 1]> <tibble [8 × 5]> <Hypervlm>    218.
+## 6 North/Northeast     6 <tibble [8 × 1]> <tibble [8 × 5]> <Hypervlm>    218.
+
+
+
+

Hypervolume size

+

We can calculate the mean and 95% CI of each run to determine the trait diversity of coral communities across each region.

+
+
# plot hypervolume size
+d = df |> 
+  group_by(region) |> 
+  summarize(mean = mean(hv_size),
+            upper = quantile(hv_size, 0.975),
+            lower = quantile(hv_size, 0.025)) |> 
+  mutate(region = factor(region, 
+                         levels = c('North/Northeast', 'Vieques/Culebra',
+                                    'Southeast', 'South', 'Southwest',
+                                     'West', 'Mona/Desecheo'),
+                         labels = c('North/\nNortheast', 'Vieques/\nCulebra',
+                                    'Southeast', 'South', 'Southwest',
+                                    'West', 'Mona/\nDesecheo')))
+
+
+ggplot(d, aes(region, mean, color = region))+
+  geom_pointrange(aes(ymin = lower, ymax = upper), size = 1.5, linewidth = 2)+
+  labs(x = 'Region', y = 'Trait diversity', color = 'Region')+
+  scale_y_log10()+
+  scale_color_viridis_d(option = 'turbo')+
+  theme_bw()+
+  theme(axis.title = element_text(size = 14), 
+        axis.text = element_text(size = 14, colour = "gray0"), 
+        plot.title = element_text(size = 14, hjust=0.5),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        legend.position = 'none',
+        legend.title = element_text(size = 14),
+        strip.text.x = element_text(size = 14),
+        legend.text = element_text(size = 12))
+
+

+
+
+ + +
+ +
+ +
+ + + + \ No newline at end of file diff --git a/docs/ex2_files/figure-html/unnamed-chunk-6-1.png b/docs/ex2_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 0000000..4cc230d Binary files /dev/null and b/docs/ex2_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index 342dd46..0000000 --- a/docs/index.html +++ /dev/null @@ -1,388 +0,0 @@ - - - - - - - - - -ISWB 15 Hypervolume Workshop - ISBW 15 Workshop: Hypervolume modeling - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
- - - - -
- -
-
-

ISBW 15 Workshop: Hypervolume modeling

-
- - - -
- - - - -
- - -
- -
-

BSC 6926 B52: R workshop on population and community ecological modeling

-

This is the course website for the R workshop that coincides with PCB 5423 (Advanced Ecology). This website will have the Rmarkdown lessons for each workshop. Find the course schedule and Syllabus here. This course will be based in R and information about downloading R and Rstudio can be found here.

-
-
-

Class Resources

- -
-
-

R Resources

-
    -
  • R for Data Science by Hadley Wickham and Garret Grolemund – An introduction to programming with R: https://r4ds.hadley.nz/
    -
  • -
  • Quick-R by datacamp: Quick overview on R programming and statistical approaches. There are more tutorials, but you will be required to register
    -
  • -
  • RStudio Cloud Training Exercises: https://rstudio.cloud/learn/primers
    -
  • -
  • Virtual Ecology Portal/EcoVirtual R Package: Website that provides various examples of population and community models that will be discussed in class and the workshop. There is also an R package (EcoVirtual) you can use to run various models included on this website: http://ecovirtual.ib.usp.br/doku.php?id=start
    -
  • -
  • ModernDive: Introductory book on R and statistical inference: https://moderndive.com/index.html
  • -
  • Rstudio: learn R https://education.rstudio.com/learn/beginner/
  • -
- - -
- -
- -
- - - - \ No newline at end of file diff --git a/docs/maps/map_stab.png b/docs/maps/map_stab.png new file mode 100644 index 0000000..d9c6e4b Binary files /dev/null and b/docs/maps/map_stab.png differ diff --git a/docs/search.json b/docs/search.json deleted file mode 100644 index 34d1f61..0000000 --- a/docs/search.json +++ /dev/null @@ -1,23 +0,0 @@ -[ - { - "objectID": "about.html", - "href": "about.html", - "title": "About", - "section": "", - "text": "About this site\n\n1 + 1\n\n[1] 2" - }, - { - "objectID": "comp.html", - "href": "comp.html", - "title": "ISWB 15 Hypervolume Workshop", - "section": "", - "text": "R and RStudio\nR and RStudio are separate downloads and installations. R is the underlying statistical computing environment, but using R alone is no fun. RStudio is a graphical integrated development environment (IDE) that makes using R much easier and more interactive. You need to install R before you install RStudio. In the sections below are the instructions for installing R and R Studio on your operating system.\n\nWindows\n\nIf you already have R and RStudio installed\n\nOpen RStudio, and click on “Help” > “Check for updates”. If a new version is available, quit RStudio, and download the latest version for RStudio.\nTo check which version of R you are using, start RStudio and the first thing that appears in the console indicates the version of R you are running. Alternatively, you can type sessionInfo(), which will also display which version of R you are running. Go on the CRAN website and check whether a more recent version is available. If so, please download and install it. You can check here for more information on how to remove old versions from your system if you wish to do so.\n\n\n\nIf you don’t have R and RStudio installed\n\nDownload R from the CRAN website.\nRun the .exe file that was just downloaded\nGo to the RStudio download page\nUnder Installers select RStudio x.yy.zzz - Windows 10/11 (where x, y, and z represent version numbers)\nDouble click the file to install it\nOnce it’s installed, open RStudio to make sure it works and you don’t get any error messages.\n\n\n\n\nmacOS\n\nIf you already have R and RStudio installed\n\nOpen RStudio, and click on “Help” > “Check for updates”. If a new version is available, quit RStudio, and download the latest version for RStudio.\nTo check the version of R you are using, start RStudio and the first thing that appears on the terminal indicates the version of R you are running. Alternatively, you can type sessionInfo(), which will also display which version of R you are running. Go on the CRAN website and check whether a more recent version is available. If so, please download and install it.\n\n\n\nIf you don’t have R and RStudio installed\n\nDownload R from the CRAN website.\nSelect the .pkg file for the latest R version\nDouble click on the downloaded file to install R\nIt is also a good idea to install XQuartz (needed by some packages)\nGo to the RStudio download page\nUnder Installers select RStudio x.yy.zzz - Mac OS X 10.15+ (64-bit) (where x, y, and z represent version numbers)\nDouble click the file to install RStudio\nOnce it’s installed, open RStudio to make sure it works and you don’t get any error messages.\n\n\n\n\nLinux\n\nFollow the instructions for your distribution from CRAN, they provide information to get the most recent version of R for common distributions. For most distributions, you could use your package manager (e.g., for Debian/Ubuntu run sudo apt-get install r-base, and for Fedora sudo yum install R), but we don’t recommend this approach as the versions provided by this are usually out of date. In any case, make sure you have at least R 3.3.1.\nGo to the RStudio download page\nUnder Installers select the version that matches your distribution, and install it with your preferred method (e.g., with Debian/Ubuntu sudo dpkg -i rstudio-x.yy.zzz-amd64.deb at the terminal).\nOnce it’s installed, open RStudio to make sure it works and you don’t get any error messages." - }, - { - "objectID": "index.html", - "href": "index.html", - "title": "ISBW 15 Workshop: Hypervolume modeling", - "section": "", - "text": "BSC 6926 B52: R workshop on population and community ecological modeling\nThis is the course website for the R workshop that coincides with PCB 5423 (Advanced Ecology). This website will have the Rmarkdown lessons for each workshop. Find the course schedule and Syllabus here. This course will be based in R and information about downloading R and Rstudio can be found here.\n\n\nClass Resources\n\nZoom link\nGithub repository for workshop R scripts\nWebsite Github repository\nTextbook for R exercises (S) Stevens, M.H.H. 2010. A primer of ecology with R. ISBN 978-0-387-89881-0 (Electronically available at FIU Library) E-book version\n\n\n\nR Resources\n\nR for Data Science by Hadley Wickham and Garret Grolemund – An introduction to programming with R: https://r4ds.hadley.nz/\n\nQuick-R by datacamp: Quick overview on R programming and statistical approaches. There are more tutorials, but you will be required to register\n\nRStudio Cloud Training Exercises: https://rstudio.cloud/learn/primers\n\nVirtual Ecology Portal/EcoVirtual R Package: Website that provides various examples of population and community models that will be discussed in class and the workshop. There is also an R package (EcoVirtual) you can use to run various models included on this website: http://ecovirtual.ib.usp.br/doku.php?id=start\n\nModernDive: Introductory book on R and statistical inference: https://moderndive.com/index.html\nRstudio: learn R https://education.rstudio.com/learn/beginner/" - } -] \ No newline at end of file diff --git a/ex1.qmd b/ex1.qmd new file mode 100644 index 0000000..d7db36e --- /dev/null +++ b/ex1.qmd @@ -0,0 +1,457 @@ +--- +title: "Example 1: Stability of seagrass ecosystems in Florida Bay" +author: "W. Ryan James" +date: "6/21/24" +format: + html: + toc: true + theme: yeti +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(collapse = T, cache = T) +``` + +## Stability of seagrass ecosystems in Florida Bay +This vignette uses hypervolumes to understand the temporal stability of seagrass ecosystems. Hypervolumes are generated using common seagrass monitoring metrics yearly from 2007-2023 across four basins in Florida Bay, USA. Centroid distance is used to compare mean conditions across all years to determine temporal stability and variable importance is calculated to determine the metric driving stability within each basin. + +[R script](HV_ex1_sgState.R) + +## data +The data used for this example comes from the [South Florida Fisheries Habitat Assessment Program](https://myfwc.com/research/habitat/seagrasses/fhap/) which monitors seagrass habitats annually using quadrat samples. The [benthic cover data](https://github.com/CoastalFishScience/ISBW_HVworkshop/blob/main/data/FLbay_SAV.csv) consists of data from 4 basins and measures 6 metrics of the SAV community. Data is averaged across stations for each metric. + + - BASIN = Basin sampled + - YEAR = year of monitoring + - STATION = monitoring station + - TT = *Thalassia testudium* percent cover + - HW = *Halodule wrightii* percent cover + - SF = *Syringodium filliforme* percent cover + - TMA = total macroalgae percent cover + - TDR = total drift algae percent cover + - sg_rich = seagrass species richness\ + +![Map of sampling basins](maps/map_stab.png) + +```{r} +# load libraries +library(tidyverse) +library(hypervolume) + +# load sav monitoring data +df = read_csv('data/FLbay_SAV.csv') +head(df) +``` + +## Prepare data +Because hypervolumes can be generated with any continuous data as an axes, many of the times the units are not combatible. Blonder et al. [2014](https://doi-org.ezproxy.fiu.edu/10.1111/geb.12146) & [2018](https://doi-org.ezproxy.fiu.edu/10.1111/2041-210X.12865) to convert all of the axes into the same units. This can be done by taking the z-score of the values to convert units into standard deviations. Z-scoring data can be done with the formula: +$$ z = \frac{x_{i}-\overline{x}}{sd} $$ Where $x_{i}$ is a value, $\overline{x}$ is the mean, and $sd$ is the standard deviation. By z-scoring each axis, 0 is the mean of that axis, a value of 1 means that the value is 1 standard deviation above the global mean of that axis, and a value of -1 is 1 standard deviation below the global mean of the axis. In R this can be done manually or with the `scale()` function. + +Hypervolumes cannot be made when all values for a single axis are the same (e.g. all values 0 for a species cover in a basin for a year), so we can add a tiny bit of variation in order to make the hypervolume. + +We then can `nest()` the data to take advantage of the `purr` package and `map()`. + +```{r} +# z-score and nest data to make hypervolume +set.seed(14) +df = df |> + # z score data across all sites and years + mutate(across(c(TT:sg_rich), scale), + # add tiny amount so when all values the same can make hv + across(c(TT:sg_rich), + ~map_dbl(., ~. + rnorm(1, mean = 0, sd = 0.0001)))) |> + # remove station from dataset + select(-STATION) |> + # nest data by basin and year + group_by(BASIN, YEAR) |> + nest() +head(df) +``` + +## Generate hypervolumes +Hypervolumes are a multidimensional tool that is based on Hutchinson's *n*-dimensional niche concept and we can build them with the `hypervolume` package. \ + +With a nested dataset of our columns that we want to build hypervolumes for we can use `mutate()` and `map()` to generate the hypervolume. + +We can also use `map()` and `get_centroid()` to extract centroid values of each hypervolume. +```{r, eval=F} +# generate hypervolumes +df = df |> + mutate(hv = map(data, \(data) hypervolume_gaussian(data, name = paste(BASIN,YEAR,sep = '_'), + samples.per.point = 1000, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + centroid = map(hv, \(hv) get_centroid(hv))) +``` + +```{r, echo=FALSE} +df = readRDS('data/SAV_hvs.rds') +``` + +** *Do not try to open dataframe with hv column it will hang because it is too big* +```{r} +head(df) + +``` + +If wanting to save you can save output as `.rds` +```{r eval=F} +saveRDS(df, 'data/SAV_hvs.rds') +``` + +## plotting hypervolumes +We can plot multiple hypervolumes by joining them together +```{r} +hvj = hypervolume_join(df$hv[[1]], df$hv[[2]]) + +plot(hvj, pairplot = T, colors=c('goldenrod','blue'), + show.3d=FALSE,plot.3d.axes.id=NULL, + show.axes=TRUE, show.frame=TRUE, + show.random=T, show.density=TRUE,show.data=F, + show.legend=T, limits=c(-5,5), + show.contour=F, contour.lwd= 2, + contour.type='alphahull', + contour.alphahull.alpha=0.25, + contour.ball.radius.factor=1, + contour.kde.level=0.01, + contour.raster.resolution=100, + show.centroid=TRUE, cex.centroid=2, + point.alpha.min=0.2, point.dark.factor=0.5, + cex.random=0.5,cex.data=1,cex.axis=1.5,cex.names=2,cex.legend=2, + num.points.max.data = 100000, num.points.max.random = 200000, reshuffle=TRUE, + plot.function.additional=NULL, + verbose=FALSE +) +``` + + +## Centroid distance +We can use the centroid distance to compare mean conditions between years to understand the stability. Centroid distance can be calculated by a set of hypervolumes. This can be done by creating a data frame with all of the possible year combinations, and merging dataframes together to easily join. We can subtract the centroids from each comparison to generate the centroid difference of each axis. + +```{r, eval=F} +# comparison of across each year +df_y= tibble(y1 = unique(df$YEAR), + y2 = unique(df$YEAR)) |> + expand(y1,y2) + +# make all unique year comparisons +df_y = df_y[!duplicated(t(apply(df_y,1,sort))),] %>% + filter(!(y1 == y2)) + +# make two df to join all unique comparisons +df1 = df |> + select(BASIN, y1 = YEAR, hv1 = hv, cent1 = centroid) + +df2 = df |> + select(BASIN, y2 = YEAR, hv2 = hv, cent2 = centroid) + + +# create data frame of all data and make yearly comparisons +df_cd = tibble(BASIN = rep(unique(df$BASIN), + each = nrow(df_y)), + y1 = rep(df_y$y1, times = length(unique(df$BASIN))), + y2 = rep(df_y$y2, times = length(unique(df$BASIN)))) |> + inner_join(df1, by = c('BASIN', 'y1')) |> + inner_join(df2, by = c('BASIN', 'y2')) |> + mutate(ychange = y2-y1, + # join hypervolumees in a set for centroid distance + set = map2(hv1,hv2, \(hv1, hv2) hypervolume_set(hv1, hv2, check.memory = F, verbose = F)), + # calculate centroid distance + dist_cent = map2_dbl(hv1, hv2, \(hv1,hv2) hypervolume_distance(hv1, hv2, type = 'centroid', check.memory=F)), + # calculate the difference of centroid of each axis + dif = map2(cent1, cent2, \(cent1,cent2) cent2 - cent1)) |> + #unnest centroid differences + unnest_wider(dif) |> + # select only metrics of interest + select(BASIN, y1, y2, ychange, + dist_cent, TT, HW, SF, sg_rich, TMA, TDR) + +# save output +write_csv(df_cd, 'data/SAV_centDist.csv') + +``` + +```{r, echo=F} +df_cd = read_csv('data/SAV_centDist.csv') +``` + +```{r} +df_cd +``` + +Plot centroid distance for each basin. +```{r} +df_cd = read_csv('data/SAV_centDist.csv') |> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG'))) + +ggplot(df_cd, aes(BASIN, dist_cent, fill = BASIN))+ + geom_hline(aes(yintercept = 1), linetype = 'dashed', linewidth = 1)+ + geom_point(aes(color = BASIN), size = 1, + position=position_jitterdodge(dodge.width = 1, jitter.width = 1))+ + # geom_errorbar(aes(ymin = lc, ymax = uc), linewidth = 2, width = 0)+ + geom_boxplot(alpha = 0.6, outliers = F)+ + labs(x = 'Basin', y = 'Centroid distance')+ + scale_fill_viridis_d(option = 'turbo')+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) +``` + +## Trend in stability +We can look across the number of years to understand the trend in stability. By fitting three possible models we can determine the trend of time of the centroid distance. When intercept model is the best, we can determine that the trend is static and not changing with the number of years between comparison. If linear, the centroid distance can indicate a shift in state overtime, and a quadratic with a maximum at middle values can indicate a disturbance with recovery in state. + +```{r} +library(MuMIn) +df_cd = df_cd |> + group_by(BASIN) |> + nest() |> + # fit intercept, linear, and quadratic model + mutate(m_int = map(data, \(df)lm(dist_cent~1, data = df)), + m_lin = map(data, \(df)lm(dist_cent~ychange, data = df)), + m_quad = map(data, \(df)lm(dist_cent~ychange + I(ychange^2), data = df)), + AICc_int = map_dbl(m_int, \(x) AICc(x)), + AICc_lin = map_dbl(m_lin, \(x) AICc(x)), + AICc_quad = map_dbl(m_quad, \(x) AICc(x)), + model = case_when( + AICc_int - min(c(AICc_int,AICc_lin,AICc_quad)) <= 4 ~ 'Intercept', + AICc_lin < AICc_quad ~ 'Linear', + AICc_quad < AICc_lin ~ 'Quadratic')) + +# unnest data +d = df_cd |> + select(BASIN, data, model) |> + unnest(cols = c(data)) |> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG'))) + +ggplot(d, aes(ychange, dist_cent, color = BASIN))+ + geom_hline(aes(yintercept = 1), linetype = 'dashed')+ + geom_point(size = 2.5)+ + geom_smooth(data = d |> filter(model == 'Intercept'), + method = 'lm', formula = y~1, + linewidth = 1, color = 'black')+ + geom_smooth(data = d |> filter(model == 'Linear'), + method = 'lm', formula = y~x, + linewidth = 1, color = 'black')+ + geom_smooth(data = d |> filter(model == 'Quadratic'), + method = 'lm', formula = y~x+I(x^2), + linewidth = 1, color = 'black')+ + facet_wrap(~BASIN, nrow = 1)+ + labs(x = 'Years between comparison', y = 'Centroid distance')+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + +## Variable importance +Becuase centroid distance is the multivariate difference in mean conditions between hypervolumes, we can determine the influence of each axis on the overall change. This can be done by removing an axis and calculating the euclidean distance without that axis. Using the formula +$$ imp_x = cd/cd_x - 1$$ +where $imp_x$ is the importance of axis $x$, $cd$ is centroid distance with all axes, and $cd_x$ is the centroid distance excluding axis $x$. + +```{r} +# pivot data longer +df_c = read_csv('data/SAV_centDist.csv') |> + mutate(across(TT:TDR, \(x) x^2)) |> + pivot_longer(TT:TDR, names_to = 'axis', values_to = 'dist') + +# create vector of unique axes +ax = unique(df_c$axis) + +# for loop to calculate variable importance of each axis +for(i in 1:length(ax)){ + d = df_c |> + # remove axis + filter(axis != ax[i]) |> + group_by(BASIN,y1,y2,ychange,dist_cent) |> + #calculate euclidean distance without axis + summarise(cd = sqrt(sum(dist))) |> + # calculate importance of axis + mutate(imp = (dist_cent/cd) - 1, + axis = ax[i]) + + # bind data into new data frame to store + if(i == 1){ + df_imp = d + }else{ + df_imp = bind_rows(df_imp, d) + } +} + +# calculate relative importance across all years for each basin +df_cdi = df_imp |> + #filter(ychange == 1) |> + group_by(BASIN,axis) |> + summarize(imp = mean(imp)) |> + group_by(BASIN) |> + mutate(s_imp = imp/max(imp))|> + mutate(BASIN = factor(BASIN, levels = + c('JON', 'RKB', 'RAN', 'EAG')), + axis = factor(axis, levels = c('TDR', 'TMA', 'sg_rich', + 'SF', 'HW', 'TT'))) +# labeler function for plotting +y_label_formatter = function(x) { + ifelse(x %% 1 == 0, formatC(x, format = "f", digits = 0), formatC(x, format = "f", digits = 2)) +} + +ggplot(df_cdi, aes(axis, s_imp, fill = BASIN))+ + geom_col()+ + labs(x = 'Variable', y = 'Centroid distance variable importance')+ + coord_flip()+ + theme_bw()+ + facet_wrap(~BASIN, nrow = 2)+ + scale_y_continuous( + breaks = c(0.0, 0.25, 0.5, 0.75, 1.0), + limits = c(0, 1), + labels = y_label_formatter) + + scale_x_discrete(labels = c('Total drift algae', 'Total Macroalgae', 'Seagrass richness', + expression(italic('Syrngodium filliforme')), + expression(italic('Halodule wrightii')), + expression(italic('Thalassia testudium'))))+ + scale_fill_viridis_d(option = 'turbo')+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + + +## Background info +### Merge/Join +If two data frames contain different columns of data, then they can be merged together with the family of join functions. + + +`left_join()` = uses left df as template and joins all matching columns from right df + +`right_join()` = uses right df as template and joins all matching columns from left df + +`inner_join()` = only matches columns contained in both dfs + +`full_join()` = combines all rows in both dfs + +```{r join} +library(tidyverse) + +left = tibble(name = c('a', 'b', 'c'), + n = c(1, 6, 7), + bio = c(100, 43, 57)) + +right = tibble(name = c('a', 'b', 'd', 'e'), + cals = c(500, 450, 570, 600)) + +left_join(left, right, by = 'name') + +right_join(left, right, by = 'name') + +inner_join(left, right, by = 'name') + +full_join(left, right, by = 'name') + +# multiple matches +fish = tibble(species = rep(c('Salmon', 'Cod'),times = 3), + year = rep(c(1999,2005,2020), each = 2), + catch = c(50, 60, 40, 50, 60, 100)) + +col = tibble(species = c('Salmon', 'Cod'), + coast = c('West', 'East')) + +left_join(fish, col, by = 'species') + + +``` + +### scaling data +Because hypervolumes can be generated with any continuous data as an axes, many of the times the units are not combatible. Blonder et al. [2014](https://doi-org.ezproxy.fiu.edu/10.1111/geb.12146) & [2018](https://doi-org.ezproxy.fiu.edu/10.1111/2041-210X.12865) to convert all of the axes into the same units. This can be done by taking the z-score of the values to convert units into standard deviations. Z-scoring data can be done with the formula: +$$ z = \frac{x_{i}-\overline{x}}{sd} $$ Where $x_{i}$ is a value, $\overline{x}$ is the mean, and $sd$ is the standard deviation. By z-scoring each axis, 0 is the mean of that axis, a value of 1 means that the value is 1 standard deviation above the global mean of that axis, and a value of -1 is 1 standard deviation below the global mean of the axis. In R this can be done manually or with the `scale()` function. + +```{r} +fish = tibble(species = rep(c('Salmon', 'Cod'),times = 3), + year = rep(c(1999,2005,2020), each = 2), + catch = c(50, 60, 40, 50, 60, 100)) + +# +fish = fish |> + mutate(zcatch1 = (catch - mean(catch))/sd(catch), # manual + zcatch2 = scale(catch)) # with scale + +fish + +# center = mean, scale = sd +fish$zcatch2 +``` + +### nesting data +One benefit of `tibbles` is that they can contain list columns. This means that we can make columns of `tibbles` that are nested within a dataset. Nesting creates a list-column of data frames; unnesting flattens it back out into regular columns. Nesting is a implicitly summarising operation: you get one row for each group defined by the non-nested columns. This is useful in conjunction with other summaries that work with whole datasets, most notably models. This can be done with the `nest()` and then flattened with `unnest()` + +```{r} +fish = tibble(species = rep(c('Salmon', 'Cod'),times = 3), + year = rep(c(1999,2005,2020), each = 2), + catch = c(50, 60, 40, 50, 60, 100)) + +# using group_by +fish_nest = fish |> + group_by(species) |> + nest() + +fish_nest +fish_nest$data + +# using .by in nest +# column name becomes data unless you change .key +fish_nest2 = fish |> + nest(.by = year, .key = 'df') + +fish_nest2 +fish_nest2$df + +``` + +### map +#### `purr` +The newest and new standard package with `tidyverse` is `purr` with its set of `map()` functions. Some similarity to `plyr` (and base) and `dplyr` functions but with more consistent names and arguments. Notice that map function can have some specification for the type of output. + + `map()` makes a list. + + `map_lgl()` makes a logical vector. + + `map_int()` makes an integer vector. + + `map_dbl()` makes a double vector. + + `map_chr()` makes a character vector. + +```{r, error = T} +df = iris |> + select(-Species) +#summary statistics +map_dbl(df, mean) + +# using map with mutate and nest +d = tibble(species = rep(c('Salmon', 'Cod'),times = 3), + year = rep(c(1999,2005,2020), each = 2), + catch = c(50, 60, 40, 50, 60, 100)) |> + nest(.by = species) |> + mutate(correlation = map(data, \(data) cor.test(data$year, data$catch))) + +d +d$correlation + +``` diff --git a/ex1_files/figure-html/unnamed-chunk-11-1.png b/ex1_files/figure-html/unnamed-chunk-11-1.png new file mode 100644 index 0000000..359cb03 Binary files /dev/null and b/ex1_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/ex1_files/figure-html/unnamed-chunk-12-1.png b/ex1_files/figure-html/unnamed-chunk-12-1.png new file mode 100644 index 0000000..fef88ac Binary files /dev/null and b/ex1_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/ex1_files/figure-html/unnamed-chunk-13-1.png b/ex1_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 0000000..ed21cdc Binary files /dev/null and b/ex1_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/ex1_files/figure-html/unnamed-chunk-7-1.png b/ex1_files/figure-html/unnamed-chunk-7-1.png new file mode 100644 index 0000000..52adbbc Binary files /dev/null and b/ex1_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/ex2.qmd b/ex2.qmd new file mode 100644 index 0000000..9e56a67 --- /dev/null +++ b/ex2.qmd @@ -0,0 +1,136 @@ +--- +title: "Example 2: Trait diversity of coral communities" +author: "W. Ryan James" +date: "6/21/24" +format: + html: + toc: true + theme: yeti +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(collapse = T, cache = T) +``` + +## Trait diversity of coral communities in Puerto Rico +This vignette uses hypervolumes to understand the spatial variation in trait diversity of coral reef communities. Hypervolumes are generated using species trait data and weighted based on random percent cover of species from mean and sd across all sites within a region. This process is repeated 100 times. Hypervolume size is used to quantify the trait diversity of each community. + +[R script](HV_ex2_traits.R) + +## data +The data used for this example comes from the [NOAA]() which monitors coral benthic communities throughout Puerto Rico. The [benthic cover data](https://github.com/CoastalFishScience/ISBW_HVworkshop/blob/main/data/coral_PC.csv) consists of percent cover (pc = mean percent cover, sd = standard deviation percent cover) from coral species collected across 8 regions. Data is averaged across sites within each region. The [coral trait data](https://github.com/CoastalFishScience/ISBW_HVworkshop/blob/main/data/coral_traits.csv) is based on average values of each trait for each species from published literature values. + +![Map of sampling basins](maps/map_stab.png) + +```{r} +# load libraries +library(tidyverse) +library(hypervolume) +library(truncnorm) + +# load trait data +df_tr = read_csv('data/coral_traits.csv') +df_tr + +# load benthic community percent cover data across regions +# pc = percent cover 0-100 +# pc_sd = standard deviation in percent cover +df_ben = read_csv('data/coral_PC.csv') +df_ben +``` + +## Prep data +To generate hypervolumes a random percent cover for each species is generated based on the mean and sd of percent cover of that species across all sites in the region. This process is repeated 100 times. + +```{r} +reps = 100 + +set.seed(14) +df = df_ben |> + # join trait data to benthic data + left_join(df_tr, by = 'species') |> + # duplicate the data to create the number of reps needed + slice(rep(1:n(), each=reps))|> + # randomly generate percent cover + mutate(i = rep(1:reps, times=nrow(df_ben)), + percentcover = truncnorm::rtruncnorm(1, a = 0.001, b = 100, + mean = pc, sd = pc_sd)) |> + select(region, `corallite diameter`:`percentcover`) + +df + +``` + +## Hypervolumes +Values are z-scored across all sites and species for each replication separately. Here we nest two columns one with all of the input data for the hypervolume (data) and one with a vector of the randomly generated percent cover that will be used to weight the hypervolume (weight). Hypervolume size is extracted from each hypervolume using `map_dbl()` and `get_size()`. + +```{r, eval = F} +# z-score across all regions for each rep and generate hypervolumes +df = df |> + # z-score across regions for each rep + group_by(i) |> + mutate(across(`corallite diameter`:`colony maximum diameter`, scale)) |> + # nest data for each region and rep to make hypervolume + group_by(region, i) |> + # create a column for the percent cover to weight hypervolume as well as input data + nest(weight = percentcover, data = `corallite diameter`:`colony maximum diameter`) |> + # create community weighted hypervolumes + mutate(hv = map2(data,weight, \(data,weight) hypervolume_gaussian(data, + name = paste(region,i,sep = '_'), + weight = weight$percentcover, + samples.per.point = 1000, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + # extrace size for each hypervolume + hv_size = map_dbl(hv, \(hv) get_volume(hv))) + +``` + +```{r, echo = F} +df = readRDS('data/coral_region_hvs.rds') +``` +** *Do not try to view df in rstudio it will freeze your r since it is too big* +```{r} +head(df) +``` + +## Hypervolume size +We can calculate the mean and 95% CI of each run to determine the trait diversity of coral communities across each region. +```{r} +# plot hypervolume size +d = df |> + group_by(region) |> + summarize(mean = mean(hv_size), + upper = quantile(hv_size, 0.975), + lower = quantile(hv_size, 0.025)) |> + mutate(region = factor(region, + levels = c('North/Northeast', 'Vieques/Culebra', + 'Southeast', 'South', 'Southwest', + 'West', 'Mona/Desecheo'), + labels = c('North/\nNortheast', 'Vieques/\nCulebra', + 'Southeast', 'South', 'Southwest', + 'West', 'Mona/\nDesecheo'))) + + +ggplot(d, aes(region, mean, color = region))+ + geom_pointrange(aes(ymin = lower, ymax = upper), size = 1.5, linewidth = 2)+ + labs(x = 'Region', y = 'Trait diversity', color = 'Region')+ + scale_y_log10()+ + scale_color_viridis_d(option = 'turbo')+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text = element_text(size = 14, colour = "gray0"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + diff --git a/ex2_files/figure-html/unnamed-chunk-6-1.png b/ex2_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 0000000..4cc230d Binary files /dev/null and b/ex2_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/ex3.qmd b/ex3.qmd new file mode 100644 index 0000000..0db0589 --- /dev/null +++ b/ex3.qmd @@ -0,0 +1,389 @@ +--- +title: "Example 3: trophic niche of seagrass consumers" +author: "W. Ryan James" +date: "6/21/24" +format: + html: + toc: true + theme: yeti +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(collapse = T, cache = T) +``` + +## Seasonal comparison of trophic niche of seagrass consumers +This vignette uses hypervolumes to quantify the trophic niche of seagrass consumers. Hypervolumes are generated using mean and standard deviation resource use data from stable isotope mixing models. This process is repeated 50 times. Hypervolume overlap metrics, as well as size and centroid distance, are used to understand how consumers niches change between seasons. + +[R script](HV_ex3_trophicNiche.R) + +## data +The data used for this vignette comes from [James et al. 2022](https://doi.org/10.1093/icesjms/fsac112). The [resource use data](https://github.com/CoastalFishScience/ISBW_HVworkshop/blob/main/data/coral_PC.csv) consists of mean and standard deviation of four resources for each species in each season based on mixing model outputs. Data comes from species collected across Florida Bay, USA. + +![Map of consumer sampling locations](maps/map_tn.png) + +```{r} +# load all data +d = read_csv('data/CESImixResults.csv') +d +``` + + +## Prepare data +Here a custom function `HVvalues()` is used to generate random points from mean and standard deviation data between end points (lower and upper bounds) using the `truncnorm` package. Values can then be z-scored. + +*** *Note chose either column names or column numbers for ID_rows and names either work but must be the same* + - df = dataframe or tibble with each row containing + - unique entry for making random points + - ID_rows = vector of column names or numbers with id information + - names = column name or number of name of measure variables + - mean = column name or column number of df with mean + - sd = column name or column number of df with sd + - n = number of points to randomly generate + - z_score = T or F, if T z-scores values + - end_points = T or F for if random points need to be generated between + - a high and low end point (e.g. 5% and 95% interval) + - low and high required if end_points = T + - low = column name or column number of df with lower bound to sample in + - high = column name or column number of df with upper bound to sample in + +```{r} +HVvalues = function(df, ID_rows, names, mean, sd, n, z_score = F, + end_points = F, low = NULL, high = NULL){ + require(tidyverse) + require(truncnorm) + + # check to see if information is needed to restrict where points are + if (end_points){ + if (is_empty(df[,low]) | is_empty(df[,high])){ + return(cat('Warning: low and/or high columns not specified \n + Specific and run again or end_points = F \n')) + } + } + + # check to see if there are more + if(T %in% duplicated(df[,c(ID_rows,names)])){ + return(cat('Warning: some of the rows contain duplicated information \n + make sure data is correct \n')) + } + + # rename variables to make code work + if (is.numeric(mean)){ + names(df)[mean] = 'Mean' + }else { + df = df |> rename(Mean = mean) + } + + if (is.numeric(sd)){ + names(df)[sd] = 'SD' + }else { + df = df |> rename(SD = sd) + } + + if (end_points){ + if (is.numeric(low)){ + names(df)[low] = 'lower' + }else { + df = df |> rename(lower = low) + } + + if (is.numeric(high)){ + names(df)[high] = 'upper' + }else { + df = df |> rename(upper = high) + } + } + + # make sure the names is not numeric + if (is.numeric(names)){ + names = names(df)[names] + } + + labs = unique(as.vector(df[,names])[[1]]) + # generate random points within bounds + if(end_points){ + + df_tot = df |> slice(rep(1:n(), each=n))|> + mutate(point = + truncnorm::rtruncnorm(1, a = lower, b = upper, + mean = Mean, sd = SD)) |> + ungroup() |> + mutate(num = rep(1:n, times=nrow(df))) |> + dplyr::select(-Mean, -SD, -lower, -upper)|> + pivot_wider(names_from = names, values_from = point)|> + dplyr::select(-num) + }else { + # generate random points outside of bounds + df_tot = df |> slice(rep(1:n(), each=n))|> + mutate(point = + truncnorm::rtruncnorm(1, mean = Mean, sd = SD)) |> + ungroup() |> + mutate(num = rep(1:n, times=nrow(df))) |> + dplyr::select(-Mean, -SD)|> + pivot_wider(names_from = names, values_from = point)|> + dplyr::select(-num) + } + if (z_score){ + df_tot = df_tot |> + mutate(across(all_of(labs), scale)) + } + + return(df_tot) + +} + +``` + +30 points are generated for each species in each season using `map()`, and this process is repeated 50 times. +```{r} +# number or iterations +reps = 50 + +# generate points and z-score across iterations +set.seed(14) +df = d |> + # duplicate for number of reps + slice(rep(1:n(), each=reps))|> + mutate(i = rep(1:reps, times=nrow(d))) |> + group_by(i) |> + nest() |> + # apply function to generate random points + mutate(points = map(data, \(data) HVvalues(df = data, ID_rows = c('species', 'season'), + names = c('source'), + mean = 'mean', sd = 'sd', n = 30, + end_points = T, low = 'lowend', high = 'highend', + z_score = T))) |> + select(i, points) |> + unnest(points) + +df + + +``` + +## Hypervolumes +Hypervolumes are generated for each species and season and the size of each hypervolume is calculated. +```{r, eval=F} +df = df |> + group_by(species, season, i) |> + nest() |> + mutate(hv = map(data, \(data) hypervolume_gaussian(data, name = paste(species, season, i,sep = '_'), + samples.per.point = 500, + kde.bandwidth = estimate_bandwidth(data), + sd.count = 3, + quantile.requested = 0.95, + quantile.requested.type = "probability", + chunk.size = 1000, + verbose = F)), + hv_size = map_dbl(hv, \(hv) get_volume(hv))) +``` + +```{r, echo = F} +df = readRDS('data/CESIhvAll.rds') +``` +** *Do not try to view df in rstudio it will freeze your r since it is too big* +```{r} +head(df) +``` + +## Hypervolume metrics +Combine each species and season to calculate the overlap and centroid distance of each species +```{r, eval=F} +ov_sn = df |> + select(species, season, hv, hv_size) |> + pivot_wider(names_from = season, values_from = c(hv,hv_size)) |> + mutate(size_rat = hv_size_Dry/hv_size_Wet, + set = map2(hv_Wet,hv_Dry, \(hv1, hv2) hypervolume_set(hv1, hv2, check.memory = F, verbose = F)), + ov = map(set, \(set) hypervolume_overlap_statistics(set)), + dist_cent = map2_dbl(hv_Wet,hv_Dry, \(hv1,hv2) hypervolume_distance(hv1, hv2, type = 'centroid', check.memory=F))) |> + unnest_wider(ov) |> + select(species, i, hv_size_Wet, hv_size_Dry, + size_rat, jaccard, sorensen, + uniq_Wet = frac_unique_1, uniq_Dry = frac_unique_2, + dist_cent) +``` + +```{r, echo = F} +ov_sn = read_csv('data/hvOv_season.csv') +``` + +### Hypervolume size +```{r} +df = ov_sn |> + pivot_longer(hv_size_Wet:hv_size_Dry,names_to = 'season', + values_to = 'vol') |> + group_by(species, season) |> + summarize(mean = mean(vol), + median = median(vol), + low = quantile(vol, 0.025), + up = quantile(vol, 0.975)) |> + mutate(season = factor(season, levels = c('hv_size_Wet', + 'hv_size_Dry'))) + +ggplot(df, aes(x = species, y = mean, color = season))+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = 'Species', y = 'Trophic niche width', + color = 'Season') + + scale_fill_manual(values = c('hv_size_Wet' = 'skyblue3', + 'hv_size_Dry' = 'indianred3'), + labels = c('hv_size_Wet' = 'Wet', + 'hv_size_Dry' = 'Dry')) + + scale_color_manual(values = c('hv_size_Wet' = 'skyblue3', + 'hv_size_Dry' = 'indianred3'), + labels = c('hv_size_Wet' = 'Wet', + 'hv_size_Dry' = 'Dry')) + + scale_x_discrete(labels = c("Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'right', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + + +### Centroid distance +```{r} +cols = c("Pinfish" = 'yellow2', + "Mojarra" = 'slategray4', + "Silver perch" = 'snow3', + "Bay anchovy" = 'deepskyblue1', + "Pigfish" = 'orange', + "Pink shrimp" = 'pink', + "Rainwater killifish" = 'firebrick', + 'All' = 'black') +df = ov_sn|> + group_by(species) |> + summarize(mean = mean(dist_cent), + median = median(dist_cent), + low = quantile(dist_cent, 0.025), + up = quantile(dist_cent, 0.975)) + +ggplot(df, aes(x = species, y = mean, color = species))+ + geom_hline(aes(yintercept = 1), linewidth = 1, linetype = 'dashed')+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = NULL, y = 'Centroid distance') + + scale_x_discrete(labels = c("Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + scale_color_manual(values = cols)+ + scale_y_continuous(limits = c(0,4.1))+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + +### Overlap +```{r} +df = ov_sn|> + group_by(species) |> + summarize(mean = mean(sorensen), + median = median(sorensen), + low = quantile(sorensen, 0.025), + up = quantile(sorensen, 0.975)) + +ggplot(df, aes(x = species, y = mean, color = species))+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = NULL, y = 'Niche overlap') + + scale_x_discrete(labels = c("All", + "Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + scale_color_manual(values = cols)+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'none', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + +### Percent unique +```{r} +df = ov_sn|> + pivot_longer(uniq_Wet:uniq_Dry,names_to = 'season', + values_to = 'vol') |> + group_by(species, season) |> + summarize(mean = mean(vol), + median = median(vol), + low = quantile(vol, 0.025), + up = quantile(vol, 0.975)) |> + mutate(season = factor(season, levels = c('uniq_Wet', + 'uniq_Dry'))) + +ggplot(df, aes(x = species, y = mean, color = season))+ + geom_pointrange(aes(ymin = low, ymax = up), + size = 1.5, linewidth = 1.5, fatten = 2, + position=position_dodge(width = 0.5))+ + labs(x = 'Species', y = 'Niche volume unique', + color = 'Season') + + scale_fill_manual(values = c('uniq_Wet' = 'skyblue3', + 'uniq_Dry' = 'indianred3'), + labels = c('uniq_Wet' = 'Wet', + 'uniq_Dry' = 'Dry')) + + scale_color_manual(values = c('uniq_Wet' = 'skyblue3', + 'uniq_Dry' = 'indianred3'), + labels = c('uniq_Wet' = 'Wet', + 'uniq_Dry' = 'Dry')) + + scale_x_discrete(labels = c("Bay \nanchovy", + "Mojarra", + "Pigfish", + "Pinfish", + "Pink \nshrimp", + "Rainwater \nkillifish", + "Silver \nperch" ))+ + theme_bw()+ + theme(axis.title = element_text(size = 14), + axis.text.y = element_text(size = 14, colour = "black"), + axis.text.x = element_text(size = 12, colour = "black"), + plot.title = element_text(size = 14, hjust=0.5), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = 'right', + legend.title = element_text(size = 14), + strip.text.x = element_text(size = 14), + legend.text = element_text(size = 12)) + +``` + diff --git a/maps/map_stab.png b/maps/map_stab.png new file mode 100644 index 0000000..d9c6e4b Binary files /dev/null and b/maps/map_stab.png differ diff --git a/maps/map_tn.tiff b/maps/map_tn.tiff new file mode 100644 index 0000000..ff0b37c Binary files /dev/null and b/maps/map_tn.tiff differ diff --git a/ovPres.png b/ovPres.png new file mode 100644 index 0000000..980c625 Binary files /dev/null and b/ovPres.png differ diff --git a/ovPres.tiff b/ovPres.tiff new file mode 100644 index 0000000..88c7a73 Binary files /dev/null and b/ovPres.tiff differ diff --git a/s_impCentDist.png b/s_impCentDist.png new file mode 100644 index 0000000..a92f181 Binary files /dev/null and b/s_impCentDist.png differ diff --git a/sizePres.png b/sizePres.png new file mode 100644 index 0000000..d46aee2 Binary files /dev/null and b/sizePres.png differ diff --git a/sizePres.tiff b/sizePres.tiff new file mode 100644 index 0000000..2260b92 Binary files /dev/null and b/sizePres.tiff differ diff --git a/stabCentDist.png b/stabCentDist.png new file mode 100644 index 0000000..0da352a Binary files /dev/null and b/stabCentDist.png differ diff --git a/uniquePres.png b/uniquePres.png new file mode 100644 index 0000000..61a41a5 Binary files /dev/null and b/uniquePres.png differ