-
Notifications
You must be signed in to change notification settings - Fork 0
/
bargraphs_2.R
130 lines (98 loc) · 5.06 KB
/
bargraphs_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
##### BAR GRAPHS Chapter2 #####
library(dplyr)
library(purrr)
library(readr)
library(stringr)
library(ggplot2)
# Examine how abundance and richness of Cnidaria and Porifera change across seamounts
# Per transect
# Base path to the directory containing seamount folders
base_folder_path <- "/Users/user/Desktop/"
# List of seamount folder names
seamount_folders <- c("coral_biigle_feb/8.coral_biigle_50m_transects",
"sapmer_biigle_feb/8.sapmer_50m_transects",
"atlantis_biigle_feb/8.atlantis_50m_transects",
"melville_biigle_feb/8.melville_biigle_50m_transects"
) # Add more as needed
# Define the taxonomic groups of interest
taxa <- c("Porifera", "Cnidaria", "Bryozoa", "Stalked crinoids")
#####
process_seamount <- function(folder_name, base_folder_path, taxa) {
folder_path <- file.path(base_folder_path, folder_name)
files <- list.files(folder_path, pattern = "\\.csv$", full.names = TRUE)
transect_data <- map_dfr(files, function(file) {
data <- read_csv(file)
data <- data %>%
filter(depth >= 0 & depth <= 2000,
str_detect(label_hierarchy, paste(taxa, collapse = "|"))) %>%
mutate(taxonomic_group = case_when(
str_detect(label_hierarchy, "Cnidaria") ~ "Cnidaria",
str_detect(label_hierarchy, "Porifera") ~ "Porifera",
str_detect(label_hierarchy, "Bryozoa") ~ "Bryozoa",
str_detect(label_hierarchy, "Stalked crinoids") ~ "Stalked crinoids"),
transect = tools::file_path_sans_ext(basename(file)), # Calculate transect here, before summarising
seamount = str_replace(folder_name, ".+/([^.]+)$", "\\1") # Extract seamount name from folder path
)
if (nrow(data) > 0) {
data %>%
group_by(transect) %>%
summarise(
richness = n_distinct(label_name), # Counts unique species names
abundance = n(), # Counts the number of individuals
.groups = 'drop'
) %>%
mutate(seamount = str_extract(folder_name, "[^/]+$")) # Ensure seamount name is included
} else {
return(tibble()) # Return an empty tibble if the depth condition is not met
}
})
# Adjust summary calculation here to focus on overall richness without grouping by taxonomic_group
summary_data <- transect_data %>%
group_by(seamount) %>%
summarise(
average_richness = mean(richness),
sd_richness = sd(richness),
sem_richness = sd_richness / sqrt(n()),
average_abundance = mean(abundance),
sd_abundance = sd(abundance),
sem_abundance = sd_abundance / sqrt(n()),
.groups = 'drop'
)
return(summary_data)
}
# Define a wrapper function to pass all arguments to 'process_seamount'
process_seamount_wrapper <- function(folder_name) {
process_seamount(folder_name, base_folder_path, taxa)
}
# Apply the 'process_seamount_wrapper' function to each folder in 'seamount_folders'
all_data <- lapply(seamount_folders, process_seamount_wrapper)
# Combine the _ata from all seamounts into one data frame
combined_data <- bind_rows(all_data)
# clean seamount names
combined_data <- combined_data %>%
mutate(seamount = case_when(
str_detect(seamount, "atlantis") ~ "Atlantis",
str_detect(seamount, "sapmer") ~ "Sapmer",
str_detect(seamount, "melville") ~ "Melville Bank",
str_detect(seamount, "coral") ~ "Coral",
TRUE ~ seamount # Default case to keep original value if none of the above conditions are met
))
ggplot(combined_data, aes(x = seamount, y = average_richness, fill = seamount)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) + # Draw bars for average richness
geom_errorbar(aes(ymin = average_richness - sem_richness, ymax = average_richness + sem_richness),
width = 0.25, position = position_dodge(0.7)) + # Add error bars
scale_x_discrete(limits = c("Atlantis", "Sapmer", "Melville Bank", "Coral")) +
scale_fill_manual(values = c("Sapmer" = "purple4", "Coral" = "royalblue", "Atlantis" = "orange", "Melville Bank" = "palevioletred1")) +
labs(title = "", x = "Seamount", y = "Average Morphospecies Richness Per Transect") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Improve label readability
ggplot(combined_data, aes(x = seamount, y = average_abundance, fill = seamount)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) + # Draw bars for average richness
geom_errorbar(aes(ymin = average_abundance - sem_abundance, ymax = average_abundance + sem_abundance),
width = 0.25, position = position_dodge(0.7)) + # Add error bars
scale_x_discrete(limits = c("Atlantis", "Sapmer", "Melville Bank", "Coral")) +
scale_fill_manual(values = c("Sapmer" = "purple4", "Coral" = "royalblue", "Atlantis" = "orange", "Melville Bank" = "palevioletred1")) +
# Optional: Use a color palette for visual distinction
labs(title = "", x = "Seamount", y = "Average Morphospecies Abundance Per Transect") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Improve label readability