-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFish692-PrudhoeFish_dataimport.R
238 lines (190 loc) · 12.6 KB
/
Fish692-PrudhoeFish_dataimport.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#Data Wrangling and Analysis
#Script to import data and clean it up
library(dplyr)
library(tidyr)
library(tibble)
library(lubridate)
library(ggplot2)
library(CircStats)
#read in the data
allcatch <- read.csv("allcatch2001-2018.csv", header = TRUE) %>%
mutate(Station = factor(Station, levels = c("220", "218", "214", "230", "231")))
temp_salin <- read.csv("tempsalin2001-2018.csv", header = TRUE) %>%
mutate(Station = factor(Station, levels = c("220", "218", "214", "230", "231")))
#fix dates
allcatch$EndDate <- ymd(as.POSIXct(allcatch$EndDate, format = "%m/%d/%Y"))
temp_salin$Date <- ymd(as.POSIXct(temp_salin$Date, format = "%m/%d/%Y"))
allcatch <- allcatch %>% add_column(Month = month(allcatch$EndDate), .after = 2)
temp_salin <- temp_salin %>% add_column(Month = month(temp_salin$Date), .after = 2)
# Separate out the temp and salinity. Drop the columns that are irrelevant
# Put data into 'long' format. NOTE: They used a slightly different definition
# of the bottom in early years ('bottom 1.5'). This is analogous to 'bottom'
watertemps <- temp_salin %>% dplyr::select(-c(Salin_Top, Salin_Mid, Salin_Bot, Salin_Bot_1.5))
watersalin <- temp_salin %>% dplyr::select(-c(Temp_Top, Temp_Mid, Temp_Bot, Temp_Bot_1.5))
watersalin$Station[watersalin$Station == 231] <- 214 # Treat 231 as the precursor to 214
watertemps$Station[watertemps$Station == 231] <- 214 # Treat 231 as the precursor to 214
watersalin$Station <- factor(watersalin$Station)
watertemps$Station <- factor(watertemps$Station)
# if needed for later: watersalin %>% gather(depth, salin_ppt, -c(Year, Date, Month, Station))
rm(temp_salin) #optional but cleans up environment
##### WIND #####
# wind and air temp data from NOAA NCEI
# https://www.ncdc.noaa.gov/cdo-web/datasets/LCD/stations/WBAN:27406/detail
# https://www1.ncdc.noaa.gov/pub/data/cdo/documentation/LCD_documentation.pdf
# Station WBAN:27406
# These data were downloaded in 10 year increments (LCD CSV).
# In appendix (at end of this doc, I summarized hourly data into a daily summary)
deadhorsewind <- read.csv("deadhorsewind_2001-2018_daily.csv", header = TRUE,
stringsAsFactors = FALSE) %>%
mutate(Date = ymd(as.POSIXct(Date, format = "%m/%d/%Y")),
Year = year(Date),
month = month(Date),
dailymeanspeed = dailymeanspeed * 1.60934) %>%
rename(dailymeanspeed_kph = dailymeanspeed) %>%
filter(month == 7 | month == 8) %>%
dplyr::select(Date, Year, month, everything()) # reorder month column
##### DISCHARGE #####
# Data from USGS https://waterdata.usgs.gov/nwis/uv/?site_no=15908000
# https://nwis.waterdata.usgs.gov/usa/nwis/uv/?cb_00060=on&format=rdb&site_no=15908000&period=&begin_date=2001-01-01&end_date=2018-09-01
sagdisch <- read.csv("SagDischargeDaily_2001-2018.csv", header = TRUE) %>%
mutate(datetime = as.POSIXct(paste0(date, " ", time), format = "%m/%d/%Y %H:%M"),
Date = as_date(datetime),
hour = hour(datetime) ) %>%
dplyr::select(Date, hour, disch_cfs) %>%
group_by(Date) %>% summarize(meandisch_cfs = mean(disch_cfs, na.rm = TRUE))
##### Catch #####
# Join the catch and environ data. 7.17.2018 creates 4 lines instead of 1
catchenviron <- left_join(allcatch, watersalin %>% dplyr::select(-c(Month)),
by = c("Year" = "Year", "EndDate" = "Date", "Station" = "Station")) %>%
left_join(watertemps %>% dplyr::select(-c( Month)),
by = c("Year" = "Year", "EndDate" = "Date", "Station" = "Station")) %>%
left_join(deadhorsewind %>% dplyr::select(-month), by = c("Year" = "Year", "EndDate" = "Date")) %>%
left_join(sagdisch, by = c("EndDate" = "Date"))
catchenviron <- catchenviron %>% mutate(biweekly = ifelse(day.of.year <= 196, 1,
ifelse(day.of.year > 196 & day.of.year <= 213, 2, #btwn July 15 and Aug 1
ifelse(day.of.year > 213 & day.of.year <= 227, 3, #Aug 1 - 15
ifelse(day.of.year > 227, 4, NA))))) # after Aug 15
###################################
# Join All Environmental Data
# This creates a dataframe summarizing annual environmental conditions at each site for each year
# and keep in same order as the corresponding catch dataframe.
# The only tricky thing done is that I converted wind from degrees (0-360) to East-West using trig,
# before this I took the mean using circular averaging. Salin & temp are from midwater sampling
pru.env.ann <- catchenviron %>% dplyr::distinct(Year, Station) %>%
mutate(Station=replace(Station, Station==231, 214)) %>% arrange(Year, Station) %>%
#the above section creates a dataframe of year/station combos, and replaces 231 with 214
left_join(deadhorsewind %>% mutate(Year = year(Date)) %>% group_by(Year) %>%
summarise(annwindspeed_kph = mean(dailymeanspeed_kph, na.rm = TRUE),
annwinddir = ((circ.mean(2*pi*na.omit(dailymeandir)/360))*(360 / (2*pi))) %%360 ),
by = c("Year" = "Year")) %>%
left_join(sagdisch %>% mutate(Year = year(Date), month = month(Date)) %>%
group_by(Year) %>% filter(month == 7 | month == 8) %>%
summarise(anndisch_cfs = mean(meandisch_cfs, na.rm = TRUE)),by = c("Year" = "Year") ) %>%
left_join(watersalin %>% group_by(Year, Station) %>% summarise(annsal_ppt = mean(Salin_Mid, na.rm = TRUE)),
by = c("Year" = "Year", "Station" = "Station")) %>%
left_join(watertemps %>% group_by(Year, Station) %>% summarise(anntemp_c = mean(Temp_Mid, na.rm = TRUE)),
by = c("Year" = "Year", "Station" = "Station")) %>%
mutate(Year = factor(Year, ordered = TRUE), # PERMANOVA will want it ordered
annwinddir_ew = sin(annwinddir * pi / 180)) # this changes from polar coords to cartesian east-west (-1=W, 1=E)
#create similar setup on a daily scale
pru.env.day <- catchenviron %>% dplyr::distinct(EndDate, Station) %>%
mutate(Station=replace(Station, Station==231, 214)) %>% arrange(EndDate, Station) %>%
left_join(deadhorsewind %>% dplyr::select(-month), by = c("EndDate" = "Date")) %>%
left_join(sagdisch, by = c("EndDate" = "Date")) %>%
left_join(watersalin %>% dplyr::select(Date, Station, Salin_Top, Salin_Mid),
by = c("EndDate" = "Date", "Station" = "Station")) %>%
left_join(watertemps %>% dplyr::select(Date, Station, Temp_Top, Temp_Mid),
by = c("EndDate" = "Date", "Station" = "Station")) %>%
mutate(#EndDate = factor(EndDate, ordered = TRUE), # PERMANOVA will want it ordered
Year = year(EndDate),
Station = factor(Station),
winddir_ew = sin(dailymeandir * pi / 180)) %>% # changes from degrees to cartesian east-west (-1=W, 1=E)
arrange(EndDate, Station)
# finally, do on a biweekly scale
pru.env.biwk <- pru.env.day %>%
mutate(Year = year(EndDate),
day.of.year = yday(EndDate),
biweekly = factor(ifelse(day.of.year <= 196, 1,
ifelse(day.of.year > 196 & day.of.year <= 213, 2, #btwn July 15 and Aug 1
ifelse(day.of.year > 213 & day.of.year <= 227, 3, #Aug 1 - 15
ifelse(day.of.year > 227, 4, NA))))) ) %>% # after Aug 15
group_by(Year, biweekly, Station) %>%
summarise(biwkmeanspeed_kph = mean(dailymeanspeed_kph, na.rm = TRUE),
biwkmeandir = ((circ.mean(2*pi*na.omit(dailymeandir)/360))*(360 / (2*pi))) %%360,
meandisch_cfs = mean(meandisch_cfs , na.rm=TRUE),
Salin_Top = mean(Salin_Top, na.rm=TRUE),
Salin_Mid = mean(Salin_Mid, na.rm=TRUE),
Temp_Top = mean( Temp_Top, na.rm=TRUE),
Temp_Mid = mean(Temp_Mid, na.rm=TRUE),
winddir_ew = mean(winddir_ew, na.rm=TRUE))
#############################
### Catch Data
# now turn catch data into a 'wide' catch matrix (Species by Year/Station)
# drop enivron data too, just focusing on catch here
catchmatrix.all <- catchenviron %>% group_by(Year, Station, Species) %>% summarise(anncount = sum(totcount)) %>%
spread(Species, value = anncount) %>% replace(., is.na(.), 0) %>% ungroup()
catchmatrix.all$Station[catchmatrix.all$Station == 231] <- 214 # Treat the 231 Station as the precursor to 214
catchmatrix.all <- catchmatrix.all %>% arrange(Year, Station) #was out of order in 2001 after station name change
catchmatrix.day <- catchenviron %>% group_by(EndDate, Station, Species) %>% summarise(count = sum(totcount)) %>%
spread(Species, value = count) %>% replace(., is.na(.), 0) %>% ungroup()
catchmatrix.day$Station[catchmatrix.day$Station == 231] <- 214 # Treat the 231 Station as the precursor to 214
catchmatrix.day <- catchmatrix.day %>% arrange(EndDate, Station) #was out of order in 2001 after station name change
catchmatrix.biwk <- catchenviron %>% group_by(Year, biweekly, Station, Species) %>% summarise(count = sum(totcount)) %>%
spread(Species, value = count) %>% replace(., is.na(.), 0) %>% ungroup()
catchmatrix.biwk$Station[catchmatrix.biwk$Station == 231] <- 214 # Treat the 231 Station as the precursor to 214
catchmatrix.biwk <- catchmatrix.biwk %>% arrange(Year, biweekly, Station) #was out of order in 2001 after station name change
# Exclude rare species:
# catchmatrix$Station <- as.numeric(catchmatrix$Station) #cheat to include it in shortcut
# catchmatrix <- catchmatrix[, which(colSums(catchmatrix) > 100)]
# Right now analysis is for only species >100 fish, all years combined. Change 100 to 0 to inc all spp
# the following code makes a list of the species to keep which have a threshold of 100 currently,
# then filters based on this list, then turns back into wide format
keepspp <- (catchmatrix.all %>% gather(Species, counts, -Year, -Station) %>%
group_by(Species) %>% summarize(counts = sum(counts)) %>% filter(counts > 100))$Species
catchmatrix <- catchmatrix.all %>% gather(Species, counts, -Year, -Station) %>% filter(Species %in% keepspp) %>%
spread(Species, value = counts)
catchmatrix.day <- catchmatrix.day %>% gather(Species, counts, -EndDate, -Station) %>% filter(Species %in% keepspp) %>%
spread(Species, value = counts)
catchmatrix.biwk <- catchmatrix.biwk %>% gather(Species, counts, -Year, -biweekly, -Station) %>% filter(Species %in% keepspp) %>%
spread(Species, value = counts)
rownames(catchmatrix) <- paste0(catchmatrix$Year, catchmatrix$Station)
rownames(catchmatrix.day) <- paste0(year(catchmatrix.day$EndDate), yday(catchmatrix.day$EndDate), catchmatrix.day$Station)
rownames(catchmatrix.biwk) <- paste0(catchmatrix.biwk$Year, catchmatrix.biwk$biweekly, catchmatrix.biwk$Station)
#pru.env.ann <- catchmatrix %>% dplyr::select(Year, Station)
catchmatrix <- catchmatrix %>% dplyr::select(-Year, -Station)
catchmatrix.all <- catchmatrix.all %>% dplyr::select(-Year, -Station)
catchmatrix.day <- catchmatrix.day %>% dplyr::select(-EndDate, -Station)
catchmatrix.biwk <- catchmatrix.biwk %>% dplyr::select(-Year, -biweekly, -Station)
# standardize catches 0 to 1 (1 is max catch in a given year/station)
#note that the order corresponds to the now deleted Year/station combo. DON'T CHANGE ORDER
catchmatrix.std <- catchmatrix
for (i in 1:ncol(catchmatrix.std)){ #make sure year/stn cols already dropped
catchmatrix.std[i] <- catchmatrix[i]/max(catchmatrix[i])}
catchmatrix.day.std <- catchmatrix.day
for (i in 1:ncol(catchmatrix.day.std)){ #starts at 3 to exclude Year and station cols
catchmatrix.day.std[i] <- catchmatrix.day[i]/max(catchmatrix.day[i])}
catchmatrix.biwk.stdtrans <- catchmatrix.biwk
for (i in 1:ncol(catchmatrix.biwk.stdtrans)){ #starts at 3 to exclude Year and station cols
catchmatrix.biwk.stdtrans[i] <- (catchmatrix.biwk[i]^0.25)/max((catchmatrix.biwk[i]^0.25))}
#using 4th root tranform
# make sure that 'catchmatrix' and catchmatrix.std are both set up in same order as pru.env.ann
#hist(catchmatrix.biwk.stdtrans$ARCS)
##########################################
#APPENDIX
# The daily wind data was created using the following code:
# windhourly <- read.csv("../../Slope Project/Data/deadhorsewind_2001-2018_hourly.csv", header = TRUE,
# stringsAsFactors = FALSE) %>%
# select(DATE, HOURLYWindSpeed, HOURLYWindDirection) %>% #remove extraneous data
# rename(Date = DATE,
# windhrly_mph = HOURLYWindSpeed,
# windhrly_dir = HOURLYWindDirection) %>%
# mutate(Date = ymd(as.POSIXct(Date, format = "%m/%d/%Y")),
# month = month(Date)) %>%
# mutate_if(is.character, as.numeric) %>% select(Date, month, everything())
#
# library(CircStats)
# winddaily <- windhourly %>% mutate(Year = year(Date)) %>% group_by(Date) %>%
# summarise(dailymeanspeed = mean(windhrly_mph, na.rm = TRUE),
# dailymeandir = ((circ.mean(2*pi*na.omit(windhrly_dir)/360))*(360 / (2*pi))) %%360 )
#
# write.csv(winddaily, file="deadhorsewind_2001-2018.csv")