-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path01_NOAA_dwn.R
99 lines (87 loc) · 3.82 KB
/
01_NOAA_dwn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
library(dplyr)
library(lubridate)
# function for updating the weather dataset
weather_download <- function(selectdate, siteid){
ds <- neon4cast::noaa_stage3()
df <- ds |> filter(datetime==as_datetime(selectdate),
site_id==siteid) |>
collect()
return(df)
}
# temp = weather_download("2023-08-08", "HARV")
# function for downloading the historical weather dataset
noaa_historical_download <- function(site, var, reference_date, end_date){
ds <- neon4cast::noaa_stage3()
# df <- ds |> filter(datetime >= lubridate::as_datetime(startdate),
# site_id == siteid)
historical_start_date <- as_datetime(reference_date)
historical_end_date <- as_datetime(end_date)
ds %>%
dplyr::filter(site_id == site,
datetime >= historical_start_date,
datetime < historical_end_date,
variable == var) %>%
dplyr::select(datetime, prediction, parameter) %>%
dplyr::group_by(datetime) %>%
dplyr::summarize(mean_prediction = mean(prediction-273.15, na.rm = TRUE),
sd_prediction = sd(prediction-273.15, na.rm = TRUE),
ensemble=max(parameter)) %>%
dplyr::select(datetime, mean_prediction, sd_prediction, ensemble) %>%
dplyr::collect()
}
# noaa_forecast_download <- function(site, var, reference_date) {
# endpoint <- "data.ecoforecast.org"
# bucket <- glue::glue("neon4cast-drivers/noaa/gefs-v12/stage2/parquet/0/{reference_date}")
# s3 <- arrow::s3_bucket(bucket, endpoint_override = endpoint, anonymous = TRUE)
#
# ds <- arrow::open_dataset(s3)
#
# forecast_date <- as_datetime(reference_date)
#
# # Filter, select, and process data
# ds %>%
# dplyr::filter(site_id == site,
# datetime >= forecast_date,
# variable == var) %>%
# dplyr::select(datetime, prediction, parameter) %>%
# dplyr::group_by(datetime) %>%
# dplyr::summarize(mean_prediction = mean(prediction-273.15, na.rm = TRUE),
# sd_prediction = sd(prediction-273.15, na.rm = TRUE),
# ensemble=max(parameter)) %>%
# dplyr::select(datetime, mean_prediction, sd_prediction, ensemble) %>%
# dplyr::collect()
# }
# variables included in the NOAA weather dataset
# variables <- c("surface_downwelling_longwave_flux_in_air",
# "surface_downwelling_shortwave_flux_in_air",
# "precipitation_flux",
# "air_pressure",
# "relative_humidity",
# "air_temperature",
# "northward_wind",
# "eastward_wind")
# temp = weather_historical_download("2024-03-01", "HARV")
noaa_forecast_download <- function(site, var, reference_date){
forecast_date <- as_datetime(reference_date)
noaa_date <- forecast_date - lubridate::days(1) #Need to use yesterday's NOAA forecast because today's is not available yet
## connect to data
df_future <- neon4cast::noaa_stage2(cycle = 0,
version = "v12",
endpoint = NA,
verbose = TRUE,
start_date = noaa_date)
## filter available forecasts by date and variable
met_future <- df_future |>
dplyr::filter(site_id == site,
datetime >= forecast_date,
variable == var) %>%
dplyr::select(datetime, prediction, parameter) %>%
# dplyr::group_by(datetime) %>%
# dplyr::summarize(mean_prediction = mean(prediction-273.15, na.rm = TRUE),
# sd_prediction = sd(prediction-273.15, na.rm = TRUE),
# ensemble=max(parameter)
# ) %>%
# dplyr::select(datetime, mean_prediction, sd_prediction, ensemble) %>%
dplyr::collect()
return(met_future)
}