From feee7b17af1000f07de2ba824fe9b5d62f65c990 Mon Sep 17 00:00:00 2001 From: James Mineau Date: Tue, 17 Sep 2024 16:54:51 -0600 Subject: [PATCH 1/6] num hours per met file --- docs/configuration.md | 1 + r/run_stilt.r | 2 ++ r/src/find_met_files.r | 46 ++++++++++++++++++++++++----------------- r/src/simulation_step.r | 7 +++++-- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 3675fbc..7d432f8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -63,6 +63,7 @@ str(receptors) | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `met_path` | Absolute path to ARL compatible meteorological data files | | `met_file_format` | String detailing file naming convention for meteorological data files using a mixture of datetime and regex syntax. The formatting string accepts `grep` compatible regular expressions (`.\*.arl`), `strftime` compatible datetime strings (`%Y%m%d%H`) or any combination of the two. Datetime syntax is expanded to all unique combinations required for the receptor and simulation duration and the intersection between the requested files and files available in `met_path` is determined with `grep`, allowing partial matching and compatible regular expressions to be used to identify the relevant data. Matching does not require the full format to be specified - e.g. `\*.arl`, `%Y`, `%Y%m%d`, `%Y%m%d_d0.*.arl` would all match with a file named `20180130_d01.arl`. | +| `n_hours_per_met_file` | Number of hours per meteorological data file. Defaults to 6 | | `met_subgrid_buffer` | Percent to extend footprint area for meteorological subdomain when using `met_subgrid_enable`. Defaults to 0.1 (10%) | | `met_subgrid_enable` | Enables extraction of spatial subdomains from files in `met_path` using HYSPLIT's `xtrct_grid` binary prior to executing simulations. If enabled, will create files in `/met/`. This can substantially accelerate simulation speed at the cost of increased disk usage. Defaults to disabled | | `met_subgrid_levels` | If set, extracts the defined number of vertical levels from the meteorological data files to further accelerate simulations. Defaults to `NA`, which includes all vertical levels available | diff --git a/r/run_stilt.r b/r/run_stilt.r index 5b77282..49cfcd5 100755 --- a/r/run_stilt.r +++ b/r/run_stilt.r @@ -51,6 +51,7 @@ yres <- xres # Meteorological data input met_path <- '' met_file_format <- '%Y%m%d.%Hz.hrrra' +n_hours_per_met_file <- 6 met_subgrid_buffer <- 0.2 met_subgrid_enable <- F met_subgrid_levels <- NA @@ -239,6 +240,7 @@ stilt_apply(FUN = simulation_step, met_subgrid_levels = met_subgrid_levels, mgmin = mgmin, n_hours = n_hours, + n_hours_per_met_file = n_hours_per_met_file, n_met_min = n_met_min, ncycl = ncycl, ndump = ndump, diff --git a/r/src/find_met_files.r b/r/src/find_met_files.r index 18aa5d7..6971ae7 100644 --- a/r/src/find_met_files.r +++ b/r/src/find_met_files.r @@ -1,42 +1,50 @@ #' find_met_files searches for meteorological data files -#' @author Ben Fasoli +#' @author Ben Fasoli & James Mineau #' #' Searches for available meteorological files matching the given strftime #' compatible file naming convention #' #' @param t_start time of simulation start +#' @param n_hours number of hours to run each simulation; negative indicates +#' backward in time +#' @param n_hours_per_met_file number of hours of meteorological data in each +#' met file #' @param met_file_format grep compatible file naming convention to identify #' meteorological data files necessary for the timing of the simulation #' indicated by \code{t_start} and \code{n_hours} -#' @param n_hours number of hours to run each simulation; negative indicates -#' backward in time #' @param met_path directory to find meteorological data #' #' @import dplyr #' @export -find_met_files <- function(t_start, met_file_format, n_hours, met_path) { +find_met_files <- function(t_start, n_hours, n_hours_per_met_file, + met_file_format, met_path) { require(dplyr) - - is_backward <- n_hours < 0 - - # TODO: implement n_hours_per_met_file to better determine file names at - # varying time resolutions - request <- as.POSIXct(t_start, tz='UTC') %>% - c(. + c(1, -1, n_hours, is_backward * (n_hours - 5)) * 3600) %>% - range() %>% - (function(x) seq(x[1], x[2], by = 'hour')) %>% - strftime(tz = 'UTC', format = met_file_format) - + + ts <- as.POSIXct(t_start, tz = 'UTC') + is_backward <- as.numeric(n_hours < 0) + ib <- ifelse(is_backward, 1, -1) + + # Generate the list of files to search for + request <- seq( + ts - (ib * as.difftime(abs(n_hours) + ib, units = 'hours')), + ts + (ib * as.difftime(n_hours_per_met_file - is_backward, units = 'hours')), + by = ib * 3600 + ) %>% + strftime(tz = 'UTC', format = met_file_format) %>% + unique() + + # Find the available files available <- dir(met_path, full.names = T, recursive = T) available <- available[!grepl('.lock', available)] - + + # Find the files that match the request idx <- do.call(c, lapply(request, function(pattern) { grep(pattern = pattern, x = available) })) - + if (any(idx < 1)) return() - - unique(available[idx]) + + available[idx] # Available files that match the request } diff --git a/r/src/simulation_step.r b/r/src/simulation_step.r index ab980a1..a209f14 100644 --- a/r/src/simulation_step.r +++ b/r/src/simulation_step.r @@ -61,6 +61,7 @@ simulation_step <- function(before_footprint = list(function() {output}), mgmin = 10, mhrs = 9999, n_hours = -24, + n_hours_per_met_file = 6, n_met_min = 1, ncycl = 0, ndump = 0, @@ -257,7 +258,8 @@ simulation_step <- function(before_footprint = list(function() {output}), link_files(exe, rundir) # Find necessary met files - met_files <- find_met_files(r_run_time, met_file_format, n_hours, met_path) + met_files <- find_met_files(r_run_time, n_hours, n_hours_per_met_file, + met_file_format, met_path) if (length(met_files) < n_met_min) { msg <- paste('Insufficient number of meteorological files found. Check', 'specifications in run_stilt.r') @@ -275,7 +277,8 @@ simulation_step <- function(before_footprint = list(function() {output}), } # Find necessary met files - met_files <- find_met_files(r_run_time, met_file_format, n_hours, met_path) + met_files <- find_met_files(r_run_time, n_hours, n_hours_per_met_file, + met_file_format, met_path) if (length(met_files) < n_met_min) { msg <- paste('Insufficient number of meteorological files found. Check', 'specifications in run_stilt.r') From 944e688e4cfe23e7df587c492764c91f59c43f59 Mon Sep 17 00:00:00 2001 From: James Mineau Date: Wed, 18 Sep 2024 12:34:02 -0600 Subject: [PATCH 2/6] only call find_met_files twice if met_subgrid is enabled --- r/src/simulation_step.r | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/r/src/simulation_step.r b/r/src/simulation_step.r index a209f14..e147ad8 100644 --- a/r/src/simulation_step.r +++ b/r/src/simulation_step.r @@ -267,26 +267,26 @@ simulation_step <- function(before_footprint = list(function() {output}), cat(msg, '\n', file = file.path(rundir, 'stilt.log'), append = T) return() } - + if (met_subgrid_enable) { met_path <- file.path(output_wd, 'met') calc_met_subgrids(met_files, met_path, exe, projection, xmn, xmx, ymn, ymx, levels = met_subgrid_levels, met_subgrid_buffer = met_subgrid_buffer) + + # Find necessary met files for subgrids + met_files <- find_met_files(r_run_time, n_hours, n_hours_per_met_file, + met_file_format, met_path) + if (length(met_files) < n_met_min) { + msg <- paste('Insufficient number of meteorological files found. Check', + 'specifications in run_stilt.r') + warning(msg) + cat(msg, '\n', file = file.path(rundir, 'stilt.log'), append = T) + return() + } } - - # Find necessary met files - met_files <- find_met_files(r_run_time, n_hours, n_hours_per_met_file, - met_file_format, met_path) - if (length(met_files) < n_met_min) { - msg <- paste('Insufficient number of meteorological files found. Check', - 'specifications in run_stilt.r') - warning(msg) - cat(msg, '\n', file = file.path(rundir, 'stilt.log'), append = T) - return() - } - + # Execute particle trajectory simulation, and read results into data frame output$receptor <- list(run_time = r_run_time, lati = r_lati, From e93e780b2d05099bd8ed5c53bdec2b2ad014f74d Mon Sep 17 00:00:00 2001 From: James Mineau Date: Thu, 19 Sep 2024 00:33:47 -0600 Subject: [PATCH 3/6] implement met_hours using n_hours_per_met_file --- r/src/find_met_files.r | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/r/src/find_met_files.r b/r/src/find_met_files.r index 6971ae7..2abebca 100644 --- a/r/src/find_met_files.r +++ b/r/src/find_met_files.r @@ -22,15 +22,26 @@ find_met_files <- function(t_start, n_hours, n_hours_per_met_file, require(dplyr) ts <- as.POSIXct(t_start, tz = 'UTC') - is_backward <- as.numeric(n_hours < 0) - ib <- ifelse(is_backward, 1, -1) - - # Generate the list of files to search for - request <- seq( - ts - (ib * as.difftime(abs(n_hours) + ib, units = 'hours')), - ts + (ib * as.difftime(n_hours_per_met_file - is_backward, units = 'hours')), - by = ib * 3600 - ) %>% + is_backward <- n_hours < 0 + met_bracket <- n_hours_per_met_file - 1 # ts can be in the middle of a met file + + # Generate the hours to search for + if (is_backward) { + met_hours <- seq( + ts - as.difftime(abs(n_hours) + met_bracket, units = 'hours'), + ts, + by = 3600 + ) + } else { + met_hours <- seq( + ts - as.difftime(met_bracket, units = 'hours'), + ts + as.difftime(n_hours, units = 'hours'), + by = 3600 + ) + } + + # Format the request and remove duplicates + request <- met_hours %>% strftime(tz = 'UTC', format = met_file_format) %>% unique() From f6128a985149c69c3ae9983be50cdcbb4fd27179 Mon Sep 17 00:00:00 2001 From: James Mineau Date: Thu, 19 Sep 2024 00:34:31 -0600 Subject: [PATCH 4/6] call unique on request and output --- r/src/find_met_files.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/find_met_files.r b/r/src/find_met_files.r index 2abebca..96f541d 100644 --- a/r/src/find_met_files.r +++ b/r/src/find_met_files.r @@ -57,5 +57,5 @@ find_met_files <- function(t_start, n_hours, n_hours_per_met_file, if (any(idx < 1)) return() - available[idx] # Available files that match the request + unique(available[idx]) # Available files that match the request } From da53c9df3bc674cc78e3a9000fe8d20ccf77f5c8 Mon Sep 17 00:00:00 2001 From: James Mineau Date: Tue, 24 Sep 2024 10:54:38 -0600 Subject: [PATCH 5/6] add n_hours_per_met_file param to stilt_cli --- r/stilt_cli.r | 1 + 1 file changed, 1 insertion(+) diff --git a/r/stilt_cli.r b/r/stilt_cli.r index 9a18ce8..a7d560d 100755 --- a/r/stilt_cli.r +++ b/r/stilt_cli.r @@ -125,6 +125,7 @@ stilt_args <- list( met_subgrid_levels = as.numeric(args$met_subgrid_levels), mgmin = as.numeric(args$mgmin), n_hours = as.numeric(args$n_hours), + n_hours_per_met_file = as.numeric(args$n_hours_per_met_file), n_met_min = as.numeric(args$n_met_min), ncycl = as.numeric(args$ncycl), ndump = as.numeric(args$ndump), From a92d9fd70262559a09aa846ced1b39dfa29937da Mon Sep 17 00:00:00 2001 From: James Mineau Date: Mon, 30 Sep 2024 16:08:06 -0600 Subject: [PATCH 6/6] n_hours_per_met_file guidance --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 7d432f8..8f7774a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -63,7 +63,7 @@ str(receptors) | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `met_path` | Absolute path to ARL compatible meteorological data files | | `met_file_format` | String detailing file naming convention for meteorological data files using a mixture of datetime and regex syntax. The formatting string accepts `grep` compatible regular expressions (`.\*.arl`), `strftime` compatible datetime strings (`%Y%m%d%H`) or any combination of the two. Datetime syntax is expanded to all unique combinations required for the receptor and simulation duration and the intersection between the requested files and files available in `met_path` is determined with `grep`, allowing partial matching and compatible regular expressions to be used to identify the relevant data. Matching does not require the full format to be specified - e.g. `\*.arl`, `%Y`, `%Y%m%d`, `%Y%m%d_d0.*.arl` would all match with a file named `20180130_d01.arl`. | -| `n_hours_per_met_file` | Number of hours per meteorological data file. Defaults to 6 | +| `n_hours_per_met_file` | Number of hours per meteorological data file. To determine the number of hours in an ARL compatible meteorological data file, refer to the README, including the file naming convention, provided by the data source. For example, the [NOAA HRRR README](https://www.ready.noaa.gov/data/archives/hrrr/README.TXT) specifies a "6 hour data file beginning with 00z - 05z in the first file of the day". Defaults to 6 | | `met_subgrid_buffer` | Percent to extend footprint area for meteorological subdomain when using `met_subgrid_enable`. Defaults to 0.1 (10%) | | `met_subgrid_enable` | Enables extraction of spatial subdomains from files in `met_path` using HYSPLIT's `xtrct_grid` binary prior to executing simulations. If enabled, will create files in `/met/`. This can substantially accelerate simulation speed at the cost of increased disk usage. Defaults to disabled | | `met_subgrid_levels` | If set, extracts the defined number of vertical levels from the meteorological data files to further accelerate simulations. Defaults to `NA`, which includes all vertical levels available |