From f4a0848f1255407feb6cbbb73500b8057fdbaf9c Mon Sep 17 00:00:00 2001 From: mikejohnson51 Date: Thu, 18 Jul 2024 11:35:16 -0600 Subject: [PATCH] Call on sys mapshaper for simplification for large datasets close NOAA-OWP/hydrofabric#128 --- DESCRIPTION | 7 +- NAMESPACE | 9 ++- R/catchment_geometry.R | 143 +++++++++++++++++++++-------------------- R/hyaggregate_utils.R | 3 + R/hydrofab.R | 37 +++++++---- R/refactor_wrapper.R | 4 -- 6 files changed, 106 insertions(+), 97 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e01a816..b96ac05 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,13 +15,12 @@ BugReports: https://github.com/mikejohnson51/hydrofab/issues Depends: R (>= 3.5.0) Imports: + arrow, climateR, data.table, - DBI, dplyr, glue, httr, - hydroloom, igraph, logger, lwgeom, @@ -31,14 +30,14 @@ Imports: pbapply, rlang, rmapshaper, - RSQLite, rvest, sf, terra, tibble, tidyr, units, - utils + utils, + yyjsonr Suggests: testthat Remotes: diff --git a/NAMESPACE b/NAMESPACE index 86c2fd0..db30bcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,13 +59,12 @@ export(union_polygons) export(unpack_set) export(update_network_identifiers) export(write_hydrofabric) -importFrom(DBI,dbConnect) -importFrom(DBI,dbDisconnect) -importFrom(DBI,dbListTables) -importFrom(RSQLite,SQLite) +importFrom(arrow,open_dataset) +importFrom(climateR,dap) importFrom(data.table,rbindlist) importFrom(dplyr,"%>%") importFrom(dplyr,`%>%`) +importFrom(dplyr,across) importFrom(dplyr,add_count) importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) @@ -178,8 +177,8 @@ importFrom(sf,st_touches) importFrom(sf,st_transform) importFrom(sf,st_union) importFrom(sf,st_within) -importFrom(sf,st_write) importFrom(sf,write_sf) +importFrom(stats,weighted.mean) importFrom(terra,aggregate) importFrom(terra,as.matrix) importFrom(terra,as.polygons) diff --git a/R/catchment_geometry.R b/R/catchment_geometry.R index 70786fd..f41ab2e 100644 --- a/R/catchment_geometry.R +++ b/R/catchment_geometry.R @@ -91,11 +91,10 @@ flowpaths_to_linestrings = function(flowpaths){ #' Clean Catchment Geometry -#' @description Fixes geometry issues present in catchments that originate in the -#' CatchmentSP layers, or from the reconcile_catchments hydrofab preocess. +#' @description Fixes geometry issues present in catchments derived from DEMs. #' These include, but are not limited to disjoint polygon fragments, artifacts -#' from the 30m DEM used to generate the catchments, and non-valid geometry topolgies. -#' A goal of this functions is also to provide means to reduce the data column +#' from the DEM used to generate the catchments, and non-valid geometry topologies. +#' A secondary goal of this functions is to provide a way to reduce the data column #' of the catchments by offering a topology preserving simplification #' through \code{\link[rmapshaper]{ms_simplify}}. #' Generally a "keep" parameter of .9 seems appropriate for the resolution of @@ -109,6 +108,8 @@ flowpaths_to_linestrings = function(flowpaths){ #' If NULL, then no simplification will be executed. #' @param crs integer or object compatible with sf::st_crs coordinate reference. #' Should be a projection that supports area-calculations. +#' @param force should the mapshaper/mapshaper-xl binaries be used directly for simplification? +#' @param gb The amount of heap memory to be allocated when force = TRUE #' @param sys logical should the mapshaper system library be used. If NULL #' the system library will be used if available. #' @return sf object @@ -121,6 +122,8 @@ clean_geometry <- function(catchments, keep = NULL, crs = 5070, grid = .0009, + gb = 8, + force = FALSE, sys = NULL) { # keep an original count of # rows in catchments @@ -137,7 +140,7 @@ clean_geometry <- function(catchments, # set crs variable to crs of catchments if(!is.null(crs)){ crs = st_crs(catchments) } - catchments = lwgeom::st_snap_to_grid(st_transform(catchments, 5070), size = grid) + catchments = lwgeom::st_snap_to_grid(st_transform(catchments, crs), size = grid) # cast MPs to POLYGONS and add featureid count column polygons = suppressWarnings({ @@ -165,7 +168,9 @@ clean_geometry <- function(catchments, if(!is.null(flowlines)){ - fl = filter(flowlines, .data[[fl_ID]] %in% extra_parts[[ID]]) + u = unique(extra_parts[[ID]]) + + fl = dplyr::filter(flowlines, .data[[fl_ID]] %in% u) imap = st_intersects(extra_parts, st_transform(st_zm(fl), st_crs(extra_parts))) @@ -273,28 +278,50 @@ clean_geometry <- function(catchments, } else { in_cat = fast_validity_check(main_parts) } + + - if(!is.null(keep)){ - in_cat = simplify_process(in_cat, keep, sys) + if(!is.null(keep)){ + in_cat = tryCatch({ + simplify_process(in_cat, keep, sys, force = force, gb = gb) + }, error = function(e){ + if(force){ + message("Even when using the system mapshaper, an error has been found.") + } else { + message("Try using `force=TRUE` to envoke the system mapshaper.") + } + }) } - - return( - mutate(in_cat, areasqkm = add_areasqkm(in_cat)) %>% - st_transform(crs) %>% - select("{ID}" := ID, areasqkm) %>% - left_join(st_drop_geometry(select(catchments, -any_of('areasqkm'))), by = ID) - ) + x = select(catchments, -any_of(c('areasqkm'))) %>% + st_drop_geometry() %>% + mutate(ID = as.numeric(ID)) + + x2 = mutate(in_cat, areasqkm = add_areasqkm(in_cat)) |> + st_transform(crs) |> + mutate(ID = as.numeric(ID)) |> + select("{ID}" := ID, areasqkm) |> + left_join(x, by = ID) + + return(x2) } else { if(!is.null(keep)){ - polygons = simplify_process(polygons, keep, sys) %>% - fast_validity_check() + polygons = tryCatch({ + simplify_process(polygons, keep, sys, force = force, gb = gb) + }, error = function(e){ + if(force){ + message("Even when using the system mapshaper, an error has been found.") + } else { + message("Try using `force=TRUE` to envoke the system mapshaper.") + } + }) + } return( - select(polygons, -n, -tmpID) + select(polygons, -any_of(c("n", 'tmpID'))) ) } } @@ -311,67 +338,41 @@ fast_validity_check <- function(x){ } -simplify_process = function(catchments, keep, sys){ +#' @importFrom yyjsonr write_geojson_file read_geojson_file + +simplify_process = function(catchments, keep, sys, gb = 8, force = TRUE){ - # simplify catchments - catchments = ms_simplify(catchments, - keep = keep, - keep_shapes = TRUE, - sys = sys) + if(force){ + tmp = tempfile(fileext = ".geojson") + tmp2 = tempfile(fileext = ".geojson") + crs = st_crs(catchments) + yyjsonr::write_geojson_file(st_transform(catchments, 4326), tmp) + + if(gb <= 8){ + system(glue("mapshaper {tmp} -simplify {keep} keep-shapes -o {tmp2}")) + } else { + system(glue("mapshaper-xl {gb}gb {tmp} -simplify {keep} keep-shapes -o {tmp2}")) + } + + cats = suppressWarnings({ + yyjsonr::read_geojson_file(tmp2) |> + st_transform(crs) + }) + + unlink(tmp) + unlink(tmp2) + + } else { + catchments = ms_simplify(catchments, keep = keep, keep_shapes = TRUE, sys = sys) + } - tt = fast_validity_check(catchments) + tt = fast_validity_check(cats) if(nrow(tt) != sum(st_is_valid(tt))){ stop("Invalid simplication") } else { tt } - # # mark valid/invalid geoms - # bool = st_is_valid(catchments) - # - # # make invalid geoms valid - # invalids = st_make_valid(filter(catchments, !bool)) - # - # # if not all polygons get returned, try different simplification keep value - # if(nrow(filter(invalids, st_geometry_type(invalids) != "POLYGON")) > 0){ - # - # warning("Invalid geometries found. Trying new keep of:", keep + ((1-keep) / 2) , call. = FALSE) - # - # # try simplification again - # catchments = ms_simplify(catchments, keep = keep + ((1-keep) / 2), keep_shapes = TRUE, sys = sys) - # - # # mark valid/invalid geoms - # bool = st_is_valid(catchments) - # - # # make invalid geoms valid - # invalids = st_make_valid(filter(catchments, !bool)) - # - # # if catchments still containing non POLYGON geometries, return original data - # if(nrow(filter(invalids, st_geometry_type(invalids) != "POLYGON")) > 0){ - # - # warning("Invalid geometries found. Original catchments returned." , call. = FALSE) - # - # return(catchments) - # - # } else { - # - # # combine corrected invalids with valids and recalc area - # return( - # mutate( - # bind_rows(invalids, filter(catchments, bool)), - # areasqkm = add_areasqkm(.) - # ) - # ) - # } - # - # } else { - # - # # combine corrected invalids with valids and recalc area - # return( - # bind_rows(invalids, filter(catchments, bool)) %>% - # mutate( areasqkm = add_areasqkm(.)) - # ) - # } } diff --git a/R/hyaggregate_utils.R b/R/hyaggregate_utils.R index f12d7c0..b367413 100644 --- a/R/hyaggregate_utils.R +++ b/R/hyaggregate_utils.R @@ -11,6 +11,9 @@ prepare_network = function(network_list) { + network_list$flowpaths = rename_geometry(network_list$flowpaths, "geom") + network_list$catchments = rename_geometry(network_list$catchments, "geom") + names(network_list$flowpaths) = tolower(names(network_list$flowpaths)) names(network_list$catchments) = tolower(names(network_list$catchments)) diff --git a/R/hydrofab.R b/R/hydrofab.R index c388d85..4a5df9a 100644 --- a/R/hydrofab.R +++ b/R/hydrofab.R @@ -1,13 +1,30 @@ -#' @importFrom dplyr right_join filter select rename mutate bind_rows group_by ungroup slice_max bind_rows n left_join rename slice_min add_count tbl collect everything distinct contains slice_max case_when bind_cols -#' @importFrom nhdplusTools get_vaa rename_geometry get_node get_flowline_index +#' @importFrom arrow open_dataset +#' +#' @importFrom climateR dap +#' +#' @importFrom dplyr across add_count bind_cols bind_rows collect case_when contains +#' @importFrom dplyr distinct everything filter group_by left_join mutate n rename +#' @importFrom dplyr right_join select slice_max slice_min summarize tbl ungroup `%>%` +#' +#' @importFrom nhdplusTools get_vaa rename_geometry get_node get_flowline_index get_streamorder +#' #' @importFrom rvest read_html html_nodes html_attr +#' #' @importFrom httr RETRY write_disk progress -#' @importFrom sf write_sf read_sf st_read st_write st_as_sf -#' @importFrom sf st_layers st_crs st_touches st_transform st_area st_make_valid st_intersection st_collection_extract st_cast st_intersects st_length st_filter st_union st_is_empty st_drop_geometry st_is_valid +#' +#' @importFrom sf read_sf st_area st_as_sf st_cast st_crs st_collection_extract +#' @importFrom sf st_drop_geometry st_filter st_layers st_length st_make_valid +#' @importFrom sf st_is_empty st_is_valid st_intersection st_intersects st_precision +#' @importFrom sf st_touches st_transform st_union write_sf +#' #' @importFrom rmapshaper ms_explode ms_dissolve ms_simplify check_sys_mapshaper +#' #' @importFrom rlang := sym -#' @importFrom RSQLite SQLite -#' @importFrom DBI dbListTables dbDisconnect dbConnect +#' +#' @importFrom stats weighted.mean +#' +#' @importFrom glue glue + #nolint start # NHDPlus Attributes @@ -147,11 +164,5 @@ get_ds_joined_fromcomid <- function(flines) { flines[["ds_joined_fromCOMID"]][match(flines$toCOMID, flines$COMID)] } -drop_geometry <- function(x) { - if("sf" %in% class(x)) { - sf::st_drop_geometry(x) - } else { - x - } -} + diff --git a/R/refactor_wrapper.R b/R/refactor_wrapper.R index ffca25f..138866a 100644 --- a/R/refactor_wrapper.R +++ b/R/refactor_wrapper.R @@ -16,10 +16,6 @@ #' @param outfile path to geopackage to write refactored_flowlines, and if facfdr != NULL, refactored catchments. #' @return data to the specified gpkg #' @export -#' @importFrom dplyr filter select rename -#' @importFrom sf read_sf st_transform st_drop_geometry write_sf st_crs st_precision -#' @importFrom nhdplusTools get_streamorder get_vaa - refactor = function (gpkg = NULL, flowpaths = NULL,