Skip to content

Commit

Permalink
Call on sys mapshaper for simplification for large datasets close NOA…
Browse files Browse the repository at this point in the history
  • Loading branch information
mikejohnson51 committed Jul 18, 2024
1 parent 905edf9 commit f4a0848
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 97 deletions.
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@ BugReports: https://github.com/mikejohnson51/hydrofab/issues
Depends:
R (>= 3.5.0)
Imports:
arrow,
climateR,
data.table,
DBI,
dplyr,
glue,
httr,
hydroloom,
igraph,
logger,
lwgeom,
Expand All @@ -31,14 +30,14 @@ Imports:
pbapply,
rlang,
rmapshaper,
RSQLite,
rvest,
sf,
terra,
tibble,
tidyr,
units,
utils
utils,
yyjsonr
Suggests:
testthat
Remotes:
Expand Down
9 changes: 4 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,12 @@ export(union_polygons)
export(unpack_set)
export(update_network_identifiers)
export(write_hydrofabric)
importFrom(DBI,dbConnect)
importFrom(DBI,dbDisconnect)
importFrom(DBI,dbListTables)
importFrom(RSQLite,SQLite)
importFrom(arrow,open_dataset)
importFrom(climateR,dap)
importFrom(data.table,rbindlist)
importFrom(dplyr,"%>%")
importFrom(dplyr,`%>%`)
importFrom(dplyr,across)
importFrom(dplyr,add_count)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
Expand Down Expand Up @@ -178,8 +177,8 @@ importFrom(sf,st_touches)
importFrom(sf,st_transform)
importFrom(sf,st_union)
importFrom(sf,st_within)
importFrom(sf,st_write)
importFrom(sf,write_sf)
importFrom(stats,weighted.mean)
importFrom(terra,aggregate)
importFrom(terra,as.matrix)
importFrom(terra,as.polygons)
Expand Down
143 changes: 72 additions & 71 deletions R/catchment_geometry.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ flowpaths_to_linestrings = function(flowpaths){


#' Clean Catchment Geometry
#' @description Fixes geometry issues present in catchments that originate in the
#' CatchmentSP layers, or from the reconcile_catchments hydrofab preocess.
#' @description Fixes geometry issues present in catchments derived from DEMs.
#' These include, but are not limited to disjoint polygon fragments, artifacts
#' from the 30m DEM used to generate the catchments, and non-valid geometry topolgies.
#' A goal of this functions is also to provide means to reduce the data column
#' from the DEM used to generate the catchments, and non-valid geometry topologies.
#' A secondary goal of this functions is to provide a way to reduce the data column
#' of the catchments by offering a topology preserving simplification
#' through \code{\link[rmapshaper]{ms_simplify}}.
#' Generally a "keep" parameter of .9 seems appropriate for the resolution of
Expand All @@ -109,6 +108,8 @@ flowpaths_to_linestrings = function(flowpaths){
#' If NULL, then no simplification will be executed.
#' @param crs integer or object compatible with sf::st_crs coordinate reference.
#' Should be a projection that supports area-calculations.
#' @param force should the mapshaper/mapshaper-xl binaries be used directly for simplification?
#' @param gb The amount of heap memory to be allocated when force = TRUE
#' @param sys logical should the mapshaper system library be used. If NULL
#' the system library will be used if available.
#' @return sf object
Expand All @@ -121,6 +122,8 @@ clean_geometry <- function(catchments,
keep = NULL,
crs = 5070,
grid = .0009,
gb = 8,
force = FALSE,
sys = NULL) {

# keep an original count of # rows in catchments
Expand All @@ -137,7 +140,7 @@ clean_geometry <- function(catchments,
# set crs variable to crs of catchments
if(!is.null(crs)){ crs = st_crs(catchments) }

catchments = lwgeom::st_snap_to_grid(st_transform(catchments, 5070), size = grid)
catchments = lwgeom::st_snap_to_grid(st_transform(catchments, crs), size = grid)

# cast MPs to POLYGONS and add featureid count column
polygons = suppressWarnings({
Expand Down Expand Up @@ -165,7 +168,9 @@ clean_geometry <- function(catchments,

if(!is.null(flowlines)){

fl = filter(flowlines, .data[[fl_ID]] %in% extra_parts[[ID]])
u = unique(extra_parts[[ID]])

fl = dplyr::filter(flowlines, .data[[fl_ID]] %in% u)

imap = st_intersects(extra_parts, st_transform(st_zm(fl), st_crs(extra_parts)))

Expand Down Expand Up @@ -273,28 +278,50 @@ clean_geometry <- function(catchments,
} else {
in_cat = fast_validity_check(main_parts)
}



if(!is.null(keep)){
in_cat = simplify_process(in_cat, keep, sys)
if(!is.null(keep)){
in_cat = tryCatch({
simplify_process(in_cat, keep, sys, force = force, gb = gb)
}, error = function(e){
if(force){
message("Even when using the system mapshaper, an error has been found.")
} else {
message("Try using `force=TRUE` to envoke the system mapshaper.")
}
})
}


return(
mutate(in_cat, areasqkm = add_areasqkm(in_cat)) %>%
st_transform(crs) %>%
select("{ID}" := ID, areasqkm) %>%
left_join(st_drop_geometry(select(catchments, -any_of('areasqkm'))), by = ID)
)
x = select(catchments, -any_of(c('areasqkm'))) %>%
st_drop_geometry() %>%
mutate(ID = as.numeric(ID))

x2 = mutate(in_cat, areasqkm = add_areasqkm(in_cat)) |>
st_transform(crs) |>
mutate(ID = as.numeric(ID)) |>
select("{ID}" := ID, areasqkm) |>
left_join(x, by = ID)

return(x2)

} else {

if(!is.null(keep)){
polygons = simplify_process(polygons, keep, sys) %>%
fast_validity_check()
polygons = tryCatch({
simplify_process(polygons, keep, sys, force = force, gb = gb)
}, error = function(e){
if(force){
message("Even when using the system mapshaper, an error has been found.")
} else {
message("Try using `force=TRUE` to envoke the system mapshaper.")
}
})

}

return(
select(polygons, -n, -tmpID)
select(polygons, -any_of(c("n", 'tmpID')))
)
}
}
Expand All @@ -311,67 +338,41 @@ fast_validity_check <- function(x){

}

simplify_process = function(catchments, keep, sys){
#' @importFrom yyjsonr write_geojson_file read_geojson_file

simplify_process = function(catchments, keep, sys, gb = 8, force = TRUE){

# simplify catchments
catchments = ms_simplify(catchments,
keep = keep,
keep_shapes = TRUE,
sys = sys)
if(force){
tmp = tempfile(fileext = ".geojson")
tmp2 = tempfile(fileext = ".geojson")
crs = st_crs(catchments)
yyjsonr::write_geojson_file(st_transform(catchments, 4326), tmp)

if(gb <= 8){
system(glue("mapshaper {tmp} -simplify {keep} keep-shapes -o {tmp2}"))
} else {
system(glue("mapshaper-xl {gb}gb {tmp} -simplify {keep} keep-shapes -o {tmp2}"))
}

cats = suppressWarnings({
yyjsonr::read_geojson_file(tmp2) |>
st_transform(crs)
})

unlink(tmp)
unlink(tmp2)

} else {
catchments = ms_simplify(catchments, keep = keep, keep_shapes = TRUE, sys = sys)
}

tt = fast_validity_check(catchments)
tt = fast_validity_check(cats)

if(nrow(tt) != sum(st_is_valid(tt))){
stop("Invalid simplication")
} else {
tt
}
# # mark valid/invalid geoms
# bool = st_is_valid(catchments)
#
# # make invalid geoms valid
# invalids = st_make_valid(filter(catchments, !bool))
#
# # if not all polygons get returned, try different simplification keep value
# if(nrow(filter(invalids, st_geometry_type(invalids) != "POLYGON")) > 0){
#
# warning("Invalid geometries found. Trying new keep of:", keep + ((1-keep) / 2) , call. = FALSE)
#
# # try simplification again
# catchments = ms_simplify(catchments, keep = keep + ((1-keep) / 2), keep_shapes = TRUE, sys = sys)
#
# # mark valid/invalid geoms
# bool = st_is_valid(catchments)
#
# # make invalid geoms valid
# invalids = st_make_valid(filter(catchments, !bool))
#
# # if catchments still containing non POLYGON geometries, return original data
# if(nrow(filter(invalids, st_geometry_type(invalids) != "POLYGON")) > 0){
#
# warning("Invalid geometries found. Original catchments returned." , call. = FALSE)
#
# return(catchments)
#
# } else {
#
# # combine corrected invalids with valids and recalc area
# return(
# mutate(
# bind_rows(invalids, filter(catchments, bool)),
# areasqkm = add_areasqkm(.)
# )
# )
# }
#
# } else {
#
# # combine corrected invalids with valids and recalc area
# return(
# bind_rows(invalids, filter(catchments, bool)) %>%
# mutate( areasqkm = add_areasqkm(.))
# )
# }
}


Expand Down
3 changes: 3 additions & 0 deletions R/hyaggregate_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

prepare_network = function(network_list) {

network_list$flowpaths = rename_geometry(network_list$flowpaths, "geom")
network_list$catchments = rename_geometry(network_list$catchments, "geom")

names(network_list$flowpaths) = tolower(names(network_list$flowpaths))
names(network_list$catchments) = tolower(names(network_list$catchments))

Expand Down
37 changes: 24 additions & 13 deletions R/hydrofab.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,30 @@
#' @importFrom dplyr right_join filter select rename mutate bind_rows group_by ungroup slice_max bind_rows n left_join rename slice_min add_count tbl collect everything distinct contains slice_max case_when bind_cols
#' @importFrom nhdplusTools get_vaa rename_geometry get_node get_flowline_index
#' @importFrom arrow open_dataset
#'
#' @importFrom climateR dap
#'
#' @importFrom dplyr across add_count bind_cols bind_rows collect case_when contains
#' @importFrom dplyr distinct everything filter group_by left_join mutate n rename
#' @importFrom dplyr right_join select slice_max slice_min summarize tbl ungroup `%>%`
#'
#' @importFrom nhdplusTools get_vaa rename_geometry get_node get_flowline_index get_streamorder
#'
#' @importFrom rvest read_html html_nodes html_attr
#'
#' @importFrom httr RETRY write_disk progress
#' @importFrom sf write_sf read_sf st_read st_write st_as_sf
#' @importFrom sf st_layers st_crs st_touches st_transform st_area st_make_valid st_intersection st_collection_extract st_cast st_intersects st_length st_filter st_union st_is_empty st_drop_geometry st_is_valid
#'
#' @importFrom sf read_sf st_area st_as_sf st_cast st_crs st_collection_extract
#' @importFrom sf st_drop_geometry st_filter st_layers st_length st_make_valid
#' @importFrom sf st_is_empty st_is_valid st_intersection st_intersects st_precision
#' @importFrom sf st_touches st_transform st_union write_sf
#'
#' @importFrom rmapshaper ms_explode ms_dissolve ms_simplify check_sys_mapshaper
#'
#' @importFrom rlang := sym
#' @importFrom RSQLite SQLite
#' @importFrom DBI dbListTables dbDisconnect dbConnect
#'
#' @importFrom stats weighted.mean
#'
#' @importFrom glue glue


#nolint start
# NHDPlus Attributes
Expand Down Expand Up @@ -147,11 +164,5 @@ get_ds_joined_fromcomid <- function(flines) {
flines[["ds_joined_fromCOMID"]][match(flines$toCOMID, flines$COMID)]
}

drop_geometry <- function(x) {
if("sf" %in% class(x)) {
sf::st_drop_geometry(x)
} else {
x
}
}


4 changes: 0 additions & 4 deletions R/refactor_wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
#' @param outfile path to geopackage to write refactored_flowlines, and if facfdr != NULL, refactored catchments.
#' @return data to the specified gpkg
#' @export
#' @importFrom dplyr filter select rename
#' @importFrom sf read_sf st_transform st_drop_geometry write_sf st_crs st_precision
#' @importFrom nhdplusTools get_streamorder get_vaa


refactor = function (gpkg = NULL,
flowpaths = NULL,
Expand Down

0 comments on commit f4a0848

Please sign in to comment.