From 1c5188e1fa8ff0bbd189ef237afea00f96d7ff06 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Mon, 6 May 2024 07:58:48 -0700 Subject: [PATCH 1/4] createSSURGO: add support for creating DuckDB files --- DESCRIPTION | 2 +- R/createSSURGO.R | 88 ++++++++++++++++++++++++++++++++++----------- man/createSSURGO.Rd | 3 ++ 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 76aa9936..ffac7b12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ License: GPL (>= 3) LazyLoad: yes Depends: R (>= 3.5.0) Imports: grDevices, graphics, stats, utils, methods, aqp (>= 2.0.2), data.table, DBI, curl -Suggests: jsonlite, xml2, httr, rvest, odbc, RSQLite, sf, wk, terra, raster, knitr, rmarkdown, testthat +Suggests: jsonlite, xml2, httr, rvest, odbc, RSQLite, duckdb, sf, wk, terra, raster, knitr, rmarkdown, testthat Repository: CRAN URL: https://ncss-tech.github.io/soilDB/, https://ncss-tech.github.io/AQP/ BugReports: https://github.com/ncss-tech/soilDB/issues diff --git a/R/createSSURGO.R b/R/createSSURGO.R index 3ededefb..bec4325e 100644 --- a/R/createSSURGO.R +++ b/R/createSSURGO.R @@ -121,6 +121,7 @@ downloadSSURGO <- function(WHERE = NULL, #' @param header Logical. Passed to `read.delim()` for reading pipe-delimited (`|`) text files containing tabular data. #' @param quiet Logical. Suppress messages and other output from database read/write operations? #' @param ... Additional arguments passed to `write_sf()` for writing spatial layers. +#' @param duckdb Logical. Create DuckDB rather than SQLite database? Default: `FALSE`. #' #' @return Character. Vector of layer/table names in `filename`. #' @export @@ -136,6 +137,7 @@ createSSURGO <- function(filename, overwrite = FALSE, header = FALSE, quiet = TRUE, + duckdb = FALSE, ...) { if (missing(filename) || length(filename) == 0) { @@ -143,19 +145,29 @@ createSSURGO <- function(filename, } IS_GPKG <- grepl("\\.gpkg$", filename, ignore.case = TRUE)[1] + IS_DUCKDB <- duckdb f <- list.files(exdir, recursive = TRUE, pattern = pattern, full.names = TRUE) if (!requireNamespace("sf")) stop("package `sf` is required to write spatial datasets to SSURGO SQLite databases", call. = FALSE) - if (!requireNamespace("RSQLite")) - stop("package `RSQLite` is required to write tabular datasets to SSURGO SQLite databases", call. = FALSE) - if (isTRUE(overwrite) && file.exists(filename)) { file.remove(filename) } + if (IS_DUCKDB) { + if (!requireNamespace("duckdb")) + stop("package `duckdb` is required to write datasets to SSURGO DuckDB databases", call. = FALSE) + con <- duckdb::dbConnect(duckdb::duckdb(filename), filename) + on.exit(duckdb::dbDisconnect(con, shutdown = TRUE)) + } else { + if (!requireNamespace("RSQLite")) + stop("package `RSQLite` is required to write datasets to SSURGO SQLite databases", call. = FALSE) + con <- RSQLite::dbConnect(RSQLite::SQLite(), filename, loadable.extensions = TRUE) + on.exit(RSQLite::dbDisconnect(con)) + } + # create and add combined vector datasets: # "soilmu_a", "soilmu_l", "soilmu_p", "soilsa_a", "soilsf_l", "soilsf_p" f.shp <- f[grepl(".*\\.shp$", f)] @@ -166,15 +178,28 @@ createSSURGO <- function(filename, if (nrow(shp.grp) >= 1 && ncol(shp.grp) == 3 && include_spatial) { f.shp.grp <- split(f.shp, list(feature = shp.grp[,1], geom = shp.grp[,2])) - + if (IS_DUCKDB) + duckdb::dbSendQuery(con, "INSTALL spatial; LOAD spatial;") lapply(seq_along(f.shp.grp), function(i) { lapply(seq_along(f.shp.grp[[i]]), function(j){ lnm <- layer_names[match(gsub(".*soil([musfa]{2}_[apl])_.*", "\\1", f.shp.grp[[i]][j]), names(layer_names))] - - if (overwrite && j == 1) { - sf::write_sf(sf::read_sf(f.shp.grp[[i]][j]), filename, layer = lnm, overwrite = TRUE, ...) - } else sf::write_sf(sf::read_sf(f.shp.grp[[i]][j]), filename, layer = lnm, append = TRUE, ...) + if (IS_DUCKDB) { + if (overwrite && j == 1) { + duckdb::dbSendQuery(con, sprintf("DROP TABLE IF EXISTS %s;", lnm)) + duckdb::dbSendQuery(con, sprintf("CREATE TABLE %s AS SELECT * FROM ST_Read('%s');", + lnm, f.shp.grp[[i]][j])) + } else { + duckdb::dbSendQuery(con, sprintf("INSERT INTO %s SELECT * FROM ST_Read('%s');", + lnm, f.shp.grp[[i]][j])) + } + } else { + if (overwrite && j == 1) { + sf::write_sf(sf::read_sf(f.shp.grp[[i]][j]), filename, layer = lnm, overwrite = TRUE, ...) + } else { + sf::write_sf(sf::read_sf(f.shp.grp[[i]][j]), filename, layer = lnm, append = TRUE, ...) + } + } NULL }) }) @@ -211,9 +236,6 @@ createSSURGO <- function(filename, msidxdet <- read.delim(msidxdn[1], sep = "|", stringsAsFactors = FALSE, header = header) } - con <- RSQLite::dbConnect(RSQLite::SQLite(), filename, loadable.extensions = TRUE) - on.exit(RSQLite::dbDisconnect(con)) - lapply(names(f.txt.grp), function(x) { if (!is.null(mstabcol)) { @@ -226,7 +248,7 @@ createSSURGO <- function(filename, } d <- try(as.data.frame(data.table::rbindlist(lapply(seq_along(f.txt.grp[[x]]), function(i) { - + # print(f.txt.grp[[x]][i]) y <- read.delim(f.txt.grp[[x]][i], sep = "|", stringsAsFactors = FALSE, header = header) if (length(y) == 1) { @@ -251,16 +273,32 @@ createSSURGO <- function(filename, # write tabular data to file try({ - if (overwrite) { - RSQLite::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) - } else RSQLite::dbWriteTable(con, mstab_lut[x], d, append = TRUE) + if (IS_DUCKDB) { + if (overwrite) { + duckdb::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) + } else { + duckdb::dbWriteTable(con, mstab_lut[x], d, append = TRUE) + } + } else { + if (overwrite) { + RSQLite::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) + } else { + RSQLite::dbWriteTable(con, mstab_lut[x], d, append = TRUE) + } + } }, silent = quiet) # create pkey indices if (!is.null(indexPK) && length(indexPK) > 0) { try({ - RSQLite::dbExecute(con, sprintf("CREATE UNIQUE INDEX IF NOT EXISTS %s ON %s (%s)", - paste0('PK_', mstab_lut[x]), mstab_lut[x], paste0(indexPK, collapse = ","))) + q <- sprintf("CREATE UNIQUE INDEX IF NOT EXISTS %s ON %s (%s)", + paste0('PK_', mstab_lut[x]), mstab_lut[x], + paste0(indexPK, collapse = ",")) + if (IS_DUCKDB) { + duckdb::dbSendQuery(con, q) + } else { + RSQLite::dbExecute(con, q) + } }, silent = quiet) } @@ -268,8 +306,13 @@ createSSURGO <- function(filename, if (!is.null(indexDI) && length(indexDI) > 0) { for (i in seq_along(indexDI)) { try({ - RSQLite::dbExecute(con, sprintf("CREATE INDEX IF NOT EXISTS %s ON %s (%s)", - paste0('DI_', mstab_lut[x]), mstab_lut[x], indexDI[i])) + q <- sprintf("CREATE INDEX IF NOT EXISTS %s ON %s (%s)", + paste0('DI_', mstab_lut[x]), mstab_lut[x], indexDI[i]) + if (IS_DUCKDB) { + duckdb::dbSendQuery(con, q) + } else { + RSQLite::dbExecute(con, q) + } }, silent = quiet) } } @@ -291,7 +334,12 @@ createSSURGO <- function(filename, } }) - res <- RSQLite::dbListTables(con) + res <- NULL + if (IS_DUCKDB) { + res <- duckdb::dbListTables(con) + } else { + res <- RSQLite::dbListTables(con) + } invisible(res) } diff --git a/man/createSSURGO.Rd b/man/createSSURGO.Rd index b568093b..ec5ae16e 100644 --- a/man/createSSURGO.Rd +++ b/man/createSSURGO.Rd @@ -12,6 +12,7 @@ createSSURGO( overwrite = FALSE, header = FALSE, quiet = TRUE, + duckdb = FALSE, ... ) } @@ -30,6 +31,8 @@ createSSURGO( \item{quiet}{Logical. Suppress messages and other output from database read/write operations?} +\item{duckdb}{Logical. Create DuckDB rather than SQLite database? Default: \code{FALSE}.} + \item{...}{Additional arguments passed to \code{write_sf()} for writing spatial layers.} } \value{ From ed07805214f2d9e6ad812ace24fc20a1c6e0ed78 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Sat, 18 May 2024 11:04:37 -0700 Subject: [PATCH 2/4] simplify with DBI methods --- R/createSSURGO.R | 46 ++++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/R/createSSURGO.R b/R/createSSURGO.R index bec4325e..1bce1750 100644 --- a/R/createSSURGO.R +++ b/R/createSSURGO.R @@ -178,19 +178,22 @@ createSSURGO <- function(filename, if (nrow(shp.grp) >= 1 && ncol(shp.grp) == 3 && include_spatial) { f.shp.grp <- split(f.shp, list(feature = shp.grp[,1], geom = shp.grp[,2])) - if (IS_DUCKDB) - duckdb::dbSendQuery(con, "INSTALL spatial; LOAD spatial;") + + if (IS_DUCKDB) { + DBI::dbExecute(con, "INSTALL spatial; LOAD spatial;") + } + lapply(seq_along(f.shp.grp), function(i) { lapply(seq_along(f.shp.grp[[i]]), function(j){ lnm <- layer_names[match(gsub(".*soil([musfa]{2}_[apl])_.*", "\\1", f.shp.grp[[i]][j]), names(layer_names))] if (IS_DUCKDB) { if (overwrite && j == 1) { - duckdb::dbSendQuery(con, sprintf("DROP TABLE IF EXISTS %s;", lnm)) - duckdb::dbSendQuery(con, sprintf("CREATE TABLE %s AS SELECT * FROM ST_Read('%s');", + DBI::dbExecute(con, sprintf("DROP TABLE IF EXISTS %s;", lnm)) + DBI::dbExecute(con, sprintf("CREATE TABLE %s AS SELECT * FROM ST_Read('%s');", lnm, f.shp.grp[[i]][j])) } else { - duckdb::dbSendQuery(con, sprintf("INSERT INTO %s SELECT * FROM ST_Read('%s');", + DBI::dbExecute(con, sprintf("INSERT INTO %s SELECT * FROM ST_Read('%s');", lnm, f.shp.grp[[i]][j])) } } else { @@ -273,18 +276,10 @@ createSSURGO <- function(filename, # write tabular data to file try({ - if (IS_DUCKDB) { - if (overwrite) { - duckdb::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) - } else { - duckdb::dbWriteTable(con, mstab_lut[x], d, append = TRUE) - } + if (overwrite) { + DBI::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) } else { - if (overwrite) { - RSQLite::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) - } else { - RSQLite::dbWriteTable(con, mstab_lut[x], d, append = TRUE) - } + DBI::dbWriteTable(con, mstab_lut[x], d, append = TRUE) } }, silent = quiet) @@ -294,11 +289,7 @@ createSSURGO <- function(filename, q <- sprintf("CREATE UNIQUE INDEX IF NOT EXISTS %s ON %s (%s)", paste0('PK_', mstab_lut[x]), mstab_lut[x], paste0(indexPK, collapse = ",")) - if (IS_DUCKDB) { - duckdb::dbSendQuery(con, q) - } else { - RSQLite::dbExecute(con, q) - } + DBI::dbExecute(con, q) }, silent = quiet) } @@ -308,11 +299,7 @@ createSSURGO <- function(filename, try({ q <- sprintf("CREATE INDEX IF NOT EXISTS %s ON %s (%s)", paste0('DI_', mstab_lut[x]), mstab_lut[x], indexDI[i]) - if (IS_DUCKDB) { - duckdb::dbSendQuery(con, q) - } else { - RSQLite::dbExecute(con, q) - } + DBI::dbExecute(con, q) }, silent = quiet) } } @@ -334,12 +321,7 @@ createSSURGO <- function(filename, } }) - res <- NULL - if (IS_DUCKDB) { - res <- duckdb::dbListTables(con) - } else { - res <- RSQLite::dbListTables(con) - } + res <- DBI::dbListTables(con, q) invisible(res) } From a0915164d58adb46d79f7c3ce642666ad5460e2e Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Thu, 30 May 2024 07:40:42 -0700 Subject: [PATCH 3/4] generalize for all DBI connection types --- R/createSSURGO.R | 104 ++++++++++++++++++++++++++---------------- man/createSSURGO.Rd | 24 +++++++--- man/downloadSSURGO.Rd | 3 ++ 3 files changed, 84 insertions(+), 47 deletions(-) diff --git a/R/createSSURGO.R b/R/createSSURGO.R index 1bce1750..d533bd2c 100644 --- a/R/createSSURGO.R +++ b/R/createSSURGO.R @@ -24,6 +24,7 @@ #' Several ESRI shapefiles are found in the _/spatial/_ folder extracted from a SSURGO ZIP. These have prefix `soilmu_` (mapunit), `soilsa_` (survey area), `soilsf_` (special features). There will also be a TXT file with prefix `soilsf_` describing any special features. Shapefile names then have an `a_` (polygon), `l_` (line), `p_` (point) followed by the soil survey area symbol. #' #' @return Character. Paths to downloaded ZIP files (invisibly). May not exist if `remove_zip = TRUE`. +#' @seealso [createSSURGO()] downloadSSURGO <- function(WHERE = NULL, areasymbols = NULL, destdir = tempdir(), @@ -111,19 +112,27 @@ downloadSSURGO <- function(WHERE = NULL, invisible(paths2) } -#' Create a SQLite database or GeoPackage from one or more SSURGO Exports +#' Create a database from SSURGO Exports +#' +#' The following database types are tested and fully supported: +#' - SQLite or Geopackage +#' - DuckDB +#' - Postgres or PostGIS +#' +#' In theory any other DBI-compatible data source can be used for output. See `conn` argument. If you encounter issues using specific DBI connection types, please report in the soilDB issue tracker. #' -#' @param filename Output file name (e.g. `'db.sqlite'` or `'db.gpkg'`) -#' @param exdir Path containing containing SSURGO spatial (.shp) and tabular (.txt) files. +#' @param filename Output file name (e.g. `'db.sqlite'` or `'db.gpkg'`). Only used when `con` is not specified by the user. +#' @param exdir Path containing containing input SSURGO spatial (.shp) and tabular (.txt) files, downloaded and extracted by `downloadSSURGO()` or similar. +#' @param conn A _DBIConnection_ object. Default is a `SQLiteConnection` used for writing .sqlite or .gpkg files. Alternate options are any DBI connection types. When `include_spatial=TRUE`, the sf package is used to write spatial data to the database. #' @param pattern Character. Optional regular expression to use to filter subdirectories of `exdir`. Default: `NULL` will search all subdirectories for SSURGO export files. #' @param include_spatial Logical. Include spatial data layers in database? Default: `TRUE`. #' @param overwrite Logical. Overwrite existing layers? Default `FALSE` will append to existing tables/layers. #' @param header Logical. Passed to `read.delim()` for reading pipe-delimited (`|`) text files containing tabular data. #' @param quiet Logical. Suppress messages and other output from database read/write operations? #' @param ... Additional arguments passed to `write_sf()` for writing spatial layers. -#' @param duckdb Logical. Create DuckDB rather than SQLite database? Default: `FALSE`. #' #' @return Character. Vector of layer/table names in `filename`. +#' @seealso [downloadSSURGO()] #' @export #' @examples #' \dontrun{ @@ -132,40 +141,44 @@ downloadSSURGO <- function(WHERE = NULL, #' } createSSURGO <- function(filename, exdir, + conn = DBI::dbConnect(DBI::dbDriver("SQLite"), + filename, + loadable.extensions = TRUE), pattern = NULL, include_spatial = TRUE, overwrite = FALSE, header = FALSE, quiet = TRUE, - duckdb = FALSE, ...) { - if (missing(filename) || length(filename) == 0) { - stop("`filename` should be a path to a .gpkg or .sqlite file to create or append to.") + if ((missing(filename) || length(filename) == 0) && missing(conn)) { + stop("`filename` should be a path to a .gpkg or .sqlite file to create or append to, or a DBIConnection should be provided via `conn`.") + } else { + filename <- NULL } - IS_GPKG <- grepl("\\.gpkg$", filename, ignore.case = TRUE)[1] - IS_DUCKDB <- duckdb + # DuckDB has special spatial format, so it gets custom handling for + IS_DUCKDB <- inherits(conn, "duckdb_connection") f <- list.files(exdir, recursive = TRUE, pattern = pattern, full.names = TRUE) - if (!requireNamespace("sf")) - stop("package `sf` is required to write spatial datasets to SSURGO SQLite databases", call. = FALSE) + if (!IS_DUCKDB) { + if (!requireNamespace("sf")) + stop("package `sf` is required to write spatial datasets to DBI data sources", call. = FALSE) + } - if (isTRUE(overwrite) && file.exists(filename)) { - file.remove(filename) - } - - if (IS_DUCKDB) { - if (!requireNamespace("duckdb")) - stop("package `duckdb` is required to write datasets to SSURGO DuckDB databases", call. = FALSE) - con <- duckdb::dbConnect(duckdb::duckdb(filename), filename) - on.exit(duckdb::dbDisconnect(con, shutdown = TRUE)) + if (missing(conn)) { + # delete existing file if overwrite=TRUE + if (isTRUE(overwrite) && file.exists(filename)) { + file.remove(filename) + } + + # if user did not specify their own connection, close on exit + on.exit(DBI::dbDisconnect(conn)) } else { - if (!requireNamespace("RSQLite")) - stop("package `RSQLite` is required to write datasets to SSURGO SQLite databases", call. = FALSE) - con <- RSQLite::dbConnect(RSQLite::SQLite(), filename, loadable.extensions = TRUE) - on.exit(RSQLite::dbDisconnect(con)) + if (isTRUE(overwrite)) { + message("`filename` and `overwrite` arguments ignored when `conn` is specified") + } } # create and add combined vector datasets: @@ -180,27 +193,32 @@ createSSURGO <- function(filename, f.shp.grp <- split(f.shp, list(feature = shp.grp[,1], geom = shp.grp[,2])) if (IS_DUCKDB) { - DBI::dbExecute(con, "INSTALL spatial; LOAD spatial;") + DBI::dbExecute(conn, "INSTALL spatial; LOAD spatial;") } lapply(seq_along(f.shp.grp), function(i) { - lapply(seq_along(f.shp.grp[[i]]), function(j){ + lapply(seq_along(f.shp.grp[[i]]), function(j) { lnm <- layer_names[match(gsub(".*soil([musfa]{2}_[apl])_.*", "\\1", f.shp.grp[[i]][j]), names(layer_names))] if (IS_DUCKDB) { if (overwrite && j == 1) { - DBI::dbExecute(con, sprintf("DROP TABLE IF EXISTS %s;", lnm)) - DBI::dbExecute(con, sprintf("CREATE TABLE %s AS SELECT * FROM ST_Read('%s');", + DBI::dbExecute(conn, sprintf("DROP TABLE IF EXISTS %s;", lnm)) + DBI::dbExecute(conn, sprintf("CREATE TABLE %s AS SELECT * FROM ST_Read('%s');", lnm, f.shp.grp[[i]][j])) } else { - DBI::dbExecute(con, sprintf("INSERT INTO %s SELECT * FROM ST_Read('%s');", + DBI::dbExecute(conn, sprintf("INSERT INTO %s SELECT * FROM ST_Read('%s');", lnm, f.shp.grp[[i]][j])) } } else { + shp <- sf::read_sf(f.shp.grp[[i]][j]) + + colnames(shp) <- tolower(colnames(shp)) + sf::st_geometry(shp) <- "geometry" + if (overwrite && j == 1) { - sf::write_sf(sf::read_sf(f.shp.grp[[i]][j]), filename, layer = lnm, overwrite = TRUE, ...) + sf::write_sf(shp, conn, layer = lnm, delete_layer = TRUE, ...) } else { - sf::write_sf(sf::read_sf(f.shp.grp[[i]][j]), filename, layer = lnm, append = TRUE, ...) + sf::write_sf(shp, conn, layer = lnm, append = TRUE, ...) } } NULL @@ -277,9 +295,9 @@ createSSURGO <- function(filename, # write tabular data to file try({ if (overwrite) { - DBI::dbWriteTable(con, mstab_lut[x], d, overwrite = TRUE) + DBI::dbWriteTable(conn, mstab_lut[x], d, overwrite = TRUE) } else { - DBI::dbWriteTable(con, mstab_lut[x], d, append = TRUE) + DBI::dbWriteTable(conn, mstab_lut[x], d, append = TRUE) } }, silent = quiet) @@ -289,7 +307,7 @@ createSSURGO <- function(filename, q <- sprintf("CREATE UNIQUE INDEX IF NOT EXISTS %s ON %s (%s)", paste0('PK_', mstab_lut[x]), mstab_lut[x], paste0(indexPK, collapse = ",")) - DBI::dbExecute(con, q) + DBI::dbExecute(conn, q) }, silent = quiet) } @@ -299,20 +317,26 @@ createSSURGO <- function(filename, try({ q <- sprintf("CREATE INDEX IF NOT EXISTS %s ON %s (%s)", paste0('DI_', mstab_lut[x]), mstab_lut[x], indexDI[i]) - DBI::dbExecute(con, q) + DBI::dbExecute(conn, q) }, silent = quiet) } } + if (inherits(conn, 'SQLiteConnection')) { + IS_GPKG <- grepl("\\.gpkg$", conn@dbname, ignore.case = TRUE)[1] + } else { + IS_GPKG <- FALSE + } + # for GPKG output, add gpkg_contents (metadata for features and attributes) if (IS_GPKG) { - if (!.gpkg_has_contents(con)) { + if (!.gpkg_has_contents(conn)) { # if no spatial data inserted, there will be no gpkg_contents table initally - try(.gpkg_create_contents(con)) + try(.gpkg_create_contents(conn)) } # update gpkg_contents table entry - try(.gpkg_delete_contents(con, mstab_lut[x])) - try(.gpkg_add_contents(con, mstab_lut[x])) + try(.gpkg_delete_contents(conn, mstab_lut[x])) + try(.gpkg_add_contents(conn, mstab_lut[x])) } # TODO: other foreign keys/relationships? ALTER TABLE/ADD CONSTRAINT not available in SQLite @@ -321,7 +345,7 @@ createSSURGO <- function(filename, } }) - res <- DBI::dbListTables(con, q) + res <- DBI::dbListTables(conn) invisible(res) } diff --git a/man/createSSURGO.Rd b/man/createSSURGO.Rd index ec5ae16e..2393cc4d 100644 --- a/man/createSSURGO.Rd +++ b/man/createSSURGO.Rd @@ -2,24 +2,26 @@ % Please edit documentation in R/createSSURGO.R \name{createSSURGO} \alias{createSSURGO} -\title{Create a SQLite database or GeoPackage from one or more SSURGO Exports} +\title{Create a database from SSURGO Exports} \usage{ createSSURGO( filename, exdir, + conn = DBI::dbConnect(DBI::dbDriver("SQLite"), filename, loadable.extensions = TRUE), pattern = NULL, include_spatial = TRUE, overwrite = FALSE, header = FALSE, quiet = TRUE, - duckdb = FALSE, ... ) } \arguments{ -\item{filename}{Output file name (e.g. \code{'db.sqlite'} or \code{'db.gpkg'})} +\item{filename}{Output file name (e.g. \code{'db.sqlite'} or \code{'db.gpkg'}). Only used when \code{con} is not specified by the user.} -\item{exdir}{Path containing containing SSURGO spatial (.shp) and tabular (.txt) files.} +\item{exdir}{Path containing containing input SSURGO spatial (.shp) and tabular (.txt) files, downloaded and extracted by \code{downloadSSURGO()} or similar.} + +\item{conn}{A \emph{DBIConnection} object. Default is a \code{SQLiteConnection} used for writing .sqlite or .gpkg files. Alternate options are any DBI connection types. When \code{include_spatial=TRUE}, the sf package is used to write spatial data to the database.} \item{pattern}{Character. Optional regular expression to use to filter subdirectories of \code{exdir}. Default: \code{NULL} will search all subdirectories for SSURGO export files.} @@ -31,15 +33,20 @@ createSSURGO( \item{quiet}{Logical. Suppress messages and other output from database read/write operations?} -\item{duckdb}{Logical. Create DuckDB rather than SQLite database? Default: \code{FALSE}.} - \item{...}{Additional arguments passed to \code{write_sf()} for writing spatial layers.} } \value{ Character. Vector of layer/table names in \code{filename}. } \description{ -Create a SQLite database or GeoPackage from one or more SSURGO Exports +The following database types are tested and fully supported: +\itemize{ +\item SQLite or Geopackage +\item DuckDB +\item Postgres or PostGIS +} + +In theory any other DBI-compatible data source can be used for output. See \code{conn} argument. If you encounter issues using specific DBI connection types, please report in the soilDB issue tracker. } \examples{ \dontrun{ @@ -47,3 +54,6 @@ Create a SQLite database or GeoPackage from one or more SSURGO Exports createSSURGO("test.gpkg", "SSURGO_test") } } +\seealso{ +\code{\link[=downloadSSURGO]{downloadSSURGO()}} +} diff --git a/man/downloadSSURGO.Rd b/man/downloadSSURGO.Rd index 2cdca851..651d8512 100644 --- a/man/downloadSSURGO.Rd +++ b/man/downloadSSURGO.Rd @@ -53,3 +53,6 @@ Pipe-delimited TXT files are found in \emph{/tabular/} folder extracted from a S Several ESRI shapefiles are found in the \emph{/spatial/} folder extracted from a SSURGO ZIP. These have prefix \code{soilmu_} (mapunit), \code{soilsa_} (survey area), \code{soilsf_} (special features). There will also be a TXT file with prefix \code{soilsf_} describing any special features. Shapefile names then have an \code{a_} (polygon), \code{l_} (line), \code{p_} (point) followed by the soil survey area symbol. } +\seealso{ +\code{\link[=createSSURGO]{createSSURGO()}} +} From 87ff3917642a403f9eadd07d617920bf1721a196 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Thu, 30 May 2024 07:40:56 -0700 Subject: [PATCH 4/4] add createSSURGO() PostGIS example --- misc/postgis-ssurgo.Rmd | 91 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 misc/postgis-ssurgo.Rmd diff --git a/misc/postgis-ssurgo.Rmd b/misc/postgis-ssurgo.Rmd new file mode 100644 index 00000000..9e115c81 --- /dev/null +++ b/misc/postgis-ssurgo.Rmd @@ -0,0 +1,91 @@ +--- +title: "`createSSURGO()` + PostGIS" +output: md_document +--- + +## Setup PostGIS Docker Container + +First, install [Docker](https://docs.docker.com/engine/install/). + +Then, pull the `postgis` image you want. + +Here, we get the latest version using tag `"latest"`: + +```sh +docker pull postgis/postgis:latest +``` + +Then run an instance of the image. + +```sh +docker run --name steep-piano -e POSTGRES_PASSWORD=mypassword -d -p 5432:5432 postgis/postgis +``` + +Give it a name (e.g. `steep-piano`). The default username is `postgres`. Set a password (e.g. `"mypassword"`) and port to forward (e.g. `5432`) from your docker container to your public host network address. + +In the simplest case you are just running `postgis` for local testing, so the host address will simply be `localhost`. + +You can then make a connection to the PostGIS instance running in the Docker container. + +To make sure everything is working open up the `psql` SQL prompt: + +```sh +psql -h localhost -p 5432 -U postgres +``` + +Then use command `\l` to list databases then `\q` to quit. + +# Connecting with R + +Create a database connection to your local PostGIS instance. To do this we use DBI and RPostgres packages. + +```{r} +library(soilDB) +library(RPostgres) + +con <- DBI::dbConnect(DBI::dbDriver("Postgres"), + host = "localhost", + port = 5432L, + user = "postgres", + password = "password") +``` + +Download some SSURGO data, if needed, and extract the files/folder structure to `"SSURGO_test"` subfolder of current working directory. + +```{r, eval = FALSE} +if (!dir.exists("SSURGO_test")) + downloadSSURGO(areasymbols = c("CA067", "CA077", "CA632"), + destdir = "SSURGO_test") +``` + +Pass the `con` argument to `createSSURGO()` to override the default SQLite connection that is created to `filename`. When `con` is not the default SQLite connection, the `filename` argument can take any value (including `NULL`). + +```{r, eval = FALSE} +createSSURGO(exdir = "SSURGO_test", + con = con) +``` + +Once the tables have been written to the database, you can write queries involving spatial and tabular data. +Perhaps the easiest way to do this is to use `sf::st_read()`. + +The `st_read()` function can take a DBIConnection as a data source, and takes a `query` argument. It will return an `sf` data frame if the result table contains a geometry column. Otherwise, it will return a data.frame (with a warning). + +```{r} +res <- sf::st_read(con, query = "SELECT + ST_Centroid(geometry) AS centroid, + ST_Area(geometry) AS polygon_area, + muaggatt.* + FROM mupolygon + LEFT JOIN muaggatt ON muaggatt.mukey = CAST(mupolygon.mukey AS integer) + LIMIT 2") + +res + +plot(res["musym"], pch = "+") +``` + +Close the connection when you are done. + +```{r} +DBI::dbDisconnect(con) +``` \ No newline at end of file