Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/ncss-tech/soilDB
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanbeaudette committed Dec 21, 2023
2 parents b8caad8 + 521b740 commit 1487a64
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 43 deletions.
34 changes: 18 additions & 16 deletions R/fetchLDM.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @param chunk.size Number of pedons per chunk (for queries that may exceed `maxJsonLength`)
#' @param ntries Number of tries (times to halve `chunk.size`) before returning `NULL`; default `3`
#' @param layer_type Default: `"horizon"`, `"layer"`, and `"reporting layer"`
#' @param area_type Default: `"ssa"` (Soil Survey Area). Other options include (choose one): `"country"`, `"state"`, `"county"`, `"mlra"` (Major Land Resource Area), `"nforest"` (National Forest), `"npark"` (National Park)
#' @param prep_code Default: `"S"` and `""`. May also include one or more of: `"F"`, `"HM"`, `"HM_SK"` `"GP"`, `"M"`, `"N"`, or `"S"`
#' @param analyzed_size_frac Default: `"<2 mm"` and `""`. May also include one or more of: `"<0.002 mm"`, `"0.02-0.05 mm"`, `"0.05-0.1 mm"`, `"0.1-0.25 mm"`, `"0.25-0.5 mm"`, `"0.5-1 mm"`, `"1-2 mm"`, `"0.02-2 mm"`, `"0.05-2 mm"`
#' @param dsn Data source name; either a path to a SQLite database, an open DBIConnection or (default) `NULL` (to use `soilDB::SDA_query`)
Expand All @@ -26,7 +27,6 @@
#' @return a `SoilProfileCollection` for a successful query, a `try-error` if no site/pedon locations can be found or `NULL` for an empty `lab_layer` (within sites/pedons) result
#' @export
#' @examplesIf curl::has_internet()
#' @examples
#' \dontrun{
#' # fetch by ssa_key
#' res <- fetchLDM(8297, what = "ssa_key")
Expand Down Expand Up @@ -58,7 +58,8 @@ fetchLDM <- function(x = NULL,
WHERE = NULL,
chunk.size = 1000,
ntries = 3,
layer_type = c("horizon","layer","reporting layer"),
layer_type = c("horizon", "layer", "reporting layer"),
area_type = c("ssa", "country", "state", "county", "mlra", "nforest", "npark"),
prep_code = c("S", ""), # , `"F"`, `"HM"`, `"HM_SK"` `"GP"`, `"M"`, `"N"`, or `"S"`
analyzed_size_frac = c("<2 mm", ""),# optional: "<0.002 mm", "0.02-0.05 mm", "0.05-0.1 mm", "0.1-0.25 mm", "0.25-0.5 mm", "0.5-1 mm", "1-2 mm", "0.02-2 mm", "0.05-2 mm"
dsn = NULL) {
Expand All @@ -76,6 +77,8 @@ fetchLDM <- function(x = NULL,
con <- NULL
}

area_type <- match.arg(tolower(area_type[1]), c("ssa", "country", "state", "county", "mlra", "nforest", "npark"))

lab_combine_nasis_ncss <- c("pedon_key", "site_key", "pedlabsampnum", "pedoniid", "upedonid",
"labdatadescflag", "priority", "priority2", "samp_name", "samp_class_type",
"samp_classdate", "samp_classification_name", "samp_taxorder",
Expand Down Expand Up @@ -120,9 +123,6 @@ fetchLDM <- function(x = NULL,
paste0("lab_site.", lab_site),
paste0("lab_pedon.", lab_pedon)))
}

# TODO: set up arbitrary area queries by putting area table into groups:
# country, state, county, mlra, ssa, npark, nforest

if (!is.null(x) && (missing(WHERE) || is.null(WHERE))) {
WHERE <- sprintf("LOWER(%s) IN %s", what, format_SQL_in_statement(tolower(x)))
Expand All @@ -149,17 +149,17 @@ fetchLDM <- function(x = NULL,
} else {
# the lab_area table allows for overlap with many different area types
# for now we only offer the "ssa" (soil survey area) area_type
site_query_ssaarea <- gsub("WHERE",
"LEFT JOIN lab_area ON
lab_combine_nasis_ncss.ssa_key = lab_area.area_key
WHERE", site_query)
sites <- suppressMessages(SDA_query(site_query_ssaarea))
site_query_byarea <- gsub("WHERE",
sprintf("LEFT JOIN lab_area ON
lab_combine_nasis_ncss.%s_key = lab_area.area_key
WHERE", area_type), site_query)
sites <- suppressMessages(SDA_query(site_query_byarea))
}

if (!inherits(sites, 'try-error') && !is.null(sites)) {

# TODO: this shouldn't be needed
sites <- sites[,unique(colnames(sites))]
sites <- sites[, unique(colnames(sites))]

if (is.null(chunk.size) || nrow(sites) < chunk.size) {
# get data for lab layers within pedon_key returned
Expand Down Expand Up @@ -312,14 +312,14 @@ fetchLDM <- function(x = NULL,
layer_type <- match.arg(layer_type, c("horizon", "layer", "reporting layer"), several.ok = TRUE)

if (any(tables %in% flattables)) {
nt <- flattables[flattables %in% tables[!tables %in% c("lab_rosetta_Key", "lab_mir")]]
layer_query <- sprintf(
"SELECT * FROM lab_layer %s WHERE lab_layer.layer_type IN %s %s AND %s",
"SELECT * FROM lab_layer %s WHERE lab_layer.layer_type IN %s %s %s",
paste0(sapply(flattables[flattables %in% tables], function(a) tablejoincriteria[[a]]), collapse = "\n"),
format_SQL_in_statement(layer_type),
ifelse(is.null(x), "", paste0(" AND ", bycol, " IN ", format_SQL_in_statement(x))),
paste0(paste0(sapply(flattables[flattables %in% tables[!tables %in% c("lab_rosetta_Key", "lab_mir")]],
function(b) paste0("IsNull(",b,".prep_code, '')")),
" IN ", format_SQL_in_statement(prep_code)), collapse = " AND "))
ifelse(length(nt) == 0, "", paste0(" AND ", paste0(sapply(nt, function(b) paste0("IsNull(",b,".prep_code, '')")),
" IN ", format_SQL_in_statement(prep_code)), collapse = " AND ")))
} else {
layer_query <- sprintf(
"SELECT * FROM lab_layer WHERE lab_layer.layer_type IN %s %s",
Expand Down Expand Up @@ -370,6 +370,8 @@ fetchLDM <- function(x = NULL,
layerdata <- merge(layerdata, layerfracdata[,c("labsampnum", colnames(layerfracdata)[!colnames(layerfracdata) %in% colnames(layerdata)])], by = "labsampnum", all.x = TRUE, incomparables = NA)
}
}
layerdata$prep_code[is.na(layerdata$prep_code)] <- ""
if (!is.null(layerdata$prep_code)) {
layerdata$prep_code[is.na(layerdata$prep_code)] <- ""
}
layerdata
}
158 changes: 138 additions & 20 deletions R/fetchSoilGrids.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#' Get SoilGrids 250m properties information from point locations
#' Get SoilGrids Properties Estimates for Points or Spatial Extent
#'
#' This function obtains SoilGrids properties information (250m raster resolution) given a \code{data.frame} containing site IDs, latitudes and longitudes. SoilGrids API and maps return values as whole (integer) numbers to minimize the storage space used. These values are converted by to produce conventional units by `fetchSoilGrids()``
#' This function obtains SoilGrids properties information (250m raster resolution) given a \code{data.frame} containing site IDs, latitudes and longitudes, or a spatial extent.
#'
#' SoilGrids API and maps return values as whole (integer) numbers to minimize the storage space used. These values are converted by to produce conventional units by `fetchSoilGrids()``
#'
#' @details
#'
#' ## Properties
#'
Expand Down Expand Up @@ -32,19 +36,26 @@
#'
#' @references Poggio, L., de Sousa, L. M., Batjes, N. H., Heuvelink, G. B. M., Kempen, B., Ribeiro, E., and Rossiter, D.: SoilGrids 2.0: producing soil information for the globe with quantified spatial uncertainty, SOIL, 7, 217-240, 2021. \doi{https://doi.org/10.5194/soil-7-217-2021}
#' @importFrom utils packageVersion
#' @param x A `data.frame` containing 3 columns referring to site ID, latitude and longitude.
#'
#' @param x A `data.frame` containing 3 columns referring to site ID, latitude and longitude. Or a spatial (sf, terra) object for which a bounding box can be calculated when `grid=TRUE`.
#' @param loc.names Optional: Column names referring to site ID, latitude and longitude. Default: `c("id", "lat", "lon")`
#' @param depth_intervals Default: `"0-5"`, `"5-15"`, `"15-30"`, `"30-60"`, `"60-100"`, `"100-200"`
#' @param variables Default: `"bdod"`, `"cec"`, `"cfvo"`, `"clay"`, `"nitrogen"`, `"phh2o"`, `"sand"`, `"silt"`, `"soc"`, `"ocd"`
#' @param variables Default: `"bdod"`, `"cec"`, `"cfvo"`, `"clay"`, `"nitrogen"`, `"phh2o"`, `"sand"`, `"silt"`, `"soc"`, `"ocd"`. Optionally `"ocs"` for 0 to 30 cm interval.
#' @param grid Download subset of SoilGrids Cloud Optimized GeoTIFF? Default: `FALSE`
#' @param filename Only used when `grid=TRUE`. If `NULL` defaults to an in-memory raster, or temporary file if result does not fit in memory.
#' @param overwrite Only used when `grid=TRUE`. Default: `FALSE`
#' @param target_resolution Only used when `grid=TRUE`. Default: `c(250, 250)` (250m x 250m pixels)
#' @param summary_type Only used when `grid=TRUE`. One or more of `"Q0.05"`, `"Q0.5"`, `"Q0.95"`, `"mean"`; these are summary statistics that
#' correspond to 5th, 50th, 95th percentiles, and mean value for selected `variables`.
#' @param ... Additional arguments passed to `terra::writeRaster()` when `grid=TRUE`.
#' @param verbose Print messages? Default: `FALSE`
#' @param progress logical, give progress when iterating over multiple requests; Default: `FALSE`
#'
#' @return A SoilProfileCollection
#' @return A SoilProfileCollection or SpatRaster when `grid=TRUE`
#' @export fetchSoilGrids
#'
#' @author Andrew G. Brown
#' @examplesIf curl::has_internet()
#' @examples
#' \dontrun{
#' library(aqp)
#'
Expand Down Expand Up @@ -87,6 +98,12 @@ fetchSoilGrids <- function(x,
variables = c("bdod", "cec", "cfvo", "clay",
"nitrogen", "phh2o", "sand", "silt",
"soc", "ocd"),
grid = FALSE,
filename = NULL,
overwrite = TRUE,
target_resolution = c(250, 250),
summary_type = c("Q0.05", "Q0.5", "Q0.95", "mean"),
...,
verbose = FALSE,
progress = FALSE) {

Expand All @@ -100,21 +117,33 @@ fetchSoilGrids <- function(x,
# convert sp -> sf & terra -> sf
locations <- sf::st_as_sf(locations)
}

# only supporting POINT geometry for now
if (inherits(sf::st_geometry(locations), 'sfc_POINT')) {
if (is.na(sf::st_crs(locations)$wkt)) {
message("CRS is missing; assuming WGS84 decimal degrees (EPSG:4326)")
sf::st_crs(locations) <- sf::st_crs(4326)
}
locations <- data.frame(id = 1:nrow(locations),
do.call('rbind', sf::st_geometry(locations)))
spatial_input <- TRUE
colnames(locations) <- c("id", "lon", "lat")
loc.names <- c("id", "lat", "lon")
} else {
stop("only POINT geometries are supported as input", call. = FALSE)
}
}

if (grid) {
return(.get_soilgrids(locations,
filename = filename,
overwrite = overwrite,
variables = variables,
target_resolution = target_resolution,
depth = depth_intervals,
summary_type = summary_type,
...,
verbose = verbose))
} else {
# only supporting POINT geometry for now
if (inherits(sf::st_geometry(locations), 'sfc_POINT')) {
if (is.na(sf::st_crs(locations)$wkt)) {
message("CRS is missing; assuming WGS84 decimal degrees (EPSG:4326)")
sf::st_crs(locations) <- sf::st_crs(4326)
}
locations <- data.frame(id = 1:nrow(locations),
do.call('rbind', sf::st_geometry(locations)))
spatial_input <- TRUE
colnames(locations) <- c("id", "lon", "lat")
loc.names <- c("id", "lat", "lon")
} else {
stop("only POINT geometries are supported as input", call. = FALSE)
}
}

Expand Down Expand Up @@ -264,3 +293,92 @@ fetchSoilGrids <- function(x,
colnames(out) <- gsub("\\.", "", colnames(out))
return(out)
}

.get_soilgrids <- function(x,
filename = NULL,
overwrite = TRUE,
target_resolution = c(250, 250),
depth = c("0-5", "5-15", "15-30", "30-60", "60-100", "100-200"),
summary_type = c("Q0.05", "Q0.5", "Q0.95", "mean"),
variables = c("bdod", "cec", "cfvo", "clay", "nitrogen",
"phh2o", "sand", "silt", "soc"),
...,
verbose = TRUE) {

stopifnot(requireNamespace("terra"))
stopifnot(requireNamespace("sf"))

if (!all(substr(depth, nchar(depth) - 1, nchar(depth)) == "cm")) {
depth <- paste0(depth, "cm")
}

if (inherits(x, 'Spatial')) {
x <- sf::st_as_sf(x) # sp support
}

if (!inherits(x, 'bbox')) {
# sf/sfc/stars/Raster*/SpatVector/SpatRaster support
xbbox <- sf::st_bbox(x)
} else xbbox <- x

sg_crs <- '+proj=igh'

# specifying vsicurl parameters appears to not work with GDAL 3.2.1
# sg_url <- paste0("/vsicurl?max_retry=", max_retry,
# "&retry_delay=", retry_delay,
# "&list_dir=no&url=https://files.isric.org/soilgrids/latest/data/")

# WORKS
sg_url <- "/vsicurl/https://files.isric.org/soilgrids/latest/data/"

# calculate homolosine bbox
xbbox <- sf::st_bbox(sf::st_transform(sf::st_as_sfc(xbbox), sg_crs))

# numeric values are returned as integers that need to be scaled to match typical measurement units
data.factor <- c(0.01, 0.1, 0.1, 0.1, 0.01, 0.1, 0.1, 0.1, 0.1)
names(data.factor) <- c("bdod", "cec", "cfvo", "clay", "nitrogen",
"phh2o", "sand", "silt", "soc")

# calculate temporary file names for each variable*depth*summary grid
vardepth <- apply(expand.grid(variables, summary_type, depth), 1, paste0, collapse = "_")
tfs <- tempfile(pattern = paste0(vardepth, "_"), fileext = ".tif")
names(tfs) <- vardepth

# helper function for gdal_utils options
.gdal_utils_opts <- function(lst) do.call('c', lapply(names(lst), function(y) c(y, lst[[y]])))

# iterate over variable*depth
for (i in seq_along(tfs)) {

# translate remote .vrt -> local .tif
vd <- strsplit(vardepth[i], "_")[[1]]
v <- vd[1]; d <- vd[2]; s <- vd[3]

sf::gdal_utils(
util = "translate",
source = paste0(sg_url, paste0(v, '/', v, '_', s, '_', d, '.vrt')),
destination = tfs[i],
options = .gdal_utils_opts(
list(
"-of" = "GTiff",
"-tr" = target_resolution,
"-projwin" = as.numeric(xbbox)[c(1, 4, 3, 2)],
"-projwin_srs" = sg_crs
)
),
quiet = !verbose
)
}

stk <- terra::rast(tfs)
names(stk) <- vardepth

if (!is.null(filename)) {
terra::writeRaster(stk, filename = filename, overwrite = overwrite, ...)
}

# apply conversion factor
stk2 <- terra::app(stk, function(x) x * data.factor[gsub("([a-z]+)_.*", "\\1", names(x))])

stk2
}
5 changes: 4 additions & 1 deletion man/fetchLDM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1487a64

Please sign in to comment.