Skip to content

Commit

Permalink
added GDAL extension to R package
Browse files Browse the repository at this point in the history
  • Loading branch information
sllynn committed Jan 31, 2024
1 parent 2f177fd commit a99fb37
Show file tree
Hide file tree
Showing 11 changed files with 101 additions and 68 deletions.
6 changes: 3 additions & 3 deletions R/generate_R_bindings.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ main <- function(scala_file_path){
closeAllConnections()

# supplementary files
sparkr_supplementary_files <- c("sparkR-mosaic/enableMosaic.R")
sparkr_supplementary_files <- c("sparkR-mosaic/enableMosaic.R", "sparkR-mosaic/enableGDAL.R")
copy_supplementary_file(sparkr_supplementary_files, "sparkR-mosaic/sparkrMosaic/R")

##########################
Expand All @@ -226,8 +226,8 @@ main <- function(scala_file_path){
closeAllConnections()

# supplementary files
sparkr_supplementary_files <- c("sparklyr-mosaic/enableMosaic.R", "sparklyr-mosaic/sparkFunctions.R")
copy_supplementary_file(sparkr_supplementary_files, "sparklyr-mosaic/sparklyrMosaic/R/")
sparklyr_supplementary_files <- c("sparklyr-mosaic/enableMosaic.R", "sparklyr-mosaic/sparkFunctions.R", "sparklyr-mosaic/enableGDAL.R")
copy_supplementary_file(sparklyr_supplementary_files, "sparklyr-mosaic/sparklyrMosaic/R/")
}


Expand Down
14 changes: 14 additions & 0 deletions R/sparkR-mosaic/enableGDAL.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#' enableGDAL
#'
#' @description enableGDAL activates GDAL extensions for Mosaic
#' @name enableGDAL
#' @rdname enableGDAL
#' @return None
#' @export enableGDAL
#' @examples
#' \dontrun{
#' enableGDAL() }
enableGDAL <- function(
){
sparkR.callJStatic(x="com.databricks.labs.mosaic.gdal.MosaicGDAL", methodName="enableGDAL", sparkR.session())
}
2 changes: 1 addition & 1 deletion R/sparkR-mosaic/sparkrMosaic/.Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
^sparkrMosaic\.Rproj$
^\.Rproj\.user$
^\.Rproj\.user$
9 changes: 5 additions & 4 deletions R/sparkR-mosaic/sparkrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
Package: sparkrMosaic
Title: SparkR bindings for Databricks Mosaic
Version: 0.4.0
Authors@R:
Authors@R:
person("Robert", "Whiffin", , "[email protected]", role = c("aut", "cre")
)
Description: This package extends SparkR to bring the Databricks Mosaic for geospatial processing APIs into SparkR.
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Collate:
Collate:
'enableGDAL.R'
'enableMosaic.R'
'generics.R'
'functions.R'
Imports:
SparkR,
methods
Suggests:
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
Config/testthat/edition: 3
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ LineEndingConversion: Posix
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
PackageRoxygenize: rd,collate,namespace
3 changes: 1 addition & 2 deletions R/sparkR-mosaic/tests.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
library(testthat)

spark_location <- Sys.getenv("SPARK_HOME")
lib_location <- Sys.getenv("MOSAIC_LIB_PATH")
library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
.libPaths(c(file.path(spark_location, "R", "lib"), .libPaths()))

Expand All @@ -13,7 +12,7 @@ install.packages(package_file, repos=NULL)
library(sparkrMosaic)

# find the mosaic jar in staging
staging_dir <- if (lib_location == "") {"/home/runner/work/mosaic/mosaic/staging/"} else {lib_location}
staging_dir <- Sys.getenv("MOSAIC_LIB_PATH", "/home/runner/work/mosaic/mosaic/staging/")
mosaic_jar <- list.files(staging_dir)
mosaic_jar <- mosaic_jar[grep("jar-with-dependencies.jar", mosaic_jar, fixed=T)]
print("Looking for mosaic jar in")
Expand Down
17 changes: 17 additions & 0 deletions R/sparklyr-mosaic/enableGDAL.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#' enableGDAL
#'
#' @description enableGDAL activates GDAL extensions for Mosaic
#' @param sc sparkContext
#' @name enableGDAL
#' @rdname enableGDAL
#' @return None
#' @export enableGDAL
#' @examples
#' \dontrun{
#' enableGDAL(sc)}

enableGDAL <- function(
sc
){
sparklyr::invoke_static(sc, class="com.databricks.labs.mosaic.gdal.MosaicGDAL", method="enableGDAL", spark_session(sc))
}
2 changes: 1 addition & 1 deletion R/sparklyr-mosaic/sparklyrMosaic/.Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
^sparkrMosaic\.Rproj$
^\.Rproj\.user$
^\.Rproj\.user$
11 changes: 6 additions & 5 deletions R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
Package: sparklyrMosaic
Title: sparklyr bindings for Databricks Mosaic
Version: 0.4.0
Authors@R:
Authors@R:
person("Robert", "Whiffin", , "[email protected]", role = c("aut", "cre")
)
Description: This package extends sparklyr to bring the Databricks Mosaic for geospatial processing APIs into sparklyr .
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Collate:
Collate:
'enableGDAL.R'
'enableMosaic.R'
'sparkFunctions.R'
'functions.R'
'functions.R'
Imports:
sparklyr
Suggests:
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
Config/testthat/edition: 3
100 changes: 50 additions & 50 deletions R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,53 +62,53 @@ test_that("scalar vector functions behave as intended", {
expect_equal(sdf_nrow(sdf), 1)

})

test_that("aggregate vector functions behave as intended", {

sdf <- sdf_sql(sc, "SELECT id as location_id FROM range(1)") %>%
mutate(geometry = st_geomfromgeojson(inputGJ))
expect_equal(sdf_nrow(sdf), 1)

sdf.l <- sdf %>%
select(
left_id = location_id,
left_geom = geometry
) %>%
mutate(left_index = mosaic_explode(left_geom, 11L))

sdf.r <- sdf %>%
select(
right_id = location_id,
right_geom = geometry
) %>%
mutate(right_geom = st_translate(
right_geom,
st_area(right_geom) * runif(n()) * 0.1,
st_area(right_geom) * runif(n()) * 0.1)
) %>%
mutate(right_index = mosaic_explode(right_geom, 11L))

sdf.intersection <- sdf.l %>%
inner_join(sdf.r, by = c("left_index" = "right_index"), keep = TRUE) %>%
dplyr::group_by(left_id, right_id) %>%
dplyr::summarise(
agg_intersects = st_intersects_agg(left_index, right_index),
agg_intersection = st_intersection_agg(left_index, right_index),
left_geom = max(left_geom, 1),
right_geom = max(right_geom, 1)
) %>%
mutate(
flat_intersects = st_intersects(left_geom, right_geom),
comparison_intersects = agg_intersects == flat_intersects,
agg_area = st_area(agg_intersection),
flat_intersection = st_intersection(left_geom, right_geom),
flat_area = st_area(flat_intersection),
comparison_intersection = abs(agg_area - flat_area) <= 1e-3
)

expect_no_error(spark_write_source(sdf.intersection, "noop", mode = "overwrite"))
expect_true(sdf.intersection %>% head(1) %>% sdf_collect %>% .$comparison_intersects)
expect_true(sdf.intersection %>% head(1) %>% sdf_collect %>% .$comparison_intersection)


})
#
# test_that("aggregate vector functions behave as intended", {
#
# sdf <- sdf_sql(sc, "SELECT id as location_id FROM range(1)") %>%
# mutate(geometry = st_geomfromgeojson(inputGJ))
# expect_equal(sdf_nrow(sdf), 1)
#
# sdf.l <- sdf %>%
# select(
# left_id = location_id,
# left_geom = geometry
# ) %>%
# mutate(left_index = mosaic_explode(left_geom, 11L))
#
# sdf.r <- sdf %>%
# select(
# right_id = location_id,
# right_geom = geometry
# ) %>%
# mutate(right_geom = st_translate(
# right_geom,
# st_area(right_geom) * runif(n()) * 0.1,
# st_area(right_geom) * runif(n()) * 0.1)
# ) %>%
# mutate(right_index = mosaic_explode(right_geom, 11L))
#
# sdf.intersection <- sdf.l %>%
# inner_join(sdf.r, by = c("left_index" = "right_index"), keep = TRUE) %>%
# dplyr::group_by(left_id, right_id) %>%
# dplyr::summarise(
# agg_intersects = st_intersects_agg(left_index, right_index),
# agg_intersection = st_intersection_agg(left_index, right_index),
# left_geom = max(left_geom, 1),
# right_geom = max(right_geom, 1)
# ) %>%
# mutate(
# flat_intersects = st_intersects(left_geom, right_geom),
# comparison_intersects = agg_intersects == flat_intersects,
# agg_area = st_area(agg_intersection),
# flat_intersection = st_intersection(left_geom, right_geom),
# flat_area = st_area(flat_intersection),
# comparison_intersection = abs(agg_area - flat_area) <= 1e-3
# )
#
# expect_no_error(spark_write_source(sdf.intersection, "noop", mode = "overwrite"))
# expect_true(sdf.intersection %>% head(1) %>% sdf_collect %>% .$comparison_intersects)
# expect_true(sdf.intersection %>% head(1) %>% sdf_collect %>% .$comparison_intersection)


# })
3 changes: 2 additions & 1 deletion R/sparklyr-mosaic/tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ install.packages("sparklyrMosaic_0.4.0.tar.gz", repos = NULL)
library(sparklyrMosaic)

# find the mosaic jar in staging
staging_dir <- "/home/runner/work/mosaic/mosaic/staging/"
staging_dir <- Sys.getenv("MOSAIC_LIB_PATH", "/home/runner/work/mosaic/mosaic/staging/")
mosaic_jar <- list.files(staging_dir)
mosaic_jar <- mosaic_jar[grep("jar-with-dependencies.jar", mosaic_jar, fixed=T)]
mosaic_jar_path <- paste0(staging_dir, mosaic_jar)
Expand All @@ -26,5 +26,6 @@ config$`sparklyr.jars.default` <- c(mosaic_jar_path)

sc <- spark_connect(master="local[*]", config=config)
enableMosaic(sc)
enableGDAL(sc)

testthat::test_local(path="./sparklyrMosaic")

0 comments on commit a99fb37

Please sign in to comment.