Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[draft] Implement SOZip storage of terra targets #62

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions R/tar-terra-rast.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,18 @@
#' to [terra::writeRaster()]
#' @param gdal character. GDAL driver specific datasource creation options
#' passed to [terra::writeRaster()]
#' @param zipfile logical. Should the file in the target store be a ZIP archive?
#' Required for `filetype` formats that have sidecar files. Not all GDAL
#' drivers support directly generating SOZip-enabled files. Default: `FALSE`.
#' @param ... Additional arguments not yet used
#'
#' @note Although you may pass any supported GDAL vector driver to the
#' `filetype` argument, not all formats are guaranteed to work with
#' `geotargets`. At the moment, we have tested `GTiff` and `GPKG` and
#' they appear to work generally. Both `GTiff` and `GPKG` rasters can be
#' stored as ZIP files by setting `zipfile=TRUE`. To write a SOZip-enabled
#' `GTiff` target set `gdal=c("STREAMABLE_OUTPUT=YES", "COMPRESS=NONE")`.
#'
#' @inheritParams targets::tar_target
#' @importFrom rlang %||% arg_match0
#' @seealso [targets::tar_target_raw()]
Expand All @@ -33,6 +43,7 @@ tar_terra_rast <- function(name,
pattern = NULL,
filetype = geotargets_option_get("gdal.raster.driver"),
gdal = geotargets_option_get("gdal.raster.creation.options"),
zipfile = FALSE,
...,
tidy_eval = targets::tar_option_get("tidy_eval"),
packages = targets::tar_option_get("packages"),
Expand Down Expand Up @@ -73,23 +84,40 @@ tar_terra_rast <- function(name,
tidy_eval = tidy_eval
)

.format_terra_rast_read <- eval(substitute(function(path) {
path2 <- ifelse(zipfile, paste0("/vsizip/{", path, "}"), path)
terra::rast(path2)
}, list(zipfile = zipfile)))

.format_terra_rast_write <- eval(substitute(function(object, path) {
path2 <- ifelse(zipfile, paste0("/vsizip/{", path, "}/", basename(path)), path)
filetype <- Sys.getenv("GEOTARGETS_GDAL_RASTER_DRIVER")
extension <- ""
if (filetype == "GPKG") {
extension <- ".gpkg.zip"
path2 <- paste0(path, extension)
}
terra::writeRaster(
object,
path2,
filetype = filetype,
overwrite = TRUE,
gdal = strsplit(Sys.getenv("GEOTARGETS_GDAL_RASTER_CREATION_OPTIONS", unset = ";"), ";")[[1]]
)
if (extension != "") {
file.rename(paste0(path, extension), path)
}
}, list(zipfile = zipfile)))

targets::tar_target_raw(
name = name,
command = command,
pattern = pattern,
packages = packages,
library = library,
format = targets::tar_format(
read = function(path) terra::rast(path),
write = function(object, path) {
terra::writeRaster(
object,
path,
filetype = Sys.getenv("GEOTARGETS_GDAL_RASTER_DRIVER"),
overwrite = TRUE,
gdal = strsplit(Sys.getenv("GEOTARGETS_GDAL_RASTER_CREATION_OPTIONS", unset = ";"), ";")[[1]]
)
},
read = .format_terra_rast_read,
write = .format_terra_rast_write,
marshal = function(object) terra::wrap(object),
unmarshal = function(object) terra::unwrap(object)
),
Expand Down
61 changes: 41 additions & 20 deletions R/tar-terra-vect.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
#' to [terra::writeVector()]. See 'Note' for more details
#' @param gdal character. GDAL driver specific datasource creation options
#' passed to [terra::writeVector()].
#' @param zipfile logical. Should the file in the target store be a ZIP archive?
#' Required for `filetype` formats that have sidecar files. Not all GDAL
#' drivers support directly generating SOZip-enabled files. Default: `FALSE`.
#' @param ... Additional arguments not yet used
#' @inheritParams targets::tar_target
#'
#' @note Although you may pass any supported GDAL vector driver to the
#' `filetype` argument, not all formats are guaranteed to work with
#' `geotargets`. At the moment, we have tested `GeoJSON` and `ESRI Shapefile`
#' which both appear to work generally.
#' `geotargets`. At the moment, we have tested `GeoJSON`, `ESRI Shapefile`,
#' `GPKG`, and `Parquet` which all appear to work generally. `ESRI Shapefile`
#' targets are always stored as a SOZip-enabled ZIP file (GDAL >= 3.7). `GPKG`
#' and `Parquet` can optionally be stored as ZIP files by setting `zipfile=TRUE`.
#' @export
#' @examples
#' if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") {
Expand Down Expand Up @@ -42,6 +47,7 @@ tar_terra_vect <- function(name,
pattern = NULL,
filetype = geotargets_option_get("gdal.vector.driver"),
gdal = geotargets_option_get("gdal.vector.creation.options"),
zipfile = FALSE,
...,
packages = targets::tar_option_get("packages"),
tidy_eval = targets::tar_option_get("tidy_eval"),
Expand Down Expand Up @@ -81,12 +87,16 @@ tar_terra_vect <- function(name,
tidy_eval = tidy_eval
)

format <- ifelse(
test = filetype == "ESRI Shapefile",
#special handling of ESRI shapefiles because the output is a dir of multiple files.
yes = create_format_terra_vect_shz(),
no = create_format_terra_vect()
)
# special handling of drivers w/ file extension for direct write of SOZIP (GDAL >=3.7)
extension <- switch(filetype,
"ESRI Shapefile" = ".shz",
"GPKG" = ".gpkg.zip",
"")

format <- ifelse(zipfile || filetype == "ESRI Shapefile",
yes = create_format_terra_vect_zip(extension,
filetype = filetype),
no = create_format_terra_vect())

targets::tar_target_raw(
name = name,
Expand Down Expand Up @@ -137,24 +147,35 @@ create_format_terra_vect <- function() {
)
}

#' Special handling for ESRI Shapefiles
#' Handling for ZIP files (required for ESRI Shapefile)
#' @noRd
create_format_terra_vect_shz <- function() {
create_format_terra_vect_zip <- function(extension, filetype) {

check_pkg_installed("terra")

.format_terra_vect_write_zip <- eval(substitute(function(object, path) {

if (extension == "") {
# no extension, use generic "/vsizip/{path to zipfile}/path/in/zipfile"
path <- paste0("/vsizip/{", path, "}/", basename(path))
}

terra::writeVector(
x = object,
filename = paste0(path, extension),
filetype = Sys.getenv("GEOTARGETS_GDAL_VECTOR_DRIVER"),
overwrite = TRUE,
options = strsplit(Sys.getenv("GEOTARGETS_GDAL_VECTOR_CREATION_OPTIONS", unset = ";"), ";")[[1]]
)

if (extension != "") {
file.rename(paste0(path, extension), path)
}
}, list(extension = extension)))

targets::tar_format(
read = function(path) terra::vect(paste0("/vsizip/{", path, "}")),
write = function(object, path) {
terra::writeVector(
x = object,
filename = paste0(path, ".shz"),
filetype = "ESRI Shapefile",
overwrite = TRUE,
options = strsplit(Sys.getenv("GEOTARGETS_GDAL_VECTOR_CREATION_OPTIONS", unset = ";"), ";")[[1]]
)
file.rename(paste0(path, ".shz"), path)
},
write = .format_terra_vect_write_zip,
marshal = function(object) terra::wrap(object),
unmarshal = function(object) terra::unwrap(object)
)
Expand Down
13 changes: 13 additions & 0 deletions man/tar_terra_rast.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions man/tar_terra_vect.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions tests/testthat/_snaps/tar-terra.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@
min value : 141
max value : 547

# tar_terra_rast(zipfile=TRUE) works

Code
x
Output
class : SpatRaster
dimensions : 90, 95, 1 (nrow, ncol, nlyr)
resolution : 0.008333333, 0.008333333 (x, y)
extent : 5.741667, 6.533333, 49.44167, 50.19167 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source : test_terra_rast2}
name : elevation

# tar_terra_vect() works

Code
Expand Down Expand Up @@ -47,3 +60,20 @@
1 Diekirch 2 Diekirch 218 32543
1 Diekirch 3 Redange 259 18664

---

Code
z
Output
class : SpatVector
geometry : polygons
dimensions : 12, 6 (geometries, attributes)
extent : 5.74414, 6.528252, 49.44781, 50.18162 (xmin, xmax, ymin, ymax)
source : test_terra_vect_geobuf_zip} (test_terra_vect_geobuf_zip)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
names : ID_1 NAME_1 ID_2 NAME_2 AREA POP
type : <num> <chr> <num> <chr> <num> <int>
values : 3 Luxembourg 10 Luxembourg 237 182607
2 Grevenmacher 7 Remich 129 22366
2 Grevenmacher 6 Echternach 188 18899

34 changes: 34 additions & 0 deletions tests/testthat/test-tar-terra.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,30 @@ targets::tar_test("tar_terra_rast() works", {
)
})

targets::tar_test("tar_terra_rast(zipfile=TRUE) works", {
targets::tar_script({
list(
geotargets::tar_terra_rast(
test_terra_rast2,
terra::rast(system.file("ex/elev.tif", package = "terra")),
gdal = c("STREAMABLE_OUTPUT=YES", "COMPRESS=NONE"),
zipfile = TRUE
),
geotargets::tar_terra_rast(
test_terra_rast3,
terra::rast(system.file("ex/elev.tif", package = "terra")),
filetype = "GPKG",
zipfile = TRUE
)
)
})
targets::tar_make()
expect_true(all(is.na(targets::tar_meta()$error)))
x <- targets::tar_read(test_terra_rast2)
expect_s4_class(x, "SpatRaster")
expect_snapshot(x)
})

targets::tar_test("tar_terra_rast() works with multiple workers (tests marshaling/unmarshaling)", {
targets::tar_script({
targets::tar_option_set(controller = crew::crew_controller_local(workers = 2))
Expand Down Expand Up @@ -55,17 +79,27 @@ targets::tar_test("tar_terra_vect() works", {
test_terra_vect_shz,
lux_area(),
filetype = "ESRI Shapefile"
),
geotargets::tar_terra_vect(
test_terra_vect_geobuf_zip,
lux_area(),
filetype = "FlatGeobuf",
zipfile = TRUE
)
)
})
targets::tar_make()
x <- targets::tar_read(test_terra_vect)
y <- targets::tar_read(test_terra_vect_shz)
z <- targets::tar_read(test_terra_vect_geobuf_zip)
expect_s4_class(x, "SpatVector")
expect_s4_class(y, "SpatVector")
expect_s4_class(z, "SpatVector")
expect_snapshot(x)
expect_snapshot(y)
expect_snapshot(z)
expect_equal(terra::values(x), terra::values(y))
# expect_equal(terra::values(y), terra::values(z)) # flatgeobuf in different order?
})

targets::tar_test("tar_terra_vect() works with multiple workers (tests marshaling/unmarshaling)", {
Expand Down
Loading