Skip to content

Commit

Permalink
Merge pull request #197 from ropensci/190
Browse files Browse the repository at this point in the history
nanoparquet format
  • Loading branch information
wlandau authored Sep 25, 2024
2 parents a0791a8 + 952db01 commit af7ce4b
Show file tree
Hide file tree
Showing 15 changed files with 398 additions and 11 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: Function-oriented Make-like declarative pipelines for
reproducible pipelines concisely and compactly.
The methods in this package were influenced by the 'drake' R package
by Will Landau (2018) <doi:10.21105/joss.00550>.
Version: 0.9.1.9002
Version: 0.10.0
License: MIT + file LICENSE
URL: https://docs.ropensci.org/tarchetypes/, https://github.com/ropensci/tarchetypes
BugReports: https://github.com/ropensci/tarchetypes/issues
Expand Down Expand Up @@ -54,6 +54,7 @@ Imports:
Suggests:
curl (>= 4.3),
knitr (>= 1.28),
nanoparquet,
quarto (>= 1.4),
rmarkdown (>= 2.1),
testthat (>= 3.0.0),
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export(tar_force)
export(tar_force_change)
export(tar_format_aws_feather)
export(tar_format_feather)
export(tar_format_nanoparquet)
export(tar_fst)
export(tar_fst_dt)
export(tar_fst_tbl)
Expand Down Expand Up @@ -91,6 +92,10 @@ export(tar_map2_size)
export(tar_map2_size_raw)
export(tar_map_rep)
export(tar_map_rep_raw)
export(tar_nanoparquet)
export(tar_nanoparquet_convert)
export(tar_nanoparquet_read)
export(tar_nanoparquet_write)
export(tar_parquet)
export(tar_plan)
export(tar_qs)
Expand Down Expand Up @@ -201,6 +206,7 @@ importFrom(targets,tar_deparse_safe)
importFrom(targets,tar_dir)
importFrom(targets,tar_envir)
importFrom(targets,tar_exist_meta)
importFrom(targets,tar_format)
importFrom(targets,tar_group)
importFrom(targets,tar_load)
importFrom(targets,tar_meta)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# tarchetypes 0.9.1.9002 (development)
# tarchetypes 0.10.0

* Add a `delimiter` argument to `tar_map()` etc. for customizing separators in target names (#177, @psychelzh).
* Add "raw" hook functions (#185, @multimeric).
Expand Down
96 changes: 96 additions & 0 deletions R/tar_format_nanoparquet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' @title Nanoparquet format
#' @export
#' @keywords storage formats
#' @description Nanoparquet storage format for data frames.
#' Uses [nanoparquet::read_parquet()] and [nanoparquet::write_parquet()]
#' to read and write data frames returned by targets in a pipeline.
#' Note: attributes such as `dplyr` row groupings and `posterior`
#' draws info are dropped during the writing process.
#' @return A [targets::tar_format()] storage format specification string
#' that can be directly supplied to the `format` argument of
#' [targets::tar_target()] or [targets::tar_option_set()].
#' @param compression Character string, compression type for saving the
#' data. See the `compression` argument of [nanoparquet::write_parquet()]
#' for details.
#' @param class Character vector with the data frame subclasses to assign.
#' See the `class` argument of [nanoparquet::parquet_options()] for details.
#' @examples
#' if (identical(Sys.getenv("TAR_LONG_EXAMPLES"), "true")) {
#' targets::tar_dir({ # tar_dir() runs code from a temporary directory.
#' targets::tar_script({
#' library(targets)
#' libary(tarchetypes)
#' list(
#' tar_target(
#' name = data,
#' command = data.frame(x = 1),
#' format = tar_format_nanoparquet()
#' )
#' )
#' })
#' tar_make()
#' tar_read(data)
#' })
#' }
tar_format_nanoparquet <- function(compression = "snappy", class = "tbl") {
rlang::check_installed("nanoparquet")
read <- function(path) {}
body(read) <- substitute(
tarchetypes::tar_nanoparquet_read(path, class),
env = list(class = class)
)
write <- function(object, path) {}
body(write) <- substitute(
tarchetypes::tar_nanoparquet_write(object, path, compression),
env = list(compression = compression)
)
convert <- function(object) {}
body(convert) <- substitute(
tarchetypes::tar_nanoparquet_convert(object, class),
env = list(class = class)
)
targets::tar_format(read = read, write = write, convert = convert)
}

#' @title Nanoparquet read method
#' @export
#' @keywords internal
#' @description Internal function.
#' @param path Path to the data.
#' @param class S3 classes to assign to the returned object.
tar_nanoparquet_read <- function(path, class) {
nanoparquet::read_parquet(
file = path,
options = nanoparquet::parquet_options(
class = class,
use_arrow_metadata = TRUE
)
)
}

#' @title Nanoparquet write method
#' @export
#' @keywords internal
#' @description Internal function.
#' @param object R object to save.
#' @param path Path to the data.
#' @param compression Compression type.
tar_nanoparquet_write <- function(object, path, compression) {
nanoparquet::write_parquet(
x = object,
file = path,
compression = compression,
options = nanoparquet::parquet_options(write_arrow_metadata = TRUE)
)
}

#' @title Nanoparquet convert method
#' @export
#' @keywords internal
#' @description Internal function.
#' @param object R object to convert.
#' @param class S3 classes to assign to the returned object.
tar_nanoparquet_convert <- function(object, class) {
class(object) <- c(class, "data.frame")
object
}
79 changes: 75 additions & 4 deletions R/tar_formats.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#' @title Target factories for storage formats
#' @name tar_formats
#' @family Formats
#' @family target factories for storage formats
#' @description Target factories for targets with
#' specialized storage formats. For example,
#' `tar_qs(name = data, command = get_data())` is shorthand for
#' `tar_target(name = data, command = get_data(), format = "qs")`.
#'
#' Most of the formats are shorthand for built-in formats in `targets`.
#' The only exception currently is the `nanoparquet` format:
#' `tar_nanoparquet(data, get_data())` is shorthand for
#' `tar_target(data get_data(), format = tar_format_nanoparquet())`,
#' where [tar_format_nanoparquet()] resides in `tarchetypes`.
#'
#' [tar_format_feather()] is superseded in favor of [tar_arrow_feather()],
#' and all the `tar_aws_*()` functions are superseded because of the
#' introduction of the `aws` argument into [targets::tar_target()].
Expand All @@ -18,14 +24,18 @@
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritParams targets::tar_target
#' @inheritParams tar_format_nanoparquet
#' @examples
#' if (identical(Sys.getenv("TAR_LONG_EXAMPLES"), "true")) {
#' targets::tar_dir({ # tar_dir() runs code from a temporary directory.
#' targets::tar_script(
#' targets::tar_script({
#' library(targets)
#' library(tarchetypes)
#' list(
#' tarchetypes::tar_rds(x, 1)
#' tar_rds(name = x, command = 1),
#' tar_nanoparquet(name = y, command = data.frame(x = x))
#' )
#' )
#' })
#' targets::tar_make()
#' })
#' }
Expand Down Expand Up @@ -140,6 +150,67 @@ tar_fst_dt <- tar_format_alias("fst_dt")
#' @rdname tar_formats
tar_fst_tbl <- tar_format_alias("fst_tbl")

#' @export
#' @rdname tar_formats
tar_nanoparquet <- function(
name,
command,
pattern = NULL,
tidy_eval = targets::tar_option_get("tidy_eval"),
packages = targets::tar_option_get("packages"),
library = targets::tar_option_get("library"),
repository = targets::tar_option_get("repository"),
iteration = targets::tar_option_get("iteration"),
error = targets::tar_option_get("error"),
memory = targets::tar_option_get("memory"),
garbage_collection = targets::tar_option_get("garbage_collection"),
deployment = targets::tar_option_get("deployment"),
priority = targets::tar_option_get("priority"),
resources = targets::tar_option_get("resources"),
storage = targets::tar_option_get("storage"),
retrieval = targets::tar_option_get("retrieval"),
cue = targets::tar_option_get("cue"),
description = targets::tar_option_get("description"),
compression = "snappy",
class = "tbl"
) {
name <- targets::tar_deparse_language(substitute(name))
envir <- tar_option_get("envir")
command <- targets::tar_tidy_eval(
as.expression(substitute(command)),
envir,
tidy_eval
)
pattern <- targets::tar_tidy_eval(
as.expression(substitute(pattern)),
envir,
tidy_eval
)
targets::tar_target_raw(
name = name,
command = command,
pattern = pattern,
packages = packages,
library = library,
format = tar_format_nanoparquet(
compression = compression,
class = class
),
repository = repository,
iteration = iteration,
error = error,
memory = memory,
garbage_collection = garbage_collection,
deployment = deployment,
priority = priority,
resources = resources,
storage = storage,
retrieval = retrieval,
cue = cue,
description = description
)
}

#' @title Superseded target factories for storage formats
#' @name tar_formats_superseded
#' @keywords internal
Expand Down
2 changes: 1 addition & 1 deletion R/tar_package.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' tar_assert_scalar tar_assert_target tar_assert_target_list
#' tar_assert_true tar_assert_unique tar_assert_unique_targets
#' tar_cue tar_definition tar_deparse_language tar_deparse_safe
#' tar_dir tar_envir tar_exist_meta tar_group tar_load tar_meta
#' tar_dir tar_envir tar_exist_meta tar_format tar_group tar_load tar_meta
#' tar_option_get tar_option_set tar_path_target tar_read
#' tar_runtime_object tar_script tar_seed_create tar_seed_set
#' tar_target tar_target_raw tar_test tar_tidy_eval
Expand Down
3 changes: 3 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ reference:
- title: Target factories for storage formats
contents:
- 'tar_formats'
- title: Storage formats
contents:
- 'tar_format_nanoparquet'
- title: Simple files
contents:
- 'tar_file_read'
Expand Down
2 changes: 2 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,5 @@ zeromq
ZeroMQ
jupyter
Jupyter
Nanoparquet

49 changes: 49 additions & 0 deletions man/tar_format_nanoparquet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit af7ce4b

Please sign in to comment.