[help] feeding information from helper table to select targets to purr::reduce #148
-
Help
DescriptionI have read access to a series of csvs, that I want to read in, process, and then join. The file paths are horrid, so I've written code to create a helper table that finds the file paths, gives each csv a "file_category" as an easy way for the user to refer to the tables. For different uses of the targets pipeline, the user should be able to select different tables (i.e. file categories) by filtering the helper table, and then read them in using a customised fread function and merge them. I've got tar_map working to read in the files with the help of the helper table. What I can't get to work, despite many many hours of trying, is a version of purr::reduce() and merge() that updates based on either the helper table, or the last target of the tar_map. That is, this works: library(targets)
tar_script({
library(targets)
library(tarchetypes)
library(tibble)
# helper table, user can filter as needed
iris_to_keep <- tribble(
~name, ~file_cat, ~split_name,
"sepal", "Sepal", "split_sepal",
"petal", "Petal", "split_petal",
"species", "Species", "split_species"
) %>%
filter(file_cat != "Petal")
list(
tar_map(
values = iris_to_keep,
names = name,
tar_target(split,
iris %>%
select(starts_with(iris_to_keep$file_cat)) %>%
mutate(row_id = row_number()))
),
# !!! need to manually comment out split_petal because
# user has removed it above from the helper table
tar_target(combine,
reduce(list(split_species,
# split_petal,
split_sepal),
full_join,
by = "row_id"))
)
}, ask = F) This works (but I want to merge targets that are data.tables, not bind_cols...) library(targets)
tar_script({
tar_option_set(
packages = c("tidyverse")
)
library(targets)
library(tarchetypes)
library(tidyverse)
# helper table, user can filter as needed
iris_to_keep <- tribble(
~name, ~file_cat, ~split_name,
"sepal", "Sepal", "split_sepal",
"petal", "Petal", "split_petal",
"species", "Species", "split_species"
) %>%
filter(file_cat != "Petal")
mapped <- tar_map(
values = iris_to_keep,
names = name,
unlist = F,
tar_target(split,
iris %>%
select(starts_with(file_cat))
)
)
list(
mapped,
tar_combine(
name = combined,
command = bind_cols(!!!.x),
mapped$split
)
)
}, ask = F) but no combination I've tried with library(targets)
tar_script({
library(targets)
library(tarchetypes)
library(tibble)
# helper table, user can filter as needed
iris_to_keep <- tribble(
~name, ~file_cat, ~split_name,
"sepal", "Sepal", "split_sepal",
"petal", "Petal", "split_petal",
"species", "Species", "split_species"
) %>%
filter(file_cat != "Petal")
mapped <- tar_map(
values = iris_to_keep,
names = name,
unlist = F,
tar_target(split,
iris %>%
select(starts_with(file_cat)) %>%
mutate(row_id = row_number())
)
)
list(
mapped,
tar_combine(
name = combined,
command = reduce(!!!.x, merge, by = "row_id", all.x = T, all.y = T),
mapped$split
)
)
}, ask = F) |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
From
cat(tar_manifest()$command[[3]])
#> reduce(split_sepal = split_sepal, split_species = split_species,
#> merge, by = "row_id", all.x = T, all.y = T) What we really want is something like this: reduce(
.x = list(split_sepal = split_sepal, split_species = split_species),
.f = \(x, y) merge(x = x, y = y, by = "row_id", all.x = TRUE, all.y = TRUE)
) The following # _targets.R file:
library(targets)
library(tarchetypes)
library(tibble)
library(dplyr)
library(purrr)
iris_to_keep <- tribble(
~name, ~file_cat, ~split_name,
"sepal", "Sepal", "split_sepal",
"species", "Species", "split_species"
)
mapped <- tar_map(
values = iris_to_keep,
names = name,
tar_target(
split,
iris %>%
select(starts_with(file_cat)) %>%
mutate(row_id = row_number())
)
)
list(
mapped,
tar_combine(
name = combined,
command = reduce(
.x = list(!!!.x),
.f = \(x, y) merge(x = x, y = y, by = "row_id", all.x = TRUE, all.y = TRUE)
),
mapped$split
)
) |
Beta Was this translation helpful? Give feedback.
From
?purrr::reduce
, it looks like informal...
arguments are constant arguments to.f
rather than part of the vector being reduced, and...
is discouraged anyway. We really want those data frames to be part of.x
.tar_manifest()
shows that command of the combined target looks like this:What we really want is something like this:
The following
_targets.R
file seems to work:# _tar…