Skip to content

Commit

Permalink
Improve transmute and mutate translations
Browse files Browse the repository at this point in the history
By using extended j expression as suggested by @MichaelChirico
  • Loading branch information
hadley committed Jul 2, 2019
1 parent 2967bc8 commit dbefca7
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 54 deletions.
51 changes: 16 additions & 35 deletions R/step-mutate.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
step_mutate <- function(parent, new_vars = list()) {
step_mutate <- function(parent, new_vars = list(), nested = FALSE) {
vars <- union(parent$vars, names(new_vars))

new_step(
Expand All @@ -7,13 +7,22 @@ step_mutate <- function(parent, new_vars = list()) {
groups = parent$groups,
needs_copy = !parent$implicit_copy,
new_vars = new_vars,
nested = nested,
class = "dtplyr_step_mutate"
)
}

dt_call.dtplyr_step_mutate <- function(x, needs_copy = x$needs_copy) {
# i is always empty because we never mutate a subset
j <- call2(":=", !!!x$new_vars)
if (!x$nested) {
j <- call2(":=", !!!x$new_vars)
} else {
assign <- Map(function(x, y) call2("<-", x, y), syms(names(x$new_vars)), x$new_vars)
output <- call2(".", !!!syms(names(x$new_vars)))
expr <- call2("{", !!!assign, output)
j <- call2(":=", call2("c", !!!names(x$new_vars)), expr)
}

out <- call2("[", dt_call(x$parent, needs_copy), , j)

if (length(x$groups) > 0) {
Expand All @@ -29,21 +38,11 @@ dt_call.dtplyr_step_mutate <- function(x, needs_copy = x$needs_copy) {
mutate.dtplyr_step <- function(.data, ...) {
dots <- capture_dots(.data, ...)

nest_vars(.data, dots, .data$vars, transmute = FALSE)
}

#' @importFrom dplyr transmute
#' @export
transmute.dtplyr_step <- function(.data, ...) {
dots <- capture_dots(.data, ...)

nest_vars(.data, dots, .data$vars, transmute = TRUE)
nested <- nested_vars(.data, dots, .data$vars)
step_mutate(.data, dots, nested)
}


# For each expression, check if it uses any newly created variables.
# If so, nest the mutate()
nest_vars <- function(.data, dots, all_vars, transmute = FALSE) {
nested_vars <- function(.data, dots, all_vars) {
new_vars <- character()
all_new_vars <- unique(names(dots))

Expand All @@ -53,37 +52,19 @@ nest_vars <- function(.data, dots, all_vars, transmute = FALSE) {
used_vars <- all_names(get_expr(dots[[i]]))

if (any(used_vars %in% new_vars)) {
.data <- step_mutate(.data, dots[new_vars])
all_vars <- c(all_vars, setdiff(new_vars, all_vars))
new_vars <- cur_var
init <- i
return(TRUE)
} else {
new_vars <- c(new_vars, cur_var)
}
}

if (init != 0L) {
dots <- dots[-seq2(1L, init - 1)]
}

if (transmute) {
# Final step needs to include all variable names
vars <- syms(set_names(all_new_vars))
vars[names(dots)] <- dots
names(vars)[!names(vars) %in% names(dots)] <- ""
vars <- simplify_names(vars)

step_subset(.data, vars = all_new_vars, j = call2(".", !!!vars))
} else {
step_mutate(.data, dots)
}
FALSE
}

# Helpers -----------------------------------------------------------------

all_names <- function(x) {
if (is.name(x)) return(as.character(x))
if (is_quosure(x)) return(all_names(quo_get_expr(x)))
if (!is.call(x)) return(NULL)

unique(unlist(lapply(x[-1], all_names), use.names = FALSE))
Expand Down
17 changes: 17 additions & 0 deletions R/step-subset.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ select.dtplyr_step <- function(.data, ...) {
step_group(out, groups)
}


#' @importFrom dplyr summarise
#' @export
summarise.dtplyr_step <- function(.data, ...) {
Expand All @@ -131,6 +132,22 @@ summarise.dtplyr_step <- function(.data, ...) {
step_group(out, groups = head(.data$groups, -1))
}

#' @importFrom dplyr transmute
#' @export
transmute.dtplyr_step <- function(.data, ...) {
dots <- capture_dots(.data, ...)
nested <- nested_vars(.data, dots, .data$vars)

if (!nested) {
j <- call2(".", !!!dots)
} else {
assign <- Map(function(x, y) call2("<-", x, y), syms(names(dots)), dots)
output <- call2(".", !!!syms(set_names(names(dots))))
j <- call2("{", !!!assign, output)
}
step_subset_j(.data, vars = names(dots), j = j)
}

# exported onLoad
filter.dtplyr_step <- function(.data, ...) {
dots <- capture_dots(.data, ..., .named = FALSE)
Expand Down
22 changes: 6 additions & 16 deletions tests/testthat/test-step-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,31 +48,21 @@ test_that("generates single calls as expect", {
expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)])
)

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x)])
)

expect_equal(
dt %>% transmute(x2 = x * 2) %>% show_query(),
expr(DT[, .(x2 = x * 2)])
)
})

test_that("mutate generates multiple steps if needed", {
test_that("mutate generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

expect_equal(
dt %>% mutate(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2)][, `:=`(x4 = x2 * 2)])
)
})

test_that("transmute generates multiple steps if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

expect_equal(
dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2)][, .(x2, x4 = x2 * 2)])
expr(copy(DT)[, c("x2", "x4") := {
x2 <- x * 2
x4 <- x2 * 2
.(x2, x4)
}])
)
})
21 changes: 21 additions & 0 deletions tests/testthat/test-step-subset.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ test_that("simple calls generate expected translations", {
expr(DT[, .(x = mean(x))])
)

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
)

expect_equal(
dt %>% arrange(x) %>% show_query(),
expr(DT[order(x)])
Expand Down Expand Up @@ -222,3 +227,19 @@ test_that("basic operation as expected", {
)
})


# transmute ---------------------------------------------------------------

test_that("transmute generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

expect_equal(
dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(DT[, {
x2 <- x * 2
x4 <- x2 * 2
.(x2 = x2, x4 = x4)
}])
)
})

6 changes: 3 additions & 3 deletions vignettes/translation.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ This document assumes that you're familiar with the basics of data.table; if you
To get started, I'll create a simple lazy frame. The actual data doesn't matter here since we're just looking at the translation:

```{r}
df <- data.frame(a = integer(), b = integer(), c = integer(), d = integer())
df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5)
dt <- lazy_dt(df)
```

Expand Down Expand Up @@ -80,9 +80,9 @@ dt %>% transmute(a2 = a * 2) %>% show_query()
dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()
```

Note that dplyr will not doesnthe input data by default, see below for more details
Note that dplyr will not doesn't copy the input data by default, see below for more details

`mutate()` allows to refer to variables that you just created. data.tables `:=` doesn't support that out of the box, so we automatically chain together as many `[` as needed:
`mutate()` allows to refer to variables that you just created using an "extended `j` expression:

```{r}
dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query()

This comment has been minimized.

Copy link
@MichaelChirico

MichaelChirico Jul 3, 2019

Contributor

do you allow combining mutate+group_by to add columns to the main table? Just remembered this might not always play nicely with keyby: Rdatatable/data.table#2763

Conceptually it's a bit odd to be using := with keyby

This comment has been minimized.

Copy link
@hadley

hadley Jul 3, 2019

Author Member

Luckily I don't — I only ever keyby column names. Whenever a grouping variable combines multiple variables, I do an interim mutate first (this the same approach that leads to the supoptimal distinct translations)

Expand Down

0 comments on commit dbefca7

Please sign in to comment.