Skip to content

Commit

Permalink
Closes #3758 and completes coverage of data.table.R
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Chirico committed Aug 11, 2019
1 parent 59f49ab commit 45ce60e
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 22 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@
24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.
25. Slight fix to the logic for auto-naming the `by` clause for using a custom function like `evaluate` to now be named `evaluate` instead of the name of the first symbolic argument, [#3758](https://github.com/Rdatatable/data.table/issues/3758).
#### NOTES
1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
Expand Down
2 changes: 1 addition & 1 deletion R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
}
}
if (xclass == iclass) {
if (verbose) cat("i.",names(i)[ic],"has same type (",xclass,") as x.",names(x)[xc],". No coercion needed.\n", sep="")
if (verbose) cat("i.", names(i)[ic], " has same type (", xclass, ") as x.", names(x)[xc], ". No coercion needed.\n", sep="")
next
}
if (xclass=="character" || iclass=="character" ||
Expand Down
40 changes: 20 additions & 20 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -841,8 +841,10 @@ replace_order = function(isub, verbose, env) {
byvars = all.vars(bysubl[[jj+1L]], functions = TRUE)
if (length(byvars) == 1L) tt = byvars
else {
tt = grep("^eval|^[^[:alpha:]. ]",byvars,invert=TRUE,value=TRUE)
if (length(tt)) tt = tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
# take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name
tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
# byvars but exclude functions or `0`+`1` becomes `+`
tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
}
# fix for #497
if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
Expand Down Expand Up @@ -1087,9 +1089,9 @@ replace_order = function(isub, verbose, env) {
if (is.list(k)) {
origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame())
if (is.character(j)) {
if (length(j)!=1L) stop("L[[i]][,:=] syntax only valid when i is length 1, but it's length %d",length(j))
if (length(j)!=1L) stop("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but it's length ", length(j))
j = match(j, names(k))
if (is.na(j)) stop("Item '",origj,"' not found in names of list")
if (is.na(j)) stop("Internal error -- item '", origj, "' not found in names of list") # nocov
}
.Call(Csetlistelt,k,as.integer(j), x)
} else if (is.environment(k) && exists(as.character(name[[3L]]), k)) {
Expand Down Expand Up @@ -1118,7 +1120,7 @@ replace_order = function(isub, verbose, env) {
xcolsAns = seq_along(ansvars)
icols = icolsAns = integer()
} else {
if (!length(leftcols)) stop("column(s) not found: ", paste(ansvars[wna],collapse=", "))
if (!length(leftcols)) stop("Internal error -- column(s) not found: ", paste(ansvars[wna],collapse=", ")) # nocov
xcols = w[!wna]
xcolsAns = which(!wna)
map = c(seq_along(i), leftcols) # this map is to handle dups in leftcols, #3635
Expand All @@ -1131,7 +1133,7 @@ replace_order = function(isub, verbose, env) {
if (any(w2na <- is.na(w2))) {
ivars[leftcols] = paste0("i.",ivars[leftcols])
w2[w2na] = chmatch(ansvars[wna][w2na], ivars)
if (any(w2na <- is.na(w2))) stop("column(s) not found: ", paste(ansvars[wna][w2na],sep=", "))
if (any(w2na <- is.na(w2))) stop("Internal error -- column(s) not found: ", paste(ansvars[wna][w2na],sep=", ")) # nocov
}
}
icols = w2
Expand Down Expand Up @@ -1294,7 +1296,7 @@ replace_order = function(isub, verbose, env) {
identical(irows, integer(0L)) && !bynull,
length(irows) && !anyNA(irows) && all(irows==0L) ## anyNA() because all() returns NA (not FALSE) when irows is all-NA. TODO: any way to not check all 'irows' values?
))
if (is.atomic(jval)) jval = jval[0L] else jval = lapply(jval, `[`, 0L)
jval = lapply(jval, `[`, 0L)
if (is.atomic(jval)) {
setattr(jval,"names",NULL)
jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead?
Expand Down Expand Up @@ -1884,7 +1886,7 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) {
non.numeric = non.atomic = FALSE
all.logical = TRUE
for (j in seq_len(p)) {
if (is.ff(X[[j]])) X[[j]] = X[[j]][] # to bring the ff into memory, since we need to create a matrix in memory
if (is.ff(X[[j]])) X[[j]] = X[[j]][] # nocov to bring the ff into memory, since we need to create a matrix in memory
xj = X[[j]]
if (length(dj <- dim(xj)) == 2L && dj[2L] > 1L) {
if (inherits(xj, "data.table"))
Expand Down Expand Up @@ -1930,13 +1932,13 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) {

# bug #2375. fixed. same as head.data.frame and tail.data.frame to deal with negative indices
head.data.table = function(x, n=6L, ...) {
if (!cedta()) return(NextMethod())
if (!cedta()) return(NextMethod()) # nocov
stopifnot(length(n) == 1L)
i = seq_len(if (n<0L) max(nrow(x)+n, 0L) else min(n,nrow(x)))
x[i, , ]
}
tail.data.table = function(x, n=6L, ...) {
if (!cedta()) return(NextMethod())
if (!cedta()) return(NextMethod()) # nocov
stopifnot(length(n) == 1L)
n = if (n<0L) max(nrow(x) + n, 0L) else min(n, nrow(x))
i = seq.int(to=nrow(x), length.out=n)
Expand Down Expand Up @@ -2077,7 +2079,7 @@ within.data.table = function (data, expr, ...)
# basically within.list but retains key (if any)
# will be slower than using := or a regular query (see ?within for further info).
{
if (!cedta()) return(NextMethod())
if (!cedta()) return(NextMethod()) # nocov
parent = parent.frame()
e = evalq(environment(), data, parent)
eval(substitute(expr), e) # might (and it's known that some user code does) contain rm()
Expand All @@ -2101,7 +2103,7 @@ within.data.table = function (data, expr, ...)
transform.data.table = function (`_data`, ...)
# basically transform.data.frame with data.table instead of data.frame, and retains key
{
if (!cedta()) return(NextMethod())
if (!cedta()) return(NextMethod()) # nocov
e = eval(substitute(list(...)), `_data`, parent.frame())
tags = names(e)
inx = chmatch(tags, names(`_data`))
Expand Down Expand Up @@ -2176,7 +2178,7 @@ any_na = function(x, by=seq_along(x)) .Call(CanyNA, x, by)

na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, ...) {
# compare to stats:::na.omit.data.frame
if (!cedta()) return(NextMethod())
if (!cedta()) return(NextMethod()) # nocov
if ( !missing(invert) && is.na(as.logical(invert)) )
stop("Argument 'invert' must be logical TRUE/FALSE")
if (is.character(cols)) {
Expand Down Expand Up @@ -2761,15 +2763,15 @@ rowid = function(..., prefix=NULL) {

rowidv = function(x, cols=seq_along(x), prefix=NULL) {
if (!is.null(prefix) && (!is.character(prefix) || length(prefix) != 1L))
stop("prefix must be NULL or a character vector of length=1.")
stop("'prefix' must be NULL or a character vector of length 1.")
if (is.atomic(x)) {
if (!missing(cols) && !is.null(cols))
stop("x is a single vector, non-NULL 'cols' doesn't make sense.")
cols = 1L
x = as_list(x)
} else {
if (!length(cols))
stop("x is a list, 'cols' can not be on 0-length.")
stop("x is a list, 'cols' cannot be 0-length.")
if (is.character(cols))
cols = chmatch(cols, names(x))
cols = as.integer(cols)
Expand All @@ -2790,15 +2792,15 @@ rleid = function(..., prefix=NULL) {

rleidv = function(x, cols=seq_along(x), prefix=NULL) {
if (!is.null(prefix) && (!is.character(prefix) || length(prefix) != 1L))
stop("prefix must be NULL or a character vector of length=1.")
stop("'prefix' must be NULL or a character vector of length 1.")
if (is.atomic(x)) {
if (!missing(cols) && !is.null(cols))
stop("x is a single vector, non-NULL 'cols' doesn't make sense.")
cols = 1L
x = as_list(x)
} else {
if (!length(cols))
stop("x is a list, 'cols' can not be 0-length.")
stop("x is a list, 'cols' cannot be 0-length.")
if (is.character(cols))
cols = chmatch(cols, names(x))
cols = as.integer(cols)
Expand Down Expand Up @@ -2880,7 +2882,7 @@ isReallyReal = function(x) {
## redirect to normal DT[x == TRUE]
stub = call("==", as.symbol(col), TRUE)
}
if (length(stub[[1L]]) != 1) return(NULL) ## Whatever it is, definitely not one of the valid operators
if (length(stub[[1L]]) != 1) return(NULL) # nocov Whatever it is, definitely not one of the valid operators
operator = as.character(stub[[1L]])
if (!operator %chin% validOps$op) return(NULL) ## operator not supported
if (!is.name(stub[[2L]])) return(NULL)
Expand All @@ -2902,7 +2904,6 @@ isReallyReal = function(x) {
# the mode() checks also deals with NULL since mode(NULL)=="NULL" and causes this return, as one CRAN package (eplusr 0.9.1) relies on
return(NULL)
}
if(is.character(x[[col]]) && !operator %chin% c("==", "%in%", "%chin%")) return(NULL) ## base R allows for non-equi operators on character columns, but these can't be optimized.
if (!operator %chin% c("%in%", "%chin%")) {
# additional requirements for notjoin and NA values. Behaviour is different for %in%, %chin% compared to other operators
# RHS is of length=1 or n
Expand Down Expand Up @@ -2998,7 +2999,6 @@ isReallyReal = function(x) {
pat = paste0("(", ops, ")", collapse="|")
if (is.call(onsub) && onsub[[1L]] == "eval") {
onsub = eval(onsub[[2L]], parent.frame(2L), parent.frame(2L))
if (is.call(onsub) && onsub[[1L]] == "eval") { onsub = onsub[[2L]] }
}
if (is.call(onsub) && as.character(onsub[[1L]]) %chin% c("list", ".")) {
spat = paste0("[ ]+(", pat, ")[ ]+")
Expand Down
49 changes: 48 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,7 @@ test(567, DT[,.N,list(a,b)][,N,by=a]$N, c(1L,1L,2L,1L))
test(568, DT[,.N,list(a,b)][,unique(N),by=a]$V1, c(1L,2L,1L))
test(569, DT[,list(.N=.N),list(a,b)][,.N,a], error="The column '.N' can't be grouped because")
test(570, DT[,list(.N=.N),list(a,b)][,unique(.N),a], error="The column '.N' can't be grouped because")
test(570.1, DT[,list(.I=.I),list(a,b)][,.I,a], error="The column '.I' can't be grouped because")

# Test spaces in by="..." format, datatable-help on 31 March
DT = data.table("a "=1:2, "b"=3:4," b"=5:6, v=1:6)
Expand Down Expand Up @@ -15001,7 +15002,7 @@ test(2050.6, rbind(DT[1], data.table(f=factor(letters[10:11]))[0])[,levels(f)],
test(2051.1, `-.IDate`(structure(0, class="Date"), 1L), structure(-1, class="Date"))
test(2051.2, `-.IDate`(1L, 1L), error = 'only subtract from "IDate"')
test(2051.3, format.data.table(1L), error = 'Possibly corrupt data.table')
test(2051.4, rleidv(prefix = 1L), error = 'prefix must be NULL or')
test(2051.4, rleidv(prefix = 1L), error = "'prefix' must be NULL or")
## passing Date to second argument of as.POSIXct.ITime
t = as.ITime(0L)
test(2051.5, as.POSIXct(t, structure(0L, class="Date")), .POSIXct(0, 'UTC'))
Expand Down Expand Up @@ -15522,6 +15523,52 @@ test(2073.7, transpose(L, make.names=NA), error="make.names=NA is out of range [
test(2073.8, transpose(L, make.names=2), list(A=INT(1,4), B=INT(2,5), C=INT(3,6)))
test(2073.9, transpose(L, make.names=2, keep.names='foo'), list(foo=c("a","b"), A=INT(1,4), B=INT(2,5), C=INT(3,6)))

# coverage of data.table.R
## i is NULL
test(2074.01, data.table(1:10)[NULL], data.table(NULL))
## auto-guessing of byvars when none of the columns have "normal" names
test(2074.02, data.table(`0`=0, `1`=1)[ , TRUE, by = .(`0` + `1`)], data.table(`0`=1, V1=TRUE))
## also eval.+ columns are OK, just not eval( patterns, #3758
evaluate = function(x) c('F', 'D', 'C', 'B', 'A')[findInterval(x, c(0, 60, 70, 80, 90, 100))]
test(2074.03, data.table(grade=c(50L, 91L, 95L, 51L, 89L))[ , .N, by=evaluate(grade)],
data.table(evaluate=c('F', 'A', 'B'), N=c(2L, 2L, 1L)))
## error: use recursive character list indexing to assign when also doing alloc.col()
opt = options(datatable.alloccol=1L)
l = list(foo = list(bar = data.table(a = 1:3, b = 4:6)))
test(2074.04, l[[c('foo', 'bar')]][ , (letters) := 16:18], error = 'under-allocated recursively indexed list')
options(opt)
## alloc.col when using 0-truelength j assigning to a subset
DT = data.table(a=1)
### construct incorrectly to have 0 truelength
zDT = structure(list(b=2), class = c('data.table', 'data.frame'))
test(2074.05, DT[1L, b := zDT], data.table(a=1, b=2))
## nested .SD in j
DT = data.table(a=1, b=2)
test(2074.06, DT[ , c(.SD[1], .SD[1, .SD[1]]), by=a], data.table(a=1, b=2, b=2))
## as.matrix.data.table when a column has columns (only possible when constructed incorrectly)
DT = structure(list(a=1:5, d=data.table(b=6:10, c=11:15), m=matrix(16:25, ncol=2L)), class = c('data.table', 'data.frame'))
test(2074.07, as.matrix(DT), matrix(1:25, ncol=5L, dimnames=list(NULL, c('a', 'd.b', 'd.c', 'm.1', 'm.2'))))
## can induce !cedta() from base::rownames to get this error
test(2074.08, rownames(structure(list(1:5), class='data.table')), error="Has it been created manually")
## default dimnames.data.table
test(2074.09, dimnames(data.table(a = 1)), list(NULL, 'a'))
## unlock argument of .shallow
DT = data.table(a = 1)
setattr(DT, '.data.table.locked', TRUE)
test(2074.10, attr(.shallow(DT, unlock=TRUE), '.data.table.locked'), NULL)
## coverage of rowidv & rleidv
test(2074.11, rowidv(1:10, cols=1), error="x is a single vector, non-NULL 'cols'")
test(2074.12, rowidv(1:10), rep(1L, 10L))
test(2074.13, rowidv(list(1:10), cols=integer()), error="x is a list, 'cols' cannot be 0-length")
test(2074.14, rleidv(1:10, cols=1), error="x is a single vector, non-NULL 'cols'")
test(2074.15, rleidv(list(1:10), cols=integer()), error="x is a list, 'cols' cannot be 0-length")
## coverage of .prepareFastSubset
DT = data.table(V1=c('a', 'b', 'a'), V2 = c('hello', 'ello', 'llo'), x=TRUE)
test(2074.16, nrow(DT[!(V1=='a' & V2 %like% 'll')]), 1L)
y = c(TRUE, FALSE, FALSE)
test(2074.17, nrow(DT[x & y]), 1L)
setkey(DT, V1)
test(2074.18, DT[V1=='a', verbose=TRUE], output='Optimized subsetting with key')

###################################
# Add new tests above this line #
Expand Down

0 comments on commit 45ce60e

Please sign in to comment.