Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc #938

Closed
wants to merge 1 commit into from
Closed

Misc #938

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ import(stats)
S3method(na.omit, data.table)

# IDateTime support:
export(as.IDate,as.ITime,IDateTime)
export(as.IDate,as.ITime,IDateTime, as.IMonth, as.IQuarter)
export(hour,yday,wday,mday,week,month,quarter,year)

importFrom(chron, chron, as.chron)
export(as.chron.IDate,as.chron.ITime)
export(as.chron.IDate,as.chron.ITime, as.chron.IQuarter, as.chron.IMonth)

S3method("[", ITime)
S3method(as.character, ITime)
Expand Down Expand Up @@ -115,6 +115,45 @@ S3method(split, IDate)
S3method(unique, IDate)
S3method(unique, ITime)

S3method(as.IQuarter, default)
S3method(as.IQuarter, POSIXlt)
S3method(as.IQuarter, numeric)
S3method(as.Date, IQuarter)
S3method(as.POSIXct, IQuarter)
S3method(as.POSIXlt, IQuarter)
S3method(as.character, IQuarter)
S3method(round, IQuarter)
S3method(print, IQuarter)
S3method(format, IQuarter)
S3method(as.data.frame, IQuarter)
S3method(c, IQuarter)
S3method(cut, IQuarter)
S3method(mean, IQuarter)
S3method(rep, IQuarter)
S3method(seq, IQuarter)
S3method(split, IQuarter)
S3method(as.list, IQuarter)
S3method(unique, IQuarter)

S3method(as.IMonth, default)
S3method(as.IMonth, POSIXlt)
S3method(as.IMonth, numeric)
S3method(as.Date, IMonth)
S3method(as.POSIXct, IMonth)
S3method(as.POSIXlt, IMonth)
S3method(as.character, IMonth)
S3method(round, IMonth)
S3method(print, IMonth)
S3method(format, IMonth)
S3method(as.data.frame, IMonth)
S3method(c, IMonth)
S3method(cut, IMonth)
S3method(mean, IMonth)
S3method(rep, IMonth)
S3method(seq, IMonth)
S3method(split, IMonth)
S3method(as.list, IMonth)
S3method(unique, IMonth)


# [.factor
Expand Down
119 changes: 118 additions & 1 deletion R/IDateTime.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,110 @@ round.IDate <- function (x, digits=c("weeks", "months", "quarters", "years"), ..
years = ISOdate(year(x), 1, 1)))
}

###################################################################
# IMonth -- Stored as elapsed months
###################################################################
as.IMonth <- function(x) {
UseMethod("as.IMonth")
}
as.IMonth.default <- function(x) as.IMonth(as.POSIXlt(x))
as.IMonth.POSIXlt <- function(x) {
structure(12L*(x$year-70L) + x$mon, class=c("IMonth"))
}
as.IMonth.numeric <- function(x){
class(x) <- "IMonth"
x
}
as.Date.IMonth <- function(x, ...){
attributes(x) <- NULL
date_origin <- as.Date("1970-01-01")
x <- date_origin + months(x)
as.Date(x, ...)
}
as.POSIXct.IMonth <- function(x, ...){as.POSIXct(as.Date(x,...))}
as.POSIXlt.IMonth <- function(x, ...){as.POSIXlt(as.Date(x,...))}
as.character.IMonth <- function(x, ...){
paste0(year(x),"m", month(x))
}
format.IMonth <- function(x, ...){
format(as.character(x),...)
}
print.IMonth <- function(x, ...){
print(format(x),...)
}
as.data.frame.IMonth <- function(...){
as.data.frame.vector(...)
}
mean.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
cut.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
seq.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
c.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
rep.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
split.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
as.list.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
unique.IMonth <- function(x, ...) {as.IMonth(NextMethod())}
`[.IMonth` <- function(x, ...) {as.IMonth(NextMethod())}

round.IMonth <- function (x, digits=c("quarters", "years"), ...) {
units <- match.arg(digits)
as.IMonth(switch(units,
quarters = ISOdate(year(x), 3 * (quarter(x)-1) + 1, 1),
years = ISOdate(year(x), 1, 1)))
}

###################################################################
# IQuarter -- Stored as elapsed quarters
###################################################################

as.IQuarter <- function(x) {
UseMethod("as.IQuarter")
}
as.IQuarter.default <- function(x) as.IQuarter(as.POSIXlt(x))
as.IQuarter.POSIXlt <- function(x) {
structure(4L*(x$year-70L) + x$mon %/% 3, class=c("IQuarter"))
}
as.IQuarter.numeric <- function(x){
class(x) <- "IQuarter"
x
}
as.Date.IQuarter <- function(x, ...){
attributes(x) <- NULL
date_origin <- as.Date("1970-01-01")
x <- date_origin+ 3L*months(x)
as.Date(x, ...)
}
as.POSIXct.IQuarter <- function(x, ...){as.POSIXct(as.Date(x,...))}
as.POSIXlt.IQuarter <- function(x, ...){as.POSIXlt(as.Date(x,...))}
as.character.IQuarter <- function(x, ...){
paste0(year(x),"q", quarter(x))
}
format.IQuarter <- function(x, ...){
format(as.character(x),...)
}
print.IQuarter <- function(x, ...){
print(format(x),...)
}
as.data.frame.IQuarter <- function(...){
as.data.frame.vector(...)
}
mean.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
cut.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
seq.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
c.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
rep.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
split.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
as.list.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
unique.IQuarter <- function(x, ...) {as.IQuarter(NextMethod())}
`[.IQuarter` <- function(x, ...) {as.IQuarter(NextMethod())}


round.IQuarter <- function (x, digits=c("years"), ...) {
units <- match.arg(digits)
as.IMonth(switch(units,
years = ISOdate(year(x), 1, 1)))
}


###################################################################
# ITime -- Integer time-of-day class
# Stored as seconds in the day
Expand Down Expand Up @@ -164,7 +268,21 @@ as.chron.IDate <- function(x, time = NULL, ...) {
chron(dates. = as.chron(as.Date(x)))
}
}
as.chron.IQuarter <- function(x, time = NULL, ...) {
if (!is.null(time)) {
chron(dates. = as.chron(as.Date(x)), times. = as.chron(time))
} else {
chron(dates. = as.chron(as.Date(x)))
}
}

as.chron.IMonth <- function(x, time = NULL, ...) {
if (!is.null(time)) {
chron(dates. = as.chron(as.Date(x)), times. = as.chron(time))
} else {
chron(dates. = as.chron(as.Date(x)))
}
}
as.chron.ITime <- function(x, date = NULL, ...) {
if (!is.null(date)) {
chron(dates. = as.chron(as.Date(date)), times. = as.chron(x))
Expand Down Expand Up @@ -200,4 +318,3 @@ month <- function(x) as.POSIXlt(x)$mon + 1L
quarter <- function(x) as.POSIXlt(x)$mon %/% 3L + 1L
year <- function(x) as.POSIXlt(x)$year + 1900L


156 changes: 102 additions & 54 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
bysub = eval(bysubl[[2]], parent.frame(), parent.frame())
if (is.expression(bysub)) bysub=bysub[[1L]]
bysubl = as.list.default(bysub)
} else if (is.call(bysub) && as.character(bysub[[1L]]) %chin% c("c","key","names", "intersect", "setdiff")) {
} else if (is.call(bysub) && as.character(bysub[[1L]]) %chin% c("c","key","names", "intersect", "setdiff", "(")) {
# catch common cases, so we don't have to copy x[irows] for all columns
# *** TO DO ***: try() this eval first (as long as not list() or .()) and see if it evaluates to column names
# to avoid the explicit c,key,names which already misses paste("V",1:10) for example
Expand Down Expand Up @@ -823,39 +823,62 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
} # else maybe a call to transform or something which returns a list.
av = all.vars(jsub,TRUE) # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
if (".SD" %chin% av) {
if (missing(.SDcols)) {
# here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'.
ansvars = dupdiff(names(x),union(bynames,allbyvars)) # TO DO: allbyvars here for vars used by 'by'. Document.
# just using .SD in j triggers all non-by columns in the subset even if some of
# those columns are not used. It would be tricky to detect whether the j expression
# really does use all of the .SD columns or not, hence .SDcols for grouping
# over a subset of columns

# all duplicate columns must be matched, because nothing is provided
ansvals = dupmatch(ansvars, names(x))
} else {
# FR #4979 - negative numeric and character indices for SDcols
colsub = substitute(.SDcols)
# fix for #5190. colsub[[1L]] gave error when it's a symbol.
if (is.call(colsub) && colsub[[1L]] == "-") {
colm = TRUE
.SDcols = eval(colsub[[2L]], parent.frame(), parent.frame())
} else colm = FALSE
# if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
if (is.numeric(.SDcols)) {
if (length(unique(sign(.SDcols))) != 1L) stop(".SDcols is numeric but has both +ve and -ve indices")
if (any(is.na(.SDcols)) || any(abs(.SDcols)>ncol(x)) || any(abs(.SDcols)<1L)) stop(".SDcols is numeric but out of bounds (or NA)")
if (colm) ansvars = dupdiff(names(x)[-.SDcols], bynames) else ansvars = names(x)[.SDcols]
ansvals = if (colm) setdiff(seq_along(names(x)), c(as.integer(.SDcols), which(names(x) %chin% bynames))) else as.integer(.SDcols)
} else {
if (!is.character(.SDcols)) stop(".SDcols should be column numbers or names")
if (any(is.na(.SDcols)) || any(!.SDcols %chin% names(x))) stop("Some items of .SDcols are not column names (or are NA)")
if (colm) ansvars = setdiff(setdiff(names(x), .SDcols), bynames) else ansvars = .SDcols
# dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
ansvals = chmatch(ansvars, names(x))
if (missing(.SDcols)) {
# here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'.
ansvars = dupdiff(names(x),union(bynames,allbyvars)) # TO DO: allbyvars here for vars used by 'by'. Document.
# just using .SD in j triggers all non-by columns in the subset even if some of
# those columns are not used. It would be tricky to detect whether the j expression
# really does use all of the .SD columns or not, hence .SDcols for grouping
# over a subset of columns

# all duplicate columns must be matched, because nothing is provided
ansvals = dupmatch(ansvars, names(x))
} else {
# FR #4979 - negative numeric and character indices for SDcols
# Starts NSE
colsub = substitute(.SDcols)
# Starts with -. If present, set colm to False and drop it
if (is.call(colsub) && colsub[[1L]] == "-") {
colm = TRUE
colsub = colsub[[2L]]
} else colm = FALSE
# Then try r or w
if (is.call(colsub) && (colsub[[1L]] == "r" | colsub[[1L]] == "w")){
.SDcols <- eval(colsub[[2]], parent.frame())
if (!is.character(.SDcols)) stop("When r() or w() is specified, .SDcols should be a character vector")
.SDcols <- NULL
if (colsub[[1L]] == "r"){ # regex
for (i in 2:length(colsub)){
.SDcols <- c(.SDcols,grep(colsub[[i]],names(x),value=TRUE))
}
} else{ # wildcard
for (i in 2:length(colsub)){
.SDcols <- c(.SDcols,grep(glob2rx(colsub[[i]]),names(x),value=TRUE))
}
}
# .SDcols might include grouping columns if users wants that, but normally we expect user not to include them in .SDcols
# the output of r() or w() has unique name, always
.SDcols <- unique(.SDcols)
# no matching should be character(0)
if (is.null(.SDcols)) .SDcols <- character(0)
} else{
# in all other cases, evaluate (this handles things with - but not r/w)
.SDcols = eval(colsub, parent.frame())
}
# if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
if (is.numeric(.SDcols)) {
if (length(unique(sign(.SDcols))) != 1L) stop(".SDcols is numeric but has both +ve and -ve indices")
if (any(is.na(.SDcols)) || any(abs(.SDcols)>ncol(x)) || any(abs(.SDcols)<1L)) stop(".SDcols is numeric but out of bounds (or NA)")
if (colm) ansvars = dupdiff(names(x)[-.SDcols], bynames) else ansvars = names(x)[.SDcols]
ansvals = if (colm) setdiff(seq_along(names(x)), c(as.integer(.SDcols), which(names(x) %chin% bynames))) else as.integer(.SDcols)
} else {
if (!is.character(.SDcols)) stop(".SDcols should be column numbers, names, or names enclosed in r() or w()")
if (any(is.na(.SDcols)) || any(!.SDcols %chin% names(x))) stop("Some items of .SDcols are not column names (or are NA)")
if (colm) ansvars = setdiff(setdiff(names(x), .SDcols), bynames) else ansvars = .SDcols
# dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
ansvals = chmatch(ansvars, names(x))
}
# .SDcols might include grouping columns if users wants that, but normally we expect user not to include them in .SDcols
}
} else {
if (!missing(.SDcols)) warning("This j doesn't use .SD but .SDcols has been supplied. Ignoring .SDcols. See ?data.table.")
ansvars = setdiff(intersect(av,c(names(x),names(i),paste("i.",names(i),sep=""))), bynames)
Expand Down Expand Up @@ -1969,19 +1992,19 @@ subset.data.table <- function (x, subset, select, ...)
# For internal use only. 'by' requires integer input. No argument checks here yet.
is_na <- function(x, by=seq_along(x)) .Call(Cdt_na, x, by)

na.omit.data.table <- function (object, by = seq_along(object), invert = FALSE, ...) {
na.omit.data.table <- function (object, cols = seq_along(object), invert = FALSE, ...) {
if (!cedta()) return(NextMethod())
if ( !missing(invert) && is.na(as.logical(invert)) )
stop("Argument 'invert' must be logical TRUE/FALSE")
if (is.character(by)) {
old = by
by = chmatch(by, names(object), nomatch=0L)
if (any(by==0L))
stop("Columns ", paste(old[by==0L], collapse=","),
if (is.character(cols)) {
old = cols
cols = chmatch(cols, names(object), nomatch=0L)
if (any(cols==0L))
stop("Columns ", paste(old[cols==0L], collapse=","),
" doesn't exist in the input data.table")
}
by = as.integer(by)
ix = .Call(Cdt_na, object, by)
cols = as.integer(cols)
ix = .Call(Cdt_na, object, cols)
if (!invert) ix = !ix
.Call(CsubsetDT, object, which(ix), seq_along(object))
# compare the above to stats:::na.omit.data.frame
Expand Down Expand Up @@ -2150,22 +2173,47 @@ setnames <- function(x,old,new) {
invisible(x)
}

setcolorder <- function(x,neworder)
{

setcolorder <- function(x, old, new){
if (!is.data.table(x)) stop("x is not a data.table")
if (length(neworder)!=length(x)) stop("neworder is length ",length(neworder)," but x has ",length(x)," columns.")
if (is.character(neworder)) {
if (any(duplicated(neworder))) stop("neworder contains duplicate column names")
if (any(duplicated(names(x)))) stop("x has some duplicated column name(s): ",paste(names(x)[duplicated(names(x))],collapse=","),". Please remove or rename the duplicate(s) and try again.")
o = as.integer(chmatch(neworder,names(x)))
if (any(is.na(o))) stop("Names in neworder not found in x: ",paste(neworder[is.na(o)],collapse=","))
} else {
if (!is.numeric(neworder)) stop("neworder is not a character or numeric vector")
o = as.integer(neworder)
m = !(o %in% seq_len(length(x)))
if (any(m)) stop("Column numbers in neworder out of bounds: ",paste(o[m],collapse=","))
if (missing(new)) {
if (length(old)!=length(x)) stop("Can't reorder ",length(old)," columns in a ",length(x)," column data.table")
if (is.character(old)) {
if (any(duplicated(old))) stop("old contains duplicate column names")
if (any(duplicated(names(x)))) stop("x has some duplicated column name(s): ",paste(names(x)[duplicated(names(x))],collapse=","),". Please remove or rename the duplicate(s) and try again.")
o = as.integer(chmatch(old,names(x)))
if (any(is.na(o))) stop("Names in old not found in x: ",paste(old[is.na(o)],collapse=","))
} else {
if (!is.numeric(old)) stop("old is not a character or numeric vector")
o = as.integer(old)
m = !(o %in% seq_len(length(x)))
if (any(m)) stop("Column numbers in old out of bounds: ",paste(o[m],collapse=","))
}
.Call(Csetcolorder,x,o)
return(invisible(x))
} else{
if (missing(old)) stop("When 'new' is provided, 'old' must be provided too")
if (length(new)!=length(old)) stop("'old' is length ",length(old)," but 'new' is length ",length(new))
}
.Call(Csetcolorder,x,o)
if (is.numeric(old)) {
tt = old<1L | old>length(x) | is.na(old)
if (any(tt)) stop("Items of 'old' either NA or outside range [1,",length(x),"]: ",paste(old[tt],collapse=","))
i = as.integer(old)
if (any(duplicated(i))) stop("Some duplicates exist in 'old': ",paste(i[duplicated(i)],collapse=","))
} else {
if (!is.character(old)) stop("'old' is type ",typeof(old)," but should be integer, double or character")
if (any(duplicated(old))) stop("Some duplicates exist in 'old': ", paste(old[duplicated(old)],collapse=","))
i = chmatch(old,names(x))
if (any(is.na(i))) stop("Items of 'old' not found in column names: ",paste(old[is.na(i)],collapse=","))
if (any(tt<-!is.na(chmatch(old,names(x)[-i])))) stop("Some items of 'old' are duplicated (ambiguous) in column names: ",paste(old[tt],collapse=","))
}
if (!is.numeric(new)) stop("'new' is not a numeric vector")
if (length(names(x)) != length(x)) stop("x is length ",length(dt)," but its names are length ",length(names(x)))

o <- rep(NA,length(x))
o[new] <- i
o[is.na(o)] <- setdiff(1:length(x),i)
.Call(Csetcolorder, x, o)
invisible(x)
}

Expand Down
Loading