diff --git a/NEWS.md b/NEWS.md index 9bf8cf3..b7e533f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # implyr (development version) * Fixed bugs in table creation (#47, @karoliskascenas) +* Allow user to set `copy_to()` size limit with option `implyr.copy_to_size_limit` (#49, @karoliskascenas) * Added more SQL translations * Fixed a DBI identifier quoting problem causing errors with dbplyr 2.0.0 (#48) diff --git a/R/src_impala.R b/R/src_impala.R index 6d7f8af..7ac5a87 100644 --- a/R/src_impala.R +++ b/R/src_impala.R @@ -275,9 +275,12 @@ tbl.src_impala <- function(src, from, ...) { #' @name copy_to #' @description #' \code{copy_to} inserts the contents of a local data frame into a new Impala -#' table. \code{copy_to} currently only supports very small data frames (1000 or -#' fewer row/column positions). It uses the SQL \code{INSERT ... VALUES()} -#' technique, which is not suitable for loading large amounts of data. +#' table. \code{copy_to} is intended to be used only with very small data +#' frames. It uses the SQL \code{INSERT ... VALUES()} technique, which is not +#' suitable for loading large amounts of data. By default, this function will +#' throw an error if you attempt to copy a data frame with more than 1000 +#' row/column positions. You can increase this limit at your own risk by setting +#' the \link{option} \code{implyr.copy_to_size_limit} to a higher number. #' #' This package does not provide tools for loading larger amounts of local data #' into Impala tables. This is because Impala can query data stored in several @@ -337,13 +340,12 @@ copy_to.src_impala <- file_format = NULL, ...) { # don't try to insert large data frames with INSERT ... VALUES() - if (prod(dim(df)) > 1e3L) { - # TBD: consider whether to make this limit configurable, possibly using - # options with the pkgconfig package + if (prod(dim(df)) > getOption("implyr.copy_to_size_limit", 1e3L)) { stop( "Data frame ", name, - " is too large. copy_to currently only supports very small data frames.", + " is too large. ", + "implyr::copy_to() is only for use with very small data frames.", call. = FALSE ) } diff --git a/man/copy_to.Rd b/man/copy_to.Rd index ec22e6e..385b10f 100644 --- a/man/copy_to.Rd +++ b/man/copy_to.Rd @@ -5,11 +5,23 @@ \alias{copy_to.src_impala} \title{Copy a (very small) local data frame to Impala} \usage{ -\method{copy_to}{src_impala}(dest, df, name = deparse(substitute(df)), - overwrite = FALSE, types = NULL, temporary = TRUE, - unique_indexes = NULL, indexes = NULL, analyze = TRUE, - external = FALSE, force = FALSE, field_terminator = NULL, - line_terminator = NULL, file_format = NULL, ...) +\method{copy_to}{src_impala}( + dest, + df, + name = deparse(substitute(df)), + overwrite = FALSE, + types = NULL, + temporary = TRUE, + unique_indexes = NULL, + indexes = NULL, + analyze = TRUE, + external = FALSE, + force = FALSE, + field_terminator = NULL, + line_terminator = NULL, + file_format = NULL, + ... +) } \arguments{ \item{dest}{an object with class with class \code{src_impala}} @@ -51,9 +63,12 @@ An object with class \code{tbl_impala}, \code{tbl_sql}, } \description{ \code{copy_to} inserts the contents of a local data frame into a new Impala -table. \code{copy_to} currently only supports very small data frames (1000 or -fewer row/column positions). It uses the SQL \code{INSERT ... VALUES()} -technique, which is not suitable for loading large amounts of data. +table. \code{copy_to} is intended to be used only with very small data +frames. It uses the SQL \code{INSERT ... VALUES()} technique, which is not +suitable for loading large amounts of data. By default, this function will +throw an error if you attempt to copy a data frame with more than 1000 +row/column positions. You can increase this limit at your own risk by setting +the \link{option} \code{implyr.copy_to_size_limit} to a higher number. This package does not provide tools for loading larger amounts of local data into Impala tables. This is because Impala can query data stored in several