apache · nealrichardson · May 29, 2024 · May 3, 2024 · May 4, 2024 · May 5, 2024
@@ -34,7 +34,11 @@ expand_across <- function(.data, quos_in, exclude_cols = NULL) {
       )
 
       if (!all(names(across_call[-1]) %in% c(".cols", ".fns", ".names"))) {
-        abort("`...` argument to `across()` is deprecated in dplyr and not supported in Arrow")
+        arrow_not_supported(
+          "`...` argument to `across()` is deprecated in dplyr and",
+          body = c(">" = "Convert your call into a function or formula including the arguments"),
+          call = rlang::caller_call()
+        )
       }
 
       if (!is.null(across_call[[".cols"]])) {

@@ -19,47 +19,46 @@
 # The following S3 methods are registered on load if dplyr is present
 
 arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
-  call <- match.call()
-  .data <- as_adq(.data)
-  exprs <- expand_across(.data, quos(...))
+  try_arrow_dplyr({
+    .data <- as_adq(.data)
+    exprs <- expand_across(.data, quos(...))
 
-  if (.by_group) {
-    # when the data is grouped and .by_group is TRUE, order the result by
-    # the grouping columns first
-    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
-  }
-  if (length(exprs) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-  .data <- as_adq(.data)
-  # find and remove any dplyr::desc() and tidy-eval
-  # the arrange expressions inside an Arrow data_mask
-  sorts <- vector("list", length(exprs))
-  descs <- logical(0)
-  mask <- arrow_mask(.data)
-  for (i in seq_along(exprs)) {
-    x <- find_and_remove_desc(exprs[[i]])
-    exprs[[i]] <- x[["quos"]]
-    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
-    names(sorts)[i] <- format_expr(exprs[[i]])
-    if (inherits(sorts[[i]], "try-error")) {
-      msg <- paste("Expression", names(sorts)[i], "not supported in Arrow")
-      return(abandon_ship(call, .data, msg))
+    if (.by_group) {
+      # when the data is grouped and .by_group is TRUE, order the result by
+      # the grouping columns first
+      exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
     }
-    if (length(mask$.aggregations)) {
-      # dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it.
-      # But we could, the same way it works in mutate() via join, if someone asks.
-      # Until then, just error.
-      # TODO: add a test for this
-      msg <- paste("Expression", format_expr(expr), "not supported in arrange() in Arrow")
-      return(abandon_ship(call, .data, msg))
+    if (length(exprs) == 0) {
+      # Nothing to do
+      return(.data)
     }
-    descs[i] <- x[["desc"]]
-  }
-  .data$arrange_vars <- c(sorts, .data$arrange_vars)
-  .data$arrange_desc <- c(descs, .data$arrange_desc)
-  .data
+    .data <- as_adq(.data)
+    # find and remove any dplyr::desc() and tidy-eval
+    # the arrange expressions inside an Arrow data_mask
+    sorts <- vector("list", length(exprs))
+    descs <- logical(0)
+    mask <- arrow_mask(.data)
+    for (i in seq_along(exprs)) {
+      x <- find_and_remove_desc(exprs[[i]])
+      exprs[[i]] <- x[["quos"]]
+      sorts[[i]] <- arrow_eval(exprs[[i]], mask)
+      names(sorts)[i] <- format_expr(exprs[[i]])
+      if (length(mask$.aggregations)) {
+        # dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it.
+        # But we could, the same way it works in mutate() via join, if someone asks.
+        # Until then, just error.
+        # TODO: add a test for this
+        arrow_not_supported(
+          .actual_msg = "Expression not supported in arrange() in Arrow",
+          call = expr
+        )
+      }
+      descs[i] <- x[["desc"]]
+    }
+    .data$arrange_vars <- c(sorts, .data$arrange_vars)
+    .data$arrange_desc <- c(descs, .data$arrange_desc)
+    .data
+  })
 }
 arrange.Dataset <- arrange.ArrowTabular <- arrange.RecordBatchReader <- arrange.arrow_dplyr_query
 
@@ -73,10 +72,9 @@ find_and_remove_desc <- function(quosure) {
   expr <- quo_get_expr(quosure)
   descending <- FALSE
   if (length(all.vars(expr)) < 1L) {
-    stop(
-      "Expression in arrange() does not contain any field names: ",
-      deparse(expr),
-      call. = FALSE
+    validation_error(
+      "Expression in arrange() does not contain any field names",
+      call = quosure
     )
   }
   # Use a while loop to remove any number of nested pairs of enclosing
@@ -90,7 +88,10 @@ find_and_remove_desc <- function(quosure) {
       # ensure desc() has only one argument (when an R expression is a function
       # call, length == 2 means it has exactly one argument)
       if (length(expr) > 2) {
-        stop("desc() expects only one argument", call. = FALSE)
+        validation_error(
+          "desc() expects only one argument",
+          call = expr
+        )
       }
       # remove desc() and toggle descending
       expr <- expr[[2]]

@@ -18,10 +18,10 @@
 check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) {
   if (tolower(Sys.info()[["sysname"]]) == "windows" && locale != "C") {
     # MingW C++ std::locale only supports "C" and "POSIX"
-    stop(paste0(
-      "On Windows, time locales other than 'C' are not supported in Arrow. ",
-      "Consider setting `Sys.setlocale('LC_TIME', 'C')`"
-    ))
+    arrow_not_supported(
+      "On Windows, time locales other than 'C'",
+      body = c(">" = "Consider setting `Sys.setlocale('LC_TIME', 'C')`")
+    )
   }
   locale
 }
@@ -56,13 +56,15 @@ duration_from_chunks <- function(chunks) {
   matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)]
 
   if (any(is.na(matched_chunks))) {
-    abort(
-      paste0(
-        "named `difftime` units other than: ",
-        oxford_paste(accepted_chunks, quote_symbol = "`"),
-        " not supported in Arrow. \nInvalid `difftime` parts: ",
+    arrow_not_supported(
+      paste(
+        "named `difftime` units other than:",
+        oxford_paste(accepted_chunks, quote_symbol = "`")
+      ),
+      body = c(i = paste(
+        "Invalid `difftime` parts:",
         oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`")
-      )
+      ))
     )
   }
 
@@ -114,7 +116,6 @@ binding_as_date_character <- function(x,
 }
 
 binding_as_date_numeric <- function(x, origin = "1970-01-01") {
-
   # Arrow does not support direct casting from double to date32(), but for
   # integer-like values we can go via int32()
   # TODO: revisit after ARROW-15798
@@ -442,7 +443,7 @@ parse_period_unit <- function(x) {
   unit <- as.integer(pmatch(str_unit_start, known_units)) - 1L
 
   if (any(is.na(unit))) {
-    abort(
+    validation_error(
       sprintf(
         "Invalid period name: '%s'",
         str_unit,
@@ -484,13 +485,13 @@ parse_period_unit <- function(x) {
   # more special cases: lubridate imposes sensible maximum
   # values on the number of seconds, minutes and hours
   if (unit == 3L && multiple > 60) {
-    abort("Rounding with second > 60 is not supported")
+    validation_error("Rounding with second > 60 is not supported")
   }
   if (unit == 4L && multiple > 60) {
-    abort("Rounding with minute > 60 is not supported")
+    validation_error("Rounding with minute > 60 is not supported")
   }
   if (unit == 5L && multiple > 24) {
-    abort("Rounding with hour > 24 is not supported")
+    validation_error("Rounding with hour > 24 is not supported")
   }
 
   list(unit = unit, multiple = multiple)