Skip to content

Commit

Permalink
Merge branch 'master' into feature/cap-alias-substitution
Browse files Browse the repository at this point in the history
  • Loading branch information
j-esse authored Mar 14, 2019
2 parents 22f594a + 8b52240 commit 11e2b8a
Show file tree
Hide file tree
Showing 1,046 changed files with 29,155 additions and 11,727 deletions.
12 changes: 9 additions & 3 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,6 @@ net.sf.supercsv:super-csv
org.apache.arrow:arrow-format
org.apache.arrow:arrow-memory
org.apache.arrow:arrow-vector
org.apache.calcite:calcite-avatica
org.apache.calcite:calcite-core
org.apache.calcite:calcite-linq4j
org.apache.commons:commons-crypto
org.apache.commons:commons-lang3
org.apache.hadoop:hadoop-annotations
Expand Down Expand Up @@ -487,6 +484,15 @@ org.glassfish.jersey.core:jersey-server
org.glassfish.jersey.media:jersey-media-jaxb


Eclipse Distribution License (EDL) 1.0
--------------------------------------

org.glassfish.jaxb:jaxb-runtime
jakarta.xml.bind:jakarta.xml.bind-api
com.sun.istack:istack-commons-runtime
jakarta.activation:jakarta.activation-api


Mozilla Public License (MPL) 1.1
--------------------------------

Expand Down
9 changes: 0 additions & 9 deletions NOTICE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -792,15 +792,6 @@ Copyright 2005-2006 The Apache Software Foundation
Apache Jakarta HttpClient
Copyright 1999-2007 The Apache Software Foundation

Calcite Avatica
Copyright 2012-2015 The Apache Software Foundation

Calcite Core
Copyright 2012-2015 The Apache Software Foundation

Calcite Linq4j
Copyright 2012-2015 The Apache Software Foundation

Apache HttpClient
Copyright 1999-2017 The Apache Software Foundation

Expand Down
8 changes: 4 additions & 4 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: SparkR
Type: Package
Version: 3.0.0
Title: R Frontend for Apache Spark
Description: Provides an R Frontend for Apache Spark.
Title: R Front end for 'Apache Spark'
Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
email = "[email protected]"),
person("Xiangrui", "Meng", role = "aut",
Expand All @@ -11,8 +11,8 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
email = "[email protected]"),
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
License: Apache License (== 2.0)
URL: http://www.apache.org/ http://spark.apache.org/
BugReports: http://spark.apache.org/contributing.html
URL: https://www.apache.org/ https://spark.apache.org/
BugReports: https://spark.apache.org/contributing.html
SystemRequirements: Java (== 8)
Depends:
R (>= 3.1),
Expand Down
2 changes: 2 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,10 @@ exportMethods("%<=>%",
"lower",
"lpad",
"ltrim",
"map_concat",
"map_entries",
"map_from_arrays",
"map_from_entries",
"map_keys",
"map_values",
"max",
Expand Down
53 changes: 52 additions & 1 deletion R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ setMethod("write.parquet",
#'
#' Save the content of the SparkDataFrame in a text file at the specified path.
#' The SparkDataFrame must have only one column of string type with the name "value".
#' Each row becomes a new line in the output file.
#' Each row becomes a new line in the output file. The text files will be encoded as UTF-8.
#'
#' @param x A SparkDataFrame
#' @param path The directory where the file is saved
Expand Down Expand Up @@ -1177,11 +1177,50 @@ setMethod("dim",
setMethod("collect",
signature(x = "SparkDataFrame"),
function(x, stringsAsFactors = FALSE) {
connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
useArrow <- FALSE
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
if (arrowEnabled) {
useArrow <- tryCatch({
checkSchemaInArrow(schema(x))
TRUE
}, error = function(e) {
warning(paste0("The conversion from Spark DataFrame to R DataFrame was attempted ",
"with Arrow optimization because ",
"'spark.sql.execution.arrow.enabled' is set to true; however, ",
"failed, attempting non-optimization. Reason: ",
e))
FALSE
})
}

dtypes <- dtypes(x)
ncol <- length(dtypes)
if (ncol <= 0) {
# empty data.frame with 0 columns and 0 rows
data.frame()
} else if (useArrow) {
requireNamespace1 <- requireNamespace
if (requireNamespace1("arrow", quietly = TRUE)) {
read_arrow <- get("read_arrow", envir = asNamespace("arrow"), inherits = FALSE)
as_tibble <- get("as_tibble", envir = asNamespace("arrow"))

portAuth <- callJMethod(x@sdf, "collectAsArrowToR")
port <- portAuth[[1]]
authSecret <- portAuth[[2]]
conn <- socketConnection(
port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
output <- tryCatch({
doServerAuth(conn, authSecret)
arrowTable <- read_arrow(readRaw(conn))
as.data.frame(as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
}, finally = {
close(conn)
})
return(output)
} else {
stop("'arrow' package should be installed.")
}
} else {
# listCols is a list of columns
listCols <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToCols", x@sdf)
Expand Down Expand Up @@ -1437,6 +1476,18 @@ dapplyInternal <- function(x, func, schema) {
schema <- structType(schema)
}

arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
if (arrowEnabled) {
if (inherits(schema, "structType")) {
checkSchemaInArrow(schema)
} else if (is.null(schema)) {
stop(paste0("Arrow optimization does not support 'dapplyCollect' yet. Please disable ",
"Arrow optimization or use 'collect' and 'dapply' APIs instead."))
} else {
stop("'schema' should be DDL-formatted string or structType.")
}
}

packageNamesArr <- serialize(.sparkREnv[[".packages"]],
connection = NULL)

Expand Down
Loading

0 comments on commit 11e2b8a

Please sign in to comment.