Skip to content

Commit

Permalink
[SPARK-12034][SPARKR] Eliminate warnings in SparkR test cases.
Browse files Browse the repository at this point in the history
This PR:
1. Suppress all known warnings.
2. Cleanup test cases and fix some errors in test cases.
3. Fix errors in HiveContext related test cases. These test cases are actually not run previously due to a bug of creating TestHiveContext.
4. Support 'testthat' package version 0.11.0 which prefers that test cases be under 'tests/testthat'
5. Make sure the default Hadoop file system is local when running test cases.
6. Turn on warnings into errors.

Author: Sun Rui <[email protected]>

Closes apache#10030 from sun-rui/SPARK-12034.
  • Loading branch information
Sun Rui authored and shivaram committed Dec 7, 2015
1 parent 9cde7d5 commit 39d677c
Show file tree
Hide file tree
Showing 20 changed files with 50 additions and 39 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ runScript <- function() {
sparkHome <- Sys.getenv("SPARK_HOME")
sparkTestJarPath <- "R/lib/SparkR/test_support/sparktestjar_2.10-1.0.jar"
jarPath <- paste("--jars", shQuote(file.path(sparkHome, sparkTestJarPath)))
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/jarTest.R")
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/testthat/jarTest.R")
submitPath <- file.path(sparkHome, "bin/spark-submit")
res <- system2(command = submitPath,
args = c(jarPath, scriptPath),
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ sc <- sparkR.init()
sqlContext <- sparkRSQL.init(sc)

test_that("glm and predict", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
test <- select(training, "Sepal_Length")
model <- glm(Sepal_Width ~ Sepal_Length, training, family = "gaussian")
prediction <- predict(model, test)
Expand All @@ -39,7 +39,7 @@ test_that("glm and predict", {
})

test_that("glm should work with long formula", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
training$LongLongLongLongLongName <- training$Sepal_Width
training$VeryLongLongLongLonLongName <- training$Sepal_Length
training$AnotherLongLongLongLongName <- training$Species
Expand All @@ -51,31 +51,31 @@ test_that("glm should work with long formula", {
})

test_that("predictions match with native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})

test_that("dot minus and intercept vs native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ . - Species + 0, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ . - Species + 0, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})

test_that("feature interaction vs native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ Species:Sepal_Length, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ Species:Sepal.Length, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})

test_that("summary coefficients match with native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training, solver = "normal"))
coefs <- unlist(stats$coefficients)
devianceResiduals <- unlist(stats$devianceResiduals)
Expand All @@ -92,7 +92,7 @@ test_that("summary coefficients match with native glm", {
})

test_that("summary coefficients match with native glm of family 'binomial'", {
df <- createDataFrame(sqlContext, iris)
df <- suppressWarnings(createDataFrame(sqlContext, iris))
training <- filter(df, df$Species != "setosa")
stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
family = "binomial"))
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -133,38 +133,45 @@ test_that("create DataFrame from RDD", {
expect_equal(columns(df), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))

df <- jsonFile(sqlContext, jsonPathNa)
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
error = function(err) {
skip("Hive is not build with SparkSQL, skipped")
})
sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
insertInto(df, "people")
expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))

schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df2 <- createDataFrame(sqlContext, df.toRDD, schema)
df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
df <- read.df(sqlContext, jsonPathNa, "json", schema)
df2 <- createDataFrame(sqlContext, toRDD(df), schema)
df2AsDF <- as.DataFrame(sqlContext, toRDD(df), schema)
expect_equal(columns(df2), c("name", "age", "height"))
expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
expect_equal(as.list(collect(where(df2, df2$name == "Bob"))),
list(name = "Bob", age = 16, height = 176.5))
expect_equal(as.list(collect(where(df2AsDF, df2AsDF$name == "Bob"))),
list(name = "Bob", age = 16, height = 176.5))

localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19, 23, 18),
height=c(164.10, 181.4, 173.7))
age=c(19L, 23L, 18L),
height=c(176.5, 181.4, 173.7))
df <- createDataFrame(sqlContext, localDF, schema)
expect_is(df, "DataFrame")
expect_equal(count(df), 3)
expect_equal(columns(df), c("name", "age", "height"))
expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
expect_equal(as.list(collect(where(df, df$name == "John"))),
list(name = "John", age = 19L, height = 176.5))

ssc <- callJMethod(sc, "sc")
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
error = function(err) {
skip("Hive is not build with SparkSQL, skipped")
})
sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
df <- read.df(hiveCtx, jsonPathNa, "json", schema)
invisible(insertInto(df, "people"))
expect_equal(collect(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"))$age,
c(16))
expect_equal(collect(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"))$height,
c(176.5))
})

test_that("convert NAs to null type in DataFrames", {
Expand Down Expand Up @@ -250,7 +257,7 @@ test_that("create DataFrame from list or data.frame", {
ldf2 <- collect(df)
expect_equal(ldf$a, ldf2$a)

irisdf <- createDataFrame(sqlContext, iris)
irisdf <- suppressWarnings(createDataFrame(sqlContext, iris))
iris_collected <- collect(irisdf)
expect_equivalent(iris_collected[,-5], iris[,-5])
expect_equal(iris_collected$Species, as.character(iris$Species))
Expand Down Expand Up @@ -463,7 +470,7 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
RDD2 <- toRDD(df)
unioned <- unionRDD(RDD1, RDD2)
expect_is(unioned, "RDD")
expect_equal(SparkR:::getSerializedMode(unioned), "byte")
expect_equal(getSerializedMode(unioned), "byte")
expect_equal(collect(unioned)[[2]]$name, "Andy")
})

Expand All @@ -485,13 +492,13 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {

unionByte <- unionRDD(rdd, dfRDD)
expect_is(unionByte, "RDD")
expect_equal(SparkR:::getSerializedMode(unionByte), "byte")
expect_equal(getSerializedMode(unionByte), "byte")
expect_equal(collect(unionByte)[[1]], 1)
expect_equal(collect(unionByte)[[12]]$name, "Andy")

unionString <- unionRDD(textRDD, dfRDD)
expect_is(unionString, "RDD")
expect_equal(SparkR:::getSerializedMode(unionString), "byte")
expect_equal(getSerializedMode(unionString), "byte")
expect_equal(collect(unionString)[[1]], "Michael")
expect_equal(collect(unionString)[[5]]$name, "Andy")
})
Expand All @@ -504,7 +511,7 @@ test_that("objectFile() works with row serialization", {
objectIn <- objectFile(sc, objectPath)

expect_is(objectIn, "RDD")
expect_equal(SparkR:::getSerializedMode(objectIn), "byte")
expect_equal(getSerializedMode(objectIn), "byte")
expect_equal(collect(objectIn)[[2]]$age, 30)
})

Expand Down Expand Up @@ -849,6 +856,7 @@ test_that("write.df() as parquet file", {
})

test_that("test HiveContext", {
ssc <- callJMethod(sc, "sc")
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
Expand All @@ -863,10 +871,10 @@ test_that("test HiveContext", {
expect_equal(count(df2), 3)

jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp")
saveAsTable(df, "json", "json", "append", path = jsonPath2)
df3 <- sql(hiveCtx, "select * from json")
invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2))
df3 <- sql(hiveCtx, "select * from json2")
expect_is(df3, "DataFrame")
expect_equal(count(df3), 6)
expect_equal(count(df3), 3)
})

test_that("column operators", {
Expand Down Expand Up @@ -1311,7 +1319,7 @@ test_that("toJSON() returns an RDD of the correct values", {
df <- jsonFile(sqlContext, jsonPath)
testRDD <- toJSON(df)
expect_is(testRDD, "RDD")
expect_equal(SparkR:::getSerializedMode(testRDD), "string")
expect_equal(getSerializedMode(testRDD), "string")
expect_equal(collect(testRDD)[[1]], mockLines[1])
})

Expand Down Expand Up @@ -1641,7 +1649,7 @@ test_that("SQL error message is returned from JVM", {
expect_equal(grepl("Table not found: blah", retError), TRUE)
})

irisDF <- createDataFrame(sqlContext, iris)
irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))

test_that("Method as.data.frame as a synonym for collect()", {
expect_equal(as.data.frame(irisDF), collect(irisDF))
Expand Down Expand Up @@ -1670,7 +1678,7 @@ test_that("attach() on a DataFrame", {
})

test_that("with() on a DataFrame", {
df <- createDataFrame(sqlContext, iris)
df <- suppressWarnings(createDataFrame(sqlContext, iris))
expect_error(Sepal_Length)
sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width)))
expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150")
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions R/pkg/tests/run-all.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@
library(testthat)
library(SparkR)

# Turn all warnings into errors
options("warn" = 2)

test_package("SparkR")
2 changes: 1 addition & 1 deletion R/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ FAILED=0
LOGFILE=$FWDIR/unit-tests.out
rm -f $LOGFILE

SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
FAILED=$((PIPESTATUS[0]||$FAILED))

if [[ $FAILED != 0 ]]; then
Expand Down

0 comments on commit 39d677c

Please sign in to comment.