From 2727304660663fcf1e41f7b666978c1443262e4e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 9 Jul 2015 19:08:33 -0700 Subject: [PATCH] [SPARK-8913] [ML] Simplify LogisticRegression suite to use Vector Vector comparision Cleanup tests from SPARK 8700. Author: Holden Karau Closes #7335 from holdenk/SPARK-8913-cleanup-tests-from-SPARK-8700-logistic-regression-r2-really-logistic-regression-this-time and squashes the following commits: e5e2c5f [Holden Karau] Simplify LogisticRegression suite to use Vector <-> Vector comparisions instead of comparing element by element --- .../LogisticRegressionSuite.scala | 135 +++++------------- 1 file changed, 39 insertions(+), 96 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 27253c1db2fff..b7dd44753896a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -234,20 +234,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.7996864 */ val interceptR = 2.8366423 - val weightsR = Array(-0.5895848, 0.8931147, -0.3925051, -0.7996864) + val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864) assert(model1.intercept ~== interceptR relTol 1E-3) - assert(model1.weights(0) ~== weightsR(0) relTol 1E-3) - assert(model1.weights(1) ~== weightsR(1) relTol 1E-3) - assert(model1.weights(2) ~== weightsR(2) relTol 1E-3) - assert(model1.weights(3) ~== weightsR(3) relTol 1E-3) + assert(model1.weights ~= weightsR relTol 1E-3) // Without regularization, with or without standardization will converge to the same solution. assert(model2.intercept ~== interceptR relTol 1E-3) - assert(model2.weights(0) ~== weightsR(0) relTol 1E-3) - assert(model2.weights(1) ~== weightsR(1) relTol 1E-3) - assert(model2.weights(2) ~== weightsR(2) relTol 1E-3) - assert(model2.weights(3) ~== weightsR(3) relTol 1E-3) + assert(model2.weights ~= weightsR relTol 1E-3) } test("binary logistic regression without intercept without regularization") { @@ -277,20 +271,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.7407946 */ val interceptR = 0.0 - val weightsR = Array(-0.3534996, 1.2964482, -0.3571741, -0.7407946) + val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946) assert(model1.intercept ~== interceptR relTol 1E-3) - assert(model1.weights(0) ~== weightsR(0) relTol 1E-2) - assert(model1.weights(1) ~== weightsR(1) relTol 1E-2) - assert(model1.weights(2) ~== weightsR(2) relTol 1E-3) - assert(model1.weights(3) ~== weightsR(3) relTol 1E-3) + assert(model1.weights ~= weightsR relTol 1E-2) // Without regularization, with or without standardization should converge to the same solution. assert(model2.intercept ~== interceptR relTol 1E-3) - assert(model2.weights(0) ~== weightsR(0) relTol 1E-2) - assert(model2.weights(1) ~== weightsR(1) relTol 1E-2) - assert(model2.weights(2) ~== weightsR(2) relTol 1E-3) - assert(model2.weights(3) ~== weightsR(3) relTol 1E-3) + assert(model2.weights ~= weightsR relTol 1E-2) } test("binary logistic regression with intercept with L1 regularization") { @@ -321,13 +309,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.02481551 */ val interceptR1 = -0.05627428 - val weightsR1 = Array(0.0, 0.0, -0.04325749, -0.02481551) + val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551) assert(model1.intercept ~== interceptR1 relTol 1E-2) - assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3) - assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3) - assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2) - assert(model1.weights(3) ~== weightsR1(3) relTol 2E-2) + assert(model1.weights ~= weightsR1 absTol 2E-2) /* Using the following R code to load the data and train the model using glmnet package. @@ -349,13 +334,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR2 = 0.3722152 - val weightsR2 = Array(0.0, 0.0, -0.1665453, 0.0) + val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0) assert(model2.intercept ~== interceptR2 relTol 1E-2) - assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3) - assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3) - assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2) - assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3) + assert(model2.weights ~= weightsR2 absTol 1E-3) } test("binary logistic regression without intercept with L1 regularization") { @@ -387,13 +369,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.03891782 */ val interceptR1 = 0.0 - val weightsR1 = Array(0.0, 0.0, -0.05189203, -0.03891782) + val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3) - assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3) - assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2) - assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2) + assert(model1.weights ~= weightsR1 absTol 1E-3) /* Using the following R code to load the data and train the model using glmnet package. @@ -415,13 +394,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR2 = 0.0 - val weightsR2 = Array(0.0, 0.0, -0.08420782, 0.0) + val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0) - assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3) - assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3) - assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2) - assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3) + assert(model2.intercept ~== interceptR2 absTol 1E-3) + assert(model2.weights ~= weightsR2 absTol 1E-3) } test("binary logistic regression with intercept with L2 regularization") { @@ -452,13 +428,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.10062872 */ val interceptR1 = 0.15021751 - val weightsR1 = Array(-0.07251837, 0.10724191, -0.04865309, -0.10062872) + val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights(0) ~== weightsR1(0) relTol 1E-3) - assert(model1.weights(1) ~== weightsR1(1) relTol 1E-3) - assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3) - assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) /* Using the following R code to load the data and train the model using glmnet package. @@ -480,13 +453,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.06266838 */ val interceptR2 = 0.48657516 - val weightsR2 = Array(-0.05155371, 0.02301057, -0.11482896, -0.06266838) + val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838) assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights(0) ~== weightsR2(0) relTol 1E-3) - assert(model2.weights(1) ~== weightsR2(1) relTol 1E-3) - assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3) - assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) } test("binary logistic regression without intercept with L2 regularization") { @@ -518,13 +488,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.09799775 */ val interceptR1 = 0.0 - val weightsR1 = Array(-0.06099165, 0.12857058, -0.04708770, -0.09799775) + val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775) - assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights(0) ~== weightsR1(0) relTol 1E-2) - assert(model1.weights(1) ~== weightsR1(1) relTol 1E-2) - assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3) - assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3) + assert(model1.intercept ~== interceptR1 absTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-2) /* Using the following R code to load the data and train the model using glmnet package. @@ -546,13 +513,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.053314311 */ val interceptR2 = 0.0 - val weightsR2 = Array(-0.005679651, 0.048967094, -0.093714016, -0.053314311) + val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311) - assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights(0) ~== weightsR2(0) relTol 1E-2) - assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2) - assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3) - assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3) + assert(model2.intercept ~== interceptR2 absTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-2) } test("binary logistic regression with intercept with ElasticNet regularization") { @@ -583,13 +547,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.15458796 */ val interceptR1 = 0.57734851 - val weightsR1 = Array(-0.05310287, 0.0, -0.08849250, -0.15458796) + val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796) assert(model1.intercept ~== interceptR1 relTol 6E-3) - assert(model1.weights(0) ~== weightsR1(0) relTol 5E-3) - assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3) - assert(model1.weights(2) ~== weightsR1(2) relTol 5E-3) - assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3) + assert(model1.weights ~== weightsR1 absTol 5E-3) /* Using the following R code to load the data and train the model using glmnet package. @@ -611,13 +572,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.05350074 */ val interceptR2 = 0.51555993 - val weightsR2 = Array(0.0, 0.0, -0.18807395, -0.05350074) + val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074) assert(model2.intercept ~== interceptR2 relTol 6E-3) - assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3) - assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3) - assert(model2.weights(2) ~== weightsR2(2) relTol 5E-3) - assert(model2.weights(3) ~== weightsR2(3) relTol 1E-2) + assert(model2.weights ~= weightsR2 absTol 1E-3) } test("binary logistic regression without intercept with ElasticNet regularization") { @@ -649,13 +607,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.142534158 */ val interceptR1 = 0.0 - val weightsR1 = Array(-0.001005743, 0.072577857, -0.081203769, -0.142534158) + val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights(0) ~== weightsR1(0) absTol 1E-2) - assert(model1.weights(1) ~== weightsR1(1) absTol 1E-2) - assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3) - assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2) + assert(model1.weights ~= weightsR1 absTol 1E-2) /* Using the following R code to load the data and train the model using glmnet package. @@ -677,13 +632,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR2 = 0.0 - val weightsR2 = Array(0.0, 0.03345223, -0.11304532, 0.0) + val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0) - assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3) - assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2) - assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2) - assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3) + assert(model2.intercept ~== interceptR2 absTol 1E-3) + assert(model2.weights ~= weightsR2 absTol 1E-3) } test("binary logistic regression with intercept with strong L1 regularization") { @@ -717,19 +669,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { }}} */ val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble) - val weightsTheory = Array(0.0, 0.0, 0.0, 0.0) + val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0) assert(model1.intercept ~== interceptTheory relTol 1E-5) - assert(model1.weights(0) ~== weightsTheory(0) absTol 1E-6) - assert(model1.weights(1) ~== weightsTheory(1) absTol 1E-6) - assert(model1.weights(2) ~== weightsTheory(2) absTol 1E-6) - assert(model1.weights(3) ~== weightsTheory(3) absTol 1E-6) + assert(model1.weights ~= weightsTheory absTol 1E-6) assert(model2.intercept ~== interceptTheory relTol 1E-5) - assert(model2.weights(0) ~== weightsTheory(0) absTol 1E-6) - assert(model2.weights(1) ~== weightsTheory(1) absTol 1E-6) - assert(model2.weights(2) ~== weightsTheory(2) absTol 1E-6) - assert(model2.weights(3) ~== weightsTheory(3) absTol 1E-6) + assert(model2.weights ~= weightsTheory absTol 1E-6) /* Using the following R code to load the data and train the model using glmnet package. @@ -750,12 +696,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR = -0.248065 - val weightsR = Array(0.0, 0.0, 0.0, 0.0) + val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0) assert(model1.intercept ~== interceptR relTol 1E-5) - assert(model1.weights(0) ~== weightsR(0) absTol 1E-6) - assert(model1.weights(1) ~== weightsR(1) absTol 1E-6) - assert(model1.weights(2) ~== weightsR(2) absTol 1E-6) - assert(model1.weights(3) ~== weightsR(3) absTol 1E-6) + assert(model1.weights ~= weightsR absTol 1E-6) } }