Skip to content

Commit

Permalink
[SPARK-8913] [ML] Simplify LogisticRegression suite to use Vector Vec…
Browse files Browse the repository at this point in the history
…tor comparision

Cleanup tests from SPARK 8700.

Author: Holden Karau <[email protected]>

Closes apache#7335 from holdenk/SPARK-8913-cleanup-tests-from-SPARK-8700-logistic-regression-r2-really-logistic-regression-this-time and squashes the following commits:

e5e2c5f [Holden Karau] Simplify LogisticRegression suite to use Vector <-> Vector comparisions instead of comparing element by element
  • Loading branch information
holdenk authored and DB Tsai committed Jul 10, 2015
1 parent 0e78e40 commit 2727304
Showing 1 changed file with 39 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -234,20 +234,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7996864
*/
val interceptR = 2.8366423
val weightsR = Array(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)

assert(model1.intercept ~== interceptR relTol 1E-3)
assert(model1.weights(0) ~== weightsR(0) relTol 1E-3)
assert(model1.weights(1) ~== weightsR(1) relTol 1E-3)
assert(model1.weights(2) ~== weightsR(2) relTol 1E-3)
assert(model1.weights(3) ~== weightsR(3) relTol 1E-3)
assert(model1.weights ~= weightsR relTol 1E-3)

// Without regularization, with or without standardization will converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
assert(model2.weights(0) ~== weightsR(0) relTol 1E-3)
assert(model2.weights(1) ~== weightsR(1) relTol 1E-3)
assert(model2.weights(2) ~== weightsR(2) relTol 1E-3)
assert(model2.weights(3) ~== weightsR(3) relTol 1E-3)
assert(model2.weights ~= weightsR relTol 1E-3)
}

test("binary logistic regression without intercept without regularization") {
Expand Down Expand Up @@ -277,20 +271,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7407946
*/
val interceptR = 0.0
val weightsR = Array(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)

assert(model1.intercept ~== interceptR relTol 1E-3)
assert(model1.weights(0) ~== weightsR(0) relTol 1E-2)
assert(model1.weights(1) ~== weightsR(1) relTol 1E-2)
assert(model1.weights(2) ~== weightsR(2) relTol 1E-3)
assert(model1.weights(3) ~== weightsR(3) relTol 1E-3)
assert(model1.weights ~= weightsR relTol 1E-2)

// Without regularization, with or without standardization should converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
assert(model2.weights(0) ~== weightsR(0) relTol 1E-2)
assert(model2.weights(1) ~== weightsR(1) relTol 1E-2)
assert(model2.weights(2) ~== weightsR(2) relTol 1E-3)
assert(model2.weights(3) ~== weightsR(3) relTol 1E-3)
assert(model2.weights ~= weightsR relTol 1E-2)
}

test("binary logistic regression with intercept with L1 regularization") {
Expand Down Expand Up @@ -321,13 +309,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.02481551
*/
val interceptR1 = -0.05627428
val weightsR1 = Array(0.0, 0.0, -0.04325749, -0.02481551)
val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)

assert(model1.intercept ~== interceptR1 relTol 1E-2)
assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3)
assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2)
assert(model1.weights(3) ~== weightsR1(3) relTol 2E-2)
assert(model1.weights ~= weightsR1 absTol 2E-2)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -349,13 +334,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.3722152
val weightsR2 = Array(0.0, 0.0, -0.1665453, 0.0)
val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)

assert(model2.intercept ~== interceptR2 relTol 1E-2)
assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
assert(model2.weights ~= weightsR2 absTol 1E-3)
}

test("binary logistic regression without intercept with L1 regularization") {
Expand Down Expand Up @@ -387,13 +369,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.03891782
*/
val interceptR1 = 0.0
val weightsR1 = Array(0.0, 0.0, -0.05189203, -0.03891782)
val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)

assert(model1.intercept ~== interceptR1 relTol 1E-3)
assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3)
assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2)
assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2)
assert(model1.weights ~= weightsR1 absTol 1E-3)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -415,13 +394,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
val weightsR2 = Array(0.0, 0.0, -0.08420782, 0.0)
val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)

assert(model2.intercept ~== interceptR2 relTol 1E-3)
assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
assert(model2.weights ~= weightsR2 absTol 1E-3)
}

test("binary logistic regression with intercept with L2 regularization") {
Expand Down Expand Up @@ -452,13 +428,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.10062872
*/
val interceptR1 = 0.15021751
val weightsR1 = Array(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)

assert(model1.intercept ~== interceptR1 relTol 1E-3)
assert(model1.weights(0) ~== weightsR1(0) relTol 1E-3)
assert(model1.weights(1) ~== weightsR1(1) relTol 1E-3)
assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
assert(model1.weights ~= weightsR1 relTol 1E-3)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -480,13 +453,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.06266838
*/
val interceptR2 = 0.48657516
val weightsR2 = Array(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)

assert(model2.intercept ~== interceptR2 relTol 1E-3)
assert(model2.weights(0) ~== weightsR2(0) relTol 1E-3)
assert(model2.weights(1) ~== weightsR2(1) relTol 1E-3)
assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3)
assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3)
assert(model2.weights ~= weightsR2 relTol 1E-3)
}

test("binary logistic regression without intercept with L2 regularization") {
Expand Down Expand Up @@ -518,13 +488,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.09799775
*/
val interceptR1 = 0.0
val weightsR1 = Array(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)

assert(model1.intercept ~== interceptR1 relTol 1E-3)
assert(model1.weights(0) ~== weightsR1(0) relTol 1E-2)
assert(model1.weights(1) ~== weightsR1(1) relTol 1E-2)
assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
assert(model1.weights ~= weightsR1 relTol 1E-2)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -546,13 +513,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.053314311
*/
val interceptR2 = 0.0
val weightsR2 = Array(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)

assert(model2.intercept ~== interceptR2 relTol 1E-3)
assert(model2.weights(0) ~== weightsR2(0) relTol 1E-2)
assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2)
assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3)
assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
assert(model2.weights ~= weightsR2 relTol 1E-2)
}

test("binary logistic regression with intercept with ElasticNet regularization") {
Expand Down Expand Up @@ -583,13 +547,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.15458796
*/
val interceptR1 = 0.57734851
val weightsR1 = Array(-0.05310287, 0.0, -0.08849250, -0.15458796)
val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)

assert(model1.intercept ~== interceptR1 relTol 6E-3)
assert(model1.weights(0) ~== weightsR1(0) relTol 5E-3)
assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
assert(model1.weights(2) ~== weightsR1(2) relTol 5E-3)
assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
assert(model1.weights ~== weightsR1 absTol 5E-3)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -611,13 +572,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.05350074
*/
val interceptR2 = 0.51555993
val weightsR2 = Array(0.0, 0.0, -0.18807395, -0.05350074)
val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)

assert(model2.intercept ~== interceptR2 relTol 6E-3)
assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
assert(model2.weights(2) ~== weightsR2(2) relTol 5E-3)
assert(model2.weights(3) ~== weightsR2(3) relTol 1E-2)
assert(model2.weights ~= weightsR2 absTol 1E-3)
}

test("binary logistic regression without intercept with ElasticNet regularization") {
Expand Down Expand Up @@ -649,13 +607,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.142534158
*/
val interceptR1 = 0.0
val weightsR1 = Array(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)

assert(model1.intercept ~== interceptR1 relTol 1E-3)
assert(model1.weights(0) ~== weightsR1(0) absTol 1E-2)
assert(model1.weights(1) ~== weightsR1(1) absTol 1E-2)
assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2)
assert(model1.weights ~= weightsR1 absTol 1E-2)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -677,13 +632,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
val weightsR2 = Array(0.0, 0.03345223, -0.11304532, 0.0)
val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)

assert(model2.intercept ~== interceptR2 relTol 1E-3)
assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2)
assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
assert(model2.weights ~= weightsR2 absTol 1E-3)
}

test("binary logistic regression with intercept with strong L1 regularization") {
Expand Down Expand Up @@ -717,19 +669,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
}}}
*/
val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble)
val weightsTheory = Array(0.0, 0.0, 0.0, 0.0)
val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)

assert(model1.intercept ~== interceptTheory relTol 1E-5)
assert(model1.weights(0) ~== weightsTheory(0) absTol 1E-6)
assert(model1.weights(1) ~== weightsTheory(1) absTol 1E-6)
assert(model1.weights(2) ~== weightsTheory(2) absTol 1E-6)
assert(model1.weights(3) ~== weightsTheory(3) absTol 1E-6)
assert(model1.weights ~= weightsTheory absTol 1E-6)

assert(model2.intercept ~== interceptTheory relTol 1E-5)
assert(model2.weights(0) ~== weightsTheory(0) absTol 1E-6)
assert(model2.weights(1) ~== weightsTheory(1) absTol 1E-6)
assert(model2.weights(2) ~== weightsTheory(2) absTol 1E-6)
assert(model2.weights(3) ~== weightsTheory(3) absTol 1E-6)
assert(model2.weights ~= weightsTheory absTol 1E-6)

/*
Using the following R code to load the data and train the model using glmnet package.
Expand All @@ -750,12 +696,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR = -0.248065
val weightsR = Array(0.0, 0.0, 0.0, 0.0)
val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)

assert(model1.intercept ~== interceptR relTol 1E-5)
assert(model1.weights(0) ~== weightsR(0) absTol 1E-6)
assert(model1.weights(1) ~== weightsR(1) absTol 1E-6)
assert(model1.weights(2) ~== weightsR(2) absTol 1E-6)
assert(model1.weights(3) ~== weightsR(3) absTol 1E-6)
assert(model1.weights ~= weightsR absTol 1E-6)
}
}

0 comments on commit 2727304

Please sign in to comment.