Skip to content

Commit

Permalink
[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.
Browse files Browse the repository at this point in the history
Before:
[info] GradientBoostingSuite:
[info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds)
[info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds)
[info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds)

After:
[info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds)
[info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds)
[info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds)

cc: mengxr, jkbradley

Author: Manish Amde <[email protected]>

Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits:

8994552 [Manish Amde] reducing gbt test run times

(cherry picked from commit 2ef016b)
Signed-off-by: Xiangrui Meng <[email protected]>
  • Loading branch information
manishamde authored and mengxr committed Nov 12, 2014
1 parent 12f5633 commit c9bb5e4
Showing 1 changed file with 5 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: SquaredError") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]

Expand All @@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0)

EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)

// Make sure trees are the same.
assert(gbtTree.toString == dt.toString)
Expand All @@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: Absolute Error") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]

Expand All @@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0)

EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)

// Make sure trees are the same.
assert(gbtTree.toString == dt.toString)
Expand All @@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Binary classification with continuous features: Log Loss") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]

Expand Down

0 comments on commit c9bb5e4

Please sign in to comment.