Skip to content

Commit

Permalink
[SPARK-7452] [MLLIB] fix bug in topBykey and update test
Browse files Browse the repository at this point in the history
the toArray function of the BoundedPriorityQueue does not necessarily preserve order. Add a counter-example as the test, which would fail the original impl.

Author: Shuo Xiang <[email protected]>

Closes apache#5990 from coderxiang/topbykey-test and squashes the following commits:

98804c9 [Shuo Xiang] fix bug in topBykey and update test
  • Loading branch information
coderxiang authored and jeanlyn committed May 28, 2015
1 parent d8101f5 commit 8230da2
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
combOp = (queue1, queue2) => {
queue1 ++= queue2
}
).mapValues(_.toArray.reverse) // This is an min-heap, so we reverse the order.
).mapValues(_.toArray.sorted(ord.reverse)) // This is an min-heap, so we reverse the order.
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._

class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
test("topByKey") {
val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2)
.topByKey(2)
val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5,
1), (3, 5)), 2)
.topByKey(5)
.collectAsMap()

assert(topMap.size === 3)
assert(topMap(1) === Array(2, 1))
assert(topMap(3) === Array(7, 5))
assert(topMap(1) === Array(7, 6, 3, 2, 1))
assert(topMap(3) === Array(7, 5, 2))
assert(topMap(5) === Array(1))
}
}

0 comments on commit 8230da2

Please sign in to comment.