Skip to content

Commit

Permalink
change first(), take(n) to has the same behavior as RDD
Browse files Browse the repository at this point in the history
  • Loading branch information
davies committed Sep 29, 2014
1 parent 98ac6c2 commit c40c52d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
11 changes: 6 additions & 5 deletions python/pyspark/streaming/dstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,18 +174,19 @@ def take(self, n):
"""
Return the first `n` RDDs in the stream (will start and stop).
"""
rdds = []
results = []

def take(_, rdd):
if rdd and len(rdds) < n:
rdds.append(rdd)
if rdd and len(results) < n:
results.extend(rdd.take(n - len(results)))

self.foreachRDD(take)

self._ssc.start()
while len(rdds) < n:
while len(results) < n:
time.sleep(0.01)
self._ssc.stop(False, True)
return rdds
return results

def collect(self):
"""
Expand Down
10 changes: 3 additions & 7 deletions python/pyspark/streaming/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,12 @@ class TestBasicOperations(PySparkStreamingTestCase):
def test_take(self):
input = [range(i) for i in range(3)]
dstream = self.ssc.queueStream(input)
rdds = dstream.take(3)
self.assertEqual(3, len(rdds))
for d, rdd in zip(input, rdds):
self.assertEqual(d, rdd.collect())
self.assertEqual([0, 0, 1], dstream.take(3))

def test_first(self):
input = [range(10)]
dstream = self.ssc.queueStream(input)
rdd = dstream.first()
self.assertEqual(range(10), rdd.collect())
self.assertEqual(0, dstream)

def test_map(self):
"""Basic operation test for DStream.map."""
Expand Down Expand Up @@ -385,7 +381,7 @@ def func(rdds):

dstream = self.ssc.transform([dstream1, dstream2, dstream3], func)

self.assertEqual([2, 3, 1], dstream.first().collect())
self.assertEqual([2, 3, 1], dstream.take(3))


if __name__ == "__main__":
Expand Down

0 comments on commit c40c52d

Please sign in to comment.