Skip to content

Commit

Permalink
Refactored to address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tdas committed Dec 1, 2015
1 parent 0c5fe55 commit a1d3f75
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,16 @@ class InternalTrackStateDStream[K: ClassTag, V: ClassTag, S: ClassTag, E: ClassT
// If the RDD is not partitioned the right way, let us repartition it using the
// partition index as the key. This is to ensure that state RDD is always partitioned
// before creating another state RDD using it
val kvRDD = rdd.mapPartitions { iter =>
iter.map { x => (TaskContext.get().partitionId(), x)}
}
kvRDD.partitionBy(partitioner).mapPartitions(iter => iter.map { _._2 },
preservesPartitioning = true)
TrackStateRDD.createFromRDD[K, V, S, E](
rdd.flatMap { _.stateMap.getAll() }, partitioner, validTime)
} else {
rdd
}
case None =>
TrackStateRDD.createFromPairRDD[K, V, S, E](
spec.getInitialStateRDD().getOrElse(new EmptyRDD[(K, S)](ssc.sparkContext)),
partitioner, validTime
partitioner,
validTime
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,22 +179,43 @@ private[streaming] class TrackStateRDD[K: ClassTag, V: ClassTag, S: ClassTag, E:

private[streaming] object TrackStateRDD {

def createFromPairRDD[K: ClassTag, V: ClassTag, S: ClassTag, T: ClassTag](
def createFromPairRDD[K: ClassTag, V: ClassTag, S: ClassTag, E: ClassTag](
pairRDD: RDD[(K, S)],
partitioner: Partitioner,
updateTime: Time): TrackStateRDD[K, V, S, T] = {
updateTime: Time): TrackStateRDD[K, V, S, E] = {

val rddOfTrackStateRecords = pairRDD.partitionBy(partitioner).mapPartitions ({ iterator =>
val stateMap = StateMap.create[K, S](SparkEnv.get.conf)
iterator.foreach { case (key, state) => stateMap.put(key, state, updateTime.milliseconds) }
Iterator(TrackStateRDDRecord(stateMap, Seq.empty[T]))
Iterator(TrackStateRDDRecord(stateMap, Seq.empty[E]))
}, preservesPartitioning = true)

val emptyDataRDD = pairRDD.sparkContext.emptyRDD[(K, V)].partitionBy(partitioner)

val noOpFunc = (time: Time, key: K, value: Option[V], state: State[S]) => None

new TrackStateRDD[K, V, S, T](rddOfTrackStateRecords, emptyDataRDD, noOpFunc, updateTime, None)
new TrackStateRDD[K, V, S, E](rddOfTrackStateRecords, emptyDataRDD, noOpFunc, updateTime, None)
}

def createFromRDD[K: ClassTag, V: ClassTag, S: ClassTag, E: ClassTag](
rdd: RDD[(K, S, Long)],
partitioner: Partitioner,
updateTime: Time): TrackStateRDD[K, V, S, E] = {

val pairRDD = rdd.map { x => (x._1, (x._2, x._3)) }
val rddOfTrackStateRecords = pairRDD.partitionBy(partitioner).mapPartitions({ iterator =>
val stateMap = StateMap.create[K, S](SparkEnv.get.conf)
iterator.foreach { case (key, (state, updateTime)) =>
stateMap.put(key, state, updateTime)
}
Iterator(TrackStateRDDRecord(stateMap, Seq.empty[E]))
}, preservesPartitioning = true)

val emptyDataRDD = pairRDD.sparkContext.emptyRDD[(K, V)].partitionBy(partitioner)

val noOpFunc = (time: Time, key: K, value: Option[V], state: State[S]) => None

new TrackStateRDD[K, V, S, E](rddOfTrackStateRecords, emptyDataRDD, noOpFunc, updateTime, None)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ class TrackStateByKeySuite extends SparkFunSuite
}


test("trackStateByKey - drivery failure recovery") {
test("trackStateByKey - driver failure recovery") {
val inputData =
Seq(
Seq(),
Expand Down

0 comments on commit a1d3f75

Please sign in to comment.