From 03cad516cd79fe925a6169075907e6f75bab199c Mon Sep 17 00:00:00 2001
From: Isaac Abraham <isaac.abraham@gmail.com>
Date: Thu, 21 Apr 2016 00:35:44 +0100
Subject: [PATCH 01/20] Initial port from KVP to Tuple

---
 .../Core/Accumulator.cs                       |  14 +-
 .../Core/OrderedRDDFunctions.cs               |  18 +-
 .../Core/PairRDDFunctions.cs                  | 272 +++++++++---------
 .../Microsoft.Spark.CSharp/Core/RDD.cs        |  96 +++----
 .../Core/SparkContext.cs                      |   8 +-
 .../Interop/Ipc/JvmBridgeUtils.cs             |  13 +-
 .../Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs |  10 +-
 .../Proxy/ISparkContextProxy.cs               |   8 +-
 .../Proxy/IStreamingContextProxy.cs           |   8 +-
 .../Proxy/Ipc/RDDIpcProxy.cs                  |  10 +-
 .../Proxy/Ipc/SparkContextIpcProxy.cs         |   8 +-
 .../Proxy/Ipc/StreamingContextIpcProxy.cs     |  20 +-
 .../Streaming/DStream.cs                      |  14 +-
 .../Streaming/EventHubsUtils.cs               |   2 +-
 .../Microsoft.Spark.CSharp/Streaming/Kafka.cs |  26 +-
 .../Streaming/MapWithStateDStream.cs          |  30 +-
 .../Streaming/PairDStreamFunctions.cs         | 130 ++++-----
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML    | 231 ++++++++-------
 .../documentation/Mobius_API_Documentation.md |  22 +-
 csharp/AdapterTest/AccumulatorTest.cs         |   8 +-
 csharp/AdapterTest/DStreamTest.cs             | 106 +++----
 csharp/AdapterTest/DoubleRDDTest.cs           |   2 +-
 csharp/AdapterTest/EventHubsUtilsTest.cs      |   4 +-
 csharp/AdapterTest/Mocks/MockRddProxy.cs      |  10 +-
 .../Mocks/MockSparkContextProxy.cs            |   8 +-
 .../Mocks/MockStreamingContextProxy.cs        |   8 +-
 csharp/AdapterTest/PairRDDTest.cs             |  64 ++---
 csharp/AdapterTest/RDDTest.cs                 |   8 +-
 csharp/AdapterTest/SparkContextTest.cs        |  12 +-
 csharp/AdapterTest/StreamingContextTest.cs    |   6 +-
 csharp/AdapterTest/TestWithMoqDemo.cs         |   6 +-
 .../FreebaseDeletionsBenchmark.cs             |   8 +-
 .../Microsoft.Spark.CSharp/DStreamSamples.cs  |   4 +-
 .../DStreamStateSample.cs                     |  14 +-
 .../Microsoft.Spark.CSharp/PairRDDSamples.cs  | 203 ++++++-------
 .../Microsoft.Spark.CSharp/RDDSamples.cs      |  74 ++---
 36 files changed, 757 insertions(+), 728 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Accumulator.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Accumulator.cs
index 816002ef..b07b138a 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/Accumulator.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/Accumulator.cs
@@ -215,19 +215,19 @@ internal int StartUpdateServer()
                             for (int i = 0; i < numUpdates; i++)
                             {
                                 var ms = new MemoryStream(SerDe.ReadBytes(ns));
-                                KeyValuePair<int, dynamic> update = (KeyValuePair<int, dynamic>)formatter.Deserialize(ms);
+                                var update = (Tuple<int, dynamic>)formatter.Deserialize(ms);
 
-                                if (Accumulator.accumulatorRegistry.ContainsKey(update.Key))
+                                if (Accumulator.accumulatorRegistry.ContainsKey(update.Item1))
                                 {
-                                    Accumulator accumulator = Accumulator.accumulatorRegistry[update.Key];
-                                    accumulator.GetType().GetMethod("Add").Invoke(accumulator, new object[] { update.Value });
+                                    Accumulator accumulator = Accumulator.accumulatorRegistry[update.Item1];
+                                    accumulator.GetType().GetMethod("Add").Invoke(accumulator, new object[] { update.Item2 });
                                 }
                                 else
                                 {
-                                    Console.Error.WriteLine("WARN: cann't find update.Key: {0} for accumulator, will create a new one", update.Key);
+                                    Console.Error.WriteLine("WARN: cann't find update.Key: {0} for accumulator, will create a new one", update.Item1);
                                     var genericAccumulatorType = typeof(Accumulator<>);
-                                    var specificAccumulatorType = genericAccumulatorType.MakeGenericType(update.Value.GetType());
-                                    Activator.CreateInstance(specificAccumulatorType, new object[] { update.Key, update.Value });
+                                    var specificAccumulatorType = genericAccumulatorType.MakeGenericType(update.Item2.GetType());
+                                    Activator.CreateInstance(specificAccumulatorType, new object[] { update.Item1, update.Item2 });
                                 }
                             }
                             ns.WriteByte((byte)1);  // acknowledge byte other than -1
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
index bb514604..23509e56 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
@@ -26,7 +26,7 @@ public static class OrderedRDDFunctions
         /// <param name="ascending"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> SortByKey<K, V>(this RDD<KeyValuePair<K, V>> self,
+        public static RDD<Tuple<K, V>> SortByKey<K, V>(this RDD<Tuple<K, V>> self,
             bool ascending = true, int? numPartitions = null)
         {
             return SortByKey<K, V, K>(self, ascending, numPartitions, new DefaultSortKeyFuncHelper<K>().Execute);
@@ -42,7 +42,7 @@ public static RDD<KeyValuePair<K, V>> SortByKey<K, V>(this RDD<KeyValuePair<K, V
         /// <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
         /// <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> SortByKey<K, V, U>(this RDD<KeyValuePair<K, V>> self,
+        public static RDD<Tuple<K, V>> SortByKey<K, V, U>(this RDD<Tuple<K, V>> self,
             bool ascending, int? numPartitions, Func<K, U> keyFunc)
         {
             if (keyFunc == null)
@@ -73,7 +73,7 @@ public static RDD<KeyValuePair<K, V>> SortByKey<K, V, U>(this RDD<KeyValuePair<K
             /* first compute the boundary of each part via sampling: we want to partition
              * the key-space into bins such that the bins have roughly the same
              * number of (key, value) pairs falling into them */
-            U[] samples = self.Sample(false, fraction, 1).Map(kv => kv.Key).Collect().Select(k => keyFunc(k)).ToArray();
+            U[] samples = self.Sample(false, fraction, 1).Map(kv => kv.Item1).Collect().Select(k => keyFunc(k)).ToArray();
             Array.Sort(samples, StringComparer.Ordinal); // case sensitive if key type is string
 
             List<U> bounds = new List<U>();
@@ -123,22 +123,22 @@ public SortByKeyHelper(Func<K, U> f, bool ascending = true)
                 this.ascending = ascending;
             }
 
-            public IEnumerable<KeyValuePair<K, V>> Execute(int pid, IEnumerable<KeyValuePair<K, V>> kvs)
+            public IEnumerable<Tuple<K, V>> Execute(int pid, IEnumerable<Tuple<K, V>> kvs)
             {
-                IEnumerable<KeyValuePair<K, V>> ordered;
+                IEnumerable<Tuple<K, V>> ordered;
                 if (ascending)
                 {
                     if (typeof(K) == typeof(string))
-                        ordered = kvs.OrderBy(k => func(k.Key).ToString(), StringComparer.Ordinal);
+                        ordered = kvs.OrderBy(k => func(k.Item1).ToString(), StringComparer.Ordinal);
                     else
-                        ordered = kvs.OrderBy(k => func(k.Key));
+                        ordered = kvs.OrderBy(k => func(k.Item1));
                 }
                 else
                 {
                     if (typeof(K) == typeof(string))
-                        ordered = kvs.OrderByDescending(k => func(k.Key).ToString(), StringComparer.Ordinal);
+                        ordered = kvs.OrderByDescending(k => func(k.Item1).ToString(), StringComparer.Ordinal);
                     else
-                        ordered = kvs.OrderByDescending(k => func(k.Key));
+                        ordered = kvs.OrderByDescending(k => func(k.Item1));
                 }
                 return ordered;
             }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
index deb5eee0..a40c6197 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
@@ -13,7 +13,7 @@
 namespace Microsoft.Spark.CSharp.Core
 {
     /// <summary>
-    /// operations only available to KeyValuePair RDD
+    /// operations only available to Tuple RDD
     /// 
     /// See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions
     /// </summary>
@@ -22,7 +22,7 @@ public static class PairRDDFunctions
         /// <summary>
         /// Return the key-value pairs in this RDD to the master as a dictionary.
         ///
-        /// var m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).CollectAsMap()
+        /// var m = sc.Parallelize(new[] { new Tuple&lt;int, int>(1, 2), new Tuple&lt;int, int>(3, 4) }, 1).CollectAsMap()
         /// m[1]
         /// 2
         /// m[3]
@@ -33,30 +33,30 @@ public static class PairRDDFunctions
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <returns></returns>
-        public static Dictionary<K, V> CollectAsMap<K, V>(this RDD<KeyValuePair<K, V>> self)
+        public static IDictionary<K, V> CollectAsMap<K, V>(this RDD<Tuple<K, V>> self)
         {
-            return self.Collect().ToDictionary(kv => kv.Key, kv => kv.Value);
+            return self.Collect().ToDictionary(kv => kv.Item1, kv => kv.Item2);
         }
 
         /// <summary>
         /// Return an RDD with the keys of each tuple.
         ///
-        /// >>> m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).Keys().Collect()
+        /// >>> m = sc.Parallelize(new[] { new Tuple&lt;int, int>(1, 2), new Tuple&lt;int, int>(3, 4) }, 1).Keys().Collect()
         /// [1, 3]
         /// </summary>
         /// <typeparam name="K"></typeparam>
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <returns></returns>
-        public static RDD<K> Keys<K, V>(this RDD<KeyValuePair<K, V>> self)
+        public static RDD<K> Keys<K, V>(this RDD<Tuple<K, V>> self)
         {
-            return self.Map<K>(kv => kv.Key);
+            return self.Map<K>(kv => kv.Item1);
         }
 
         /// <summary>
         /// Return an RDD with the values of each tuple.
         ///
-        /// >>> m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).Values().Collect()
+        /// >>> m = sc.Parallelize(new[] { new Tuple&lt;int, int>(1, 2), new Tuple&lt;int, int>(3, 4) }, 1).Values().Collect()
         /// [2, 4]
         /// 
         /// </summary>
@@ -64,9 +64,9 @@ public static RDD<K> Keys<K, V>(this RDD<KeyValuePair<K, V>> self)
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <returns></returns>
-        public static RDD<V> Values<K, V>(this RDD<KeyValuePair<K, V>> self)
+        public static RDD<V> Values<K, V>(this RDD<Tuple<K, V>> self)
         {
-            return self.Map<V>(kv => kv.Value);
+            return self.Map<V>(kv => kv.Item2);
         }
 
         /// <summary>
@@ -80,9 +80,9 @@ public static RDD<V> Values<K, V>(this RDD<KeyValuePair<K, V>> self)
         /// 
         /// sc.Parallelize(new[] 
         /// { 
-        ///     new KeyValuePair&lt;string, int>("a", 1), 
-        ///     new KeyValuePair&lt;string, int>("b", 1),
-        ///     new KeyValuePair&lt;string, int>("a", 1)
+        ///     new Tuple&lt;string, int>("a", 1), 
+        ///     new Tuple&lt;string, int>("b", 1),
+        ///     new Tuple&lt;string, int>("a", 1)
         /// }, 2)
         /// .ReduceByKey((x, y) => x + y).Collect()
         ///        
@@ -95,7 +95,7 @@ public static RDD<V> Values<K, V>(this RDD<KeyValuePair<K, V>> self)
         /// <param name="reduceFunc"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> ReduceByKey<K, V>(this RDD<KeyValuePair<K, V>> self, Func<V, V, V> reduceFunc, int numPartitions = 0)
+        public static RDD<Tuple<K, V>> ReduceByKey<K, V>(this RDD<Tuple<K, V>> self, Func<V, V, V> reduceFunc, int numPartitions = 0)
         {
             return CombineByKey(self, () => default(V), reduceFunc, reduceFunc, numPartitions);
         }
@@ -109,9 +109,9 @@ public static RDD<KeyValuePair<K, V>> ReduceByKey<K, V>(this RDD<KeyValuePair<K,
         /// 
         /// sc.Parallelize(new[] 
         /// { 
-        ///     new KeyValuePair&lt;string, int>("a", 1), 
-        ///     new KeyValuePair&lt;string, int>("b", 1),
-        ///     new KeyValuePair&lt;string, int>("a", 1)
+        ///     new Tuple&lt;string, int>("a", 1), 
+        ///     new Tuple&lt;string, int>("b", 1),
+        ///     new Tuple&lt;string, int>("a", 1)
         /// }, 2)
         /// .ReduceByKeyLocally((x, y) => x + y).Collect()
         /// 
@@ -123,7 +123,7 @@ public static RDD<KeyValuePair<K, V>> ReduceByKey<K, V>(this RDD<KeyValuePair<K,
         /// <param name="self"></param>
         /// <param name="reduceFunc"></param>
         /// <returns></returns>
-        public static Dictionary<K, V> ReduceByKeyLocally<K, V>(this RDD<KeyValuePair<K, V>> self, Func<V, V, V> reduceFunc)
+        public static IDictionary<K, V> ReduceByKeyLocally<K, V>(this RDD<Tuple<K, V>> self, Func<V, V, V> reduceFunc)
         {
             return ReduceByKey(self, reduceFunc).CollectAsMap();
         }
@@ -133,9 +133,9 @@ public static Dictionary<K, V> ReduceByKeyLocally<K, V>(this RDD<KeyValuePair<K,
         /// 
         /// sc.Parallelize(new[] 
         /// { 
-        ///     new KeyValuePair&lt;string, int>("a", 1), 
-        ///     new KeyValuePair&lt;string, int>("b", 1),
-        ///     new KeyValuePair&lt;string, int>("a", 1)
+        ///     new Tuple&lt;string, int>("a", 1), 
+        ///     new Tuple&lt;string, int>("b", 1),
+        ///     new Tuple&lt;string, int>("a", 1)
         /// }, 2)
         /// .CountByKey((x, y) => x + y).Collect()
         /// 
@@ -146,7 +146,7 @@ public static Dictionary<K, V> ReduceByKeyLocally<K, V>(this RDD<KeyValuePair<K,
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <returns></returns>
-        public static Dictionary<K, long> CountByKey<K, V>(this RDD<KeyValuePair<K, V>> self)
+        public static IDictionary<K, long> CountByKey<K, V>(this RDD<Tuple<K, V>> self)
         {
             return self.MapValues(v => 1L).ReduceByKey((a, b) => a + b).CollectAsMap();
         }
@@ -159,9 +159,9 @@ public static Dictionary<K, long> CountByKey<K, V>(this RDD<KeyValuePair<K, V>>
         /// Performs a hash join across the cluster.
         /// 
         /// var l = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 1);
         /// var r = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 2), new KeyValuePair&lt;string, int>("a", 3) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 2), new Tuple&lt;string, int>("a", 3) }, 1);
         /// var m = l.Join(r, 2).Collect();
         /// 
         /// [('a', (1, 2)), ('a', (1, 3))]
@@ -174,9 +174,9 @@ public static Dictionary<K, long> CountByKey<K, V>(this RDD<KeyValuePair<K, V>>
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<V, W>>> Join<K, V, W>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W>> other,
+        public static RDD<Tuple<K, Tuple<V, W>>> Join<K, V, W>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W>> other,
             int numPartitions = 0)
         {
             return self.GroupWith(other, numPartitions).FlatMapValues(
@@ -194,9 +194,9 @@ public static RDD<KeyValuePair<K, Tuple<V, W>>> Join<K, V, W>(
         /// Hash-partitions the resulting RDD into the given number of partitions.
         /// 
         /// var l = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 1);
         /// var r = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 2) }, 1);
         /// var m = l.LeftOuterJoin(r).Collect();
         /// 
         /// [('a', (1, 2)), ('b', (4, Option))]
@@ -209,9 +209,9 @@ public static RDD<KeyValuePair<K, Tuple<V, W>>> Join<K, V, W>(
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V, W>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W>> other,
+        public static RDD<Tuple<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V, W>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W>> other,
             int numPartitions = 0)
         {
             return self.GroupWith(other, numPartitions).FlatMapValues(
@@ -228,9 +228,9 @@ public static RDD<KeyValuePair<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V, W>(
         /// Hash-partitions the resulting RDD into the given number of partitions.
         /// 
         /// var l = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 2) }, 1);
         /// var r = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 1);
         /// var m = l.RightOuterJoin(r).Collect();
         /// 
         /// [('a', (2, 1)), ('b', (Option, 4))]
@@ -243,9 +243,9 @@ public static RDD<KeyValuePair<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V, W>(
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V, W>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W>> other,
+        public static RDD<Tuple<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V, W>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W>> other,
             int numPartitions = 0)
         {
             return self.GroupWith(other, numPartitions).FlatMapValues(
@@ -267,9 +267,9 @@ public static RDD<KeyValuePair<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V, W>(
         /// Hash-partitions the resulting RDD into the given number of partitions.
         /// 
         /// var l = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 1), KeyValuePair&lt;string, int>("b", 4) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 1), Tuple&lt;string, int>("b", 4) }, 1);
         /// var r = sc.Parallelize(
-        ///     new[] { new KeyValuePair&lt;string, int>("a", 2), new KeyValuePair&lt;string, int>("c", 8) }, 1);
+        ///     new[] { new Tuple&lt;string, int>("a", 2), new Tuple&lt;string, int>("c", 8) }, 1);
         /// var m = l.FullOuterJoin(r).Collect();
         /// 
         /// [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]
@@ -282,9 +282,9 @@ public static RDD<KeyValuePair<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V, W>(
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<Option<V>, Option<W>>>> FullOuterJoin<K, V, W>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W>> other,
+        public static RDD<Tuple<K, Tuple<Option<V>, Option<W>>>> FullOuterJoin<K, V, W>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W>> other,
             int numPartitions = 0)
         {
             return self.GroupWith(other, numPartitions).FlatMapValues(
@@ -295,13 +295,13 @@ public static RDD<KeyValuePair<K, Tuple<Option<V>, Option<W>>>> FullOuterJoin<K,
         /// <summary>
         /// Return a copy of the RDD partitioned using the specified partitioner.
         /// 
-        /// sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x => new KeyValuePair&lt;int, int>(x, x)).PartitionBy(3).Glom().Collect()
+        /// sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x => new Tuple&lt;int, int>(x, x)).PartitionBy(3).Glom().Collect()
         /// </summary>
         /// <param name="self"></param>
         /// <param name="numPartitions"></param>
         /// <param name="partitionFunc"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> PartitionBy<K, V>(this RDD<KeyValuePair<K, V>> self, int numPartitions = 0, 
+        public static RDD<Tuple<K, V>> PartitionBy<K, V>(this RDD<Tuple<K, V>> self, int numPartitions = 0, 
             Func<dynamic, int> partitionFunc = null)
         {
             if (numPartitions == 0)
@@ -318,7 +318,7 @@ public static RDD<KeyValuePair<K, V>> PartitionBy<K, V>(this RDD<KeyValuePair<K,
             // convert shuffling version of RDD[(Long, Array[Byte])] back to normal RDD[Array[Byte]]
             // invoking property keyed.RddProxy marks the end of current pipeline RDD after shuffling
             // and potentially starts next pipeline RDD with defult SerializedMode.Byte
-            var rdd = new RDD<KeyValuePair<K, V>>(self.sparkContext.SparkContextProxy.CreatePairwiseRDD(keyed.RddProxy, numPartitions,
+            var rdd = new RDD<Tuple<K, V>>(self.sparkContext.SparkContextProxy.CreatePairwiseRDD(keyed.RddProxy, numPartitions,
                 GenerateObjectId(partitionFunc)), self.sparkContext);
             rdd.partitioner = partitioner;
 
@@ -346,9 +346,9 @@ public static RDD<KeyValuePair<K, V>> PartitionBy<K, V>(this RDD<KeyValuePair<K,
         /// sc.Parallelize(
         ///         new[] 
         ///         { 
-        ///             new KeyValuePair&lt;string, int>("a", 1), 
-        ///             new KeyValuePair&lt;string, int>("b", 1),
-        ///             new KeyValuePair&lt;string, int>("a", 1)
+        ///             new Tuple&lt;string, int>("a", 1), 
+        ///             new Tuple&lt;string, int>("b", 1),
+        ///             new Tuple&lt;string, int>("a", 1)
         ///         }, 2)
         ///         .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
         ///         
@@ -363,8 +363,8 @@ public static RDD<KeyValuePair<K, V>> PartitionBy<K, V>(this RDD<KeyValuePair<K,
         /// <param name="mergeCombiners"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, C>> CombineByKey<K, V, C>(
-            this RDD<KeyValuePair<K, V>> self,
+        public static RDD<Tuple<K, C>> CombineByKey<K, V, C>(
+            this RDD<Tuple<K, V>> self,
             Func<C> createCombiner,
             Func<C, V, C> mergeValue,
             Func<C, C, C> mergeCombiners,
@@ -389,9 +389,9 @@ public static RDD<KeyValuePair<K, C>> CombineByKey<K, V, C>(
         /// sc.Parallelize(
         ///         new[] 
         ///         { 
-        ///             new KeyValuePair&lt;string, int>("a", 1), 
-        ///             new KeyValuePair&lt;string, int>("b", 1),
-        ///             new KeyValuePair&lt;string, int>("a", 1)
+        ///             new Tuple&lt;string, int>("a", 1), 
+        ///             new Tuple&lt;string, int>("b", 1),
+        ///             new Tuple&lt;string, int>("a", 1)
         ///         }, 2)
         ///         .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
         ///         
@@ -406,8 +406,8 @@ public static RDD<KeyValuePair<K, C>> CombineByKey<K, V, C>(
         /// <param name="combOp"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, U>> AggregateByKey<K, V, U>(
-            this RDD<KeyValuePair<K, V>> self,
+        public static RDD<Tuple<K, U>> AggregateByKey<K, V, U>(
+            this RDD<Tuple<K, V>> self,
             Func<U> zeroValue,
             Func<U, V, U> seqOp,
             Func<U, U, U> combOp,
@@ -425,9 +425,9 @@ public static RDD<KeyValuePair<K, U>> AggregateByKey<K, V, U>(
         /// sc.Parallelize(
         ///         new[] 
         ///         { 
-        ///             new KeyValuePair&lt;string, int>("a", 1), 
-        ///             new KeyValuePair&lt;string, int>("b", 1),
-        ///             new KeyValuePair&lt;string, int>("a", 1)
+        ///             new Tuple&lt;string, int>("a", 1), 
+        ///             new Tuple&lt;string, int>("b", 1),
+        ///             new Tuple&lt;string, int>("a", 1)
         ///         }, 2)
         ///         .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
         ///         
@@ -440,8 +440,8 @@ public static RDD<KeyValuePair<K, U>> AggregateByKey<K, V, U>(
         /// <param name="func"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> FoldByKey<K, V>(
-            this RDD<KeyValuePair<K, V>> self,
+        public static RDD<Tuple<K, V>> FoldByKey<K, V>(
+            this RDD<Tuple<K, V>> self,
             Func<V> zeroValue,
             Func<V, V, V> func,
             int numPartitions = 0)
@@ -460,9 +460,9 @@ public static RDD<KeyValuePair<K, V>> FoldByKey<K, V>(
         /// sc.Parallelize(
         ///         new[] 
         ///         { 
-        ///             new KeyValuePair&lt;string, int>("a", 1), 
-        ///             new KeyValuePair&lt;string, int>("b", 1),
-        ///             new KeyValuePair&lt;string, int>("a", 1)
+        ///             new Tuple&lt;string, int>("a", 1), 
+        ///             new Tuple&lt;string, int>("b", 1),
+        ///             new Tuple&lt;string, int>("a", 1)
         ///         }, 2)
         ///         .GroupByKey().MapValues(l => string.Join(" ", l)).Collect()
         ///         
@@ -474,7 +474,7 @@ public static RDD<KeyValuePair<K, V>> FoldByKey<K, V>(
         /// <param name="self"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, List<V>>> GroupByKey<K, V>(this RDD<KeyValuePair<K, V>> self, int numPartitions = 0)
+        public static RDD<Tuple<K, List<V>>> GroupByKey<K, V>(this RDD<Tuple<K, V>> self, int numPartitions = 0)
         {
             return CombineByKey(self,
                 () => new List<V>(),
@@ -490,8 +490,8 @@ public static RDD<KeyValuePair<K, List<V>>> GroupByKey<K, V>(this RDD<KeyValuePa
         /// sc.Parallelize(
         ///         new[] 
         ///         { 
-        ///             new KeyValuePair&lt;string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
-        ///             new KeyValuePair&lt;string, string[]>("b", new[]{"grapes"})
+        ///             new Tuple&lt;string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
+        ///             new Tuple&lt;string, string[]>("b", new[]{"grapes"})
         ///         }, 2)
         ///         .MapValues(x => x.Length).Collect()
         ///         
@@ -504,7 +504,7 @@ public static RDD<KeyValuePair<K, List<V>>> GroupByKey<K, V>(this RDD<KeyValuePa
         /// <param name="self"></param>
         /// <param name="func"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, U>> MapValues<K, V, U>(this RDD<KeyValuePair<K, V>> self, Func<V, U> func)
+        public static RDD<Tuple<K, U>> MapValues<K, V, U>(this RDD<Tuple<K, V>> self, Func<V, U> func)
         {
             return self.Map(new MapValuesHelper<K, V, U>(func).Execute, true);
         }
@@ -516,8 +516,8 @@ public static RDD<KeyValuePair<K, U>> MapValues<K, V, U>(this RDD<KeyValuePair<K
         /// x = sc.Parallelize(
         ///         new[] 
         ///         { 
-        ///             new KeyValuePair&lt;string, string[]>("a", new[]{"x", "y", "z"}), 
-        ///             new KeyValuePair&lt;string, string[]>("b", new[]{"p", "r"})
+        ///             new Tuple&lt;string, string[]>("a", new[]{"x", "y", "z"}), 
+        ///             new Tuple&lt;string, string[]>("b", new[]{"p", "r"})
         ///         }, 2)
         ///         .FlatMapValues(x => x).Collect()
         ///         
@@ -530,13 +530,13 @@ public static RDD<KeyValuePair<K, U>> MapValues<K, V, U>(this RDD<KeyValuePair<K
         /// <param name="self"></param>
         /// <param name="func"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, U>> FlatMapValues<K, V, U>(this RDD<KeyValuePair<K, V>> self, Func<V, IEnumerable<U>> func)
+        public static RDD<Tuple<K, U>> FlatMapValues<K, V, U>(this RDD<Tuple<K, V>> self, Func<V, IEnumerable<U>> func)
         {
             return self.FlatMap(new FlatMapValuesHelper<K, V, U>(func).Execute, true);
         }
 
         /// <summary>
-        /// explicitly convert KeyValuePair&lt;K, V> to KeyValuePair&lt;K, dynamic>
+        /// explicitly convert Tuple&lt;K, V> to Tuple&lt;K, dynamic>
         /// since they are incompatibles types unlike V to dynamic
         /// </summary>
         /// <typeparam name="K"></typeparam>
@@ -546,10 +546,10 @@ public static RDD<KeyValuePair<K, U>> FlatMapValues<K, V, U>(this RDD<KeyValuePa
         /// <typeparam name="W3"></typeparam>
         /// <param name="self"></param>
         /// <returns></returns>
-        private static RDD<KeyValuePair<K, dynamic>> MapPartitionsWithIndex<K, V, W1, W2, W3>(this RDD<KeyValuePair<K, dynamic>> self)
+        private static RDD<Tuple<K, dynamic>> MapPartitionsWithIndex<K, V, W1, W2, W3>(this RDD<Tuple<K, dynamic>> self)
         {
             CSharpWorkerFunc csharpWorkerFunc = new CSharpWorkerFunc(new DynamicTypingWrapper<K, V, W1, W2, W3>().Execute);
-            var pipelinedRDD = new PipelinedRDD<KeyValuePair<K, dynamic>>
+            var pipelinedRDD = new PipelinedRDD<Tuple<K, dynamic>>
             {
                 workerFunc = csharpWorkerFunc,
                 preservesPartitioning = true,
@@ -568,8 +568,8 @@ private static RDD<KeyValuePair<K, dynamic>> MapPartitionsWithIndex<K, V, W1, W2
         /// For each key k in this RDD or <paramref name="other"/>, return a resulting RDD that
         /// contains a tuple with the list of values for that key in this RDD as well as <paramref name="other"/>.
         /// 
-        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
-        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+        /// var x = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 2);
+        /// var y = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 2) }, 1);
         /// x.GroupWith(y).Collect();
         /// 
         /// [('a', ([1], [2])), ('b', ([4], []))]
@@ -582,16 +582,16 @@ private static RDD<KeyValuePair<K, dynamic>> MapPartitionsWithIndex<K, V, W1, W2
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<List<V>, List<W>>>> GroupWith<K, V, W>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W>> other,
+        public static RDD<Tuple<K, Tuple<List<V>, List<W>>>> GroupWith<K, V, W>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W>> other,
             int numPartitions = 0)
         {
             // MapValues, which introduces extra CSharpRDD, is not necessary when union different RDD types
             if (typeof(V) != typeof(W))
             {
-                return self.ConvertTo<KeyValuePair<K, dynamic>>()
-                    .Union(other.ConvertTo<KeyValuePair<K, dynamic>>())
+                return self.ConvertTo<Tuple<K, dynamic>>()
+                    .Union(other.ConvertTo<Tuple<K, dynamic>>())
                     .MapPartitionsWithIndex<K, V, W, W, W>()
                     .CombineByKey(
                     () => new Tuple<List<V>, List<W>>(new List<V>(), new List<W>()),
@@ -610,9 +610,9 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W>>>> GroupWith<K, V, W>(
         }
 
         /// <summary>
-        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 5), new KeyValuePair&lt;string, int>("b", 6) }, 2);
-        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
-        /// var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+        /// var x = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 5), new Tuple&lt;string, int>("b", 6) }, 2);
+        /// var y = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 2);
+        /// var z = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 2) }, 1);
         /// x.GroupWith(y, z).Collect();
         /// </summary>
         /// <typeparam name="K"></typeparam>
@@ -624,18 +624,18 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W>>>> GroupWith<K, V, W>(
         /// <param name="other2"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>>>> GroupWith<K, V, W1, W2>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W1>> other1,
-            RDD<KeyValuePair<K, W2>> other2,
+        public static RDD<Tuple<K, Tuple<List<V>, List<W1>, List<W2>>>> GroupWith<K, V, W1, W2>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W1>> other1,
+            RDD<Tuple<K, W2>> other2,
             int numPartitions = 0)
         {
             // MapValues, which introduces extra CSharpRDD, is not necessary when union different RDD types
             if (!(typeof(V) == typeof(W1) && typeof(V) == typeof(W2)))
             {
-                return self.ConvertTo<KeyValuePair<K, dynamic>>()
-                    .Union(other1.ConvertTo<KeyValuePair<K, dynamic>>())
-                    .Union(other2.ConvertTo<KeyValuePair<K, dynamic>>())
+                return self.ConvertTo<Tuple<K, dynamic>>()
+                    .Union(other1.ConvertTo<Tuple<K, dynamic>>())
+                    .Union(other2.ConvertTo<Tuple<K, dynamic>>())
                     .MapPartitionsWithIndex<K, V, W1, W2, W2>()
                     .CombineByKey(
                     () => new Tuple<List<V>, List<W1>, List<W2>>(new List<V>(), new List<W1>(), new List<W2>()),
@@ -655,10 +655,10 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>>>> GroupWith
         }
 
         /// <summary>
-        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 5), new KeyValuePair&lt;string, int>("b", 6) }, 2);
-        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
-        /// var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
-        /// var w = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("b", 42) }, 1);
+        /// var x = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 5), new Tuple&lt;string, int>("b", 6) }, 2);
+        /// var y = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 2);
+        /// var z = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 2) }, 1);
+        /// var w = sc.Parallelize(new[] { new Tuple&lt;string, int>("b", 42) }, 1);
         /// var m = x.GroupWith(y, z, w).MapValues(l => string.Join(" ", l.Item1) + " : " + string.Join(" ", l.Item2) + " : " + string.Join(" ", l.Item3) + " : " + string.Join(" ", l.Item4)).Collect();
         /// </summary>
         /// <typeparam name="K"></typeparam>
@@ -672,20 +672,20 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>>>> GroupWith
         /// <param name="other3"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>, List<W3>>>> GroupWith<K, V, W1, W2, W3>(
-            this RDD<KeyValuePair<K, V>> self,
-            RDD<KeyValuePair<K, W1>> other1,
-            RDD<KeyValuePair<K, W2>> other2,
-            RDD<KeyValuePair<K, W3>> other3,
+        public static RDD<Tuple<K, Tuple<List<V>, List<W1>, List<W2>, List<W3>>>> GroupWith<K, V, W1, W2, W3>(
+            this RDD<Tuple<K, V>> self,
+            RDD<Tuple<K, W1>> other1,
+            RDD<Tuple<K, W2>> other2,
+            RDD<Tuple<K, W3>> other3,
             int numPartitions = 0)
         {
             // MapValues, which introduces extra CSharpRDD, is not necessary when union different RDD types
             if (!(typeof(V) == typeof(W1) && typeof(V) == typeof(W2)))
             {
-                return self.ConvertTo<KeyValuePair<K, dynamic>>()
-                    .Union(other1.ConvertTo<KeyValuePair<K, dynamic>>())
-                    .Union(other2.ConvertTo<KeyValuePair<K, dynamic>>())
-                    .Union(other3.ConvertTo<KeyValuePair<K, dynamic>>())
+                return self.ConvertTo<Tuple<K, dynamic>>()
+                    .Union(other1.ConvertTo<Tuple<K, dynamic>>())
+                    .Union(other2.ConvertTo<Tuple<K, dynamic>>())
+                    .Union(other3.ConvertTo<Tuple<K, dynamic>>())
                     .MapPartitionsWithIndex<K, V, W1, W2, W3>()
                     .CombineByKey(
                     () => new Tuple<List<V>, List<W1>, List<W2>, List<W3>>(new List<V>(), new List<W1>(), new List<W2>(), new List<W3>()),
@@ -713,7 +713,7 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>, List<W3>>>>
         // /// 
         // /// var fractions = new <see cref="Dictionary{string, double}"/> { { "a", 0.2 }, { "b", 0.1 } };
         // /// var rdd = sc.Parallelize(fractions.Keys.ToArray(), 2).Cartesian(sc.Parallelize(Enumerable.Range(0, 1000), 2));
-        // /// var sample = rdd.Map(t => new KeyValuePair&lt;string, int>(t.Item1, t.Item2)).SampleByKey(false, fractions, 2).GroupByKey().Collect();
+        // /// var sample = rdd.Map(t => new Tuple&lt;string, int>(t.Item1, t.Item2)).SampleByKey(false, fractions, 2).GroupByKey().Collect();
         // /// 
         // /// 100 &lt; sample["a"].Length &lt; 300 and 50 &lt; sample["b"].Length &lt; 150
         // /// true
@@ -730,8 +730,8 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>, List<W3>>>>
         // /// <param name="fractions"></param>
         // /// <param name="seed"></param>
         // /// <returns></returns>
-        //public static RDD<KeyValuePair<string, V>> SampleByKey<V>(
-        //    this RDD<KeyValuePair<string, V>> self,
+        //public static RDD<Tuple<string, V>> SampleByKey<V>(
+        //    this RDD<Tuple<string, V>> self,
         //    bool withReplacement,
         //    Dictionary<string, double> fractions,
         //    long seed)
@@ -739,14 +739,14 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>, List<W3>>>>
         //    if (fractions.Any(f => f.Value < 0.0))
         //        throw new ArgumentException(string.Format("Negative fraction value found in: {0}", string.Join(",", fractions.Values.ToArray())));
 
-        //    return new RDD<KeyValuePair<string, V>>(self.RddProxy.SampleByKey(withReplacement, fractions, seed), self.sparkContext);
+        //    return new RDD<Tuple<string, V>>(self.RddProxy.SampleByKey(withReplacement, fractions, seed), self.sparkContext);
         //}
 
         /// <summary>
         /// Return each (key, value) pair in this RDD that has no pair with matching key in <paramref name="other"/>.
         /// 
-        /// var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?>("a", 1), new KeyValuePair&lt;string, int?>("b", 4), new KeyValuePair&lt;string, int?>("b", 5), new KeyValuePair&lt;string, int?>("a", 2) }, 2);
-        /// var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?>("a", 3), new KeyValuePair&lt;string, int?>("c", null) }, 2);
+        /// var x = sc.Parallelize(new[] { new Tuple&lt;string, int?>("a", 1), new Tuple&lt;string, int?>("b", 4), new Tuple&lt;string, int?>("b", 5), new Tuple&lt;string, int?>("a", 2) }, 2);
+        /// var y = sc.Parallelize(new[] { new Tuple&lt;string, int?>("a", 3), new Tuple&lt;string, int?>("c", null) }, 2);
         /// x.SubtractByKey(y).Collect();
         /// 
         /// [('b', 4), ('b', 5)]
@@ -759,7 +759,7 @@ public static RDD<KeyValuePair<K, Tuple<List<V>, List<W1>, List<W2>, List<W3>>>>
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> SubtractByKey<K, V, W>(this RDD<KeyValuePair<K, V>> self, RDD<KeyValuePair<K, W>> other, int numPartitions = 0)
+        public static RDD<Tuple<K, V>> SubtractByKey<K, V, W>(this RDD<Tuple<K, V>> self, RDD<Tuple<K, W>> other, int numPartitions = 0)
         {
             return self.GroupWith(other, numPartitions).FlatMapValues(t => t.Item1.Where(v => t.Item2.Count == 0));
         }
@@ -770,7 +770,7 @@ public static RDD<KeyValuePair<K, V>> SubtractByKey<K, V, W>(this RDD<KeyValuePa
         /// searching the partition that the key maps to.
         /// 
         /// >>> l = range(1000)
-        /// >>> rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new KeyValuePair&lt;int, int>(x, y)), 10)
+        /// >>> rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new Tuple&lt;int, int>(x, y)), 10)
         /// >>> rdd.lookup(42)
         /// [42]
         /// 
@@ -780,7 +780,7 @@ public static RDD<KeyValuePair<K, V>> SubtractByKey<K, V, W>(this RDD<KeyValuePa
         /// <param name="self"></param>
         /// <param name="key"></param>
         /// <returns></returns>
-        public static V[] Lookup<K, V>(this RDD<KeyValuePair<K, V>> self, K key)
+        public static V[] Lookup<K, V>(this RDD<Tuple<K, V>> self, K key)
         {
             return self.Filter(new LookupHelper<K, V>(key).Execute).Values().Collect();
         }
@@ -795,7 +795,7 @@ public static V[] Lookup<K, V>(this RDD<KeyValuePair<K, V>> self, K key)
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <param name="conf">Hadoop job configuration, passed in as a dict</param>
-        public static void SaveAsNewAPIHadoopDataset<K, V>(this RDD<KeyValuePair<K, V>> self, IEnumerable<KeyValuePair<string, string>> conf)
+        public static void SaveAsNewAPIHadoopDataset<K, V>(this RDD<Tuple<K, V>> self, IEnumerable<Tuple<string, string>> conf)
         {
             self.RddProxy.SaveAsNewAPIHadoopDataset(conf);
         }
@@ -811,7 +811,7 @@ public static void SaveAsNewAPIHadoopDataset<K, V>(this RDD<KeyValuePair<K, V>>
         /// <param name="keyClass">fully qualified classname of key Writable class (e.g. "org.apache.hadoop.io.IntWritable", None by default)</param>
         /// <param name="valueClass">fully qualified classname of value Writable class (e.g. "org.apache.hadoop.io.Text", None by default)</param>
         /// <param name="conf">Hadoop job configuration, passed in as a dict (None by default)</param>
-        public static void SaveAsNewAPIHadoopFile<K, V>(this RDD<KeyValuePair<K, V>> self, string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf)
+        public static void SaveAsNewAPIHadoopFile<K, V>(this RDD<Tuple<K, V>> self, string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf)
         {
             self.RddProxy.SaveAsNewAPIHadoopFile(path, outputFormatClass, keyClass, valueClass, conf);
         }
@@ -826,7 +826,7 @@ public static void SaveAsNewAPIHadoopFile<K, V>(this RDD<KeyValuePair<K, V>> sel
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <param name="conf">Hadoop job configuration, passed in as a dict</param>
-        public static void SaveAsHadoopDataset<K, V>(this RDD<KeyValuePair<K, V>> self, IEnumerable<KeyValuePair<string, string>> conf)
+        public static void SaveAsHadoopDataset<K, V>(this RDD<Tuple<K, V>> self, IEnumerable<Tuple<string, string>> conf)
         {
             self.RddProxy.SaveAsHadoopDataset(conf);
         }
@@ -848,7 +848,7 @@ public static void SaveAsHadoopDataset<K, V>(this RDD<KeyValuePair<K, V>> self,
         /// <param name="valueClass">fully qualified classname of value Writable class (e.g. "org.apache.hadoop.io.Text", None by default)</param>
         /// <param name="conf">(None by default)</param>
         /// <param name="compressionCodecClass">(None by default)</param>
-        public static void SaveAsHadoopFile<K, V>(this RDD<KeyValuePair<K, V>> self, string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf, string compressionCodecClass)
+        public static void SaveAsHadoopFile<K, V>(this RDD<Tuple<K, V>> self, string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf, string compressionCodecClass)
         {
             self.RddProxy.SaveAsHadoopFile(path, outputFormatClass, keyClass, valueClass, conf, compressionCodecClass);
         }
@@ -867,7 +867,7 @@ public static void SaveAsHadoopFile<K, V>(this RDD<KeyValuePair<K, V>> self, str
         /// <param name="self"></param>
         /// <param name="path">path to sequence file</param>
         /// <param name="compressionCodecClass">(None by default)</param>
-        public static void SaveAsSequenceFile<K, V>(this RDD<KeyValuePair<K, V>> self, string path, string compressionCodecClass)
+        public static void SaveAsSequenceFile<K, V>(this RDD<Tuple<K, V>> self, string path, string compressionCodecClass)
         {
             self.RddProxy.SaveAsSequenceFile(path, compressionCodecClass);
         }
@@ -887,12 +887,12 @@ public GroupByMergeHelper(Func<C, C, C> mc)
                 mergeCombiners = mc;
             }
 
-            public IEnumerable<KeyValuePair<K, C>> Execute(int pid, IEnumerable<KeyValuePair<K, C>> input)
+            public IEnumerable<Tuple<K, C>> Execute(int pid, IEnumerable<Tuple<K, C>> input)
             {
                 return input.GroupBy(
-                    kvp => kvp.Key,
-                    kvp => kvp.Value,
-                    (k, v) => new KeyValuePair<K, C>(k, v.Aggregate(mergeCombiners))
+                    kvp => kvp.Item1,
+                    kvp => kvp.Item2,
+                    (k, v) => new Tuple<K, C>(k, v.Aggregate(mergeCombiners))
                     );
             }
         }
@@ -908,12 +908,12 @@ public GroupByCombineHelper(Func<C> createCombiner, Func<C, V, C> mergeValue)
                 this.mergeValue = mergeValue;
             }
 
-            public IEnumerable<KeyValuePair<K, C>> Execute(int pid, IEnumerable<KeyValuePair<K, V>> input)
+            public IEnumerable<Tuple<K, C>> Execute(int pid, IEnumerable<Tuple<K, V>> input)
             {
                 return input.GroupBy(
-                    kvp => kvp.Key,
-                    kvp => kvp.Value,
-                    (k, v) => new KeyValuePair<K, C>(k, v.Aggregate(createCombiner(), mergeValue))
+                    kvp => kvp.Item1,
+                    kvp => kvp.Item2,
+                    (k, v) => new Tuple<K, C>(k, v.Aggregate(createCombiner(), mergeValue))
                     );
             }
         }
@@ -932,7 +932,7 @@ public AddShuffleKeyHelper(int numPartitions, Func<dynamic, int> partitionFunc =
                 this.partitionFunc = partitionFunc;
             }
 
-            public IEnumerable<byte[]> Execute(int split, IEnumerable<KeyValuePair<K, V>> input)
+            public IEnumerable<byte[]> Execute(int split, IEnumerable<Tuple<K, V>> input)
             {
                 // make sure that md5 is not null even if it is deseriazed in C# worker
                 if (md5 == null)
@@ -945,12 +945,12 @@ public IEnumerable<byte[]> Execute(int split, IEnumerable<KeyValuePair<K, V>> in
                     var ms = new MemoryStream();
                     if (partitionFunc == null)
                     {
-                        formatter.Serialize(ms, kv.Key);
+                        formatter.Serialize(ms, kv.Item1);
                         yield return md5.ComputeHash(ms.ToArray()).Take(8).ToArray();
                     }
                     else
                     {
-                        long pid = (long)(partitionFunc(kv.Key) % numPartitions);
+                        long pid = (long)(partitionFunc(kv.Item1) % numPartitions);
                         yield return SerDe.ToBytes(pid);
                     }
                     ms = new MemoryStream();
@@ -969,12 +969,12 @@ public MapValuesHelper(Func<V, U> f)
                 func = f;
             }
 
-            public KeyValuePair<K, U> Execute(KeyValuePair<K, V> kvp)
+            public Tuple<K, U> Execute(Tuple<K, V> kvp)
             {
-                return new KeyValuePair<K, U>
+                return new Tuple<K, U>
                     (
-                    kvp.Key,
-                    func(kvp.Value)
+                    kvp.Item1,
+                    func(kvp.Item2)
                     );
             }
         }
@@ -988,9 +988,9 @@ public FlatMapValuesHelper(Func<V, IEnumerable<U>> f)
                 func = f;
             }
 
-            public IEnumerable<KeyValuePair<K, U>> Execute(KeyValuePair<K, V> kvp)
+            public IEnumerable<Tuple<K, U>> Execute(Tuple<K, V> kvp)
             {
-                return func(kvp.Value).Select(v => new KeyValuePair<K, U>(kvp.Key, v));
+                return func(kvp.Item2).Select(v => new Tuple<K, U>(kvp.Item1, v));
             }
         }
         [Serializable]
@@ -1001,9 +1001,9 @@ internal LookupHelper(K key)
             {
                 this.key = key;
             }
-            internal bool Execute(KeyValuePair<K, V> input)
+            internal bool Execute(Tuple<K, V> input)
             {
-                return input.Key.ToString() == key.ToString();
+                return input.Item1.ToString() == key.ToString();
             }
         }
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
index c7e75ee7..345672f7 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
@@ -189,7 +189,7 @@ public int GetNumPartitions()
         /// <summary>
         /// Return a new RDD by applying a function to each element of this RDD.
         /// 
-        /// sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new KeyValuePair&lt;string, int>(x, 1)).Collect()
+        /// sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new Tuple&lt;string, int>(x, 1)).Collect()
         /// [('a', 1), ('b', 1), ('c', 1)]
         /// 
         /// </summary>
@@ -288,7 +288,7 @@ public RDD<T> Filter(Func<T, bool> f)
         /// <returns></returns>
         public RDD<T> Distinct(int numPartitions = 0)
         {
-            return Map(x => new KeyValuePair<T, int>(x, 0)).ReduceByKey((x, y) => x, numPartitions).Map<T>(x => x.Key);
+            return Map(x => new Tuple<T, int>(x, 0)).ReduceByKey((x, y) => x, numPartitions).Map<T>(x => x.Item1);
         }
 
         /// <summary>
@@ -461,9 +461,9 @@ public RDD<T> Union(RDD<T> other)
         /// <returns></returns>
         public RDD<T> Intersection(RDD<T> other)
         {
-            return Map(v => new KeyValuePair<T, int>(v, 0))
-                .GroupWith(other.Map(v => new KeyValuePair<T, int>(v, 0)))
-                .Filter(kv => kv.Value.Item1.Count > 0 && kv.Value.Item2.Count > 0)
+            return Map(v => new Tuple<T, int>(v, 0))
+                .GroupWith(other.Map(v => new Tuple<T, int>(v, 0)))
+                .Filter(kv => kv.Item2.Item1.Count > 0 && kv.Item2.Item2.Count > 0)
                 .Keys();
         }
 
@@ -533,7 +533,7 @@ public RDD<Tuple<T, U>> Cartesian<U>(RDD<U> other)
         /// 
         /// </summary>
         /// <returns></returns>
-        public RDD<KeyValuePair<K, List<T>>> GroupBy<K>(Func<T, K> f, int numPartitions = 0)
+        public RDD<Tuple<K, List<T>>> GroupBy<K>(Func<T, K> f, int numPartitions = 0)
         {
             return KeyBy(f).GroupByKey(numPartitions);
         }
@@ -639,14 +639,14 @@ public T TreeReduce(Func<T, T, T> f, int depth = 2)
             if (depth < 1)
                 throw new ArgumentException(string.Format("Depth cannot be smaller than 1 but got {0}.", depth));
 
-            var zeroValue = new KeyValuePair<T, bool>(default(T), true);  // Use the second entry to indicate whether this is a dummy value.
+            var zeroValue = new Tuple<T, bool>(default(T), true);  // Use the second entry to indicate whether this is a dummy value.
 
-            Func<KeyValuePair<T, bool>, KeyValuePair<T, bool>, KeyValuePair<T, bool>> op = new TreeReduceHelper<T>(f).Execute;
+            Func<Tuple<T, bool>, Tuple<T, bool>, Tuple<T, bool>> op = new TreeReduceHelper<T>(f).Execute;
 
-            var reduced = Map<KeyValuePair<T, bool>>(x => new KeyValuePair<T, bool>(x, false)).TreeAggregate(zeroValue, op, op, depth);
-            if (reduced.Value)
+            var reduced = Map<Tuple<T, bool>>(x => new Tuple<T, bool>(x, false)).TreeAggregate(zeroValue, op, op, depth);
+            if (reduced.Item2)
                 throw new ArgumentException("Cannot reduce empty RDD.");
-            return reduced.Key;
+            return reduced.Item1;
         }
 
         /// <summary>
@@ -736,7 +736,7 @@ public U TreeAggregate<U>(U zeroValue, Func<U, T, U> seqOp, Func<U, U, U> combOp
                 numPartitions /= scale;
 
                 partiallyAggregated = partiallyAggregated
-                    .MapPartitionsWithIndex<KeyValuePair<int, U>>(new TreeAggregateHelper<U>(numPartitions).Execute)
+                    .MapPartitionsWithIndex<Tuple<int, U>>(new TreeAggregateHelper<U>(numPartitions).Execute)
                     .ReduceByKey(combOp, numPartitions)
                     .Values();
             }
@@ -762,9 +762,9 @@ public long Count()
         /// 
         /// </summary>
         /// <returns></returns>
-        public Dictionary<T, long> CountByValue()
+        public IDictionary<T, long> CountByValue()
         {
-            return Map<KeyValuePair<T, T>>(v => new KeyValuePair<T, T>(v, default(T))).CountByKey();
+            return Map<Tuple<T, T>>(v => new Tuple<T, T>(v, default(T))).CountByKey();
         }
 
         /// <summary>
@@ -872,9 +872,9 @@ public bool IsEmpty()
         /// <returns></returns>
         public RDD<T> Subtract(RDD<T> other, int numPartitions = 0)
         {
-            return Map<KeyValuePair<T, T>>(v => new KeyValuePair<T, T>(v, default(T))).SubtractByKey
+            return Map<Tuple<T, T>>(v => new Tuple<T, T>(v, default(T))).SubtractByKey
                 (
-                    other.Map<KeyValuePair<T, T>>(v => new KeyValuePair<T, T>(v, default(T))),
+                    other.Map<Tuple<T, T>>(v => new Tuple<T, T>(v, default(T))),
                     numPartitions
                 )
                 .Keys();
@@ -890,9 +890,9 @@ public RDD<T> Subtract(RDD<T> other, int numPartitions = 0)
         /// <typeparam name="K"></typeparam>
         /// <param name="f"></param>
         /// <returns></returns>
-        public RDD<KeyValuePair<K, T>> KeyBy<K>(Func<T, K> f)
+        public RDD<Tuple<K, T>> KeyBy<K>(Func<T, K> f)
         {
-            return Map<KeyValuePair<K, T>>(new KeyByHelper<K, T>(f).Execute);
+            return Map<Tuple<K, T>>(new KeyByHelper<K, T>(f).Execute);
         }
 
         /// <summary>
@@ -950,9 +950,9 @@ public RDD<T> Coalesce(int numPartitions, bool shuffle = false)
         /// <typeparam name="U"></typeparam>
         /// <param name="other"></param>
         /// <returns></returns>
-        public RDD<KeyValuePair<T, U>> Zip<U>(RDD<U> other)
+        public RDD<Tuple<T, U>> Zip<U>(RDD<U> other)
         {
-            return new RDD<KeyValuePair<T, U>>(RddProxy.Zip(other.RddProxy), sparkContext, SerializedMode.Pair);
+            return new RDD<Tuple<T, U>>(RddProxy.Zip(other.RddProxy), sparkContext, SerializedMode.Pair);
         }
 
         /// <summary>
@@ -971,7 +971,7 @@ public RDD<KeyValuePair<T, U>> Zip<U>(RDD<U> other)
         /// 
         /// </summary>
         /// <returns></returns>
-        public RDD<KeyValuePair<T, long>> ZipWithIndex()
+        public RDD<Tuple<T, long>> ZipWithIndex()
         {
             int num = GetNumPartitions();
             int[] starts = new int[num];
@@ -981,7 +981,7 @@ public RDD<KeyValuePair<T, long>> ZipWithIndex()
                 for (int i = 0; i < nums.Length - 1; i++)
                     starts[i + 1] = starts[i] + nums[i];
             }
-            return MapPartitionsWithIndex<KeyValuePair<T, long>>(new ZipWithIndexHelper<T>(starts).Execute);
+            return MapPartitionsWithIndex<Tuple<T, long>>(new ZipWithIndexHelper<T>(starts).Execute);
         }
 
         /// <summary>
@@ -996,10 +996,10 @@ public RDD<KeyValuePair<T, long>> ZipWithIndex()
         /// 
         /// </summary>
         /// <returns></returns>
-        public RDD<KeyValuePair<T, long>> ZipWithUniqueId()
+        public RDD<Tuple<T, long>> ZipWithUniqueId()
         {
             int num = GetNumPartitions();
-            return MapPartitionsWithIndex<KeyValuePair<T, long>>(new ZipWithUniqueIdHelper<T>(num).Execute);
+            return MapPartitionsWithIndex<Tuple<T, long>>(new ZipWithUniqueIdHelper<T>(num).Execute);
         }
 
         /// <summary>
@@ -1217,27 +1217,27 @@ internal IEnumerable<dynamic> Execute(int val, IEnumerable<dynamic> inputValues)
                 {
                     K key;
                     dynamic value;
-                    if (x is KeyValuePair<K, V>)
+                    if (x is Tuple<K, V>)
                     {
-                        key = ((KeyValuePair<K, V>)x).Key;
-                        value = ((KeyValuePair<K, V>)x).Value;
+                        key = ((Tuple<K, V>)x).Item1;
+                        value = ((Tuple<K, V>)x).Item2;
                     }
-                    else if (x is KeyValuePair<K, W1>)
+                    else if (x is Tuple<K, W1>)
                     {
-                        key = ((KeyValuePair<K, W1>)x).Key;
-                        value = ((KeyValuePair<K, W1>)x).Value;
+                        key = ((Tuple<K, W1>)x).Item1;
+                        value = ((Tuple<K, W1>)x).Item2;
                     }
-                    else if (x is KeyValuePair<K, W2>)
+                    else if (x is Tuple<K, W2>)
                     {
-                        key = ((KeyValuePair<K, W2>)x).Key;
-                        value = ((KeyValuePair<K, W2>)x).Value;
+                        key = ((Tuple<K, W2>)x).Item1;
+                        value = ((Tuple<K, W2>)x).Item2;
                     }
                     else
                     {
-                        key = ((KeyValuePair<K, W3>)x).Key;
-                        value = ((KeyValuePair<K, W3>)x).Value;
+                        key = ((Tuple<K, W3>)x).Item1;
+                        value = ((Tuple<K, W3>)x).Item2;
                     }
-                    return new KeyValuePair<K, dynamic>(key, value);
+                    return new Tuple<K, dynamic>(key, value);
                 })
                 .Cast<dynamic>();
         }
@@ -1397,9 +1397,9 @@ internal KeyByHelper(Func<T, K> f)
             func = f;
         }
 
-        internal KeyValuePair<K, T> Execute(T input)
+        internal Tuple<K, T> Execute(T input)
         {
-            return new KeyValuePair<K, T>(func(input), input);
+            return new Tuple<K, T>(func(input), input);
         }
     }
     [Serializable]
@@ -1426,9 +1426,9 @@ internal TreeAggregateHelper(int numPartitions)
         {
             this.numPartitions = numPartitions;
         }
-        internal IEnumerable<KeyValuePair<int, U>> Execute(int pid, IEnumerable<U> input)
+        internal IEnumerable<Tuple<int, U>> Execute(int pid, IEnumerable<U> input)
         {
-            return input.Select(x => new KeyValuePair<int, U>(pid % numPartitions, x));
+            return input.Select(x => new Tuple<int, U>(pid % numPartitions, x));
         }
     }
     [Serializable]
@@ -1439,14 +1439,14 @@ internal TreeReduceHelper(Func<T, T, T> func)
         {
             this.func = func;
         }
-        internal KeyValuePair<T, bool> Execute(KeyValuePair<T, bool> x, KeyValuePair<T, bool> y)
+        internal Tuple<T, bool> Execute(Tuple<T, bool> x, Tuple<T, bool> y)
         {
-            if (x.Value)
+            if (x.Item2)
                 return y;
-            else if (y.Value)
+            else if (y.Item2)
                 return x;
             else
-                return new KeyValuePair<T, bool>(func(x.Key, y.Key), false);
+                return new Tuple<T, bool>(func(x.Item1, y.Item1), false);
         }
     }
     [Serializable]
@@ -1498,12 +1498,12 @@ internal ZipWithUniqueIdHelper(int num)
         {
             this.num = num;
         }
-        internal IEnumerable<KeyValuePair<T, long>> Execute(int pid, IEnumerable<T> input)
+        internal IEnumerable<Tuple<T, long>> Execute(int pid, IEnumerable<T> input)
         {
             long l = 0;
             foreach (var item in input)
             {
-                yield return new KeyValuePair<T, long>(item, (l++) * num + pid);
+                yield return new Tuple<T, long>(item, (l++) * num + pid);
             }
         }
     }
@@ -1515,12 +1515,12 @@ internal ZipWithIndexHelper(int[] starts)
         {
             this.starts = starts;
         }
-        internal IEnumerable<KeyValuePair<T, long>> Execute(int pid, IEnumerable<T> input)
+        internal IEnumerable<Tuple<T, long>> Execute(int pid, IEnumerable<T> input)
         {
             long l = 0;
             foreach (var item in input)
             {
-                yield return new KeyValuePair<T, long>(item, (l++) + starts[pid]);
+                yield return new Tuple<T, long>(item, (l++) + starts[pid]);
             }
         }
     }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
index 442a11d4..f8a85865 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@@ -297,7 +297,7 @@ public RDD<byte[]> SequenceFile(string filePath, string keyClass, string valueCl
         /// <param name="valueConverterClass">(None by default)</param>
         /// <param name="conf"> Hadoop configuration, passed in as a dict (None by default)</param>
         /// <returns></returns>
-        public RDD<byte[]> NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<KeyValuePair<string, string>> conf = null)
+        public RDD<byte[]> NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<Tuple<string, string>> conf = null)
         {
             return new RDD<byte[]>(SparkContextProxy.NewAPIHadoopFile(filePath, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf, 1), this, SerializedMode.None);
         }
@@ -316,7 +316,7 @@ public RDD<byte[]> NewAPIHadoopFile(string filePath, string inputFormatClass, st
         /// <param name="valueConverterClass">(None by default)</param>
         /// <param name="conf">Hadoop configuration, passed in as a dict (None by default)</param>
         /// <returns></returns>
-        public RDD<byte[]> NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<KeyValuePair<string, string>> conf = null)
+        public RDD<byte[]> NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<Tuple<string, string>> conf = null)
         {
             return new RDD<byte[]>(SparkContextProxy.NewAPIHadoopRDD(inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf, 1), this, SerializedMode.None);
         }
@@ -337,7 +337,7 @@ public RDD<byte[]> NewAPIHadoopRDD(string inputFormatClass, string keyClass, str
         /// <param name="valueConverterClass">(None by default)</param>
         /// <param name="conf">Hadoop configuration, passed in as a dict (None by default)</param>
         /// <returns></returns>
-        public RDD<byte[]> HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<KeyValuePair<string, string>> conf = null)
+        public RDD<byte[]> HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<Tuple<string, string>> conf = null)
         {
             return new RDD<byte[]>(SparkContextProxy.HadoopFile(filePath, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf, 1), this, SerializedMode.None);
         }
@@ -356,7 +356,7 @@ public RDD<byte[]> HadoopFile(string filePath, string inputFormatClass, string k
         /// <param name="valueConverterClass">(None by default)</param>
         /// <param name="conf">Hadoop configuration, passed in as a dict (None by default)</param>
         /// <returns></returns>
-        public RDD<byte[]> HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<KeyValuePair<string, string>> conf = null)
+        public RDD<byte[]> HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass = null, string valueConverterClass = null, IEnumerable<Tuple<string, string>> conf = null)
         {
             return new RDD<byte[]>(SparkContextProxy.HadoopRDD(inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf, 1), this, SerializedMode.None);
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridgeUtils.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridgeUtils.cs
index cb250fee..d4e0002e 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridgeUtils.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/JvmBridgeUtils.cs
@@ -7,6 +7,7 @@
 using System.Text;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Proxy.Ipc;
+using System;
 
 namespace Microsoft.Spark.CSharp.Interop.Ipc
 {
@@ -16,31 +17,31 @@ namespace Microsoft.Spark.CSharp.Interop.Ipc
     [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
     internal static class JvmBridgeUtils
     {
-        public static JvmObjectReference GetJavaMap<K, V>(IEnumerable<KeyValuePair<K, V>> enumerable)
+        public static JvmObjectReference GetJavaMap<K, V>(IEnumerable<Tuple<K, V>> enumerable)
         {
             var jmap = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
             if (enumerable != null)
             {
                 foreach (var item in enumerable)
-                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Key, item.Value });
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Item1, item.Item2 });
             }
             return jmap;
         }
 
-        public static JvmObjectReference GetJavaHashMap<K, V>(IEnumerable<KeyValuePair<K, V>> enumerable)
+        public static JvmObjectReference GetJavaHashMap<K, V>(IEnumerable<Tuple<K, V>> enumerable)
         {
             var jmap = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashMap", new object[] { });
             if (enumerable != null)
             {
                 foreach (var item in enumerable)
-                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Key, item.Value });
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jmap, "put", new object[] { item.Item1, item.Item2 });
             }
             return jmap;
         }
 
-        public static JvmObjectReference GetScalaMutableMap<K, V>(Dictionary<K, V> mapValues)
+        public static JvmObjectReference GetScalaMutableMap<K, V>(IEnumerable<Tuple<K, V>> mapValues)
         {
-            var hashMapReference = GetJavaHashMap(mapValues.Select(kvp => kvp));
+            var hashMapReference = GetJavaHashMap(mapValues);
             return new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.JvmBridgeUtils", "toMutableMap", new object[] { hashMapReference }).ToString());
         }
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
index ae8a18fa..e323cf47 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IRDDProxy.cs
@@ -31,13 +31,13 @@ internal interface IRDDProxy
         string Name { get; }
         void SetName(string name);
         IRDDProxy RandomSampleWithRange(double lb, double ub, long seed);
-        IRDDProxy SampleByKey(bool withReplacement, Dictionary<string, double> fractions, long seed);
+        IRDDProxy SampleByKey(bool withReplacement, IEnumerable<Tuple<string, double>> fractions, long seed);
         IRDDProxy Zip(IRDDProxy other);
         string ToDebugString();
-        void SaveAsNewAPIHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf);
-        void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf);
-        void SaveAsHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf);
-        void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf, string compressionCodecClass);
+        void SaveAsNewAPIHadoopDataset(IEnumerable<Tuple<string, string>> conf);
+        void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf);
+        void SaveAsHadoopDataset(IEnumerable<Tuple<string, string>> conf);
+        void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf, string compressionCodecClass);
         void SaveAsSequenceFile(string path, string compressionCodecClass);
         void SaveAsTextFile(string path, string compressionCodecClass);
         long Count();
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
index d5b4ac0b..4764322e 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@@ -33,10 +33,10 @@ internal interface ISparkContextProxy
         IRDDProxy WholeTextFiles(string filePath, int minPartitions);
         IRDDProxy BinaryFiles(string filePath, int minPartitions);
         IRDDProxy SequenceFile(string filePath, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, int minSplits, int batchSize);
-        IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize);
-        IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize);
-        IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize);
-        IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize);
+        IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize);
+        IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize);
+        IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize);
+        IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize);
         IRDDProxy CheckpointFile(string filePath);
         IRDDProxy Union(IEnumerable<IRDDProxy> rdds);
         void AddFile(string path);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IStreamingContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IStreamingContextProxy.cs
index 8af21d61..1e23f758 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IStreamingContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IStreamingContextProxy.cs
@@ -20,9 +20,9 @@ internal interface IStreamingContextProxy
         void Checkpoint(string directory);
         IDStreamProxy TextFileStream(string directory);
         IDStreamProxy SocketTextStream(string hostname, int port, StorageLevelType storageLevelType);
-        IDStreamProxy KafkaStream(Dictionary<string, int> topics, Dictionary<string, string> kafkaParams, StorageLevelType storageLevelType);
-        IDStreamProxy DirectKafkaStream(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets);
-        IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets, int numPartitions);
+        IDStreamProxy KafkaStream(IEnumerable<Tuple<string, int>> topics, IEnumerable<Tuple<string, string>> kafkaParams, StorageLevelType storageLevelType);
+        IDStreamProxy DirectKafkaStream(List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets);
+        IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets, int numPartitions);
         IDStreamProxy Union(IDStreamProxy firstDStreams, IDStreamProxy[] otherDStreams);
         void AwaitTermination();
         void AwaitTermination(int timeout);
@@ -31,7 +31,7 @@ internal interface IStreamingContextProxy
         IDStreamProxy CreateCSharpReducedWindowedDStream(IDStreamProxy jdstream, byte[] func, byte[] invFunc, int windowSeconds, int slideSeconds, string serializationMode);
         IDStreamProxy CreateCSharpStateDStream(IDStreamProxy jdstream, byte[] func, string className, string serializationMode, string serializationMode2);
         IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy);
-        IDStreamProxy EventHubsUnionStream(Dictionary<string, string> eventHubsParams, StorageLevelType storageLevelType);
+        IDStreamProxy EventHubsUnionStream(IEnumerable<Tuple<string, string>> eventHubsParams, StorageLevelType storageLevelType);
 
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
index 1433ac9d..9377c079 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/RDDIpcProxy.cs
@@ -158,7 +158,7 @@ public void SetName(string name)
             SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "setName", new object[] { name });
         }
 
-        public IRDDProxy SampleByKey(bool withReplacement, Dictionary<string, double> fractions, long seed)
+        public IRDDProxy SampleByKey(bool withReplacement, IEnumerable<Tuple<string, double>> fractions, long seed)
         {
             var jfractions = JvmBridgeUtils.GetJavaMap(fractions) as JvmObjectReference;
             return new RDDIpcProxy(new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "sampleByKey", new object[] { withReplacement, jfractions, seed })));
@@ -176,25 +176,25 @@ public IRDDProxy Zip(IRDDProxy other)
             return new RDDIpcProxy(new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "zip", new object[] { (other as RDDIpcProxy).jvmRddReference })));
         }
 
-        public void SaveAsNewAPIHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf)
+        public void SaveAsNewAPIHadoopDataset(IEnumerable<Tuple<string, string>> conf)
         {
             var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
             SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopDataset", new object[] { jvmRddReference, false, jconf, null, null, true });
         }
 
-        public void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf)
+        public void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf)
         {
             var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
             SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsNewAPIHadoopFile", new object[] { jvmRddReference, false, path, outputFormatClass, keyClass, valueClass, null, null, jconf });
         }
 
-        public void SaveAsHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf)
+        public void SaveAsHadoopDataset(IEnumerable<Tuple<string, string>> conf)
         {
             var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
             SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopDataset", new object[] { jvmRddReference, false, jconf, null, null, false });
         }
 
-        public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf, string compressionCodecClass)
+        public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf, string compressionCodecClass)
         {
             var jconf = JvmBridgeUtils.GetJavaMap<string, string>(conf);
             SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopFile", new object[] { jvmRddReference, false, path, outputFormatClass, keyClass, valueClass, null, null, jconf, compressionCodecClass });
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
index c74df12d..bfdd686b 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@@ -157,7 +157,7 @@ public IRDDProxy SequenceFile(string filePath, string keyClass, string valueClas
             return new RDDIpcProxy(jvmRddReference);
         }
 
-        public IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
             var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "newAPIHadoopFile",
@@ -165,7 +165,7 @@ public IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, stri
             return new RDDIpcProxy(jvmRddReference);
         }
 
-        public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
             var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "newAPIHadoopRDD",
@@ -173,7 +173,7 @@ public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, strin
             return new RDDIpcProxy(jvmRddReference);
         }
 
-        public IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
             var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "hadoopFile",
@@ -181,7 +181,7 @@ public IRDDProxy HadoopFile(string filePath, string inputFormatClass, string key
             return new RDDIpcProxy(jvmRddReference);
         }
 
-        public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf);
             var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "hadoopRDD",
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/StreamingContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/StreamingContextIpcProxy.cs
index 91bae5e5..047169e3 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/StreamingContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/StreamingContextIpcProxy.cs
@@ -173,7 +173,7 @@ public IDStreamProxy SocketTextStream(string hostname, int port, StorageLevelTyp
             return new DStreamIpcProxy(jstream);
         }
 
-        public IDStreamProxy KafkaStream(Dictionary<string, int> topics, Dictionary<string, string> kafkaParams, StorageLevelType storageLevelType)
+        public IDStreamProxy KafkaStream(IEnumerable<Tuple<string, int>> topics, IEnumerable<Tuple<string, string>> kafkaParams, StorageLevelType storageLevelType)
         {
             JvmObjectReference jtopics = JvmBridgeUtils.GetJavaMap<string, int>(topics);
             JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap<string, string>(kafkaParams);
@@ -184,16 +184,16 @@ public IDStreamProxy KafkaStream(Dictionary<string, int> topics, Dictionary<stri
             return new DStreamIpcProxy(jstream);
         }
         
-        public IDStreamProxy DirectKafkaStream(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets)
+        public IDStreamProxy DirectKafkaStream(List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets)
         {
             JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet<string>(topics);
             JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap<string, string>(kafkaParams);
 
             var jTopicAndPartitions = fromOffsets.Select(x =>
-                new KeyValuePair<JvmObjectReference, long>
+                new Tuple<JvmObjectReference, long>
                 (
-                    SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }),
-                    x.Value
+                    SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Item1.Split(':')[0], int.Parse(x.Item1.Split(':')[1]) }),
+                    x.Item2
                 )
             );
 
@@ -204,16 +204,16 @@ public IDStreamProxy DirectKafkaStream(List<string> topics, Dictionary<string, s
             return new DStreamIpcProxy(jstream);
         }
 
-        public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets, int numPartitions)
+        public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets, int numPartitions)
         {
             JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet<string>(topics);
             JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap<string, string>(kafkaParams);
 
             var jTopicAndPartitions = fromOffsets.Select(x =>
-                new KeyValuePair<JvmObjectReference, long>
+                new Tuple<JvmObjectReference, long>
                 (
-                    SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }),
-                    x.Value
+                    SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Item1.Split(':')[0], int.Parse(x.Item1.Split(':')[1]) }),
+                    x.Item2
                 )
             );
 
@@ -224,7 +224,7 @@ public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dicti
             return new DStreamIpcProxy(jstream);
         }
 
-        public IDStreamProxy EventHubsUnionStream(Dictionary<string, string> eventHubsParams, StorageLevelType storageLevelType)
+        public IDStreamProxy EventHubsUnionStream(IEnumerable<Tuple<string, string>> eventHubsParams, StorageLevelType storageLevelType)
         {
             JvmObjectReference eventHubsParamsReference = JvmBridgeUtils.GetScalaMutableMap<string, string>(eventHubsParams);
             JvmObjectReference storageLevelTypeReference = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/DStream.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/DStream.cs
index 6cb6c49e..ed0634f6 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/DStream.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/DStream.cs
@@ -153,7 +153,7 @@ public DStream<U> MapPartitionsWithIndex<U>(Func<int, IEnumerable<T>, IEnumerabl
         /// <returns></returns>
         public DStream<T> Reduce(Func<T, T, T> f)
         {
-            return Map<KeyValuePair<string, T>>(x => new KeyValuePair<string, T>(string.Empty, x)).ReduceByKey(f, 1).Map<T>(kvp => kvp.Value);
+            return Map<Tuple<string, T>>(x => new Tuple<string, T>(string.Empty, x)).ReduceByKey(f, 1).Map<T>(kvp => kvp.Item2);
         }
 
         /// <summary>
@@ -235,9 +235,9 @@ public DStream<T> Checkpoint(long intervalMs)
         /// distinct value in each RDD of this DStream.
         /// </summary>
         /// <returns></returns>
-        public DStream<KeyValuePair<T, long>> CountByValue(int numPartitions = 0)
+        public DStream<Tuple<T, long>> CountByValue(int numPartitions = 0)
         {
-            return Map(v => new KeyValuePair<T, long>(v, 1L)).ReduceByKey((x, y) => x + y, numPartitions);
+            return Map(v => new Tuple<T, long>(v, 1L)).ReduceByKey((x, y) => x + y, numPartitions);
         }
 
         /// <summary>
@@ -427,9 +427,9 @@ public DStream<T> Window(int windowSeconds, int slideSeconds)
         /// <returns></returns>
         public DStream<T> ReduceByWindow(Func<T, T, T> reduceFunc, Func<T, T, T> invReduceFunc, int windowSeconds, int slideSeconds = 0)
         {
-            var keyed = Map(v => new KeyValuePair<int, T>(1, v));
+            var keyed = Map(v => new Tuple<int, T>(1, v));
             var reduced = keyed.ReduceByKeyAndWindow(reduceFunc, invReduceFunc, windowSeconds, slideSeconds, 1);
-            return reduced.Map(kv => (T)kv.Value);
+            return reduced.Map(kv => (T)kv.Item2);
         }
 
         /// <summary>
@@ -462,9 +462,9 @@ public DStream<long> CountByWindow(int windowSeconds, int slideSeconds = 0)
         /// <returns></returns>
         public DStream<long> CountByValueAndWindow(int windowSeconds, int slideSeconds, int numPartitions = 0)
         {
-            var keyed = Map(v => new KeyValuePair<T, int>(v, 1));
+            var keyed = Map(v => new Tuple<T, int>(v, 1));
             var counted = keyed.ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y, windowSeconds, slideSeconds, numPartitions);
-            return counted.Filter(kv => kv.Value > 0).Count();
+            return counted.Filter(kv => kv.Item2 > 0).Count();
         }
     }
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/EventHubsUtils.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/EventHubsUtils.cs
index 53a46de0..44b959ec 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/EventHubsUtils.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/EventHubsUtils.cs
@@ -35,7 +35,7 @@ public class EventHubsUtils
         /// </param>
         /// <param name="storageLevelType">Storage level, by default it is MEMORY_ONLY</param>
         /// <returns>DStream with byte[] representing events from EventHub</returns>
-        public static DStream<byte[]> CreateUnionStream(StreamingContext ssc, Dictionary<string, string> eventhubsParams, StorageLevelType storageLevelType = StorageLevelType.MEMORY_ONLY)
+        public static DStream<byte[]> CreateUnionStream(StreamingContext ssc, IEnumerable<Tuple<string, string>> eventhubsParams, StorageLevelType storageLevelType = StorageLevelType.MEMORY_ONLY)
         {
             return new DStream<byte[]>(ssc.streamingContextProxy.EventHubsUnionStream(eventhubsParams, storageLevelType), ssc, SerializedMode.None);
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/Kafka.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/Kafka.cs
index 6f517810..c204f69d 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/Kafka.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/Kafka.cs
@@ -25,7 +25,7 @@ public class KafkaUtils
         /// <param name="topics">Dict of (topic_name -> numPartitions) to consume. Each partition is consumed in its own thread.</param>
         /// <param name="kafkaParams">Additional params for Kafka</param>
         /// <returns>A DStream object</returns>
-        public static DStream<KeyValuePair<byte[], byte[]>> CreateStream(StreamingContext ssc, string zkQuorum, string groupId, Dictionary<string, int> topics, Dictionary<string, string> kafkaParams)
+        public static DStream<Tuple<byte[], byte[]>> CreateStream(StreamingContext ssc, string zkQuorum, string groupId, IEnumerable<Tuple<string, int>> topics, IEnumerable<Tuple<string, string>> kafkaParams)
         {
             return CreateStream(ssc, zkQuorum, groupId, topics, kafkaParams, StorageLevelType.MEMORY_AND_DISK_SER_2);
         }
@@ -40,19 +40,21 @@ public static DStream<KeyValuePair<byte[], byte[]>> CreateStream(StreamingContex
         /// <param name="kafkaParams">Additional params for Kafka</param>
         /// <param name="storageLevelType">RDD storage level.</param>
         /// <returns>A DStream object</returns>
-        public static DStream<KeyValuePair<byte[], byte[]>> CreateStream(StreamingContext ssc, string zkQuorum, string groupId, Dictionary<string, int> topics, Dictionary<string, string> kafkaParams, StorageLevelType storageLevelType)
+        public static DStream<Tuple<byte[], byte[]>> CreateStream(StreamingContext ssc, string zkQuorum, string groupId, IEnumerable<Tuple<string, int>> topics, IEnumerable<Tuple<string, string>> kafkaParams, StorageLevelType storageLevelType)
         {
             if (kafkaParams == null)
-                kafkaParams = new Dictionary<string, string>();
+                kafkaParams = new List<Tuple<string, string>>();
+
+            var kafkaParamsMap = kafkaParams.ToDictionary(x => x.Item1, x => x.Item2);
 
             if (!string.IsNullOrEmpty(zkQuorum))
-                kafkaParams["zookeeper.connect"] = zkQuorum;
+                kafkaParamsMap["zookeeper.connect"] = zkQuorum;
             if (groupId != null)
-                kafkaParams["group.id"] = groupId;
-            if (kafkaParams.ContainsKey("zookeeper.connection.timeout.ms"))
-                kafkaParams["zookeeper.connection.timeout.ms"] = "10000";
+                kafkaParamsMap["group.id"] = groupId;
+            if (kafkaParamsMap.ContainsKey("zookeeper.connection.timeout.ms"))
+                kafkaParamsMap["zookeeper.connection.timeout.ms"] = "10000";
 
-            return new DStream<KeyValuePair<byte[], byte[]>>(ssc.streamingContextProxy.KafkaStream(topics, kafkaParams, storageLevelType), ssc);
+            return new DStream<Tuple<byte[], byte[]>>(ssc.streamingContextProxy.KafkaStream(topics, kafkaParamsMap.Select(x => Tuple.Create(x.Key, x.Value)), storageLevelType), ssc);
         }
 
         /// <summary>
@@ -79,9 +81,9 @@ public static DStream<KeyValuePair<byte[], byte[]>> CreateStream(StreamingContex
         /// </param>
         /// <param name="fromOffsets">Per-topic/partition Kafka offsets defining the (inclusive) starting point of the stream.</param>
         /// <returns>A DStream object</returns>
-        public static DStream<KeyValuePair<byte[], byte[]>> CreateDirectStream(StreamingContext ssc, List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets)
+        public static DStream<Tuple<byte[], byte[]>> CreateDirectStream(StreamingContext ssc, List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets)
         {
-            return new DStream<KeyValuePair<byte[], byte[]>>(ssc.streamingContextProxy.DirectKafkaStream(topics, kafkaParams, fromOffsets), ssc, SerializedMode.Pair);
+            return new DStream<Tuple<byte[], byte[]>>(ssc.streamingContextProxy.DirectKafkaStream(topics, kafkaParams, fromOffsets), ssc, SerializedMode.Pair);
         }
 
         /// <summary>
@@ -116,9 +118,9 @@ public static DStream<KeyValuePair<byte[], byte[]>> CreateDirectStream(Streaming
         ///     If numPartitions > 0, repartition using this parameter
         /// </param>
         /// <returns>A DStream object</returns>
-        public static DStream<KeyValuePair<byte[], byte[]>> CreateDirectStreamWithRepartition(StreamingContext ssc, List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets, int numPartitions = -1)
+        public static DStream<Tuple<byte[], byte[]>> CreateDirectStreamWithRepartition(StreamingContext ssc, List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets, int numPartitions = -1)
         {
-            return new DStream<KeyValuePair<byte[], byte[]>>(ssc.streamingContextProxy.DirectKafkaStreamWithRepartition(topics, kafkaParams, fromOffsets, numPartitions), ssc, SerializedMode.Pair);
+            return new DStream<Tuple<byte[], byte[]>>(ssc.streamingContextProxy.DirectKafkaStreamWithRepartition(topics, kafkaParams, fromOffsets, numPartitions), ssc, SerializedMode.Pair);
         }
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/MapWithStateDStream.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/MapWithStateDStream.cs
index d0907743..fe3de008 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/MapWithStateDStream.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/MapWithStateDStream.cs
@@ -29,9 +29,9 @@ namespace Microsoft.Spark.CSharp.Streaming
     [Serializable]
     public class MapWithStateDStream<K, V, S, M> : DStream<M>
     {
-        internal DStream<KeyValuePair<K, S>> snapshotsDStream;
+        internal DStream<Tuple<K, S>> snapshotsDStream;
 
-        internal MapWithStateDStream(DStream<M> mappedDataDStream, DStream<KeyValuePair<K, S>> snapshotsDStream)
+        internal MapWithStateDStream(DStream<M> mappedDataDStream, DStream<Tuple<K, S>> snapshotsDStream)
             : base(mappedDataDStream.DStreamProxy, mappedDataDStream.streamingContext)
         {
             this.snapshotsDStream = snapshotsDStream;
@@ -40,7 +40,7 @@ internal MapWithStateDStream(DStream<M> mappedDataDStream, DStream<KeyValuePair<
         /// <summary>
         /// Return a pair DStream where each RDD is the snapshot of the state of all the keys.
         /// </summary>
-        public DStream<KeyValuePair<K, S>> StateSnapshots()
+        public DStream<Tuple<K, S>> StateSnapshots()
         {
             return snapshotsDStream;
         }
@@ -87,11 +87,11 @@ public MapWithStateRDDRecord()
         {
         }
 
-        public MapWithStateRDDRecord(long t, IEnumerable<KeyValuePair<K, S>> iter)
+        public MapWithStateRDDRecord(long t, IEnumerable<Tuple<K, S>> iter)
         {
             foreach (var p in iter)
             {
-                stateMap[p.Key] = new KeyedState<S>(p.Value, t);
+                stateMap[p.Item1] = new KeyedState<S>(p.Item2, t);
             }
         }
     }
@@ -131,14 +131,14 @@ internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> iter)
 
             while (enumerator.MoveNext())
             {
-                KeyValuePair<K, V> kv = enumerator.Current;
+                Tuple<K, V> kv = enumerator.Current;
                 KeyedState<S> keyedState;
-                State<S> wrappedState = stateRddRecord.stateMap.TryGetValue(kv.Key, out keyedState) ? new State<S>(keyedState.state) : new State<S>(default(S));
+                State<S> wrappedState = stateRddRecord.stateMap.TryGetValue(kv.Item1, out keyedState) ? new State<S>(keyedState.state) : new State<S>(default(S));
 
                 var mappedData = default(M);
                 try
                 {
-                    mappedData = f(kv.Key, kv.Value, wrappedState);
+                    mappedData = f(kv.Item1, kv.Item2, wrappedState);
                 }
                 catch (Exception e)
                 {
@@ -149,11 +149,11 @@ internal IEnumerable<dynamic> Execute(int pid, IEnumerable<dynamic> iter)
 
                 if (wrappedState.removed)
                 {
-                    stateRddRecord.stateMap.Remove(kv.Key);
+                    stateRddRecord.stateMap.Remove(kv.Item1);
                 }
                 else if (wrappedState.updated || wrappedState.defined)
                 {
-                    stateRddRecord.stateMap[kv.Key] = new KeyedState<S>(wrappedState.state, ticks);
+                    stateRddRecord.stateMap[kv.Item1] = new KeyedState<S>(wrappedState.state, ticks);
                 }
             }
 
@@ -223,7 +223,7 @@ internal RDD<dynamic> Execute(double t, RDD<dynamic> stateRDD, RDD<dynamic> valu
                 valuesRDD = prevFunc(t, valuesRDD);
             }
 
-            var values = valuesRDD.ConvertTo<KeyValuePair<K, V>>().PartitionBy(stateSpec.numPartitions);
+            var values = valuesRDD.ConvertTo<Tuple<K, V>>().PartitionBy(stateSpec.numPartitions);
 
             if (stateRDD == null)
             {
@@ -259,12 +259,12 @@ internal MapWithStateMapPartitionHelper(long ticks)
             this.ticks = ticks;
         }
 
-        internal IEnumerable<MapWithStateRDDRecord<K, S, M>> Execute(IEnumerable<KeyValuePair<K, S>> iter)
+        internal IEnumerable<MapWithStateRDDRecord<K, S, M>> Execute(IEnumerable<Tuple<K, S>> iter)
         {
             return new[] {new MapWithStateRDDRecord<K, S, M>(ticks, iter)};
         }
 
-        internal IEnumerable<MapWithStateRDDRecord<K, S, M>> ExecuteWithoutInitialState(IEnumerable<KeyValuePair<K, V>> iter)
+        internal IEnumerable<MapWithStateRDDRecord<K, S, M>> ExecuteWithoutInitialState(IEnumerable<Tuple<K, V>> iter)
         {
             return new[] { new MapWithStateRDDRecord<K, S, M>() };
         }
@@ -283,7 +283,7 @@ public class StateSpec<K, V, S, M>
         internal Func<K, V, State<S>, M> mappingFunction;
         internal int numPartitions;
         internal TimeSpan idleDuration = TimeSpan.FromTicks(0);
-        internal RDD<KeyValuePair<K, S>> initialState = null;
+        internal RDD<Tuple<K, S>> initialState = null;
 
         /// <summary>
         /// Create a StateSpec for setting all the specifications of the `mapWithState` operation on a pair DStream.
@@ -318,7 +318,7 @@ public StateSpec<K, V, S, M> Timeout(TimeSpan idleDuration)
             return this;
         }
 
-        public StateSpec<K, V, S, M> InitialState(RDD<KeyValuePair<K, S>> initialState)
+        public StateSpec<K, V, S, M> InitialState(RDD<Tuple<K, S>> initialState)
         {
             this.initialState = initialState;
             return this;
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/PairDStreamFunctions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/PairDStreamFunctions.cs
index 409a8b47..8a9244d2 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/PairDStreamFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Streaming/PairDStreamFunctions.cs
@@ -15,7 +15,7 @@
 namespace Microsoft.Spark.CSharp.Streaming
 {
     /// <summary>
-    /// operations only available to KeyValuePair RDD
+    /// operations only available to Tuple RDD
     /// </summary>
     public static class PairDStreamFunctions
     {
@@ -28,7 +28,7 @@ public static class PairDStreamFunctions
         /// <param name="reduceFunc"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, V>> ReduceByKey<K, V>(this DStream<KeyValuePair<K, V>> self, Func<V, V, V> reduceFunc, int numPartitions = 0)
+        public static DStream<Tuple<K, V>> ReduceByKey<K, V>(this DStream<Tuple<K, V>> self, Func<V, V, V> reduceFunc, int numPartitions = 0)
         {
             return self.CombineByKey(() => default(V), reduceFunc, reduceFunc, numPartitions);
         }
@@ -45,8 +45,8 @@ public static DStream<KeyValuePair<K, V>> ReduceByKey<K, V>(this DStream<KeyValu
         /// <param name="mergeCombiners"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, C>> CombineByKey<K, V, C>(
-            this DStream<KeyValuePair<K, V>> self,
+        public static DStream<Tuple<K, C>> CombineByKey<K, V, C>(
+            this DStream<Tuple<K, V>> self,
             Func<C> createCombiner,
             Func<C, V, C> mergeValue,
             Func<C, C, C> mergeCombiners,
@@ -55,7 +55,7 @@ public static DStream<KeyValuePair<K, C>> CombineByKey<K, V, C>(
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.Transform<KeyValuePair<K, C>>(new CombineByKeyHelper<K, V, C>(createCombiner, mergeValue, mergeCombiners, numPartitions).Execute);
+            return self.Transform<Tuple<K, C>>(new CombineByKeyHelper<K, V, C>(createCombiner, mergeValue, mergeCombiners, numPartitions).Execute);
         }
 
         /// <summary>
@@ -66,12 +66,12 @@ public static DStream<KeyValuePair<K, C>> CombineByKey<K, V, C>(
         /// <param name="self"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, V>> PartitionBy<K, V>(this DStream<KeyValuePair<K, V>> self, int numPartitions = 0)
+        public static DStream<Tuple<K, V>> PartitionBy<K, V>(this DStream<Tuple<K, V>> self, int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.Transform<KeyValuePair<K, V>>(new PartitionByHelper<K, V>(numPartitions).Execute);
+            return self.Transform<Tuple<K, V>>(new PartitionByHelper<K, V>(numPartitions).Execute);
         }
 
         /// <summary>
@@ -84,7 +84,7 @@ public static DStream<KeyValuePair<K, V>> PartitionBy<K, V>(this DStream<KeyValu
         /// <param name="self"></param>
         /// <param name="func"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, U>> MapValues<K, V, U>(this DStream<KeyValuePair<K, V>> self, Func<V, U> func)
+        public static DStream<Tuple<K, U>> MapValues<K, V, U>(this DStream<Tuple<K, V>> self, Func<V, U> func)
         {
             return self.Map(new MapValuesHelper<K, V, U>(func).Execute, true);
         }
@@ -99,7 +99,7 @@ public static DStream<KeyValuePair<K, U>> MapValues<K, V, U>(this DStream<KeyVal
         /// <param name="self"></param>
         /// <param name="func"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, U>> FlatMapValues<K, V, U>(this DStream<KeyValuePair<K, V>> self, Func<V, IEnumerable<U>> func)
+        public static DStream<Tuple<K, U>> FlatMapValues<K, V, U>(this DStream<Tuple<K, V>> self, Func<V, IEnumerable<U>> func)
         {
             return self.FlatMap(new FlatMapValuesHelper<K, V, U>(func).Execute, true);
         }
@@ -112,9 +112,9 @@ public static DStream<KeyValuePair<K, U>> FlatMapValues<K, V, U>(this DStream<Ke
         /// <param name="self"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, List<V>>> GroupByKey<K, V>(this DStream<KeyValuePair<K, V>> self, int numPartitions = 0)
+        public static DStream<Tuple<K, List<V>>> GroupByKey<K, V>(this DStream<Tuple<K, V>> self, int numPartitions = 0)
         {
-            return self.Transform<KeyValuePair<K, List<V>>>(new GroupByKeyHelper<K, V>(numPartitions).Execute);
+            return self.Transform<Tuple<K, List<V>>>(new GroupByKeyHelper<K, V>(numPartitions).Execute);
         }
 
         /// <summary>
@@ -128,12 +128,12 @@ public static DStream<KeyValuePair<K, List<V>>> GroupByKey<K, V>(this DStream<Ke
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, Tuple<List<V>, List<W>>>> GroupWith<K, V, W>(this DStream<KeyValuePair<K, V>> self, DStream<KeyValuePair<K, W>> other, int numPartitions = 0)
+        public static DStream<Tuple<K, Tuple<List<V>, List<W>>>> GroupWith<K, V, W>(this DStream<Tuple<K, V>> self, DStream<Tuple<K, W>> other, int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.TransformWith<KeyValuePair<K, W>, KeyValuePair<K, Tuple<List<V>, List<W>>>>(new GroupWithHelper<K, V, W>(numPartitions).Execute, other);
+            return self.TransformWith<Tuple<K, W>, Tuple<K, Tuple<List<V>, List<W>>>>(new GroupWithHelper<K, V, W>(numPartitions).Execute, other);
         }
 
         /// <summary>
@@ -147,12 +147,12 @@ public static DStream<KeyValuePair<K, Tuple<List<V>, List<W>>>> GroupWith<K, V,
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, Tuple<V, W>>> Join<K, V, W>(this DStream<KeyValuePair<K, V>> self, DStream<KeyValuePair<K, W>> other, int numPartitions = 0)
+        public static DStream<Tuple<K, Tuple<V, W>>> Join<K, V, W>(this DStream<Tuple<K, V>> self, DStream<Tuple<K, W>> other, int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.TransformWith<KeyValuePair<K, W>, KeyValuePair<K, Tuple<V, W>>>(new JoinHelper<K, V, W>(numPartitions).Execute, other);
+            return self.TransformWith<Tuple<K, W>, Tuple<K, Tuple<V, W>>>(new JoinHelper<K, V, W>(numPartitions).Execute, other);
         }
 
         /// <summary>
@@ -166,12 +166,12 @@ public static DStream<KeyValuePair<K, Tuple<V, W>>> Join<K, V, W>(this DStream<K
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V, W>(this DStream<KeyValuePair<K, V>> self, DStream<KeyValuePair<K, W>> other, int numPartitions = 0)
+        public static DStream<Tuple<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V, W>(this DStream<Tuple<K, V>> self, DStream<Tuple<K, W>> other, int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.TransformWith<KeyValuePair<K, W>, KeyValuePair<K, Tuple<V, Option<W>>>>(new LeftOuterJoinHelper<K, V, W>(numPartitions).Execute, other);
+            return self.TransformWith<Tuple<K, W>, Tuple<K, Tuple<V, Option<W>>>>(new LeftOuterJoinHelper<K, V, W>(numPartitions).Execute, other);
         }
 
         /// <summary>
@@ -185,12 +185,12 @@ public static DStream<KeyValuePair<K, Tuple<V, Option<W>>>> LeftOuterJoin<K, V,
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V, W>(this DStream<KeyValuePair<K, V>> self, DStream<KeyValuePair<K, W>> other, int numPartitions = 0)
+        public static DStream<Tuple<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V, W>(this DStream<Tuple<K, V>> self, DStream<Tuple<K, W>> other, int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.TransformWith<KeyValuePair<K, W>, KeyValuePair<K, Tuple<Option<V>, W>>>(new RightOuterJoinHelper<K, V, W>(numPartitions).Execute, other);
+            return self.TransformWith<Tuple<K, W>, Tuple<K, Tuple<Option<V>, W>>>(new RightOuterJoinHelper<K, V, W>(numPartitions).Execute, other);
         }
 
         /// <summary>
@@ -204,12 +204,12 @@ public static DStream<KeyValuePair<K, Tuple<Option<V>, W>>> RightOuterJoin<K, V,
         /// <param name="other"></param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, Tuple<Option<V>, Option<W>>>> FullOuterJoin<K, V, W>(this DStream<KeyValuePair<K, V>> self, DStream<KeyValuePair<K, W>> other, int numPartitions = 0)
+        public static DStream<Tuple<K, Tuple<Option<V>, Option<W>>>> FullOuterJoin<K, V, W>(this DStream<Tuple<K, V>> self, DStream<Tuple<K, W>> other, int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            return self.TransformWith<KeyValuePair<K, W>, KeyValuePair<K, Tuple<Option<V>, Option<W>>>>(new FullOuterJoinHelper<K, V, W>(numPartitions).Execute, other);
+            return self.TransformWith<Tuple<K, W>, Tuple<K, Tuple<Option<V>, Option<W>>>>(new FullOuterJoinHelper<K, V, W>(numPartitions).Execute, other);
         }
 
         /// <summary>
@@ -227,7 +227,7 @@ public static DStream<KeyValuePair<K, Tuple<Option<V>, Option<W>>>> FullOuterJoi
         /// </param>
         /// <param name="numPartitions">Number of partitions of each RDD in the new DStream.</param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, IEnumerable<V>>> GroupByKeyAndWindow<K, V>(this DStream<KeyValuePair<K, V>> self,
+        public static DStream<Tuple<K, IEnumerable<V>>> GroupByKeyAndWindow<K, V>(this DStream<Tuple<K, V>> self,
             int windowSeconds, int slideSeconds, int numPartitions = 0)
         {
             var ls = self.MapValues(x => new List<V> { x });
@@ -259,13 +259,13 @@ public static DStream<KeyValuePair<K, IEnumerable<V>>> GroupByKeyAndWindow<K, V>
         /// <param name="numPartitions">number of partitions of each RDD in the new DStream.</param>
         /// <param name="filterFunc">function to filter expired key-value pairs; only pairs that satisfy the function are retained set this to null if you do not want to filter</param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, V>> ReduceByKeyAndWindow<K, V>(this DStream<KeyValuePair<K, V>> self,
+        public static DStream<Tuple<K, V>> ReduceByKeyAndWindow<K, V>(this DStream<Tuple<K, V>> self,
             Func<V, V, V> reduceFunc,
             Func<V, V, V> invReduceFunc,
             int windowSeconds,
             int slideSeconds = 0,
             int numPartitions = 0,
-            Func<KeyValuePair<K, V>, bool> filterFunc = null)
+            Func<Tuple<K, V>, bool> filterFunc = null)
         {
             self.ValidateWindowParam(windowSeconds, slideSeconds);
 
@@ -275,7 +275,7 @@ public static DStream<KeyValuePair<K, V>> ReduceByKeyAndWindow<K, V>(this DStrea
             // dstream to be transformed by substracting old RDDs and adding new RDDs based on the window
             var reduced = self.ReduceByKey(reduceFunc, numPartitions);
 
-            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = reduced.Piplinable ? (reduced as TransformedDStream<KeyValuePair<K, V>>).func : null;
+            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = reduced.Piplinable ? (reduced as TransformedDStream<Tuple<K, V>>).func : null;
 
             var helper = new ReduceByKeyAndWindowHelper<K, V>(reduceFunc, invReduceFunc, numPartitions, filterFunc, prevFunc);
             // function to reduce the new values that entered the window (e.g., adding new counts)
@@ -295,7 +295,7 @@ public static DStream<KeyValuePair<K, V>> ReduceByKeyAndWindow<K, V>(this DStrea
                 formatter.Serialize(stream, invReduceF);
             }
 
-            return new DStream<KeyValuePair<K, V>>(
+            return new DStream<Tuple<K, V>>(
                 SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpReducedWindowedDStream(
                     reduced.Piplinable ? reduced.prevDStreamProxy : reduced.DStreamProxy, 
                     stream.ToArray(),
@@ -321,7 +321,7 @@ public static DStream<KeyValuePair<K, V>> ReduceByKeyAndWindow<K, V>(this DStrea
         /// </param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream<KeyValuePair<K, V>> self,
+        public static DStream<Tuple<K, S>> UpdateStateByKey<K, V, S>(this DStream<Tuple<K, V>> self,
             Func<IEnumerable<V>, S, S> updateFunc,
             int numPartitions = 0)
         {
@@ -339,11 +339,11 @@ public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream
         /// <param name="updateFunc">State update function - IEnumerable[K, [newValues, oldState]] => IEnumerable[K, newState]</param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream<KeyValuePair<K, V>> self,
-            Func<IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> updateFunc,
+        public static DStream<Tuple<K, S>> UpdateStateByKey<K, V, S>(this DStream<Tuple<K, V>> self,
+            Func<IEnumerable<Tuple<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<Tuple<K, S>>> updateFunc,
             int numPartitions = 0)
         {
-            return UpdateStateByKey<K, V, S>(self, new MapPartitionsHelper<KeyValuePair<K, Tuple<IEnumerable<V>, S>>, KeyValuePair<K, S>>(updateFunc).Execute, numPartitions);
+            return UpdateStateByKey<K, V, S>(self, new MapPartitionsHelper<Tuple<K, Tuple<IEnumerable<V>, S>>, Tuple<K, S>>(updateFunc).Execute, numPartitions);
         }
         
         /// <summary>
@@ -357,14 +357,14 @@ public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream
         /// <param name="updateFunc">State update function - (pid, IEnumerable[K, [newValues, oldState]]) => IEnumerable[K, newState]</param>
         /// <param name="numPartitions"></param>
         /// <returns></returns>
-        public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream<KeyValuePair<K, V>> self,
-            Func<int, IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> updateFunc,
+        public static DStream<Tuple<K, S>> UpdateStateByKey<K, V, S>(this DStream<Tuple<K, V>> self,
+            Func<int, IEnumerable<Tuple<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<Tuple<K, S>>> updateFunc,
             int numPartitions = 0)
         {
             if (numPartitions <= 0)
                 numPartitions = self.streamingContext.SparkContext.DefaultParallelism;
 
-            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = self.Piplinable ? (self as TransformedDStream<KeyValuePair<K, V>>).func : null;
+            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = self.Piplinable ? (self as TransformedDStream<Tuple<K, V>>).func : null;
 
             Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> func = new UpdateStateByKeysHelper<K, V, S>(updateFunc, prevFunc, numPartitions).Execute;
 
@@ -372,7 +372,7 @@ public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream
             var stream = new MemoryStream();
             formatter.Serialize(stream, func);
 
-            return new DStream<KeyValuePair<K, S>>(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
+            return new DStream<Tuple<K, S>>(SparkCLREnvironment.SparkCLRProxy.StreamingContextProxy.CreateCSharpStateDStream(
                     self.Piplinable ? self.prevDStreamProxy : self.DStreamProxy,
                     stream.ToArray(),
                     "CSharpStateDStream",
@@ -385,14 +385,14 @@ public static DStream<KeyValuePair<K, S>> UpdateStateByKey<K, V, S>(this DStream
         /// Return a new "state" DStream where the state for each key is updated by applying
         /// the given function on the previous state of the key and the new values of the key.
         /// </summary>
-        public static MapWithStateDStream<K, V, S, M> MapWithState<K, V, S, M>(this DStream<KeyValuePair<K, V>> self, StateSpec<K, V, S, M> stateSpec)
+        public static MapWithStateDStream<K, V, S, M> MapWithState<K, V, S, M>(this DStream<Tuple<K, V>> self, StateSpec<K, V, S, M> stateSpec)
         {
             if (stateSpec.numPartitions <= 0)
             {
                 stateSpec = stateSpec.NumPartitions(self.streamingContext.SparkContext.DefaultParallelism);
             }
 
-            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = self.Piplinable ? (self as TransformedDStream<KeyValuePair<K, V>>).func : null;
+            Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc = self.Piplinable ? (self as TransformedDStream<Tuple<K, V>>).func : null;
 
             Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> func = new MapWithStateHelper<K, V, S, M>(prevFunc, stateSpec).Execute;
 
@@ -409,8 +409,8 @@ public static MapWithStateDStream<K, V, S, M> MapWithState<K, V, S, M>(this DStr
                 self.streamingContext);
 
             DStream<M> mappedDataDStream = mapWithStateDStream.FlatMap(r => r.mappedData);
-            DStream<KeyValuePair<K, S>> snapshotsDStream = mapWithStateDStream.FlatMap(
-                r => r.stateMap.Select(entry => new KeyValuePair<K, S>(entry.Key, entry.Value.state)));
+            DStream<Tuple<K, S>> snapshotsDStream = mapWithStateDStream.FlatMap(
+                r => r.stateMap.Select(entry => new Tuple<K, S>(entry.Key, entry.Value.state)));
 
             return new MapWithStateDStream<K, V, S, M>(mappedDataDStream, snapshotsDStream);
         }
@@ -438,7 +438,7 @@ internal CombineByKeyHelper(Func<C> createCombiner, Func<C, V, C> mergeValue, Fu
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, C>> Execute(RDD<KeyValuePair<K, V>> rdd)
+        internal RDD<Tuple<K, C>> Execute(RDD<Tuple<K, V>> rdd)
         {
             return rdd.CombineByKey(createCombiner, mergeValue, mergeCombiners, numPartitions);
         }
@@ -453,7 +453,7 @@ internal PartitionByHelper(int numPartitions = 0)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, V>> Execute(RDD<KeyValuePair<K, V>> rdd)
+        internal RDD<Tuple<K, V>> Execute(RDD<Tuple<K, V>> rdd)
         {
             return rdd.PartitionBy(numPartitions);
         }
@@ -468,9 +468,9 @@ internal MapValuesHelper(Func<V, U> f)
             func = f;
         }
 
-        internal KeyValuePair<K, U> Execute(KeyValuePair<K, V> kvp)
+        internal Tuple<K, U> Execute(Tuple<K, V> kvp)
         {
-            return new KeyValuePair<K, U>(kvp.Key, func(kvp.Value));
+            return new Tuple<K, U>(kvp.Item1, func(kvp.Item2));
         }
     }
 
@@ -483,9 +483,9 @@ internal FlatMapValuesHelper(Func<V, IEnumerable<U>> f)
             func = f;
         }
 
-        internal IEnumerable<KeyValuePair<K, U>> Execute(KeyValuePair<K, V> kvp)
+        internal IEnumerable<Tuple<K, U>> Execute(Tuple<K, V> kvp)
         {
-            return func(kvp.Value).Select(v => new KeyValuePair<K, U>(kvp.Key, v));
+            return func(kvp.Item2).Select(v => new Tuple<K, U>(kvp.Item1, v));
         }
     }
     
@@ -498,7 +498,7 @@ internal GroupByKeyHelper(int numPartitions = 0)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, List<V>>> Execute(RDD<KeyValuePair<K, V>> rdd)
+        internal RDD<Tuple<K, List<V>>> Execute(RDD<Tuple<K, V>> rdd)
         {
             return rdd.GroupByKey(numPartitions);
         }
@@ -513,7 +513,7 @@ internal GroupWithHelper(int numPartitions)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, Tuple<List<V>, List<W>>>> Execute(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
+        internal RDD<Tuple<K, Tuple<List<V>, List<W>>>> Execute(RDD<Tuple<K, V>> l, RDD<Tuple<K, W>> r)
         {
             return l.GroupWith<K, V, W>(r, numPartitions);
         }
@@ -528,7 +528,7 @@ internal JoinHelper(int numPartitions)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, Tuple<V, W>>> Execute(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
+        internal RDD<Tuple<K, Tuple<V, W>>> Execute(RDD<Tuple<K, V>> l, RDD<Tuple<K, W>> r)
         {
             return l.Join<K, V, W>(r, numPartitions);
         }
@@ -543,7 +543,7 @@ internal LeftOuterJoinHelper(int numPartitions)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, Tuple<V, Option<W>>>> Execute(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
+        internal RDD<Tuple<K, Tuple<V, Option<W>>>> Execute(RDD<Tuple<K, V>> l, RDD<Tuple<K, W>> r)
         {
             return l.LeftOuterJoin<K, V, W>(r, numPartitions);
         }
@@ -558,7 +558,7 @@ internal RightOuterJoinHelper(int numPartitions)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, Tuple<Option<V>, W>>> Execute(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
+        internal RDD<Tuple<K, Tuple<Option<V>, W>>> Execute(RDD<Tuple<K, V>> l, RDD<Tuple<K, W>> r)
         {
             return l.RightOuterJoin<K, V, W>(r, numPartitions);
         }
@@ -573,7 +573,7 @@ internal FullOuterJoinHelper(int numPartitions)
             this.numPartitions = numPartitions;
         }
 
-        internal RDD<KeyValuePair<K, Tuple<Option<V>, Option<W>>>> Execute(RDD<KeyValuePair<K, V>> l, RDD<KeyValuePair<K, W>> r)
+        internal RDD<Tuple<K, Tuple<Option<V>, Option<W>>>> Execute(RDD<Tuple<K, V>> l, RDD<Tuple<K, W>> r)
         {
             return l.FullOuterJoin<K, V, W>(r, numPartitions);
         }
@@ -585,13 +585,13 @@ internal class ReduceByKeyAndWindowHelper<K, V>
         private readonly Func<V, V, V> reduceFunc;
         private readonly Func<V, V, V> invReduceFunc;
         private readonly int numPartitions;
-        private readonly Func<KeyValuePair<K, V>, bool> filterFunc;
+        private readonly Func<Tuple<K, V>, bool> filterFunc;
         private readonly Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc;
 
         internal ReduceByKeyAndWindowHelper(Func<V, V, V> reduceF, 
             Func<V, V, V> invReduceF, 
             int numPartitions, 
-            Func<KeyValuePair<K, V>, bool> filterF, 
+            Func<Tuple<K, V>, bool> filterF, 
             Func<double, RDD<dynamic>, RDD<dynamic>> prevF)
         {
             reduceFunc = reduceF;
@@ -606,13 +606,13 @@ internal RDD<dynamic> Reduce(double t, RDD<dynamic> a, RDD<dynamic> b)
             if (prevFunc != null)
                 b = prevFunc(t, b);
 
-            var r = b.ConvertTo<KeyValuePair<K, V>>().ReduceByKey<K, V>(reduceFunc);
+            var r = b.ConvertTo<Tuple<K, V>>().ReduceByKey<K, V>(reduceFunc);
             if (a != null)
             {
                 if (prevFunc != null)
                     a = prevFunc(t, a);
                 
-                r = a.ConvertTo<KeyValuePair<K, V>>().Union(r).ReduceByKey<K, V>(reduceFunc);
+                r = a.ConvertTo<Tuple<K, V>>().Union(r).ReduceByKey<K, V>(reduceFunc);
             }
             if (filterFunc != null)
                 r.Filter(filterFunc);
@@ -627,8 +627,8 @@ internal RDD<dynamic> InvReduce(double t, RDD<dynamic> a, RDD<dynamic> b)
                 b = prevFunc(t, b);
             }
 
-            var rddb = b.ConvertTo<KeyValuePair<K, V>>().ReduceByKey<K, V>(reduceFunc);
-            var rdda = a.ConvertTo<KeyValuePair<K, V>>();
+            var rddb = b.ConvertTo<Tuple<K, V>>().ReduceByKey<K, V>(reduceFunc);
+            var rdda = a.ConvertTo<Tuple<K, V>>();
             var joined = rdda.Join<K, V, V>(rddb, numPartitions);
             var r = joined.MapValues<K, Tuple<V, V>, V>(kv => kv.Item2 != null ? invReduceFunc(kv.Item1, kv.Item2) : kv.Item1);
             return r.ConvertTo<dynamic>();
@@ -645,20 +645,20 @@ internal UpdateStateByKeyHelper(Func<IEnumerable<V>, S, S> f)
             func = f;
         }
 
-        internal IEnumerable<KeyValuePair<K, S>> Execute(IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>> input)
+        internal IEnumerable<Tuple<K, S>> Execute(IEnumerable<Tuple<K, Tuple<IEnumerable<V>, S>>> input)
         {
-            return input.Select(x => new KeyValuePair<K, S>(x.Key, func(x.Value.Item1, x.Value.Item2)));
+            return input.Select(x => new Tuple<K, S>(x.Item1, func(x.Item2.Item1, x.Item2.Item2)));
         }
     }
 
     [Serializable]
     internal class UpdateStateByKeysHelper<K, V, S>
     {
-        private readonly Func<int, IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> func;
+        private readonly Func<int, IEnumerable<Tuple<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<Tuple<K, S>>> func;
         private readonly Func<double, RDD<dynamic>, RDD<dynamic>> prevFunc;
         private readonly int numPartitions;
         internal UpdateStateByKeysHelper(
-            Func<int, IEnumerable<KeyValuePair<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<KeyValuePair<K, S>>> f, 
+            Func<int, IEnumerable<Tuple<K, Tuple<IEnumerable<V>, S>>>, IEnumerable<Tuple<K, S>>> f, 
             Func<double, RDD<dynamic>, RDD<dynamic>> prevF, int numPartitions)
         {
             func = f;
@@ -668,13 +668,13 @@ internal UpdateStateByKeysHelper(
 
         internal RDD<dynamic> Execute(double t, RDD<dynamic> stateRDD, RDD<dynamic> valuesRDD)
         {
-            RDD<KeyValuePair<K, S>> state = null;
-            RDD<KeyValuePair<K, Tuple<IEnumerable<V>, S>>> g = null;
+            RDD<Tuple<K, S>> state = null;
+            RDD<Tuple<K, Tuple<IEnumerable<V>, S>>> g = null;
 
             if (prevFunc != null)
                 valuesRDD = prevFunc(t, valuesRDD);
 
-            var values = valuesRDD.ConvertTo<KeyValuePair<K, V>>();
+            var values = valuesRDD.ConvertTo<Tuple<K, V>>();
 
             if (stateRDD == null)
             {
@@ -682,13 +682,13 @@ internal RDD<dynamic> Execute(double t, RDD<dynamic> stateRDD, RDD<dynamic> valu
             }
             else
             {
-                state = stateRDD.ConvertTo<KeyValuePair<K, S>>();
+                state = stateRDD.ConvertTo<Tuple<K, S>>();
                 values = values.PartitionBy(numPartitions);
                 state.partitioner = values.partitioner;
                 g = state.GroupWith(values, numPartitions).MapValues(x => new Tuple<IEnumerable<V>, S>(new List<V>(x.Item2), x.Item1.Count > 0 ? x.Item1[0] : default(S)));
             }
 
-            state = g.MapPartitionsWithIndex((pid, iter) => func(pid, iter), true).Filter(x => x.Value != null);
+            state = g.MapPartitionsWithIndex((pid, iter) => func(pid, iter), true).Filter(x => x.Item2 != null);
 
             return state.ConvertTo<dynamic>();
         }
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 4ea048c2..a68abc7e 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -407,7 +407,7 @@
             a function to sort the key.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Boolean,System.Nullable{System.Int32})">
+        <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Boolean,System.Nullable{System.Int32})">
             <summary>
             Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
             </summary>
@@ -418,7 +418,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Boolean,System.Nullable{System.Int32},System.Func{``0,``2})">
+        <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Boolean,System.Nullable{System.Int32},System.Func{``0,``2})">
             <summary>
             Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
             </summary>
@@ -449,16 +449,16 @@
         </member>
         <member name="T:Microsoft.Spark.CSharp.Core.PairRDDFunctions">
             <summary>
-            operations only available to KeyValuePair RDD
+            operations only available to Tuple RDD
             
             See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.CollectAsMap``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.CollectAsMap``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}})">
              <summary>
              Return the key-value pairs in this RDD to the master as a dictionary.
             
-             var m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).CollectAsMap()
+             var m = sc.Parallelize(new[] { new Tuple&lt;int, int>(1, 2), new Tuple&lt;int, int>(3, 4) }, 1).CollectAsMap()
              m[1]
              2
              m[3]
@@ -470,11 +470,11 @@
              <param name="self"></param>
              <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Keys``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Keys``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}})">
              <summary>
              Return an RDD with the keys of each tuple.
             
-             >>> m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).Keys().Collect()
+             >>> m = sc.Parallelize(new[] { new Tuple&lt;int, int>(1, 2), new Tuple&lt;int, int>(3, 4) }, 1).Keys().Collect()
              [1, 3]
              </summary>
              <typeparam name="K"></typeparam>
@@ -482,11 +482,11 @@
              <param name="self"></param>
              <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Values``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Values``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}})">
              <summary>
              Return an RDD with the values of each tuple.
             
-             >>> m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int>(1, 2), new KeyValuePair&lt;int, int>(3, 4) }, 1).Values().Collect()
+             >>> m = sc.Parallelize(new[] { new Tuple&lt;int, int>(1, 2), new Tuple&lt;int, int>(3, 4) }, 1).Values().Collect()
              [2, 4]
              
              </summary>
@@ -495,7 +495,7 @@
              <param name="self"></param>
              <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.ReduceByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,``1,``1},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.ReduceByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``1,``1,``1},System.Int32)">
             <summary>
             Merge the values for each key using an associative reduce function.
             
@@ -507,9 +507,9 @@
             
             sc.Parallelize(new[] 
             { 
-                new KeyValuePair&lt;string, int>("a", 1), 
-                new KeyValuePair&lt;string, int>("b", 1),
-                new KeyValuePair&lt;string, int>("a", 1)
+                new Tuple&lt;string, int>("a", 1), 
+                new Tuple&lt;string, int>("b", 1),
+                new Tuple&lt;string, int>("a", 1)
             }, 2)
             .ReduceByKey((x, y) => x + y).Collect()
                    
@@ -523,7 +523,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.ReduceByKeyLocally``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,``1,``1})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.ReduceByKeyLocally``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``1,``1,``1})">
             <summary>
             Merge the values for each key using an associative reduce function, but
             return the results immediately to the master as a dictionary.
@@ -533,9 +533,9 @@
             
             sc.Parallelize(new[] 
             { 
-                new KeyValuePair&lt;string, int>("a", 1), 
-                new KeyValuePair&lt;string, int>("b", 1),
-                new KeyValuePair&lt;string, int>("a", 1)
+                new Tuple&lt;string, int>("a", 1), 
+                new Tuple&lt;string, int>("b", 1),
+                new Tuple&lt;string, int>("a", 1)
             }, 2)
             .ReduceByKeyLocally((x, y) => x + y).Collect()
             
@@ -548,15 +548,15 @@
             <param name="reduceFunc"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.CountByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.CountByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}})">
             <summary>
             Count the number of elements for each key, and return the result to the master as a dictionary.
             
             sc.Parallelize(new[] 
             { 
-                new KeyValuePair&lt;string, int>("a", 1), 
-                new KeyValuePair&lt;string, int>("b", 1),
-                new KeyValuePair&lt;string, int>("a", 1)
+                new Tuple&lt;string, int>("a", 1), 
+                new Tuple&lt;string, int>("b", 1),
+                new Tuple&lt;string, int>("a", 1)
             }, 2)
             .CountByKey((x, y) => x + y).Collect()
             
@@ -568,7 +568,7 @@
             <param name="self"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Join``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Join``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return an RDD containing all pairs of elements with matching keys in this RDD and <paramref name="other"/>.
             
@@ -577,9 +577,9 @@
             Performs a hash join across the cluster.
             
             var l = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 1);
             var r = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 2), new KeyValuePair&lt;string, int>("a", 3) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 2), new Tuple&lt;string, int>("a", 3) }, 1);
             var m = l.Join(r, 2).Collect();
             
             [('a', (1, 2)), ('a', (1, 3))]
@@ -593,7 +593,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.LeftOuterJoin``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.LeftOuterJoin``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Perform a left outer join of this RDD and <paramref name="other"/>.
             
@@ -604,9 +604,9 @@
             Hash-partitions the resulting RDD into the given number of partitions.
             
             var l = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 1);
             var r = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 2) }, 1);
             var m = l.LeftOuterJoin(r).Collect();
             
             [('a', (1, 2)), ('b', (4, Option))]
@@ -620,7 +620,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.RightOuterJoin``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.RightOuterJoin``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Perform a right outer join of this RDD and <paramref name="other"/>.
             
@@ -631,9 +631,9 @@
             Hash-partitions the resulting RDD into the given number of partitions.
             
             var l = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 2) }, 1);
             var r = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 1);
             var m = l.RightOuterJoin(r).Collect();
             
             [('a', (2, 1)), ('b', (Option, 4))]
@@ -647,7 +647,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.FullOuterJoin``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.FullOuterJoin``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Perform a full outer join of this RDD and <paramref name="other"/>.
             
@@ -662,9 +662,9 @@
             Hash-partitions the resulting RDD into the given number of partitions.
             
             var l = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 1), KeyValuePair&lt;string, int>("b", 4) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 1), Tuple&lt;string, int>("b", 4) }, 1);
             var r = sc.Parallelize(
-                new[] { new KeyValuePair&lt;string, int>("a", 2), new KeyValuePair&lt;string, int>("c", 8) }, 1);
+                new[] { new Tuple&lt;string, int>("a", 2), new Tuple&lt;string, int>("c", 8) }, 1);
             var m = l.FullOuterJoin(r).Collect();
             
             [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]
@@ -678,18 +678,18 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.PartitionBy``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Int32,System.Func{System.Object,System.Int32})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.PartitionBy``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Int32,System.Func{System.Object,System.Int32})">
             <summary>
             Return a copy of the RDD partitioned using the specified partitioner.
             
-            sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x => new KeyValuePair&lt;int, int>(x, x)).PartitionBy(3).Glom().Collect()
+            sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x => new Tuple&lt;int, int>(x, x)).PartitionBy(3).Glom().Collect()
             </summary>
             <param name="self"></param>
             <param name="numPartitions"></param>
             <param name="partitionFunc"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.CombineByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``2},System.Func{``2,``1,``2},System.Func{``2,``2,``2},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.CombineByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``2},System.Func{``2,``1,``2},System.Func{``2,``2,``2},System.Int32)">
             <summary>
             # TODO: add control over map-side aggregation
             Generic function to combine the elements for each key using a custom
@@ -711,9 +711,9 @@
             sc.Parallelize(
                     new[] 
                     { 
-                        new KeyValuePair&lt;string, int>("a", 1), 
-                        new KeyValuePair&lt;string, int>("b", 1),
-                        new KeyValuePair&lt;string, int>("a", 1)
+                        new Tuple&lt;string, int>("a", 1), 
+                        new Tuple&lt;string, int>("b", 1),
+                        new Tuple&lt;string, int>("a", 1)
                     }, 2)
                     .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
                     
@@ -729,7 +729,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.AggregateByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``2},System.Func{``2,``1,``2},System.Func{``2,``2,``2},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.AggregateByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``2},System.Func{``2,``1,``2},System.Func{``2,``2,``2},System.Int32)">
             <summary>
             Aggregate the values of each key, using given combine functions and a neutral
             "zero value". This function can return a different result type, U, than the type
@@ -742,9 +742,9 @@
             sc.Parallelize(
                     new[] 
                     { 
-                        new KeyValuePair&lt;string, int>("a", 1), 
-                        new KeyValuePair&lt;string, int>("b", 1),
-                        new KeyValuePair&lt;string, int>("a", 1)
+                        new Tuple&lt;string, int>("a", 1), 
+                        new Tuple&lt;string, int>("b", 1),
+                        new Tuple&lt;string, int>("a", 1)
                     }, 2)
                     .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
                     
@@ -760,7 +760,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.FoldByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1},System.Func{``1,``1,``1},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.FoldByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``1},System.Func{``1,``1,``1},System.Int32)">
             <summary>
             Merge the values for each key using an associative function "func"
             and a neutral "zeroValue" which may be added to the result an
@@ -770,9 +770,9 @@
             sc.Parallelize(
                     new[] 
                     { 
-                        new KeyValuePair&lt;string, int>("a", 1), 
-                        new KeyValuePair&lt;string, int>("b", 1),
-                        new KeyValuePair&lt;string, int>("a", 1)
+                        new Tuple&lt;string, int>("a", 1), 
+                        new Tuple&lt;string, int>("b", 1),
+                        new Tuple&lt;string, int>("a", 1)
                     }, 2)
                     .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(), (x, y) => x + y).Collect()
                     
@@ -786,7 +786,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Int32)">
             <summary>
             Group the values for each key in the RDD into a single sequence.
             Hash-partitions the resulting RDD with numPartitions partitions.
@@ -798,9 +798,9 @@
             sc.Parallelize(
                     new[] 
                     { 
-                        new KeyValuePair&lt;string, int>("a", 1), 
-                        new KeyValuePair&lt;string, int>("b", 1),
-                        new KeyValuePair&lt;string, int>("a", 1)
+                        new Tuple&lt;string, int>("a", 1), 
+                        new Tuple&lt;string, int>("b", 1),
+                        new Tuple&lt;string, int>("a", 1)
                     }, 2)
                     .GroupByKey().MapValues(l => string.Join(" ", l)).Collect()
                     
@@ -813,7 +813,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.MapValues``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,``2})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.MapValues``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``1,``2})">
             <summary>
             Pass each value in the key-value pair RDD through a map function
             without changing the keys; this also retains the original RDD's partitioning.
@@ -821,8 +821,8 @@
             sc.Parallelize(
                     new[] 
                     { 
-                        new KeyValuePair&lt;string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
-                        new KeyValuePair&lt;string, string[]>("b", new[]{"grapes"})
+                        new Tuple&lt;string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
+                        new Tuple&lt;string, string[]>("b", new[]{"grapes"})
                     }, 2)
                     .MapValues(x => x.Length).Collect()
                     
@@ -836,7 +836,7 @@
             <param name="func"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.FlatMapValues``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,System.Collections.Generic.IEnumerable{``2}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.FlatMapValues``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Func{``1,System.Collections.Generic.IEnumerable{``2}})">
             <summary>
             Pass each value in the key-value pair RDD through a flatMap function
             without changing the keys; this also retains the original RDD's partitioning.
@@ -844,8 +844,8 @@
             x = sc.Parallelize(
                     new[] 
                     { 
-                        new KeyValuePair&lt;string, string[]>("a", new[]{"x", "y", "z"}), 
-                        new KeyValuePair&lt;string, string[]>("b", new[]{"p", "r"})
+                        new Tuple&lt;string, string[]>("a", new[]{"x", "y", "z"}), 
+                        new Tuple&lt;string, string[]>("b", new[]{"p", "r"})
                     }, 2)
                     .FlatMapValues(x => x).Collect()
                     
@@ -859,9 +859,9 @@
             <param name="func"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.MapPartitionsWithIndex``5(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,System.Object}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.MapPartitionsWithIndex``5(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,System.Object}})">
             <summary>
-            explicitly convert KeyValuePair&lt;K, V> to KeyValuePair&lt;K, dynamic>
+            explicitly convert Tuple&lt;K, V> to Tuple&lt;K, dynamic>
             since they are incompatibles types unlike V to dynamic
             </summary>
             <typeparam name="K"></typeparam>
@@ -872,13 +872,13 @@
             <param name="self"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupWith``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupWith``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             For each key k in this RDD or <paramref name="other"/>, return a resulting RDD that
             contains a tuple with the list of values for that key in this RDD as well as <paramref name="other"/>.
             
-            var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
-            var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+            var x = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 2);
+            var y = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 2) }, 1);
             x.GroupWith(y).Collect();
             
             [('a', ([1], [2])), ('b', ([4], []))]
@@ -892,11 +892,11 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupWith``4(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``3}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupWith``4(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``3}},System.Int32)">
             <summary>
-            var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 5), new KeyValuePair&lt;string, int>("b", 6) }, 2);
-            var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
-            var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
+            var x = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 5), new Tuple&lt;string, int>("b", 6) }, 2);
+            var y = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 2);
+            var z = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 2) }, 1);
             x.GroupWith(y, z).Collect();
             </summary>
             <typeparam name="K"></typeparam>
@@ -909,12 +909,12 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupWith``5(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``3}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``4}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupWith``5(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``3}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``4}},System.Int32)">
             <summary>
-            var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 5), new KeyValuePair&lt;string, int>("b", 6) }, 2);
-            var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 1), new KeyValuePair&lt;string, int>("b", 4) }, 2);
-            var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("a", 2) }, 1);
-            var w = sc.Parallelize(new[] { new KeyValuePair&lt;string, int>("b", 42) }, 1);
+            var x = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 5), new Tuple&lt;string, int>("b", 6) }, 2);
+            var y = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 1), new Tuple&lt;string, int>("b", 4) }, 2);
+            var z = sc.Parallelize(new[] { new Tuple&lt;string, int>("a", 2) }, 1);
+            var w = sc.Parallelize(new[] { new Tuple&lt;string, int>("b", 42) }, 1);
             var m = x.GroupWith(y, z, w).MapValues(l => string.Join(" ", l.Item1) + " : " + string.Join(" ", l.Item2) + " : " + string.Join(" ", l.Item3) + " : " + string.Join(" ", l.Item4)).Collect();
             </summary>
             <typeparam name="K"></typeparam>
@@ -929,12 +929,12 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SubtractByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SubtractByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return each (key, value) pair in this RDD that has no pair with matching key in <paramref name="other"/>.
             
-            var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?>("a", 1), new KeyValuePair&lt;string, int?>("b", 4), new KeyValuePair&lt;string, int?>("b", 5), new KeyValuePair&lt;string, int?>("a", 2) }, 2);
-            var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?>("a", 3), new KeyValuePair&lt;string, int?>("c", null) }, 2);
+            var x = sc.Parallelize(new[] { new Tuple&lt;string, int?>("a", 1), new Tuple&lt;string, int?>("b", 4), new Tuple&lt;string, int?>("b", 5), new Tuple&lt;string, int?>("a", 2) }, 2);
+            var y = sc.Parallelize(new[] { new Tuple&lt;string, int?>("a", 3), new Tuple&lt;string, int?>("c", null) }, 2);
             x.SubtractByKey(y).Collect();
             
             [('b', 4), ('b', 5)]
@@ -948,14 +948,14 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Lookup``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},``0)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.Lookup``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},``0)">
             <summary>
             Return the list of values in the RDD for key `key`. This operation
             is done efficiently if the RDD has a known partitioner by only
             searching the partition that the key maps to.
             
             >>> l = range(1000)
-            >>> rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new KeyValuePair&lt;int, int>(x, y)), 10)
+            >>> rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new Tuple&lt;int, int>(x, y)), 10)
             >>> rdd.lookup(42)
             [42]
             
@@ -966,7 +966,7 @@
             <param name="key"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsNewAPIHadoopDataset``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsNewAPIHadoopDataset``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file
             system, using the new Hadoop OutputFormat API (mapreduce package). Keys/values are
@@ -978,7 +978,7 @@
             <param name="self"></param>
             <param name="conf">Hadoop job configuration, passed in as a dict</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsNewAPIHadoopFile``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsNewAPIHadoopFile``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             
             </summary>
@@ -991,7 +991,7 @@
             <param name="valueClass">fully qualified classname of value Writable class (e.g. "org.apache.hadoop.io.Text", None by default)</param>
             <param name="conf">Hadoop job configuration, passed in as a dict (None by default)</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsHadoopDataset``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsHadoopDataset``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file
             system, using the old Hadoop OutputFormat API (mapred package). Keys/values are
@@ -1003,7 +1003,7 @@
             <param name="self"></param>
             <param name="conf">Hadoop job configuration, passed in as a dict</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsHadoopFile``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}},System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsHadoopFile``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}},System.String)">
             <summary>
             Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file
             system, using the old Hadoop OutputFormat API (mapred package). Key and value types
@@ -1022,7 +1022,7 @@
             <param name="conf">(None by default)</param>
             <param name="compressionCodecClass">(None by default)</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsSequenceFile``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.String,System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.SaveAsSequenceFile``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.String,System.String)">
             <summary>
             Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file
             system, using the org.apache.hadoop.io.Writable types that we convert from the
@@ -1166,7 +1166,7 @@
             <summary>
             Return a new RDD by applying a function to each element of this RDD.
             
-            sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new KeyValuePair&lt;string, int>(x, 1)).Collect()
+            sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new Tuple&lt;string, int>(x, 1)).Collect()
             [('a', 1), ('b', 1), ('c', 1)]
             
             </summary>
@@ -1772,7 +1772,7 @@
             <param name="self"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.ComparableRDDFunctions.TakeOrdered``1(Microsoft.Spark.CSharp.Core.RDD{``0},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Core.ComparableRDDFunctions.TakeOrdered``1(Microsoft.Spark.CSharp.Core.RDD{``0},System.Int32,System.Func{``0,System.Object})">
             <summary>
             Get the N elements from a RDD ordered in ascending order or as
             specified by the optional key function.
@@ -1784,6 +1784,7 @@
             <typeparam name="T"></typeparam>
             <param name="self"></param>
             <param name="num"></param>
+            <param name="keyFunc"></param>
             <returns></returns>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Core.ComparableRDDFunctions.Top``1(Microsoft.Spark.CSharp.Core.RDD{``0},System.Int32)">
@@ -2109,7 +2110,7 @@
             <param name="minSplits">minimum splits in dataset (default min(2, sc.defaultParallelism))</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.NewAPIHadoopFile(System.String,System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.NewAPIHadoopFile(System.String,System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
             a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -2127,7 +2128,7 @@
             <param name="conf"> Hadoop configuration, passed in as a dict (None by default)</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.NewAPIHadoopRDD(System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.NewAPIHadoopRDD(System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
             Hadoop configuration, which is passed in as a Python dict.
@@ -2143,7 +2144,7 @@
             <param name="conf">Hadoop configuration, passed in as a dict (None by default)</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.HadoopFile(System.String,System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.HadoopFile(System.String,System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
             a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -2161,7 +2162,7 @@
             <param name="conf">Hadoop configuration, passed in as a dict (None by default)</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.HadoopRDD(System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{System.String,System.String}})">
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.HadoopRDD(System.String,System.String,System.String,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
             Hadoop configuration, which is passed in as a Python dict.
@@ -4841,6 +4842,16 @@
             <param name="json">The Json object used to construct a StructType</param>
             <returns>A new StructType instance</returns>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1">
+            <summary>
+            An input stream that always returns the same RDD on each timestep. Useful for testing.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1.#ctor(Microsoft.Spark.CSharp.Core.RDD{`0},Microsoft.Spark.CSharp.Streaming.StreamingContext)">
+            <summary>
+            Construct a ConstantInputDStream instance.
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Streaming.DStream`1">
             <summary>
             A Discretized Stream (DStream), the basic abstraction in Spark Streaming,
@@ -5137,7 +5148,7 @@
             Utility for creating streams from 
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.EventHubsUtils.CreateUnionStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.Dictionary{System.String,System.String},Microsoft.Spark.CSharp.Core.StorageLevelType)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.EventHubsUtils.CreateUnionStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}},Microsoft.Spark.CSharp.Core.StorageLevelType)">
              <summary>
              Create a unioned EventHubs stream that receives data from Microsoft Azure Eventhubs
              The unioned stream will receive message from all partitions of the EventHubs
@@ -5167,7 +5178,7 @@
             Utils for Kafka input stream.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.String,System.String,System.Collections.Generic.Dictionary{System.String,System.Int32},System.Collections.Generic.Dictionary{System.String,System.String})">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.Int32}},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}})">
             <summary>
             Create an input stream that pulls messages from a Kafka Broker.
             </summary>
@@ -5178,7 +5189,7 @@
             <param name="kafkaParams">Additional params for Kafka</param>
             <returns>A DStream object</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.String,System.String,System.Collections.Generic.Dictionary{System.String,System.Int32},System.Collections.Generic.Dictionary{System.String,System.String},Microsoft.Spark.CSharp.Core.StorageLevelType)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.String,System.String,System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.Int32}},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}},Microsoft.Spark.CSharp.Core.StorageLevelType)">
             <summary>
             Create an input stream that pulls messages from a Kafka Broker.
             </summary>
@@ -5190,7 +5201,7 @@
             <param name="storageLevelType">RDD storage level.</param>
             <returns>A DStream object</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.Dictionary{System.String,System.String},System.Collections.Generic.Dictionary{System.String,System.Int64})">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStream(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.Int64}})">
             <summary>
             Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
             
@@ -5216,7 +5227,7 @@
             <param name="fromOffsets">Per-topic/partition Kafka offsets defining the (inclusive) starting point of the stream.</param>
             <returns>A DStream object</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStreamWithRepartition(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.Dictionary{System.String,System.String},System.Collections.Generic.Dictionary{System.String,System.Int64},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.KafkaUtils.CreateDirectStreamWithRepartition(Microsoft.Spark.CSharp.Streaming.StreamingContext,System.Collections.Generic.List{System.String},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.String}},System.Collections.Generic.IEnumerable{System.Tuple{System.String,System.Int64}},System.Int32)">
             <summary>
             Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
             
@@ -5331,10 +5342,10 @@
         </member>
         <member name="T:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions">
             <summary>
-            operations only available to KeyValuePair RDD
+            operations only available to Tuple RDD
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.ReduceByKey``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,``1,``1},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.ReduceByKey``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{``1,``1,``1},System.Int32)">
             <summary>
             Return a new DStream by applying ReduceByKey to each RDD.
             </summary>
@@ -5345,7 +5356,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.CombineByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``2},System.Func{``2,``1,``2},System.Func{``2,``2,``2},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.CombineByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{``2},System.Func{``2,``1,``2},System.Func{``2,``2,``2},System.Int32)">
             <summary>
             Return a new DStream by applying combineByKey to each RDD.
             </summary>
@@ -5359,7 +5370,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.PartitionBy``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.PartitionBy``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Int32)">
             <summary>
             Return a new DStream in which each RDD are partitioned by numPartitions.
             </summary>
@@ -5369,7 +5380,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.MapValues``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,``2})">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.MapValues``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{``1,``2})">
             <summary>
             Return a new DStream by applying a map function to the value of
             each key-value pairs in this DStream without changing the key.
@@ -5381,7 +5392,7 @@
             <param name="func"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.FlatMapValues``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,System.Collections.Generic.IEnumerable{``2}})">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.FlatMapValues``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{``1,System.Collections.Generic.IEnumerable{``2}})">
             <summary>
             Return a new DStream by applying a flatmap function to the value
             of each key-value pairs in this DStream without changing the key.
@@ -5393,7 +5404,7 @@
             <param name="func"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.GroupByKey``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.GroupByKey``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Int32)">
             <summary>
             Return a new DStream by applying groupByKey on each RDD.
             </summary>
@@ -5403,7 +5414,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.GroupWith``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.GroupWith``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return a new DStream by applying 'cogroup' between RDDs of this DStream and `other` DStream.
             Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
@@ -5416,7 +5427,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.Join``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.Join``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return a new DStream by applying 'join' between RDDs of this DStream and `other` DStream.
             Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
@@ -5429,7 +5440,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.LeftOuterJoin``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.LeftOuterJoin``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return a new DStream by applying 'left outer join' between RDDs of this DStream and `other` DStream.
             Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
@@ -5442,7 +5453,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.RightOuterJoin``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.RightOuterJoin``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return a new DStream by applying 'right outer join' between RDDs of this DStream and `other` DStream.
             Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
@@ -5455,7 +5466,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.FullOuterJoin``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``2}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.FullOuterJoin``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``2}},System.Int32)">
             <summary>
             Return a new DStream by applying 'full outer join' between RDDs of this DStream and `other` DStream.
             Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
@@ -5468,7 +5479,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.GroupByKeyAndWindow``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Int32,System.Int32,System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.GroupByKeyAndWindow``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Int32,System.Int32,System.Int32)">
             <summary>
             Return a new DStream by applying `GroupByKey` over a sliding window.
             Similar to `DStream.GroupByKey()`, but applies it over a sliding window.
@@ -5485,7 +5496,7 @@
             <param name="numPartitions">Number of partitions of each RDD in the new DStream.</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.ReduceByKeyAndWindow``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{``1,``1,``1},System.Func{``1,``1,``1},System.Int32,System.Int32,System.Int32,System.Func{System.Collections.Generic.KeyValuePair{``0,``1},System.Boolean})">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.ReduceByKeyAndWindow``2(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{``1,``1,``1},System.Func{``1,``1,``1},System.Int32,System.Int32,System.Int32,System.Func{System.Tuple{``0,``1},System.Boolean})">
              <summary>
              Return a new DStream by applying incremental `reduceByKey` over a sliding window.
             
@@ -5506,7 +5517,7 @@
              <param name="filterFunc">function to filter expired key-value pairs; only pairs that satisfy the function are retained set this to null if you do not want to filter</param>
              <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.UpdateStateByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{System.Collections.Generic.IEnumerable{``1},``2,``2},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.UpdateStateByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{System.Collections.Generic.IEnumerable{``1},``2,``2},System.Int32)">
             <summary>
             Return a new "state" DStream where the state for each key is updated by applying
             the given function on the previous state of the key and the new values of the key.
@@ -5522,7 +5533,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.UpdateStateByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{``0,System.Tuple{System.Collections.Generic.IEnumerable{``1},``2}}},System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{``0,``2}}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.UpdateStateByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{System.Collections.Generic.IEnumerable{System.Tuple{``0,System.Tuple{System.Collections.Generic.IEnumerable{``1},``2}}},System.Collections.Generic.IEnumerable{System.Tuple{``0,``2}}},System.Int32)">
             <summary>
             Return a new "state" DStream where the state for each key is updated by applying
             the given function on the previous state of the key and the new values of the key.
@@ -5535,7 +5546,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.UpdateStateByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},System.Func{System.Int32,System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{``0,System.Tuple{System.Collections.Generic.IEnumerable{``1},``2}}},System.Collections.Generic.IEnumerable{System.Collections.Generic.KeyValuePair{``0,``2}}},System.Int32)">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.UpdateStateByKey``3(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},System.Func{System.Int32,System.Collections.Generic.IEnumerable{System.Tuple{``0,System.Tuple{System.Collections.Generic.IEnumerable{``1},``2}}},System.Collections.Generic.IEnumerable{System.Tuple{``0,``2}}},System.Int32)">
             <summary>
             Return a new "state" DStream where the state for each key is updated by applying
             the given function on the previous state of the key and the new values of the key.
@@ -5548,7 +5559,7 @@
             <param name="numPartitions"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.MapWithState``4(Microsoft.Spark.CSharp.Streaming.DStream{System.Collections.Generic.KeyValuePair{``0,``1}},Microsoft.Spark.CSharp.Streaming.StateSpec{``0,``1,``2,``3})">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.PairDStreamFunctions.MapWithState``4(Microsoft.Spark.CSharp.Streaming.DStream{System.Tuple{``0,``1}},Microsoft.Spark.CSharp.Streaming.StateSpec{``0,``1,``2,``3})">
             <summary>
             Return a new "state" DStream where the state for each key is updated by applying
             the given function on the previous state of the key and the new values of the key.
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index 71bfc5d1..5d02396d 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -182,14 +182,14 @@
 ####Summary
   
             
-            operations only available to KeyValuePair RDD
+            operations only available to Tuple RDD
             
             See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions
             
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">CollectAsMap``2</font></td><td>Return the key-value pairs in this RDD to the master as a dictionary. var m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int&gt;(1, 2), new KeyValuePair&lt;int, int&gt;(3, 4) }, 1).CollectAsMap() m[1] 2 m[3] 4</td></tr><tr><td><font color="blue">Keys``2</font></td><td>Return an RDD with the keys of each tuple. &gt;&gt;&gt; m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int&gt;(1, 2), new KeyValuePair&lt;int, int&gt;(3, 4) }, 1).Keys().Collect() [1, 3]</td></tr><tr><td><font color="blue">Values``2</font></td><td>Return an RDD with the values of each tuple. &gt;&gt;&gt; m = sc.Parallelize(new[] { new KeyValuePair&lt;int, int&gt;(1, 2), new KeyValuePair&lt;int, int&gt;(3, 4) }, 1).Values().Collect() [2, 4]</td></tr><tr><td><font color="blue">ReduceByKey``2</font></td><td>Merge the values for each key using an associative reduce function. This will also perform the merging locally on each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce. Output will be hash-partitioned with partitions, or the default parallelism level if is not specified. sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .ReduceByKey((x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">ReduceByKeyLocally``2</font></td><td>Merge the values for each key using an associative reduce function, but return the results immediately to the master as a dictionary. This will also perform the merging locally on each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce. sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .ReduceByKeyLocally((x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">CountByKey``2</font></td><td>Count the number of elements for each key, and return the result to the master as a dictionary. sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .CountByKey((x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">Join``3</font></td><td>Return an RDD containing all pairs of elements with matching keys in this RDD and . Each pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in this RDD and (k, v2) is in . Performs a hash join across the cluster. var l = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 4) }, 1); var r = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 2), new KeyValuePair&lt;string, int&gt;("a", 3) }, 1); var m = l.Join(r, 2).Collect(); [('a', (1, 2)), ('a', (1, 3))]</td></tr><tr><td><font color="blue">LeftOuterJoin``3</font></td><td>Perform a left outer join of this RDD and . For each element (k, v) in this RDD, the resulting RDD will either contain all pairs (k, (v, Option)) for w in , where Option.IsDefined is TRUE, or the pair (k, (v, Option)) if no elements in have key k, where Option.IsDefined is FALSE. Hash-partitions the resulting RDD into the given number of partitions. var l = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 4) }, 1); var r = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 2) }, 1); var m = l.LeftOuterJoin(r).Collect(); [('a', (1, 2)), ('b', (4, Option))] * Option.IsDefined = FALSE</td></tr><tr><td><font color="blue">RightOuterJoin``3</font></td><td>Perform a right outer join of this RDD and . For each element (k, w) in , the resulting RDD will either contain all pairs (k, (Option, w)) for v in this, where Option.IsDefined is TRUE, or the pair (k, (Option, w)) if no elements in this RDD have key k, where Option.IsDefined is FALSE. Hash-partitions the resulting RDD into the given number of partitions. var l = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 2) }, 1); var r = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 4) }, 1); var m = l.RightOuterJoin(r).Collect(); [('a', (2, 1)), ('b', (Option, 4))] * Option.IsDefined = FALSE</td></tr><tr><td><font color="blue">FullOuterJoin``3</font></td><td>Perform a full outer join of this RDD and . For each element (k, v) in this RDD, the resulting RDD will either contain all pairs (k, (v, w)) for w in , or the pair (k, (v, None)) if no elements in have key k. Similarly, for each element (k, w) in , the resulting RDD will either contain all pairs (k, (v, w)) for v in this RDD, or the pair (k, (None, w)) if no elements in this RDD have key k. Hash-partitions the resulting RDD into the given number of partitions. var l = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), KeyValuePair&lt;string, int&gt;("b", 4) }, 1); var r = sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 2), new KeyValuePair&lt;string, int&gt;("c", 8) }, 1); var m = l.FullOuterJoin(r).Collect(); [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]</td></tr><tr><td><font color="blue">PartitionBy``2</font></td><td>Return a copy of the RDD partitioned using the specified partitioner. sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x =&gt; new KeyValuePair&lt;int, int&gt;(x, x)).PartitionBy(3).Glom().Collect()</td></tr><tr><td><font color="blue">CombineByKey``3</font></td><td># TODO: add control over map-side aggregation Generic function to combine the elements for each key using a custom set of aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined type" C. Note that V and C can be different -- for example, one might group an RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three functions: - , which turns a V into a C (e.g., creates a one-element list) - , to merge a V into a C (e.g., adds it to the end of a list) - , to combine two C's into a single one. In addition, users can control the partitioning of the output RDD. sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .CombineByKey(() =&gt; string.Empty, (x, y) =&gt; x + y.ToString(), (x, y) =&gt; x + y).Collect() [('a', '11'), ('b', '1')]</td></tr><tr><td><font color="blue">AggregateByKey``3</font></td><td>Aggregate the values of each key, using given combine functions and a neutral "zero value". This function can return a different result type, U, than the type of the values in this RDD, V. Thus, we need one operation for merging a V into a U and one operation for merging two U's, The former operation is used for merging values within a partition, and the latter is used for merging values between partitions. To avoid memory allocation, both of these functions are allowed to modify and return their first argument instead of creating a new U. sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .CombineByKey(() =&gt; string.Empty, (x, y) =&gt; x + y.ToString(), (x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">FoldByKey``2</font></td><td>Merge the values for each key using an associative function "func" and a neutral "zeroValue" which may be added to the result an arbitrary number of times, and must not change the result (e.g., 0 for addition, or 1 for multiplication.). sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .CombineByKey(() =&gt; string.Empty, (x, y) =&gt; x + y.ToString(), (x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">GroupByKey``2</font></td><td>Group the values for each key in the RDD into a single sequence. Hash-partitions the resulting RDD with numPartitions partitions. Note: If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using reduceByKey or aggregateByKey will provide much better performance. sc.Parallelize( new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 1), new KeyValuePair&lt;string, int&gt;("a", 1) }, 2) .GroupByKey().MapValues(l =&gt; string.Join(" ", l)).Collect() [('a', [1, 1]), ('b', [1])]</td></tr><tr><td><font color="blue">MapValues``3</font></td><td>Pass each value in the key-value pair RDD through a map function without changing the keys; this also retains the original RDD's partitioning. sc.Parallelize( new[] { new KeyValuePair&lt;string, string[]&gt;("a", new[]{"apple", "banana", "lemon"}), new KeyValuePair&lt;string, string[]&gt;("b", new[]{"grapes"}) }, 2) .MapValues(x =&gt; x.Length).Collect() [('a', 3), ('b', 1)]</td></tr><tr><td><font color="blue">FlatMapValues``3</font></td><td>Pass each value in the key-value pair RDD through a flatMap function without changing the keys; this also retains the original RDD's partitioning. x = sc.Parallelize( new[] { new KeyValuePair&lt;string, string[]&gt;("a", new[]{"x", "y", "z"}), new KeyValuePair&lt;string, string[]&gt;("b", new[]{"p", "r"}) }, 2) .FlatMapValues(x =&gt; x).Collect() [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')]</td></tr><tr><td><font color="blue">MapPartitionsWithIndex``5</font></td><td>explicitly convert KeyValuePair&lt;K, V&gt; to KeyValuePair&lt;K, dynamic&gt; since they are incompatibles types unlike V to dynamic</td></tr><tr><td><font color="blue">GroupWith``3</font></td><td>For each key k in this RDD or , return a resulting RDD that contains a tuple with the list of values for that key in this RDD as well as . var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 4) }, 2); var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 2) }, 1); x.GroupWith(y).Collect(); [('a', ([1], [2])), ('b', ([4], []))]</td></tr><tr><td><font color="blue">GroupWith``4</font></td><td>var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 5), new KeyValuePair&lt;string, int&gt;("b", 6) }, 2); var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 4) }, 2); var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 2) }, 1); x.GroupWith(y, z).Collect();</td></tr><tr><td><font color="blue">GroupWith``5</font></td><td>var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 5), new KeyValuePair&lt;string, int&gt;("b", 6) }, 2); var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 1), new KeyValuePair&lt;string, int&gt;("b", 4) }, 2); var z = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("a", 2) }, 1); var w = sc.Parallelize(new[] { new KeyValuePair&lt;string, int&gt;("b", 42) }, 1); var m = x.GroupWith(y, z, w).MapValues(l =&gt; string.Join(" ", l.Item1) + " : " + string.Join(" ", l.Item2) + " : " + string.Join(" ", l.Item3) + " : " + string.Join(" ", l.Item4)).Collect();</td></tr><tr><td><font color="blue">SubtractByKey``3</font></td><td>Return each (key, value) pair in this RDD that has no pair with matching key in . var x = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?&gt;("a", 1), new KeyValuePair&lt;string, int?&gt;("b", 4), new KeyValuePair&lt;string, int?&gt;("b", 5), new KeyValuePair&lt;string, int?&gt;("a", 2) }, 2); var y = sc.Parallelize(new[] { new KeyValuePair&lt;string, int?&gt;("a", 3), new KeyValuePair&lt;string, int?&gt;("c", null) }, 2); x.SubtractByKey(y).Collect(); [('b', 4), ('b', 5)]</td></tr><tr><td><font color="blue">Lookup``2</font></td><td>Return the list of values in the RDD for key `key`. This operation is done efficiently if the RDD has a known partitioner by only searching the partition that the key maps to. &gt;&gt;&gt; l = range(1000) &gt;&gt;&gt; rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) =&gt; new KeyValuePair&lt;int, int&gt;(x, y)), 10) &gt;&gt;&gt; rdd.lookup(42) [42]</td></tr><tr><td><font color="blue">SaveAsNewAPIHadoopDataset``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the new Hadoop OutputFormat API (mapreduce package). Keys/values are converted for output using either user specified converters or, by default, org.apache.spark.api.python.JavaToWritableConverter.</td></tr><tr><td><font color="blue">SaveAsNewAPIHadoopFile``2</font></td><td></td></tr><tr><td><font color="blue">SaveAsHadoopDataset``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the old Hadoop OutputFormat API (mapred package). Keys/values are converted for output using either user specified converters or, by default, org.apache.spark.api.python.JavaToWritableConverter.</td></tr><tr><td><font color="blue">SaveAsHadoopFile``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the old Hadoop OutputFormat API (mapred package). Key and value types will be inferred if not specified. Keys and values are converted for output using either user specified converters or org.apache.spark.api.python.JavaToWritableConverter. The is applied on top of the base Hadoop conf associated with the SparkContext of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.</td></tr><tr><td><font color="blue">SaveAsSequenceFile``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the org.apache.hadoop.io.Writable types that we convert from the RDD's key and value types. The mechanism is as follows: 1. Pyrolite is used to convert pickled Python RDD into RDD of Java objects. 2. Keys and values of this Java RDD are converted to Writables and written out.</td></tr><tr><td><font color="blue">NullIfEmpty``1</font></td><td>Converts a collection to a list where the element type is Option(T) type. If the collection is empty, just returns the empty list.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">CollectAsMap``2</font></td><td>Return the key-value pairs in this RDD to the master as a dictionary. var m = sc.Parallelize(new[] { new Tuple&lt;int, int&gt;(1, 2), new Tuple&lt;int, int&gt;(3, 4) }, 1).CollectAsMap() m[1] 2 m[3] 4</td></tr><tr><td><font color="blue">Keys``2</font></td><td>Return an RDD with the keys of each tuple. &gt;&gt;&gt; m = sc.Parallelize(new[] { new Tuple&lt;int, int&gt;(1, 2), new Tuple&lt;int, int&gt;(3, 4) }, 1).Keys().Collect() [1, 3]</td></tr><tr><td><font color="blue">Values``2</font></td><td>Return an RDD with the values of each tuple. &gt;&gt;&gt; m = sc.Parallelize(new[] { new Tuple&lt;int, int&gt;(1, 2), new Tuple&lt;int, int&gt;(3, 4) }, 1).Values().Collect() [2, 4]</td></tr><tr><td><font color="blue">ReduceByKey``2</font></td><td>Merge the values for each key using an associative reduce function. This will also perform the merging locally on each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce. Output will be hash-partitioned with partitions, or the default parallelism level if is not specified. sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .ReduceByKey((x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">ReduceByKeyLocally``2</font></td><td>Merge the values for each key using an associative reduce function, but return the results immediately to the master as a dictionary. This will also perform the merging locally on each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce. sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .ReduceByKeyLocally((x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">CountByKey``2</font></td><td>Count the number of elements for each key, and return the result to the master as a dictionary. sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .CountByKey((x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">Join``3</font></td><td>Return an RDD containing all pairs of elements with matching keys in this RDD and . Each pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in this RDD and (k, v2) is in . Performs a hash join across the cluster. var l = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 4) }, 1); var r = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 2), new Tuple&lt;string, int&gt;("a", 3) }, 1); var m = l.Join(r, 2).Collect(); [('a', (1, 2)), ('a', (1, 3))]</td></tr><tr><td><font color="blue">LeftOuterJoin``3</font></td><td>Perform a left outer join of this RDD and . For each element (k, v) in this RDD, the resulting RDD will either contain all pairs (k, (v, Option)) for w in , where Option.IsDefined is TRUE, or the pair (k, (v, Option)) if no elements in have key k, where Option.IsDefined is FALSE. Hash-partitions the resulting RDD into the given number of partitions. var l = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 4) }, 1); var r = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 2) }, 1); var m = l.LeftOuterJoin(r).Collect(); [('a', (1, 2)), ('b', (4, Option))] * Option.IsDefined = FALSE</td></tr><tr><td><font color="blue">RightOuterJoin``3</font></td><td>Perform a right outer join of this RDD and . For each element (k, w) in , the resulting RDD will either contain all pairs (k, (Option, w)) for v in this, where Option.IsDefined is TRUE, or the pair (k, (Option, w)) if no elements in this RDD have key k, where Option.IsDefined is FALSE. Hash-partitions the resulting RDD into the given number of partitions. var l = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 2) }, 1); var r = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 4) }, 1); var m = l.RightOuterJoin(r).Collect(); [('a', (2, 1)), ('b', (Option, 4))] * Option.IsDefined = FALSE</td></tr><tr><td><font color="blue">FullOuterJoin``3</font></td><td>Perform a full outer join of this RDD and . For each element (k, v) in this RDD, the resulting RDD will either contain all pairs (k, (v, w)) for w in , or the pair (k, (v, None)) if no elements in have key k. Similarly, for each element (k, w) in , the resulting RDD will either contain all pairs (k, (v, w)) for v in this RDD, or the pair (k, (None, w)) if no elements in this RDD have key k. Hash-partitions the resulting RDD into the given number of partitions. var l = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), Tuple&lt;string, int&gt;("b", 4) }, 1); var r = sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 2), new Tuple&lt;string, int&gt;("c", 8) }, 1); var m = l.FullOuterJoin(r).Collect(); [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]</td></tr><tr><td><font color="blue">PartitionBy``2</font></td><td>Return a copy of the RDD partitioned using the specified partitioner. sc.Parallelize(new[] { 1, 2, 3, 4, 2, 4, 1 }, 1).Map(x =&gt; new Tuple&lt;int, int&gt;(x, x)).PartitionBy(3).Glom().Collect()</td></tr><tr><td><font color="blue">CombineByKey``3</font></td><td># TODO: add control over map-side aggregation Generic function to combine the elements for each key using a custom set of aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined type" C. Note that V and C can be different -- for example, one might group an RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three functions: - , which turns a V into a C (e.g., creates a one-element list) - , to merge a V into a C (e.g., adds it to the end of a list) - , to combine two C's into a single one. In addition, users can control the partitioning of the output RDD. sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .CombineByKey(() =&gt; string.Empty, (x, y) =&gt; x + y.ToString(), (x, y) =&gt; x + y).Collect() [('a', '11'), ('b', '1')]</td></tr><tr><td><font color="blue">AggregateByKey``3</font></td><td>Aggregate the values of each key, using given combine functions and a neutral "zero value". This function can return a different result type, U, than the type of the values in this RDD, V. Thus, we need one operation for merging a V into a U and one operation for merging two U's, The former operation is used for merging values within a partition, and the latter is used for merging values between partitions. To avoid memory allocation, both of these functions are allowed to modify and return their first argument instead of creating a new U. sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .CombineByKey(() =&gt; string.Empty, (x, y) =&gt; x + y.ToString(), (x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">FoldByKey``2</font></td><td>Merge the values for each key using an associative function "func" and a neutral "zeroValue" which may be added to the result an arbitrary number of times, and must not change the result (e.g., 0 for addition, or 1 for multiplication.). sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .CombineByKey(() =&gt; string.Empty, (x, y) =&gt; x + y.ToString(), (x, y) =&gt; x + y).Collect() [('a', 2), ('b', 1)]</td></tr><tr><td><font color="blue">GroupByKey``2</font></td><td>Group the values for each key in the RDD into a single sequence. Hash-partitions the resulting RDD with numPartitions partitions. Note: If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using reduceByKey or aggregateByKey will provide much better performance. sc.Parallelize( new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 1), new Tuple&lt;string, int&gt;("a", 1) }, 2) .GroupByKey().MapValues(l =&gt; string.Join(" ", l)).Collect() [('a', [1, 1]), ('b', [1])]</td></tr><tr><td><font color="blue">MapValues``3</font></td><td>Pass each value in the key-value pair RDD through a map function without changing the keys; this also retains the original RDD's partitioning. sc.Parallelize( new[] { new Tuple&lt;string, string[]&gt;("a", new[]{"apple", "banana", "lemon"}), new Tuple&lt;string, string[]&gt;("b", new[]{"grapes"}) }, 2) .MapValues(x =&gt; x.Length).Collect() [('a', 3), ('b', 1)]</td></tr><tr><td><font color="blue">FlatMapValues``3</font></td><td>Pass each value in the key-value pair RDD through a flatMap function without changing the keys; this also retains the original RDD's partitioning. x = sc.Parallelize( new[] { new Tuple&lt;string, string[]&gt;("a", new[]{"x", "y", "z"}), new Tuple&lt;string, string[]&gt;("b", new[]{"p", "r"}) }, 2) .FlatMapValues(x =&gt; x).Collect() [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')]</td></tr><tr><td><font color="blue">MapPartitionsWithIndex``5</font></td><td>explicitly convert Tuple&lt;K, V&gt; to Tuple&lt;K, dynamic&gt; since they are incompatibles types unlike V to dynamic</td></tr><tr><td><font color="blue">GroupWith``3</font></td><td>For each key k in this RDD or , return a resulting RDD that contains a tuple with the list of values for that key in this RDD as well as . var x = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 4) }, 2); var y = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 2) }, 1); x.GroupWith(y).Collect(); [('a', ([1], [2])), ('b', ([4], []))]</td></tr><tr><td><font color="blue">GroupWith``4</font></td><td>var x = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 5), new Tuple&lt;string, int&gt;("b", 6) }, 2); var y = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 4) }, 2); var z = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 2) }, 1); x.GroupWith(y, z).Collect();</td></tr><tr><td><font color="blue">GroupWith``5</font></td><td>var x = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 5), new Tuple&lt;string, int&gt;("b", 6) }, 2); var y = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 1), new Tuple&lt;string, int&gt;("b", 4) }, 2); var z = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("a", 2) }, 1); var w = sc.Parallelize(new[] { new Tuple&lt;string, int&gt;("b", 42) }, 1); var m = x.GroupWith(y, z, w).MapValues(l =&gt; string.Join(" ", l.Item1) + " : " + string.Join(" ", l.Item2) + " : " + string.Join(" ", l.Item3) + " : " + string.Join(" ", l.Item4)).Collect();</td></tr><tr><td><font color="blue">SubtractByKey``3</font></td><td>Return each (key, value) pair in this RDD that has no pair with matching key in . var x = sc.Parallelize(new[] { new Tuple&lt;string, int?&gt;("a", 1), new Tuple&lt;string, int?&gt;("b", 4), new Tuple&lt;string, int?&gt;("b", 5), new Tuple&lt;string, int?&gt;("a", 2) }, 2); var y = sc.Parallelize(new[] { new Tuple&lt;string, int?&gt;("a", 3), new Tuple&lt;string, int?&gt;("c", null) }, 2); x.SubtractByKey(y).Collect(); [('b', 4), ('b', 5)]</td></tr><tr><td><font color="blue">Lookup``2</font></td><td>Return the list of values in the RDD for key `key`. This operation is done efficiently if the RDD has a known partitioner by only searching the partition that the key maps to. &gt;&gt;&gt; l = range(1000) &gt;&gt;&gt; rdd = sc.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) =&gt; new Tuple&lt;int, int&gt;(x, y)), 10) &gt;&gt;&gt; rdd.lookup(42) [42]</td></tr><tr><td><font color="blue">SaveAsNewAPIHadoopDataset``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the new Hadoop OutputFormat API (mapreduce package). Keys/values are converted for output using either user specified converters or, by default, org.apache.spark.api.python.JavaToWritableConverter.</td></tr><tr><td><font color="blue">SaveAsNewAPIHadoopFile``2</font></td><td></td></tr><tr><td><font color="blue">SaveAsHadoopDataset``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the old Hadoop OutputFormat API (mapred package). Keys/values are converted for output using either user specified converters or, by default, org.apache.spark.api.python.JavaToWritableConverter.</td></tr><tr><td><font color="blue">SaveAsHadoopFile``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the old Hadoop OutputFormat API (mapred package). Key and value types will be inferred if not specified. Keys and values are converted for output using either user specified converters or org.apache.spark.api.python.JavaToWritableConverter. The is applied on top of the base Hadoop conf associated with the SparkContext of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.</td></tr><tr><td><font color="blue">SaveAsSequenceFile``2</font></td><td>Output a Python RDD of key-value pairs (of form RDD[(K, V)]) to any Hadoop file system, using the org.apache.hadoop.io.Writable types that we convert from the RDD's key and value types. The mechanism is as follows: 1. Pyrolite is used to convert pickled Python RDD into RDD of Java objects. 2. Keys and values of this Java RDD are converted to Writables and written out.</td></tr><tr><td><font color="blue">NullIfEmpty``1</font></td><td>Converts a collection to a list where the element type is Option(T) type. If the collection is empty, just returns the empty list.</td></tr></table>
 
 ---
   
@@ -230,7 +230,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Cache</font></td><td>Persist this RDD with the default storage level .</td></tr><tr><td><font color="blue">Persist</font></td><td>Set this RDD's storage level to persist its values across operations after the first time it is computed. This can only be used to assign a new storage level if the RDD does not have a storage level set yet. If no storage level is specified defaults to . sc.Parallelize(new string[] {"b", "a", "c").Persist().isCached True</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Checkpoint</font></td><td>Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint directory set with ) and all references to its parent RDDs will be removed. This function must be called before any job has been executed on this RDD. It is strongly recommended that this RDD is persisted in memory, otherwise saving it on a file will require recomputation.</td></tr><tr><td><font color="blue">GetNumPartitions</font></td><td>Returns the number of partitions of this RDD.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Return a new RDD by applying a function to each element of this RDD. sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x =&gt; new KeyValuePair&lt;string, int&gt;(x, 1)).Collect() [('a', 1), ('b', 1), ('c', 1)]</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results. sc.Parallelize(new int[] {2, 3, 4}, 1).FlatMap(x =&gt; Enumerable.Range(1, x - 1)).Collect() [1, 1, 1, 2, 2, 3]</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD. sc.Parallelize(new int[] {1, 2, 3, 4}, 2).MapPartitions(iter =&gt; new[]{iter.Sum(x =&gt; (x as decimal?))}).Collect() [3, 7]</td></tr><tr><td><font color="blue">MapPartitionsWithIndex``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition. sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex&lt;double&gt;((pid, iter) =&gt; (double)pid).Sum() 6</td></tr><tr><td><font color="blue">Filter</font></td><td>Return a new RDD containing only the elements that satisfy a predicate. sc.Parallelize(new int[]{1, 2, 3, 4, 5}, 1).Filter(x =&gt; x % 2 == 0).Collect() [2, 4]</td></tr><tr><td><font color="blue">Distinct</font></td><td>Return a new RDD containing the distinct elements in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] {1, 1, 2, 3}, 1).Distinct().Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Sample</font></td><td>Return a sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 100), 4) 6 &lt;= rdd.Sample(False, 0.1, 81).count() &lt;= 14 true</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this RDD with the provided weights. var rdd = sc.Parallelize(Enumerable.Range(0, 500), 1) var rdds = rdd.RandomSplit(new double[] {2, 3}, 17) 150 &lt; rdds[0].Count() &lt; 250 250 &lt; rdds[1].Count() &lt; 350</td></tr><tr><td><font color="blue">TakeSample</font></td><td>Return a fixed-size sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 10), 2) rdd.TakeSample(true, 20, 1).Length 20 rdd.TakeSample(false, 5, 2).Length 5 rdd.TakeSample(false, 15, 3).Length 10</td></tr><tr><td><font color="blue">ComputeFractionForSampleSize</font></td><td>Returns a sampling rate that guarantees a sample of size &gt;= sampleSizeLowerBound 99.99% of the time. How the sampling rate is determined: Let p = num / total, where num is the sample size and total is the total number of data points in the RDD. We're trying to compute q &gt; p such that - when sampling with replacement, we're drawing each data point with prob_i ~ Pois(q), where we want to guarantee Pr[s &lt; num] &lt; 0.0001 for s = sum(prob_i for i from 0 to total), i.e. the failure rate of not having a sufficiently large sample &lt; 0.0001. Setting q = p + 5 * sqrt(p/total) is sufficient to guarantee 0.9999 success rate for num &gt; 12, but we need a slightly larger q (9 empirically determined). - when sampling without replacement, we're drawing each data point with prob_i ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success rate, where success rate is defined the same as in sampling with replacement.</td></tr><tr><td><font color="blue">Union</font></td><td>Return the union of this RDD and another one. var rdd = sc.Parallelize(new int[] { 1, 1, 2, 3 }, 1) rdd.union(rdd).collect() [1, 1, 2, 3, 1, 1, 2, 3]</td></tr><tr><td><font color="blue">Intersection</font></td><td>Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did. Note that this method performs a shuffle internally. var rdd1 = sc.Parallelize(new int[] { 1, 10, 2, 3, 4, 5 }, 1) var rdd2 = sc.Parallelize(new int[] { 1, 6, 2, 3, 7, 8 }, 1) var rdd1.Intersection(rdd2).Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Glom</font></td><td>Return an RDD created by coalescing all elements within each partition into a list. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 2) rdd.Glom().Collect() [[1, 2], [3, 4]]</td></tr><tr><td><font color="blue">Cartesian``1</font></td><td>Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements (a, b) where a is in self and b is in other. rdd = sc.Parallelize(new int[] { 1, 2 }, 1) rdd.Cartesian(rdd).Collect() [(1, 1), (1, 2), (2, 1), (2, 2)]</td></tr><tr><td><font color="blue">GroupBy``1</font></td><td>Return an RDD of grouped items. Each group consists of a key and a sequence of elements mapping to that key. The ordering of elements within each group is not guaranteed, and may even differ each time the resulting RDD is evaluated. Note: This operation may be very expensive. If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] or [[PairRDDFunctions.reduceByKey]] will provide much better performance. &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { 1, 1, 2, 3, 5, 8 }, 1) &gt;&gt;&gt; result = rdd.GroupBy(lambda x: x % 2).Collect() [(0, [2, 8]), (1, [1, 1, 3, 5])]</td></tr><tr><td><font color="blue">Pipe</font></td><td>Return an RDD created by piping elements to a forked external process. &gt;&gt;&gt; sc.Parallelize(new char[] { '1', '2', '3', '4' }, 1).Pipe("cat").Collect() [u'1', u'2', u'3', u'4']</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function to all elements of this RDD. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Foreach(x =&gt; Console.Write(x))</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function to each partition of this RDD. sc.parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).ForeachPartition(iter =&gt; { foreach (var x in iter) Console.Write(x + " "); })</td></tr><tr><td><font color="blue">Collect</font></td><td>Return a list that contains all of the elements in this RDD.</td></tr><tr><td><font color="blue">Reduce</font></td><td>Reduces the elements of this RDD using the specified commutative and associative binary operator. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Reduce((x, y) =&gt; x + y) 15</td></tr><tr><td><font color="blue">TreeReduce</font></td><td>Reduces the elements of this RDD in a multi-level tree pattern. &gt;&gt;&gt; add = lambda x, y: x + y &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { -5, -4, -3, -2, -1, 1, 2, 3, 4 }, 10).TreeReduce((x, y) =&gt; x + y)) &gt;&gt;&gt; rdd.TreeReduce(add) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 1) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 2) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 5) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 10) -5</td></tr><tr><td><font color="blue">Fold</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given associative and commutative function and a neutral "zero value." The function op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. This behaves somewhat differently from fold operations implemented for non-distributed collections in functional languages like Scala. This fold operation may be applied to partitions individually, and then fold those results into the final result, rather than apply the fold to each element sequentially in some defined ordering. For functions that are not commutative, the result may differ from that of a fold applied to a non-distributed collection. &gt;&gt;&gt; from operator import add &gt;&gt;&gt; sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15</td></tr><tr><td><font color="blue">Aggregate``1</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given combine functions and a neutral "zero value." The functions op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. The first function (seqOp) can return a different result type, U, than the type of this RDD. Thus, we need one operation for merging a T into an U and one operation for merging two U &gt;&gt;&gt; sc.parallelize(new int[] { 1, 2, 3, 4 }, 1).Aggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">TreeAggregate``1</font></td><td>Aggregates the elements of this RDD in a multi-level tree pattern. rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).TreeAggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">Count</font></td><td>Return the number of elements in this RDD.</td></tr><tr><td><font color="blue">CountByValue</font></td><td>Return the count of each unique value in this RDD as a dictionary of (value, count) pairs. sc.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue()) [(1, 2), (2, 3)]</td></tr><tr><td><font color="blue">Take</font></td><td>Take the first num elements of the RDD. It works by first scanning one partition, and use the results from that partition to estimate the number of additional partitions needed to satisfy the limit. Translated from the Scala implementation in RDD#take(). sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Cache().Take(2))) [2, 3] sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Take(10) [2, 3, 4, 5, 6] sc.Parallelize(Enumerable.Range(0, 100), 100).Filter(x =&gt; x &gt; 90).Take(3) [91, 92, 93]</td></tr><tr><td><font color="blue">First</font></td><td>Return the first element in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] { 2, 3, 4 }, 2).First() 2</td></tr><tr><td><font color="blue">IsEmpty</font></td><td>Returns true if and only if the RDD contains no elements at all. Note that an RDD may be empty even when it has at least 1 partition. sc.Parallelize(new int[0], 1).isEmpty() true sc.Parallelize(new int[] {1}).isEmpty() false</td></tr><tr><td><font color="blue">Subtract</font></td><td>Return each value in this RDD that is not contained in . var x = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1) var y = sc.Parallelize(new int[] { 3 }, 1) x.Subtract(y).Collect()) [1, 2, 4]</td></tr><tr><td><font color="blue">KeyBy``1</font></td><td>Creates tuples of the elements in this RDD by applying . sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).KeyBy(x =&gt; x * x).Collect()) (1, 1), (4, 2), (9, 3), (16, 4)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Return a new RDD that has exactly numPartitions partitions. Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data. If you are decreasing the number of partitions in this RDD, consider using `Coalesce`, which can avoid performing a shuffle. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4, 5, 6, 7 }, 4) rdd.Glom().Collect().Length 4 rdd.Repartition(2).Glom().Collect().Length 2</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Return a new RDD that is reduced into `numPartitions` partitions. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Glom().Collect().Length 3 &gt;&gt;&gt; sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Coalesce(1).Glom().Collect().Length 1</td></tr><tr><td><font color="blue">Zip``1</font></td><td>Zips this RDD with another one, returning key-value pairs with the first element in each RDD second element in each RDD, etc. Assumes that the two RDDs have the same number of partitions and the same number of elements in each partition (e.g. one was made through a map on the other). var x = sc.parallelize(range(0,5)) var y = sc.parallelize(range(1000, 1005)) x.Zip(y).Collect() [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]</td></tr><tr><td><font color="blue">ZipWithIndex</font></td><td>Zips this RDD with its element indices. The ordering is first based on the partition index and then the ordering of items within each partition. So the first item in the first partition gets index 0, and the last item in the last partition receives the largest index. This method needs to trigger a spark job when this RDD contains more than one partitions. sc.Parallelize(new string[] { "a", "b", "c", "d" }, 3).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 2), ('d', 3)]</td></tr><tr><td><font color="blue">ZipWithUniqueId</font></td><td>Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method won't trigger a spark job, which is different from &gt;&gt;&gt; sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]</td></tr><tr><td><font color="blue">SetName</font></td><td>Assign a name to this RDD. &gt;&gt;&gt; rdd1 = sc.parallelize([1, 2]) &gt;&gt;&gt; rdd1.setName('RDD1').name() u'RDD1'</td></tr><tr><td><font color="blue">ToDebugString</font></td><td>A description of this RDD and its recursive dependencies for debugging.</td></tr><tr><td><font color="blue">GetStorageLevel</font></td><td>Get the RDD's current storage level. &gt;&gt;&gt; rdd1 = sc.parallelize([1,2]) &gt;&gt;&gt; rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1) &gt;&gt;&gt; print(rdd1.getStorageLevel()) Serialized 1x Replicated</td></tr><tr><td><font color="blue">ToLocalIterator</font></td><td>Return an iterator that contains all of the elements in this RDD. The iterator will consume as much memory as the largest partition in this RDD. sc.Parallelize(Enumerable.Range(0, 10), 1).ToLocalIterator() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]</td></tr><tr><td><font color="blue">RandomSampleWithRange</font></td><td>Internal method exposed for Random Splits in DataFrames. Samples an RDD given a probability range.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Cache</font></td><td>Persist this RDD with the default storage level .</td></tr><tr><td><font color="blue">Persist</font></td><td>Set this RDD's storage level to persist its values across operations after the first time it is computed. This can only be used to assign a new storage level if the RDD does not have a storage level set yet. If no storage level is specified defaults to . sc.Parallelize(new string[] {"b", "a", "c").Persist().isCached True</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Checkpoint</font></td><td>Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint directory set with ) and all references to its parent RDDs will be removed. This function must be called before any job has been executed on this RDD. It is strongly recommended that this RDD is persisted in memory, otherwise saving it on a file will require recomputation.</td></tr><tr><td><font color="blue">GetNumPartitions</font></td><td>Returns the number of partitions of this RDD.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Return a new RDD by applying a function to each element of this RDD. sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x =&gt; new Tuple&lt;string, int&gt;(x, 1)).Collect() [('a', 1), ('b', 1), ('c', 1)]</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results. sc.Parallelize(new int[] {2, 3, 4}, 1).FlatMap(x =&gt; Enumerable.Range(1, x - 1)).Collect() [1, 1, 1, 2, 2, 3]</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD. sc.Parallelize(new int[] {1, 2, 3, 4}, 2).MapPartitions(iter =&gt; new[]{iter.Sum(x =&gt; (x as decimal?))}).Collect() [3, 7]</td></tr><tr><td><font color="blue">MapPartitionsWithIndex``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition. sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex&lt;double&gt;((pid, iter) =&gt; (double)pid).Sum() 6</td></tr><tr><td><font color="blue">Filter</font></td><td>Return a new RDD containing only the elements that satisfy a predicate. sc.Parallelize(new int[]{1, 2, 3, 4, 5}, 1).Filter(x =&gt; x % 2 == 0).Collect() [2, 4]</td></tr><tr><td><font color="blue">Distinct</font></td><td>Return a new RDD containing the distinct elements in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] {1, 1, 2, 3}, 1).Distinct().Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Sample</font></td><td>Return a sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 100), 4) 6 &lt;= rdd.Sample(False, 0.1, 81).count() &lt;= 14 true</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this RDD with the provided weights. var rdd = sc.Parallelize(Enumerable.Range(0, 500), 1) var rdds = rdd.RandomSplit(new double[] {2, 3}, 17) 150 &lt; rdds[0].Count() &lt; 250 250 &lt; rdds[1].Count() &lt; 350</td></tr><tr><td><font color="blue">TakeSample</font></td><td>Return a fixed-size sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 10), 2) rdd.TakeSample(true, 20, 1).Length 20 rdd.TakeSample(false, 5, 2).Length 5 rdd.TakeSample(false, 15, 3).Length 10</td></tr><tr><td><font color="blue">ComputeFractionForSampleSize</font></td><td>Returns a sampling rate that guarantees a sample of size &gt;= sampleSizeLowerBound 99.99% of the time. How the sampling rate is determined: Let p = num / total, where num is the sample size and total is the total number of data points in the RDD. We're trying to compute q &gt; p such that - when sampling with replacement, we're drawing each data point with prob_i ~ Pois(q), where we want to guarantee Pr[s &lt; num] &lt; 0.0001 for s = sum(prob_i for i from 0 to total), i.e. the failure rate of not having a sufficiently large sample &lt; 0.0001. Setting q = p + 5 * sqrt(p/total) is sufficient to guarantee 0.9999 success rate for num &gt; 12, but we need a slightly larger q (9 empirically determined). - when sampling without replacement, we're drawing each data point with prob_i ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success rate, where success rate is defined the same as in sampling with replacement.</td></tr><tr><td><font color="blue">Union</font></td><td>Return the union of this RDD and another one. var rdd = sc.Parallelize(new int[] { 1, 1, 2, 3 }, 1) rdd.union(rdd).collect() [1, 1, 2, 3, 1, 1, 2, 3]</td></tr><tr><td><font color="blue">Intersection</font></td><td>Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did. Note that this method performs a shuffle internally. var rdd1 = sc.Parallelize(new int[] { 1, 10, 2, 3, 4, 5 }, 1) var rdd2 = sc.Parallelize(new int[] { 1, 6, 2, 3, 7, 8 }, 1) var rdd1.Intersection(rdd2).Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Glom</font></td><td>Return an RDD created by coalescing all elements within each partition into a list. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 2) rdd.Glom().Collect() [[1, 2], [3, 4]]</td></tr><tr><td><font color="blue">Cartesian``1</font></td><td>Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements (a, b) where a is in self and b is in other. rdd = sc.Parallelize(new int[] { 1, 2 }, 1) rdd.Cartesian(rdd).Collect() [(1, 1), (1, 2), (2, 1), (2, 2)]</td></tr><tr><td><font color="blue">GroupBy``1</font></td><td>Return an RDD of grouped items. Each group consists of a key and a sequence of elements mapping to that key. The ordering of elements within each group is not guaranteed, and may even differ each time the resulting RDD is evaluated. Note: This operation may be very expensive. If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] or [[PairRDDFunctions.reduceByKey]] will provide much better performance. &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { 1, 1, 2, 3, 5, 8 }, 1) &gt;&gt;&gt; result = rdd.GroupBy(lambda x: x % 2).Collect() [(0, [2, 8]), (1, [1, 1, 3, 5])]</td></tr><tr><td><font color="blue">Pipe</font></td><td>Return an RDD created by piping elements to a forked external process. &gt;&gt;&gt; sc.Parallelize(new char[] { '1', '2', '3', '4' }, 1).Pipe("cat").Collect() [u'1', u'2', u'3', u'4']</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function to all elements of this RDD. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Foreach(x =&gt; Console.Write(x))</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function to each partition of this RDD. sc.parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).ForeachPartition(iter =&gt; { foreach (var x in iter) Console.Write(x + " "); })</td></tr><tr><td><font color="blue">Collect</font></td><td>Return a list that contains all of the elements in this RDD.</td></tr><tr><td><font color="blue">Reduce</font></td><td>Reduces the elements of this RDD using the specified commutative and associative binary operator. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Reduce((x, y) =&gt; x + y) 15</td></tr><tr><td><font color="blue">TreeReduce</font></td><td>Reduces the elements of this RDD in a multi-level tree pattern. &gt;&gt;&gt; add = lambda x, y: x + y &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { -5, -4, -3, -2, -1, 1, 2, 3, 4 }, 10).TreeReduce((x, y) =&gt; x + y)) &gt;&gt;&gt; rdd.TreeReduce(add) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 1) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 2) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 5) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 10) -5</td></tr><tr><td><font color="blue">Fold</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given associative and commutative function and a neutral "zero value." The function op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. This behaves somewhat differently from fold operations implemented for non-distributed collections in functional languages like Scala. This fold operation may be applied to partitions individually, and then fold those results into the final result, rather than apply the fold to each element sequentially in some defined ordering. For functions that are not commutative, the result may differ from that of a fold applied to a non-distributed collection. &gt;&gt;&gt; from operator import add &gt;&gt;&gt; sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15</td></tr><tr><td><font color="blue">Aggregate``1</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given combine functions and a neutral "zero value." The functions op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. The first function (seqOp) can return a different result type, U, than the type of this RDD. Thus, we need one operation for merging a T into an U and one operation for merging two U &gt;&gt;&gt; sc.parallelize(new int[] { 1, 2, 3, 4 }, 1).Aggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">TreeAggregate``1</font></td><td>Aggregates the elements of this RDD in a multi-level tree pattern. rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).TreeAggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">Count</font></td><td>Return the number of elements in this RDD.</td></tr><tr><td><font color="blue">CountByValue</font></td><td>Return the count of each unique value in this RDD as a dictionary of (value, count) pairs. sc.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue()) [(1, 2), (2, 3)]</td></tr><tr><td><font color="blue">Take</font></td><td>Take the first num elements of the RDD. It works by first scanning one partition, and use the results from that partition to estimate the number of additional partitions needed to satisfy the limit. Translated from the Scala implementation in RDD#take(). sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Cache().Take(2))) [2, 3] sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Take(10) [2, 3, 4, 5, 6] sc.Parallelize(Enumerable.Range(0, 100), 100).Filter(x =&gt; x &gt; 90).Take(3) [91, 92, 93]</td></tr><tr><td><font color="blue">First</font></td><td>Return the first element in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] { 2, 3, 4 }, 2).First() 2</td></tr><tr><td><font color="blue">IsEmpty</font></td><td>Returns true if and only if the RDD contains no elements at all. Note that an RDD may be empty even when it has at least 1 partition. sc.Parallelize(new int[0], 1).isEmpty() true sc.Parallelize(new int[] {1}).isEmpty() false</td></tr><tr><td><font color="blue">Subtract</font></td><td>Return each value in this RDD that is not contained in . var x = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1) var y = sc.Parallelize(new int[] { 3 }, 1) x.Subtract(y).Collect()) [1, 2, 4]</td></tr><tr><td><font color="blue">KeyBy``1</font></td><td>Creates tuples of the elements in this RDD by applying . sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).KeyBy(x =&gt; x * x).Collect()) (1, 1), (4, 2), (9, 3), (16, 4)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Return a new RDD that has exactly numPartitions partitions. Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data. If you are decreasing the number of partitions in this RDD, consider using `Coalesce`, which can avoid performing a shuffle. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4, 5, 6, 7 }, 4) rdd.Glom().Collect().Length 4 rdd.Repartition(2).Glom().Collect().Length 2</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Return a new RDD that is reduced into `numPartitions` partitions. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Glom().Collect().Length 3 &gt;&gt;&gt; sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Coalesce(1).Glom().Collect().Length 1</td></tr><tr><td><font color="blue">Zip``1</font></td><td>Zips this RDD with another one, returning key-value pairs with the first element in each RDD second element in each RDD, etc. Assumes that the two RDDs have the same number of partitions and the same number of elements in each partition (e.g. one was made through a map on the other). var x = sc.parallelize(range(0,5)) var y = sc.parallelize(range(1000, 1005)) x.Zip(y).Collect() [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]</td></tr><tr><td><font color="blue">ZipWithIndex</font></td><td>Zips this RDD with its element indices. The ordering is first based on the partition index and then the ordering of items within each partition. So the first item in the first partition gets index 0, and the last item in the last partition receives the largest index. This method needs to trigger a spark job when this RDD contains more than one partitions. sc.Parallelize(new string[] { "a", "b", "c", "d" }, 3).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 2), ('d', 3)]</td></tr><tr><td><font color="blue">ZipWithUniqueId</font></td><td>Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method won't trigger a spark job, which is different from &gt;&gt;&gt; sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]</td></tr><tr><td><font color="blue">SetName</font></td><td>Assign a name to this RDD. &gt;&gt;&gt; rdd1 = sc.parallelize([1, 2]) &gt;&gt;&gt; rdd1.setName('RDD1').name() u'RDD1'</td></tr><tr><td><font color="blue">ToDebugString</font></td><td>A description of this RDD and its recursive dependencies for debugging.</td></tr><tr><td><font color="blue">GetStorageLevel</font></td><td>Get the RDD's current storage level. &gt;&gt;&gt; rdd1 = sc.parallelize([1,2]) &gt;&gt;&gt; rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1) &gt;&gt;&gt; print(rdd1.getStorageLevel()) Serialized 1x Replicated</td></tr><tr><td><font color="blue">ToLocalIterator</font></td><td>Return an iterator that contains all of the elements in this RDD. The iterator will consume as much memory as the largest partition in this RDD. sc.Parallelize(Enumerable.Range(0, 10), 1).ToLocalIterator() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]</td></tr><tr><td><font color="blue">RandomSampleWithRange</font></td><td>Internal method exposed for Random Splits in DataFrames. Samples an RDD given a probability range.</td></tr></table>
 
 ---
   
@@ -703,6 +703,20 @@
 ---
   
   
+###<font color="#68228B">Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1</font>
+####Summary
+  
+            
+            An input stream that always returns the same RDD on each timestep. Useful for testing.
+            
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr></table>
+
+---
+  
+  
 ###<font color="#68228B">Microsoft.Spark.CSharp.Streaming.DStream`1</font>
 ####Summary
   
@@ -831,7 +845,7 @@
 ####Summary
   
             
-            operations only available to KeyValuePair RDD
+            operations only available to Tuple RDD
             
         
 ####Methods
diff --git a/csharp/AdapterTest/AccumulatorTest.cs b/csharp/AdapterTest/AccumulatorTest.cs
index f5998f4d..a6b2d7f7 100644
--- a/csharp/AdapterTest/AccumulatorTest.cs
+++ b/csharp/AdapterTest/AccumulatorTest.cs
@@ -80,7 +80,7 @@ public void TestAccumuatorSuccess()
                 // write update
                 int key = 0;
                 int value = 100;
-                KeyValuePair<int, dynamic> update = new KeyValuePair<int, dynamic>(key, value);
+                Tuple<int, dynamic> update = new Tuple<int, dynamic>(key, value);
                 var ms = new MemoryStream();
                 var formatter = new BinaryFormatter();
                 formatter.Serialize(ms, update);
@@ -111,7 +111,7 @@ public void TestUndefinedAccumuator()
                 // write update
                 int key = 1;
                 int value = 1000;
-                KeyValuePair<int, dynamic> update = new KeyValuePair<int, dynamic>(key, value);
+                Tuple<int, dynamic> update = new Tuple<int, dynamic>(key, value);
                 var ms = new MemoryStream();
                 var formatter = new BinaryFormatter();
                 formatter.Serialize(ms, update);
@@ -123,8 +123,8 @@ public void TestUndefinedAccumuator()
                 byte[] receiveBuffer = new byte[1];
                 s.Read(receiveBuffer, 0, 1);
 
-                Assert.IsTrue(Accumulator.accumulatorRegistry.ContainsKey(update.Key));
-                var accumulator = Accumulator.accumulatorRegistry[update.Key] as Accumulator<int>;
+                Assert.IsTrue(Accumulator.accumulatorRegistry.ContainsKey(update.Item1));
+                var accumulator = Accumulator.accumulatorRegistry[update.Item1] as Accumulator<int>;
                 Assert.AreEqual(accumulator.Value, value);
             }
         }
diff --git a/csharp/AdapterTest/DStreamTest.cs b/csharp/AdapterTest/DStreamTest.cs
index a46ff061..e829a611 100644
--- a/csharp/AdapterTest/DStreamTest.cs
+++ b/csharp/AdapterTest/DStreamTest.cs
@@ -47,8 +47,8 @@ public void TestDStreamMapReduce()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, long> countByWord = (KeyValuePair<string, long>)record;
-                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
+                    Tuple<string, long> countByWord = (Tuple<string, long>)record;
+                    Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                 }
             });
 
@@ -90,7 +90,7 @@ public void TestDStreamTransform()
 
             var words = lines.FlatMap(l => l.Split(' '));
 
-            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+            var pairs = words.Map(w => new Tuple<string, int>(w, 1));
 
             var wordCounts = pairs.PartitionBy().ReduceByKey((x, y) => x + y);
 
@@ -101,8 +101,8 @@ public void TestDStreamTransform()
 
                     foreach (object record in taken)
                     {
-                        KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                        Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
+                        Tuple<string, int> countByWord = (Tuple<string, int>)record;
+                        Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                     }
                 });
 
@@ -115,8 +115,8 @@ public void TestDStreamTransform()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, List<int>> countByWord = (KeyValuePair<string, List<int>>)record;
-                    Assert.AreEqual(countByWord.Value.Count, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
+                    Tuple<string, List<int>> countByWord = (Tuple<string, List<int>>)record;
+                    Assert.AreEqual(countByWord.Item2.Count, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                 }
             });
 
@@ -129,8 +129,8 @@ public void TestDStreamTransform()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 46 : 44);
+                    Tuple<string, int> countByWord = (Tuple<string, int>)record;
+                    Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 46 : 44);
                 }
             });
         }
@@ -146,12 +146,12 @@ public void TestDStreamJoin()
 
             var words = lines.FlatMap(l => l.Split(' '));
 
-            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+            var pairs = words.Map(w => new Tuple<string, int>(w, 1));
 
             var wordCounts = pairs.ReduceByKey((x, y) => x + y);
 
-            var left = wordCounts.Filter(x => x.Key != "quick" && x.Key != "lazy");
-            var right = wordCounts.Filter(x => x.Key != "brown");
+            var left = wordCounts.Filter(x => x.Item1 != "quick" && x.Item1 != "lazy");
+            var right = wordCounts.Filter(x => x.Item1 != "brown");
 
             var groupWith = left.GroupWith(right);
             groupWith.ForeachRDD((time, rdd) =>
@@ -161,15 +161,15 @@ public void TestDStreamJoin()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, Tuple<List<int>, List<int>>> countByWord = (KeyValuePair<string, Tuple<List<int>, List<int>>>)record;
-                    if (countByWord.Key == "quick" || countByWord.Key == "lazy")
-                        Assert.AreEqual(countByWord.Value.Item1.Count, 0);
-                    else if (countByWord.Key == "brown")
-                        Assert.AreEqual(countByWord.Value.Item2.Count, 0);
+                    Tuple<string, Tuple<List<int>, List<int>>> countByWord = (Tuple<string, Tuple<List<int>, List<int>>>)record;
+                    if (countByWord.Item1 == "quick" || countByWord.Item1 == "lazy")
+                        Assert.AreEqual(countByWord.Item2.Item1.Count, 0);
+                    else if (countByWord.Item1 == "brown")
+                        Assert.AreEqual(countByWord.Item2.Item2.Count, 0);
                     else
                     {
-                        Assert.AreEqual(countByWord.Value.Item1[0], countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
-                        Assert.AreEqual(countByWord.Value.Item2[0], countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
+                        Assert.AreEqual(countByWord.Item2.Item1[0], countByWord.Item1 == "The" || countByWord.Item1 == "dog" ? 23 : 22);
+                        Assert.AreEqual(countByWord.Item2.Item2[0], countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                     }
                 }
             });
@@ -182,9 +182,9 @@ public void TestDStreamJoin()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, Tuple<int, int>> countByWord = (KeyValuePair<string, Tuple<int, int>>)record;
-                    Assert.AreEqual(countByWord.Value.Item1, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
-                    Assert.AreEqual(countByWord.Value.Item2, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
+                    Tuple<string, Tuple<int, int>> countByWord = (Tuple<string, Tuple<int, int>>)record;
+                    Assert.AreEqual(countByWord.Item2.Item1, countByWord.Item1 == "The" || countByWord.Item1 == "dog" ? 23 : 22);
+                    Assert.AreEqual(countByWord.Item2.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" ? 23 : 22);
                 }
             });
 
@@ -196,11 +196,11 @@ public void TestDStreamJoin()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, Tuple<int, Option<int>>> countByWord = (KeyValuePair<string, Tuple<int, Option<int>>>)record;
-                    Assert.AreEqual(countByWord.Value.Item1, countByWord.Key == "The" || countByWord.Key == "dog" ? 23 : 22);
-                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? 
-                        countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 23 : (countByWord.Key == "brown" ?
-                        countByWord.Value.Item2.IsDefined == true == false : countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 22));
+                    Tuple<string, Tuple<int, Option<int>>> countByWord = (Tuple<string, Tuple<int, Option<int>>>)record;
+                    Assert.AreEqual(countByWord.Item2.Item1, countByWord.Item1 == "The" || countByWord.Item1 == "dog" ? 23 : 22);
+                    Assert.IsTrue(countByWord.Item1 == "The" || countByWord.Item1 == "dog" ? 
+                        countByWord.Item2.Item2.IsDefined == true && countByWord.Item2.Item2.GetValue() == 23 : (countByWord.Item1 == "brown" ?
+                        countByWord.Item2.Item2.IsDefined == true == false : countByWord.Item2.Item2.IsDefined == true && countByWord.Item2.Item2.GetValue() == 22));
                 }
             });
 
@@ -212,12 +212,12 @@ public void TestDStreamJoin()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, Tuple<Option<int>, int>> countByWord = (KeyValuePair<string, Tuple<Option<int>, int>>)record;
-                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ? 
-                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 23 : 
-                        (countByWord.Key == "quick" || countByWord.Key == "lazy" ? countByWord.Value.Item1.IsDefined == false :
-                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 22));
-                    Assert.AreEqual(countByWord.Value.Item2, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
+                    Tuple<string, Tuple<Option<int>, int>> countByWord = (Tuple<string, Tuple<Option<int>, int>>)record;
+                    Assert.IsTrue(countByWord.Item1 == "The" || countByWord.Item1 == "dog" ? 
+                        countByWord.Item2.Item1.IsDefined == true && countByWord.Item2.Item1.GetValue() == 23 : 
+                        (countByWord.Item1 == "quick" || countByWord.Item1 == "lazy" ? countByWord.Item2.Item1.IsDefined == false :
+                        countByWord.Item2.Item1.IsDefined == true && countByWord.Item2.Item1.GetValue() == 22));
+                    Assert.AreEqual(countByWord.Item2.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                 }
             });
             
@@ -229,15 +229,15 @@ public void TestDStreamJoin()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, Tuple<Option<int>, Option<int>>> countByWord = (KeyValuePair<string, Tuple<Option<int>, Option<int>>>)record;
-                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" ?
-                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 23 :
-                        (countByWord.Key == "quick" || countByWord.Key == "lazy" ? countByWord.Value.Item1.IsDefined == false :
-                        countByWord.Value.Item1.IsDefined == true && countByWord.Value.Item1.GetValue() == 22));
-
-                    Assert.IsTrue(countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 
-                        countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 23 : 
-                        (countByWord.Key == "brown" ? countByWord.Value.Item2.IsDefined == false : countByWord.Value.Item2.IsDefined == true && countByWord.Value.Item2.GetValue() == 22));
+                    Tuple<string, Tuple<Option<int>, Option<int>>> countByWord = (Tuple<string, Tuple<Option<int>, Option<int>>>)record;
+                    Assert.IsTrue(countByWord.Item1 == "The" || countByWord.Item1 == "dog" ?
+                        countByWord.Item2.Item1.IsDefined == true && countByWord.Item2.Item1.GetValue() == 23 :
+                        (countByWord.Item1 == "quick" || countByWord.Item1 == "lazy" ? countByWord.Item2.Item1.IsDefined == false :
+                        countByWord.Item2.Item1.IsDefined == true && countByWord.Item2.Item1.GetValue() == 22));
+
+                    Assert.IsTrue(countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 
+                        countByWord.Item2.Item2.IsDefined == true && countByWord.Item2.Item2.GetValue() == 23 : 
+                        (countByWord.Item1 == "brown" ? countByWord.Item2.Item2.IsDefined == false : countByWord.Item2.Item2.IsDefined == true && countByWord.Item2.Item2.GetValue() == 22));
                 }
             });
         }
@@ -253,7 +253,7 @@ public void TestDStreamUpdateStateByKey()
 
             var words = lines.FlatMap(l => l.Split(' '));
 
-            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+            var pairs = words.Map(w => new Tuple<string, int>(w, 1));
 
             var doubleCounts = pairs.GroupByKey().FlatMapValues(vs => vs).MapValues(v => 2 * v).ReduceByKey((x, y) => x + y);
             doubleCounts.ForeachRDD((time, rdd) =>
@@ -263,8 +263,8 @@ public void TestDStreamUpdateStateByKey()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 2 * 23 : 2 * 22);
+                    Tuple<string, int> countByWord = (Tuple<string, int>)record;
+                    Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 2 * 23 : 2 * 22);
                 }
             });
 
@@ -279,8 +279,8 @@ public void TestDStreamUpdateStateByKey()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 24 : 23);
+                    Tuple<string, int> countByWord = (Tuple<string, int>)record;
+                    Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 24 : 23);
                 }
             });
         }
@@ -314,7 +314,7 @@ public void TestDStreamMapWithState()
                 var ssc = new StreamingContext(new SparkContext(sparkContextProxy.Object, sparkConf), 10000);
 
                 var dstreamProxy = new Mock<IDStreamProxy>();
-                var pairDStream = new DStream<KeyValuePair<string, int>>(dstreamProxy.Object, ssc);
+                var pairDStream = new DStream<Tuple<string, int>>(dstreamProxy.Object, ssc);
 
                 var stateSpec = new StateSpec<string, int, int, int>((k, v, s) => v);
                 var stateDStream = pairDStream.MapWithState(stateSpec);
@@ -357,7 +357,7 @@ public void TestDStreamMapWithStateMapWithStateHelper()
             Assert.IsNotNull(resultRdd);
 
             // test when initialStateRdd is not null
-            var initialStateRdd = new RDD<KeyValuePair<string, int>>(new Mock<IRDDProxy>().Object, null);
+            var initialStateRdd = new RDD<Tuple<string, int>>(new Mock<IRDDProxy>().Object, null);
             var stateSpec2 = new StateSpec<string, int, int, int>((k, v, s) => v).InitialState(initialStateRdd).NumPartitions(2);
             var helper2 = new MapWithStateHelper<string, int, int, int>((t, rdd) => rdd, stateSpec2);
 
@@ -388,13 +388,13 @@ public void TestDStreamMapWithStateUpdateStateHelper()
 
             var input = new dynamic[4];
 
-            var preStateRddRecord = new MapWithStateRDDRecord<string, int, int>(ticks - TimeSpan.FromSeconds(2).Ticks, new [] { new KeyValuePair<string, int>("1", 1), new KeyValuePair<string, int>("2", 2)});
+            var preStateRddRecord = new MapWithStateRDDRecord<string, int, int>(ticks - TimeSpan.FromSeconds(2).Ticks, new [] { new Tuple<string, int>("1", 1), new Tuple<string, int>("2", 2)});
             preStateRddRecord.stateMap.Add("expired", new KeyedState<int>(0, ticks - TimeSpan.FromSeconds(60).Ticks));
 
             input[0] = preStateRddRecord;
-            input[1] = new KeyValuePair<string, int>("1", -1);
-            input[2] = new KeyValuePair<string, int>("2", 2);
-            input[3] = new KeyValuePair<string, int>("3", 3);
+            input[1] = new Tuple<string, int>("1", -1);
+            input[2] = new Tuple<string, int>("2", 2);
+            input[3] = new Tuple<string, int>("3", 3);
 
             var result = helper.Execute(1, input).GetEnumerator();
             Assert.IsNotNull(result);
diff --git a/csharp/AdapterTest/DoubleRDDTest.cs b/csharp/AdapterTest/DoubleRDDTest.cs
index d61ad2d7..5b773809 100644
--- a/csharp/AdapterTest/DoubleRDDTest.cs
+++ b/csharp/AdapterTest/DoubleRDDTest.cs
@@ -19,7 +19,7 @@ public static void Initialize()
             var sparkContext = new SparkContext(null);
             var lines = sparkContext.TextFile(Path.GetTempFileName());
             var words = lines.FlatMap(l => l.Split(' '));
-            doubles = words.Map(w => new KeyValuePair<string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Value);
+            doubles = words.Map(w => new Tuple<string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Item2);
         }
 
         [Test]
diff --git a/csharp/AdapterTest/EventHubsUtilsTest.cs b/csharp/AdapterTest/EventHubsUtilsTest.cs
index 428d4b6f..44f2fa2b 100644
--- a/csharp/AdapterTest/EventHubsUtilsTest.cs
+++ b/csharp/AdapterTest/EventHubsUtilsTest.cs
@@ -22,7 +22,7 @@ public void TestCreateUnionStream()
             var streamingContextProxy = new Mock<IStreamingContextProxy>();
             var mockDstreamProxy = new Mock<IDStreamProxy>().Object;
             streamingContextProxy.Setup(
-                                    m => m.EventHubsUnionStream(It.IsAny<Dictionary<string, string>>(), It.IsAny<StorageLevelType>()))
+                                    m => m.EventHubsUnionStream(It.IsAny<IEnumerable<Tuple<string, string>>>(), It.IsAny<StorageLevelType>()))
                                 .Returns(mockDstreamProxy);
 
             var mockSparkClrProxy = new Mock<ISparkCLRProxy>();
@@ -32,7 +32,7 @@ public void TestCreateUnionStream()
 
             var sparkContext = new SparkContext(SparkCLREnvironment.SparkCLRProxy.SparkContextProxy, new SparkConf(new Mock<ISparkConfProxy>().Object));
             var streamingContext = new StreamingContext(sparkContext, 123);
-            var dstream = EventHubsUtils.CreateUnionStream(streamingContext, new Dictionary<string, string>());
+            var dstream = EventHubsUtils.CreateUnionStream(streamingContext, new List<Tuple<string, string>>());
             Assert.AreEqual(mockDstreamProxy, dstream.DStreamProxy);
         }
     }
diff --git a/csharp/AdapterTest/Mocks/MockRddProxy.cs b/csharp/AdapterTest/Mocks/MockRddProxy.cs
index d9baa284..1a3cfe4c 100644
--- a/csharp/AdapterTest/Mocks/MockRddProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockRddProxy.cs
@@ -136,7 +136,7 @@ public IRDDProxy Coalesce(int numPartitions, bool shuffle)
             return this;
         }
 
-        public IRDDProxy SampleByKey(bool withReplacement, Dictionary<string, double> fractions, long seed)
+        public IRDDProxy SampleByKey(bool withReplacement, IEnumerable<Tuple<string, double>> fractions, long seed)
         {
             return this;
         }
@@ -151,13 +151,13 @@ public string ToDebugString()
             return null;
         }
 
-        public void SaveAsNewAPIHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf)
+        public void SaveAsNewAPIHadoopDataset(IEnumerable<Tuple<string, string>> conf)
         { }
 
-        public void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf)
+        public void SaveAsNewAPIHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf)
         { }
 
-        public void SaveAsHadoopDataset(IEnumerable<KeyValuePair<string, string>> conf)
+        public void SaveAsHadoopDataset(IEnumerable<Tuple<string, string>> conf)
         { }
 
         public void SaveAsSequenceFile(string path, string compressionCodecClass)
@@ -167,7 +167,7 @@ public void SaveAsTextFile(string path, string compressionCodecClass)
         { }
 
 
-        public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<KeyValuePair<string, string>> conf, string compressionCodecClass)
+        public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable<Tuple<string, string>> conf, string compressionCodecClass)
         { }
 
 
diff --git a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
index 3b783cda..6813daeb 100644
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@@ -133,22 +133,22 @@ public IRDDProxy SequenceFile(string filePath, string keyClass, string valueClas
             return new MockRddProxy(null);
         }
 
-        public IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy NewAPIHadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             return new MockRddProxy(null);
         }
 
-        public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             return new MockRddProxy(null);
         }
 
-        public IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy HadoopFile(string filePath, string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             return new MockRddProxy(null);
         }
 
-        public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize)
+        public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<Tuple<string, string>> conf, int batchSize)
         {
             return new MockRddProxy(null);
         }
diff --git a/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs b/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
index 9df6c506..fee98275 100644
--- a/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
@@ -39,17 +39,17 @@ public IDStreamProxy SocketTextStream(string hostname, int port, Microsoft.Spark
             return new MockDStreamProxy();
         }
 
-        public IDStreamProxy KafkaStream(Dictionary<string, int> topics, Dictionary<string, string> kafkaParams, Microsoft.Spark.CSharp.Core.StorageLevelType storageLevelType)
+        public IDStreamProxy KafkaStream(IEnumerable<Tuple<string, int>> topics, IEnumerable<Tuple<string, string>> kafkaParams, Microsoft.Spark.CSharp.Core.StorageLevelType storageLevelType)
         {
             return new MockDStreamProxy();
         }
 
-        public IDStreamProxy DirectKafkaStream(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets)
+        public IDStreamProxy DirectKafkaStream(List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets)
         {
             return new MockDStreamProxy();
         }
 
-        public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, Dictionary<string, string> kafkaParams, Dictionary<string, long> fromOffsets, int numPartitions)
+        public IDStreamProxy DirectKafkaStreamWithRepartition(List<string> topics, IEnumerable<Tuple<string, string>> kafkaParams, IEnumerable<Tuple<string, long>> fromOffsets, int numPartitions)
         {
             return new MockDStreamProxy();
         }
@@ -113,7 +113,7 @@ public IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy)
             return new MockDStreamProxy();
         }
 
-        public IDStreamProxy EventHubsUnionStream(Dictionary<string, string> eventHubsParams, StorageLevelType storageLevelType)
+        public IDStreamProxy EventHubsUnionStream(IEnumerable<Tuple<string, string>> eventHubsParams, StorageLevelType storageLevelType)
         {
             throw new NotImplementedException();
         }
diff --git a/csharp/AdapterTest/PairRDDTest.cs b/csharp/AdapterTest/PairRDDTest.cs
index f8ba3847..00dd3e99 100644
--- a/csharp/AdapterTest/PairRDDTest.cs
+++ b/csharp/AdapterTest/PairRDDTest.cs
@@ -10,7 +10,7 @@ namespace AdapterTest
     [TestFixture]
     public class PairRDDTest
     {
-        private static RDD<KeyValuePair<string, int>> pairs;
+        private static RDD<Tuple<string, int>> pairs;
 
         [OneTimeSetUp]
         public static void Initialize()
@@ -18,7 +18,7 @@ public static void Initialize()
             var sparkContext = new SparkContext(null);
             var lines = sparkContext.TextFile(Path.GetTempFileName());
             var words = lines.FlatMap(l => l.Split(' '));
-            pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+            pairs = words.Map(w => new Tuple<string, int>(w, 1));
         }
 
         [Test]
@@ -36,53 +36,53 @@ public void TestPairRddGroupWith()
         {
             foreach (var record in pairs.GroupWith(pairs).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item1.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item2.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item1.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item2.Count);
             }
             foreach (var record in pairs.GroupWith(pairs, pairs).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item1.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item2.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item3.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item1.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item2.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item3.Count);
             }
             foreach (var record in pairs.GroupWith(pairs, pairs, pairs).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item1.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item2.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item3.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item4.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item1.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item2.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item3.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item4.Count);
             }
         }
 
         /// <summary>
-        /// Test RDD.GroupWith() method with different KeyValuePair<K,V> types.
+        /// Test RDD.GroupWith() method with different Tuple<K,V> types.
         /// </summary>
         [Test]
         public void TestPairRddGroupWith2()
         {
-            var pairs1 = pairs.Map(p => new KeyValuePair<string, double>(p.Key, Convert.ToDouble(p.Value)));
-            var pairs2 = pairs.Map(p => new KeyValuePair<string, string>(p.Key, p.Value.ToString()));
-            var pairs3 = pairs.Map(p => new KeyValuePair<string, long>(p.Key, Convert.ToInt64(p.Value)));
+            var pairs1 = pairs.Map(p => new Tuple<string, double>(p.Item1, Convert.ToDouble(p.Item2)));
+            var pairs2 = pairs.Map(p => new Tuple<string, string>(p.Item1, p.Item2.ToString()));
+            var pairs3 = pairs.Map(p => new Tuple<string, long>(p.Item1, Convert.ToInt64(p.Item2)));
 
             foreach (var record in pairs.GroupWith(pairs1).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item1.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item2.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item1.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item2.Count);
             }
 
             foreach (var record in pairs.GroupWith(pairs1, pairs2).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item1.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item2.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item3.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item1.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item2.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item3.Count);
             }
 
             foreach (var record in pairs.GroupWith(pairs1, pairs2, pairs3).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item1.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item2.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item3.Count);
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Item4.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item1.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item2.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item3.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Item4.Count);
             }
         }
 
@@ -90,10 +90,10 @@ public void TestPairRddGroupWith2()
         public void TestPairRddSubtractByKey()
         {
             var reduce = pairs.ReduceByKey((x, y) => x + y);
-            var records = reduce.SubtractByKey(reduce.Filter(kvp => kvp.Key != "The")).Collect();
+            var records = reduce.SubtractByKey(reduce.Filter(kvp => kvp.Item1 != "The")).Collect();
             Assert.AreEqual(1, records.Length);
-            Assert.AreEqual("The", records[0].Key);
-            Assert.AreEqual(23, records[0].Value);
+            Assert.AreEqual("The", records[0].Item1);
+            Assert.AreEqual(23, records[0].Item2);
         }
 
         [Test]
@@ -110,7 +110,7 @@ public void TestPairRddFoldByKey()
         {
             foreach (var record in pairs.FoldByKey(() => 0, (x, y) => x + y).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2);
             }
         }
 
@@ -119,7 +119,7 @@ public void TestPairRddAggregateByKey()
         {
             foreach (var record in pairs.AggregateByKey(() => 0, (x, y) => x + y, (x, y) => x + y).Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2);
             }
         }
 
@@ -128,7 +128,7 @@ public void TestPairRddGroupByKey()
         {
             foreach (var record in pairs.GroupByKey().Collect())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Count);
             }
         }
 
@@ -165,7 +165,7 @@ public void TestPairRddPartitionBy()
         [Test]
         public void TestPairRddSortByKey()
         {
-            var expectedSortedRdd = pairs.Collect().OrderBy(kv => kv.Key, StringComparer.OrdinalIgnoreCase).ToArray();
+            var expectedSortedRdd = pairs.Collect().OrderBy(kv => kv.Item1, StringComparer.OrdinalIgnoreCase).ToArray();
             var rddSortByKey = pairs.SortByKey(true, null, key => key.ToLowerInvariant()).Collect();
             CollectionAssert.AreEqual(expectedSortedRdd, rddSortByKey);
         }
@@ -173,7 +173,7 @@ public void TestPairRddSortByKey()
         [Test]
         public void TestPairRddSortByKey2()
         {
-            var expectedSortedRdd = pairs.Collect().OrderBy(kv => kv.Key, StringComparer.OrdinalIgnoreCase).ToArray();
+            var expectedSortedRdd = pairs.Collect().OrderBy(kv => kv.Item1, StringComparer.OrdinalIgnoreCase).ToArray();
             var rddSortByKey = pairs.SortByKey(true, 1, key => key.ToLowerInvariant()).Collect();
             CollectionAssert.AreEqual(expectedSortedRdd, rddSortByKey);
         }
diff --git a/csharp/AdapterTest/RDDTest.cs b/csharp/AdapterTest/RDDTest.cs
index 2d3049df..23bbdc11 100644
--- a/csharp/AdapterTest/RDDTest.cs
+++ b/csharp/AdapterTest/RDDTest.cs
@@ -119,14 +119,14 @@ public void TestRddGroupBy()
         {
             words.GroupBy(w => w).Foreach(record =>
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Count);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Count);
             });
             
             words.GroupBy(w => w).ForeachPartition(iter =>
             {
                 foreach (var record in iter)
                 {
-                    Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value.Count);
+                    Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2.Count);
                 }
             });
         }
@@ -144,7 +144,7 @@ public void TestRddZipWithIndex()
             int index = 0;
             foreach(var record in words.ZipWithIndex().Collect())
             {
-                Assert.AreEqual(index++, record.Value);
+                Assert.AreEqual(index++, record.Item2);
             }
         }
 
@@ -155,7 +155,7 @@ public void TestRddZipWithUniqueId()
             int num = words.GetNumPartitions();
             foreach (var record in words.ZipWithUniqueId().Collect())
             {
-                Assert.AreEqual(num * index++, record.Value);
+                Assert.AreEqual(num * index++, record.Item2);
             }
         }
 
diff --git a/csharp/AdapterTest/SparkContextTest.cs b/csharp/AdapterTest/SparkContextTest.cs
index 8b4f45f5..719ad3e0 100644
--- a/csharp/AdapterTest/SparkContextTest.cs
+++ b/csharp/AdapterTest/SparkContextTest.cs
@@ -412,7 +412,7 @@ public void TestNewAPIHadoopFile()
 
             Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>();
             Mock<ISparkContextProxy> sparkContextProxy = new Mock<ISparkContextProxy>();
-            sparkContextProxy.Setup(m => m.NewAPIHadoopFile(filePath, It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<KeyValuePair<string, string>>>(), It.IsAny<int>()))
+            sparkContextProxy.Setup(m => m.NewAPIHadoopFile(filePath, It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<Tuple<string, string>>>(), It.IsAny<int>()))
                 .Returns(rddProxy.Object);
             SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
 
@@ -440,7 +440,7 @@ public void TestHadoopFile()
 
             Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>();
             Mock<ISparkContextProxy> sparkContextProxy = new Mock<ISparkContextProxy>();
-            sparkContextProxy.Setup(m => m.HadoopFile(filePath, It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<KeyValuePair<string, string>>>(), It.IsAny<int>()))
+            sparkContextProxy.Setup(m => m.HadoopFile(filePath, It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<Tuple<string, string>>>(), It.IsAny<int>()))
                 .Returns(rddProxy.Object);
             SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
 
@@ -466,12 +466,12 @@ public void TestNewAPIHadoopRDD()
 
             Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>();
             Mock<ISparkContextProxy> sparkContextProxy = new Mock<ISparkContextProxy>();
-            sparkContextProxy.Setup(m => m.NewAPIHadoopRDD(It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<KeyValuePair<string, string>>>(), It.IsAny<int>()))
+            sparkContextProxy.Setup(m => m.NewAPIHadoopRDD(It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<Tuple<string, string>>>(), It.IsAny<int>()))
                 .Returns(rddProxy.Object);
             SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
 
             const string inputFormatClass = "org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
-            var conf = new KeyValuePair<string, string>[] { };
+            var conf = new Tuple<string, string>[] { };
             // Act
             RDD<byte[]> rdd = sc.NewAPIHadoopRDD(inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf);
 
@@ -493,12 +493,12 @@ public void TestHadoopRDD()
 
             Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>();
             Mock<ISparkContextProxy> sparkContextProxy = new Mock<ISparkContextProxy>();
-            sparkContextProxy.Setup(m => m.HadoopRDD(It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<KeyValuePair<string, string>>>(), It.IsAny<int>()))
+            sparkContextProxy.Setup(m => m.HadoopRDD(It.IsAny<string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny<IEnumerable<Tuple<string, string>>>(), It.IsAny<int>()))
                 .Returns(rddProxy.Object);
             SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
 
             const string inputFormatClass = "org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
-            var conf = new KeyValuePair<string, string>[] { };
+            var conf = new Tuple<string, string>[] { };
             // Act
             RDD<byte[]> rdd = sc.HadoopRDD(inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf);
 
diff --git a/csharp/AdapterTest/StreamingContextTest.cs b/csharp/AdapterTest/StreamingContextTest.cs
index af07c48e..b2628bf2 100644
--- a/csharp/AdapterTest/StreamingContextTest.cs
+++ b/csharp/AdapterTest/StreamingContextTest.cs
@@ -31,13 +31,13 @@ public void TestStreamingContext()
             var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
             Assert.IsNotNull(socketStream.DStreamProxy);
 
-            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new Dictionary<string, int> { { "testTopic1", 1 } }, new Dictionary<string, string>());
+            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new [] { Tuple.Create("testTopic1", 1) }, new List<Tuple<string, string>>());
             Assert.IsNotNull(kafkaStream.DStreamProxy);
 
-            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new Dictionary<string, string>(), new Dictionary<string, long>());
+            var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new List<Tuple<string, string>>(), new List<Tuple<string, long>>());
             Assert.IsNotNull(directKafkaStream.DStreamProxy);
 
-            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new Dictionary<string, string>(), new Dictionary<string, long>(), 10);
+            var directKafkaStreamWithRepartition = KafkaUtils.CreateDirectStreamWithRepartition(ssc, new List<string> { "testTopic3" }, new List<Tuple<string, string>>(), new List<Tuple<string, long>>(), 10);
             Assert.IsNotNull(directKafkaStreamWithRepartition.DStreamProxy);
 
             var union = ssc.Union(textFile, socketStream);
diff --git a/csharp/AdapterTest/TestWithMoqDemo.cs b/csharp/AdapterTest/TestWithMoqDemo.cs
index 70c4f8d6..fda808f7 100644
--- a/csharp/AdapterTest/TestWithMoqDemo.cs
+++ b/csharp/AdapterTest/TestWithMoqDemo.cs
@@ -166,7 +166,7 @@ public void TestDStreamTransform_Moq()
             // Act
             var lines = _streamingContext.TextFileStream(Path.GetTempPath());
             var words = lines.FlatMap(l => l.Split(' '));
-            var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+            var pairs = words.Map(w => new Tuple<string, int>(w, 1));
             var wordCounts = pairs.ReduceByKey((x, y) => x + y);
 
             // Assert
@@ -177,8 +177,8 @@ public void TestDStreamTransform_Moq()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                    Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22);
+                    Tuple<string, int> countByWord = (Tuple<string, int>)record;
+                    Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22);
                 }
             });
             // Use Verify to verify if a method to mock was invoked
diff --git a/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs b/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs
index c76fc51e..e001a990 100644
--- a/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs
+++ b/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs
@@ -63,11 +63,11 @@ internal static void RunRDDMaxDeletionsByUser(string[] args)
 
             var flaggedRows = parsedRows.Filter(s => s.Item1); //select good rows
             var selectedDeletions = flaggedRows.Filter(s => s.Item3.Equals(s.Item5)); //select deletions made by same creators
-            var userDeletions = selectedDeletions.Map(s => new KeyValuePair<string, int>(s.Item3, 1));
+            var userDeletions = selectedDeletions.Map(s => new Tuple<string, int>(s.Item3, 1));
             var userDeletionCount = userDeletions.ReduceByKey((x, y) => x + y);
-            var userWithMaxDeletions = userDeletionCount.Fold(new KeyValuePair<string, int>("zerovalue", 0), (kvp1, kvp2) =>
+            var userWithMaxDeletions = userDeletionCount.Fold(new Tuple<string, int>("zerovalue", 0), (kvp1, kvp2) =>
             {
-                if (kvp1.Value > kvp2.Value)
+                if (kvp1.Item2 > kvp2.Item2)
                     return kvp1;
                 else
                     return kvp2;
@@ -76,7 +76,7 @@ internal static void RunRDDMaxDeletionsByUser(string[] args)
             stopwatch.Stop();
             PerfBenchmark.ExecutionTimeList.Add(stopwatch.Elapsed);
 
-            Console.WriteLine("User with max deletions is {0}, count of deletions={1}. Elapsed time={2}", userWithMaxDeletions.Key, userWithMaxDeletions.Value, stopwatch.Elapsed);
+            Console.WriteLine("User with max deletions is {0}, count of deletions={1}. Elapsed time={2}", userWithMaxDeletions.Item1, userWithMaxDeletions.Item2, stopwatch.Elapsed);
         }
 
         [PerfSuite]
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
index 3d47966c..488a46ef 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
@@ -69,7 +69,7 @@ internal static void DStreamTextFileSamples()
                     var lines = context.TextFileStream(Path.Combine(directory, "test"));
                     lines = context.Union(lines, lines);
                     var words = lines.FlatMap(l => l.Split(' '));
-                    var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+                    var pairs = words.Map(w => new Tuple<string, int>(w, 1));
 
                     // since operations like ReduceByKey, Join and UpdateStateByKey are
                     // separate dstream transformations defined in CSharpDStream.scala
@@ -140,7 +140,7 @@ internal static void DStreamDirectKafkaWithRepartitionSample()
                         {"auto.offset.reset", "smallest"}
                     };
 
-                    var dstream = KafkaUtils.CreateDirectStreamWithRepartition(context, new List<string> { topic }, kafkaParams, new Dictionary<string, long>(), partitions);
+                    var dstream = KafkaUtils.CreateDirectStreamWithRepartition(context, new List<string> { topic }, kafkaParams.Select(kv => Tuple.Create(kv.Key, kv.Value)), new List<Tuple<string, long>>(), partitions);
 
                     dstream.ForeachRDD((time, rdd) => 
                         {
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/DStreamStateSample.cs b/csharp/Samples/Microsoft.Spark.CSharp/DStreamStateSample.cs
index 0c0e3c28..aef5fc6a 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/DStreamStateSample.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DStreamStateSample.cs
@@ -61,16 +61,16 @@ internal static void DStreamMapWithStateSample()
                     var lines = context.TextFileStream(Path.Combine(directory, "test1"));
                     lines = context.Union(lines, lines);
                     var words = lines.FlatMap(l => l.Split(' '));
-                    var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
+                    var pairs = words.Map(w => new Tuple<string, int>(w, 1));
 
                     var wordCounts = pairs.ReduceByKey((x, y) => x + y);
-                    var initialState = sc.Parallelize(new[] { new KeyValuePair<string, int>("NOT_A_WORD", 1024), new KeyValuePair<string, int>("dog", 10000), }, 1);
-                    var stateSpec = new StateSpec<string, int, int, KeyValuePair<string, int>>((word, count, state) =>
+                    var initialState = sc.Parallelize(new[] { new Tuple<string, int>("NOT_A_WORD", 1024), new Tuple<string, int>("dog", 10000), }, 1);
+                    var stateSpec = new StateSpec<string, int, int, Tuple<string, int>>((word, count, state) =>
                     {
                         if (state.IsTimingOut())
                         {
                             Console.WriteLine("Found timing out word: {0}", word);
-                            return new KeyValuePair<string, int>(word, state.Get());
+                            return new Tuple<string, int>(word, state.Get());
                         }
 
                         var sum = 0;
@@ -80,7 +80,7 @@ internal static void DStreamMapWithStateSample()
                         }
                         state.Update(sum + count);
                         Console.WriteLine("word: {0}, count: {1}", word, sum + count);
-                        return new KeyValuePair<string, int>(word, sum + count);
+                        return new Tuple<string, int>(word, sum + count);
                     }).NumPartitions(1).InitialState(initialState).Timeout(TimeSpan.FromSeconds(30));
 
                     var snapshots = wordCounts.MapWithState(stateSpec).StateSnapshots();
@@ -90,9 +90,9 @@ internal static void DStreamMapWithStateSample()
                         Console.WriteLine("Snapshots @ Time: {0}", time);
                         Console.WriteLine("-------------------------------------------");
 
-                        foreach (KeyValuePair<string, int> record in rdd.Collect())
+                        foreach (Tuple<string, int> record in rdd.Collect())
                         {
-                            Console.WriteLine("[{0}, {1}]", record.Key, record.Value);
+                            Console.WriteLine("[{0}, {1}]", record.Item1, record.Item2);
                         }
                         Console.WriteLine();
                     });
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
index ac63d312..97604337 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
@@ -15,7 +15,7 @@ class PairRDDSamples
         [Sample]
         internal static void PairRDDCollectAsMapSample()
         {
-            var map = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<int, int>(1, 2), new KeyValuePair<int, int>(3, 4) }, 1).CollectAsMap();
+            var map = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<int, int>(1, 2), new Tuple<int, int>(3, 4) }, 1).CollectAsMap();
 
             foreach (var kv in map)
                 Console.WriteLine(kv);
@@ -30,7 +30,7 @@ internal static void PairRDDCollectAsMapSample()
         [Sample]
         internal static void PairRDDKeysSample()
         {
-            var keys = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<int, int>(1, 2), new KeyValuePair<int, int>(3, 4) }, 1).Keys().Collect();
+            var keys = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<int, int>(1, 2), new Tuple<int, int>(3, 4) }, 1).Keys().Collect();
 
             Console.WriteLine(keys[0]);
             Console.WriteLine(keys[1]);
@@ -45,7 +45,7 @@ internal static void PairRDDKeysSample()
         [Sample]
         internal static void PairRDDValuesSample()
         {
-            var values = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<int, int>(1, 2), new KeyValuePair<int, int>(3, 4) }, 1).Values().Collect();
+            var values = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<int, int>(1, 2), new Tuple<int, int>(3, 4) }, 1).Values().Collect();
 
             Console.WriteLine(values[0]);
             Console.WriteLine(values[1]);
@@ -63,9 +63,9 @@ internal static void PairRDDReduceByKeySample()
             var reduced = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
                 .ReduceByKey((x, y) => x + y).Collect();
 
@@ -74,8 +74,8 @@ internal static void PairRDDReduceByKeySample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(reduced.Contains(new KeyValuePair<string, int>("a", 2)));
-                Assert.IsTrue(reduced.Contains(new KeyValuePair<string, int>("b", 1)));
+                Assert.IsTrue(reduced.Contains(new Tuple<string, int>("a", 2)));
+                Assert.IsTrue(reduced.Contains(new Tuple<string, int>("b", 1)));
             }
         }
 
@@ -85,9 +85,9 @@ internal static void PairRDDReduceByKeyLocallySample()
             var reduced = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
                 .ReduceByKeyLocally((x, y) => x + y);
 
@@ -105,13 +105,14 @@ internal static void PairRDDReduceByKeyLocallySample()
         internal static void PairRDDCountByKeySample()
         {
             var countByKey = SparkCLRSamples.SparkContext.Parallelize(
-                new[] 
-                { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                new[]
+                {
+                    new Tuple<string, int>("a", 1),
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
-                .CountByKey();
+                .CountByKey()
+                .ToLookup(x => x.Key);
 
             foreach (var kv in countByKey)
                 Console.WriteLine(kv);
@@ -129,15 +130,15 @@ internal static void PairRDDJoinSample()
             var l = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 4),
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 4),
                 }, 1);
 
             var r = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 2), 
-                    new KeyValuePair<string, int>("a", 3),
+                    new Tuple<string, int>("a", 2), 
+                    new Tuple<string, int>("a", 3),
                 }, 1);
 
             var joined = l.Join(r, 2).Collect();
@@ -147,8 +148,8 @@ internal static void PairRDDJoinSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(joined.Contains(new KeyValuePair<string, Tuple<int, int>>("a", new Tuple<int, int>(1, 2))));
-                Assert.IsTrue(joined.Contains(new KeyValuePair<string, Tuple<int, int>>("a", new Tuple<int, int>(1, 3))));
+                Assert.IsTrue(joined.Contains(new Tuple<string, Tuple<int, int>>("a", new Tuple<int, int>(1, 2))));
+                Assert.IsTrue(joined.Contains(new Tuple<string, Tuple<int, int>>("a", new Tuple<int, int>(1, 3))));
             }
         }
 
@@ -158,14 +159,14 @@ internal static void PairRDDLeftOuterJoinSample()
             var l = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 4),
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 4),
                 }, 2);
 
             var r = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 2), 
+                    new Tuple<string, int>("a", 2), 
                 }, 1);
 
             var joined = l.LeftOuterJoin(r).Collect();
@@ -175,8 +176,8 @@ internal static void PairRDDLeftOuterJoinSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(joined.Any(kv => kv.Key == "a" && kv.Value.Item1 == 1 && kv.Value.Item2.IsDefined && kv.Value.Item2.GetValue() == 2));
-                Assert.IsTrue(joined.Any(kv => kv.Key == "b" && kv.Value.Item1 == 4 && !kv.Value.Item2.IsDefined));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "a" && kv.Item2.Item1 == 1 && kv.Item2.Item2.IsDefined && kv.Item2.Item2.GetValue() == 2));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "b" && kv.Item2.Item1 == 4 && !kv.Item2.Item2.IsDefined));
             }
         }
 
@@ -186,14 +187,14 @@ internal static void PairRDDRightOuterJoinSample()
             var l = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 2), 
+                    new Tuple<string, int>("a", 2), 
                 }, 1);
 
             var r = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 4),
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 4),
                 }, 2);
 
             var joined = l.RightOuterJoin(r).Collect();
@@ -203,8 +204,8 @@ internal static void PairRDDRightOuterJoinSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(joined.Any(kv => kv.Key == "a" && kv.Value.Item1.IsDefined && kv.Value.Item1.GetValue() == 2 && kv.Value.Item2 == 1));
-                Assert.IsTrue(joined.Any(kv => kv.Key == "b" && !kv.Value.Item1.IsDefined && kv.Value.Item2 == 4));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "a" && kv.Item2.Item1.IsDefined && kv.Item2.Item1.GetValue() == 2 && kv.Item2.Item2 == 1));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "b" && !kv.Item2.Item1.IsDefined && kv.Item2.Item2 == 4));
             }
         }
 
@@ -214,15 +215,15 @@ internal static void PairRDDFullOuterJoinSample()
             var l = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 4),
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 4),
                 }, 2);
 
             var r = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 2), 
-                    new KeyValuePair<string, int>("c", 8), 
+                    new Tuple<string, int>("a", 2), 
+                    new Tuple<string, int>("c", 8), 
                 }, 2);
 
             var joined = l.FullOuterJoin(r).Collect();
@@ -232,12 +233,12 @@ internal static void PairRDDFullOuterJoinSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(joined.Any(kv => kv.Key == "a" && kv.Value.Item1.IsDefined && kv.Value.Item1.GetValue() == 1 && 
-                kv.Value.Item2.IsDefined && kv.Value.Item2.GetValue() == 2));
-                Assert.IsTrue(joined.Any(kv => kv.Key == "b" && kv.Value.Item1.IsDefined && kv.Value.Item1.GetValue() == 4 &&
-                !kv.Value.Item2.IsDefined));
-                Assert.IsTrue(joined.Any(kv => kv.Key == "c" && !kv.Value.Item1.IsDefined &&
-                kv.Value.Item2.IsDefined && kv.Value.Item2.GetValue() == 8));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "a" && kv.Item2.Item1.IsDefined && kv.Item2.Item1.GetValue() == 1 && 
+                kv.Item2.Item2.IsDefined && kv.Item2.Item2.GetValue() == 2));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "b" && kv.Item2.Item1.IsDefined && kv.Item2.Item1.GetValue() == 4 &&
+                !kv.Item2.Item2.IsDefined));
+                Assert.IsTrue(joined.Any(kv => kv.Item1 == "c" && !kv.Item2.Item1.IsDefined &&
+                kv.Item2.Item2.IsDefined && kv.Item2.Item2.GetValue() == 8));
             }
         }
 
@@ -252,7 +253,7 @@ internal static void PairRDDPartitionBySample()
             };
 
             var partitioned = SparkCLRSamples.SparkContext.Parallelize(new[] { 1, 2, 3, 4, 5, 6, 1 }, 3)
-                .Map(x => new KeyValuePair<int, int>(x, x + 100))
+                .Map(x => new Tuple<int, int>(x, x + 100))
                 .PartitionBy(3, partitionFunc)
                 .Glom()
                 .Collect();
@@ -270,9 +271,9 @@ internal static void PairRDDPartitionBySample()
             {
                 Assert.AreEqual(3, partitioned.Length);
                 // Assert that the partition distribution is correct with partitionFunc
-                Assert.IsTrue(partitioned.Count(p => p.All(key => key.Key < 3)) == 1);
-                Assert.IsTrue(partitioned.Count(p => p.All(key => key.Key >= 3 && key.Key < 6)) == 1);
-                Assert.IsTrue(partitioned.Count(p => p.All(key => key.Key >= 6)) == 1);
+                Assert.IsTrue(partitioned.Count(p => p.All(key => key.Item1 < 3)) == 1);
+                Assert.IsTrue(partitioned.Count(p => p.All(key => key.Item1 >= 3 && key.Item1 < 6)) == 1);
+                Assert.IsTrue(partitioned.Count(p => p.All(key => key.Item1 >= 6)) == 1);
             }
         }
 
@@ -282,9 +283,9 @@ internal static void PairRDDCombineByKeySample()
             var combineByKey = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
                 .CombineByKey(() => string.Empty, (x, y) => x + y.ToString(CultureInfo.InvariantCulture), (x, y) => x + y).Collect();
 
@@ -293,8 +294,8 @@ internal static void PairRDDCombineByKeySample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(combineByKey.Contains(new KeyValuePair<string, string>("a", "11")));
-                Assert.IsTrue(combineByKey.Contains(new KeyValuePair<string, string>("b", "1")));
+                Assert.IsTrue(combineByKey.Contains(new Tuple<string, string>("a", "11")));
+                Assert.IsTrue(combineByKey.Contains(new Tuple<string, string>("b", "1")));
             }
         }
 
@@ -304,9 +305,9 @@ internal static void PairRDDAggregateByKeySample()
             var aggregateByKey = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
                 .AggregateByKey(() => 0, (x, y) => x + y, (x, y) => x + y).Collect();
 
@@ -315,8 +316,8 @@ internal static void PairRDDAggregateByKeySample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(aggregateByKey.Contains(new KeyValuePair<string, int>("a", 2)));
-                Assert.IsTrue(aggregateByKey.Contains(new KeyValuePair<string, int>("b", 1)));
+                Assert.IsTrue(aggregateByKey.Contains(new Tuple<string, int>("a", 2)));
+                Assert.IsTrue(aggregateByKey.Contains(new Tuple<string, int>("b", 1)));
             }
         }
 
@@ -326,9 +327,9 @@ internal static void PairRDDFoldByKeySample()
             var FoldByKey = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
                 .FoldByKey(() => 0, (x, y) => x + y).Collect();
 
@@ -337,8 +338,8 @@ internal static void PairRDDFoldByKeySample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(FoldByKey.Contains(new KeyValuePair<string, int>("a", 2)));
-                Assert.IsTrue(FoldByKey.Contains(new KeyValuePair<string, int>("b", 1)));
+                Assert.IsTrue(FoldByKey.Contains(new Tuple<string, int>("a", 2)));
+                Assert.IsTrue(FoldByKey.Contains(new Tuple<string, int>("b", 1)));
             }
         }
 
@@ -348,19 +349,19 @@ internal static void PairRDDGroupByKeySample()
             var groupByKey = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, int>("a", 1), 
-                    new KeyValuePair<string, int>("b", 1),
-                    new KeyValuePair<string, int>("a", 1)
+                    new Tuple<string, int>("a", 1), 
+                    new Tuple<string, int>("b", 1),
+                    new Tuple<string, int>("a", 1)
                 }, 2)
                 .GroupByKey().Collect();
 
             foreach (var kv in groupByKey)
-                Console.WriteLine(kv.Key + ", " + "(" + string.Join(",", kv.Value) + ")");
+                Console.WriteLine(kv.Item1 + ", " + "(" + string.Join(",", kv.Item2) + ")");
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(groupByKey.Any(kv => kv.Key == "a" && kv.Value.Count == 2 && kv.Value[0] == 1 && kv.Value[1] == 1));
-                Assert.IsTrue(groupByKey.Any(kv => kv.Key == "b" && kv.Value.Count == 1 && kv.Value[0] == 1));
+                Assert.IsTrue(groupByKey.Any(kv => kv.Item1 == "a" && kv.Item2.Count == 2 && kv.Item2[0] == 1 && kv.Item2[1] == 1));
+                Assert.IsTrue(groupByKey.Any(kv => kv.Item1 == "b" && kv.Item2.Count == 1 && kv.Item2[0] == 1));
             }
         }
 
@@ -370,8 +371,8 @@ internal static void PairRDDMapValuesSample()
             var mapValues = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
-                    new KeyValuePair<string, string[]>("b", new[]{"grapes"})
+                    new Tuple<string, string[]>("a", new[]{"apple", "banana", "lemon"}), 
+                    new Tuple<string, string[]>("b", new[]{"grapes"})
                 }, 2)
                 .MapValues(x => x.Length).Collect();
 
@@ -380,8 +381,8 @@ internal static void PairRDDMapValuesSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(mapValues.Any(kv => kv.Key == "a" && kv.Value == 3));
-                Assert.IsTrue(mapValues.Any(kv => kv.Key == "b" && kv.Value == 1));
+                Assert.IsTrue(mapValues.Any(kv => kv.Item1 == "a" && kv.Item2 == 3));
+                Assert.IsTrue(mapValues.Any(kv => kv.Item1 == "b" && kv.Item2 == 1));
             }
         }
 
@@ -391,8 +392,8 @@ internal static void PairRDDFlatMapValuesSample()
             var flatMapValues = SparkCLRSamples.SparkContext.Parallelize(
                 new[] 
                 { 
-                    new KeyValuePair<string, string[]>("a", new[]{"x", "y", "z"}), 
-                    new KeyValuePair<string, string[]>("b", new[]{"p", "r"})
+                    new Tuple<string, string[]>("a", new[]{"x", "y", "z"}), 
+                    new Tuple<string, string[]>("b", new[]{"p", "r"})
                 }, 2)
                 .FlatMapValues(x => x).Collect();
 
@@ -401,48 +402,48 @@ internal static void PairRDDFlatMapValuesSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(flatMapValues.Any(kv => kv.Key == "a" && kv.Value == "x"));
-                Assert.IsTrue(flatMapValues.Any(kv => kv.Key == "a" && kv.Value == "y"));
-                Assert.IsTrue(flatMapValues.Any(kv => kv.Key == "a" && kv.Value == "z"));
-                Assert.IsTrue(flatMapValues.Any(kv => kv.Key == "b" && kv.Value == "p"));
-                Assert.IsTrue(flatMapValues.Any(kv => kv.Key == "b" && kv.Value == "r"));
+                Assert.IsTrue(flatMapValues.Any(kv => kv.Item1 == "a" && kv.Item2 == "x"));
+                Assert.IsTrue(flatMapValues.Any(kv => kv.Item1 == "a" && kv.Item2 == "y"));
+                Assert.IsTrue(flatMapValues.Any(kv => kv.Item1 == "a" && kv.Item2 == "z"));
+                Assert.IsTrue(flatMapValues.Any(kv => kv.Item1 == "b" && kv.Item2 == "p"));
+                Assert.IsTrue(flatMapValues.Any(kv => kv.Item1 == "b" && kv.Item2 == "r"));
             }
         }
 
         [Sample]
         internal static void PairRDDGroupWithSample()
         {
-            var x = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int>("a", 1), new KeyValuePair<string, int>("b", 4)}, 2);
-            var y = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int>("a", 2)}, 1);
+            var x = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int>("a", 1), new Tuple<string, int>("b", 4)}, 2);
+            var y = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int>("a", 2)}, 1);
 
             var groupWith = x.GroupWith(y).Collect();
 
             foreach (var kv in groupWith)
-                Console.WriteLine(kv.Key + ", " + "(" + string.Join(",", kv.Value) + ")");
+                Console.WriteLine(kv.Item1 + ", " + "(" + string.Join(",", kv.Item2) + ")");
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(groupWith.Any(kv => kv.Key == "a" && kv.Value.Item1[0] == 1 && kv.Value.Item2[0] == 2));
-                Assert.IsTrue(groupWith.Any(kv => kv.Key == "b" && kv.Value.Item1[0] == 4 && !kv.Value.Item2.Any()));
+                Assert.IsTrue(groupWith.Any(kv => kv.Item1 == "a" && kv.Item2.Item1[0] == 1 && kv.Item2.Item2[0] == 2));
+                Assert.IsTrue(groupWith.Any(kv => kv.Item1 == "b" && kv.Item2.Item1[0] == 4 && !kv.Item2.Item2.Any()));
             }
         }
 
         [Sample]
         internal static void PairRDDGroupWithSample2()
         {
-            var x = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int>("a", 5), new KeyValuePair<string, int>("b", 6) }, 2);
-            var y = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int>("a", 1), new KeyValuePair<string, int>("b", 4) }, 2);
-            var z = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int>("a", 2) }, 1);
+            var x = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int>("a", 5), new Tuple<string, int>("b", 6) }, 2);
+            var y = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int>("a", 1), new Tuple<string, int>("b", 4) }, 2);
+            var z = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int>("a", 2) }, 1);
 
             var groupWith = x.GroupWith(y, z).Collect();
 
             foreach (var kv in groupWith)
-                Console.WriteLine(kv.Key + ", " + "(" + string.Join(",", kv.Value) + ")");
+                Console.WriteLine(kv.Item1 + ", " + "(" + string.Join(",", kv.Item2) + ")");
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(groupWith.Any(kv => kv.Key == "a" && kv.Value.Item1[0] == 5 && kv.Value.Item2[0] == 1 && kv.Value.Item3[0] == 2));
-                Assert.IsTrue(groupWith.Any(kv => kv.Key == "b" && kv.Value.Item1[0] == 6 && kv.Value.Item2[0] == 4 && !kv.Value.Item3.Any()));
+                Assert.IsTrue(groupWith.Any(kv => kv.Item1 == "a" && kv.Item2.Item1[0] == 5 && kv.Item2.Item2[0] == 1 && kv.Item2.Item3[0] == 2));
+                Assert.IsTrue(groupWith.Any(kv => kv.Item1 == "b" && kv.Item2.Item1[0] == 6 && kv.Item2.Item2[0] == 4 && !kv.Item2.Item3.Any()));
             }
         }
 
@@ -452,7 +453,7 @@ internal static void PairRDDGroupWithSample2()
         //{
         //    var fractions = new Dictionary<string, double> { { "a", 0.2 }, { "b", 0.1 } };
         //    var rdd = SparkCLRSamples.SparkContext.Parallelize(fractions.Keys.ToArray(), 2).Cartesian(SparkCLRSamples.SparkContext.Parallelize(Enumerable.Range(0, 1000), 2));
-        //    var sample = rdd.Map(t => new KeyValuePair<string, int>(t.Item1, t.Item2)).SampleByKey(false, fractions, 2).GroupByKey().Collect();
+        //    var sample = rdd.Map(t => new Tuple<string, int>(t.Item1, t.Item2)).SampleByKey(false, fractions, 2).GroupByKey().Collect();
 
         //    Console.WriteLine(sample);
         //}
@@ -460,8 +461,8 @@ internal static void PairRDDGroupWithSample2()
         [Sample]
         internal static void PairRDDSubtractByKeySample()
         {
-            var x = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int?>("a", 1), new KeyValuePair<string, int?>("b", 4), new KeyValuePair<string, int?>("b", 5), new KeyValuePair<string, int?>("a", 2) }, 2);
-            var y = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int?>("a", 3), new KeyValuePair<string, int?>("c", null) }, 2);
+            var x = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int?>("a", 1), new Tuple<string, int?>("b", 4), new Tuple<string, int?>("b", 5), new Tuple<string, int?>("a", 2) }, 2);
+            var y = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int?>("a", 3), new Tuple<string, int?>("c", null) }, 2);
 
             var subtractByKey = x.SubtractByKey(y).Collect();
 
@@ -471,15 +472,15 @@ internal static void PairRDDSubtractByKeySample()
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
                 Assert.AreEqual(2, subtractByKey.Length);
-                subtractByKey.Contains(new KeyValuePair<string, int?>("b", 4));
-                subtractByKey.Contains(new KeyValuePair<string, int?>("b", 5));
+                subtractByKey.Contains(new Tuple<string, int?>("b", 4));
+                subtractByKey.Contains(new Tuple<string, int?>("b", 5));
             }
         }
 
         [Sample]
         internal static void PairRDDLookupSample()
         {
-            var rdd = SparkCLRSamples.SparkContext.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new KeyValuePair<int, int>(x, y)), 10);
+            var rdd = SparkCLRSamples.SparkContext.Parallelize(Enumerable.Range(0, 1000).Zip(Enumerable.Range(0, 1000), (x, y) => new Tuple<int, int>(x, y)), 10);
             var lookup42 = rdd.Lookup(42);
             var lookup1024 = rdd.Lookup(1024);
             Console.WriteLine(string.Join(",", lookup42));
@@ -495,9 +496,9 @@ internal static void PairRDDLookupSample()
         [Sample]
         internal static void PairRDDSortByKeySample()
         {
-            var rdd = SparkCLRSamples.SparkContext.Parallelize(new[] { new KeyValuePair<string, int>("B", 2),
-                new KeyValuePair<string, int>("a", 1), new KeyValuePair<string, int>("c", 3),
-                new KeyValuePair<string, int>("E", 5), new KeyValuePair<string, int>("D", 4)}, 3);
+            var rdd = SparkCLRSamples.SparkContext.Parallelize(new[] { new Tuple<string, int>("B", 2),
+                new Tuple<string, int>("a", 1), new Tuple<string, int>("c", 3),
+                new Tuple<string, int>("E", 5), new Tuple<string, int>("D", 4)}, 3);
 
             var sortedRdd = rdd.SortByKey(true, 2);
             var sortedInTotal = sortedRdd.Collect();
@@ -507,7 +508,7 @@ internal static void PairRDDSortByKeySample()
             {
                 Assert.AreEqual(2, sortedPartitions.Length);
                 // by default SortByKey is case sensitive
-                CollectionAssert.AreEqual(new[] { "B", "D", "E", "a", "c" }, sortedInTotal.Select(kv => kv.Key).ToArray());
+                CollectionAssert.AreEqual(new[] { "B", "D", "E", "a", "c" }, sortedInTotal.Select(kv => kv.Item1).ToArray());
             }
 
             // convert the keys to lower case in order to sort with case insensitive
@@ -518,7 +519,7 @@ internal static void PairRDDSortByKeySample()
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
                 Assert.AreEqual(2, sortedPartitions.Length);
-                CollectionAssert.AreEqual(new[] { "a", "B", "c", "D", "E" }, sortedInTotal.Select(kv => kv.Key).ToArray());
+                CollectionAssert.AreEqual(new[] { "a", "B", "c", "D", "E" }, sortedInTotal.Select(kv => kv.Item1).ToArray());
             }
         }
     }
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
index bc201f82..37943c26 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
@@ -129,7 +129,7 @@ internal static void RDDGroupBySample()
             var rdd = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 1, 2, 3, 5, 8 }, 1);
             var groups = rdd.GroupBy(x => x % 2).Collect();
             foreach (var kv in groups)
-                Console.WriteLine(kv.Key + ", " + string.Join(",", kv.Value));
+                Console.WriteLine(kv.Item1 + ", " + string.Join(",", kv.Item2));
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
@@ -137,9 +137,9 @@ internal static void RDDGroupBySample()
                 foreach (var kv in groups)
                 {
                     // the group with key=1 is odd numbers
-                    if (kv.Key == 1) CollectionAssert.AreEquivalent(new[] { 1, 1, 3, 5 }, kv.Value);
+                    if (kv.Item1 == 1) CollectionAssert.AreEquivalent(new[] { 1, 1, 3, 5 }, kv.Item2);
                     // the group with key=0 is even numbers
-                    else if (kv.Key == 0) CollectionAssert.AreEquivalent(new[] { 2, 8 }, kv.Value);
+                    else if (kv.Item1 == 0) CollectionAssert.AreEquivalent(new[] { 2, 8 }, kv.Item2);
                 }
             }
         }
@@ -292,10 +292,10 @@ internal static void RDDKeyBySample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(keyBy.Contains(new KeyValuePair<int, int>(1, 1)));
-                Assert.IsTrue(keyBy.Contains(new KeyValuePair<int, int>(4, 2)));
-                Assert.IsTrue(keyBy.Contains(new KeyValuePair<int, int>(9, 3)));
-                Assert.IsTrue(keyBy.Contains(new KeyValuePair<int, int>(16, 4)));
+                Assert.IsTrue(keyBy.Contains(new Tuple<int, int>(1, 1)));
+                Assert.IsTrue(keyBy.Contains(new Tuple<int, int>(4, 2)));
+                Assert.IsTrue(keyBy.Contains(new Tuple<int, int>(9, 3)));
+                Assert.IsTrue(keyBy.Contains(new Tuple<int, int>(16, 4)));
             }
         }
 
@@ -344,7 +344,7 @@ internal static void RDDZipSample()
             {
                 for (int i = 0; i < 5; i++)
                 {
-                    Assert.IsTrue(zip.Contains(new KeyValuePair<int, int>(i, 1000 + i)));
+                    Assert.IsTrue(zip.Contains(new Tuple<int, int>(i, 1000 + i)));
                 }
             }
         }
@@ -358,10 +358,10 @@ internal static void RDDZipWithIndexSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(zipWithIndex.Contains(new KeyValuePair<string, long>("a", 0)));
-                Assert.IsTrue(zipWithIndex.Contains(new KeyValuePair<string, long>("b", 1)));
-                Assert.IsTrue(zipWithIndex.Contains(new KeyValuePair<string, long>("c", 2)));
-                Assert.IsTrue(zipWithIndex.Contains(new KeyValuePair<string, long>("d", 3)));
+                Assert.IsTrue(zipWithIndex.Contains(new Tuple<string, long>("a", 0)));
+                Assert.IsTrue(zipWithIndex.Contains(new Tuple<string, long>("b", 1)));
+                Assert.IsTrue(zipWithIndex.Contains(new Tuple<string, long>("c", 2)));
+                Assert.IsTrue(zipWithIndex.Contains(new Tuple<string, long>("d", 3)));
             }
         }
 
@@ -374,11 +374,11 @@ internal static void RDDZipWithUniqueIdSample()
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                Assert.IsTrue(zipWithUniqueId.Contains(new KeyValuePair<string, long>("a", 0)));
-                Assert.IsTrue(zipWithUniqueId.Contains(new KeyValuePair<string, long>("b", 1)));
-                Assert.IsTrue(zipWithUniqueId.Contains(new KeyValuePair<string, long>("c", 4)));
-                Assert.IsTrue(zipWithUniqueId.Contains(new KeyValuePair<string, long>("d", 2)));
-                Assert.IsTrue(zipWithUniqueId.Contains(new KeyValuePair<string, long>("e", 5)));
+                Assert.IsTrue(zipWithUniqueId.Contains(new Tuple<string, long>("a", 0)));
+                Assert.IsTrue(zipWithUniqueId.Contains(new Tuple<string, long>("b", 1)));
+                Assert.IsTrue(zipWithUniqueId.Contains(new Tuple<string, long>("c", 4)));
+                Assert.IsTrue(zipWithUniqueId.Contains(new Tuple<string, long>("d", 2)));
+                Assert.IsTrue(zipWithUniqueId.Contains(new Tuple<string, long>("e", 5)));
             }
         }
 
@@ -530,22 +530,22 @@ internal static void RDDWordCountSample()
             
             var words = lines.FlatMap(s => s.Split(' '));
             
-            var wordCounts = words.Map(w => new KeyValuePair<string, int>(w.Trim(), 1))
+            var wordCounts = words.Map(w => new Tuple<string, int>(w.Trim(), 1))
                                   .ReduceByKey((x, y) => x + y).Collect();
 
             Console.WriteLine("*** Printing words and their counts ***");
             foreach (var kvp in wordCounts)
             {
-                Console.WriteLine("'{0}':{1}", kvp.Key, kvp.Value);
+                Console.WriteLine("'{0}':{1}", kvp.Item1, kvp.Item2);
             }
 
-            var wordCountsCaseInsensitve = words.Map(w => new KeyValuePair<string, int>(w.ToLower().Trim(), 1))
+            var wordCountsCaseInsensitve = words.Map(w => new Tuple<string, int>(w.ToLower().Trim(), 1))
                                                 .ReduceByKey((x, y) => x + y).Collect();
 
             Console.WriteLine("*** Printing words and their counts ignoring case ***");
             foreach (var kvp in wordCountsCaseInsensitve)
             {
-                Console.WriteLine("'{0}':{1}", kvp.Key, kvp.Value);
+                Console.WriteLine("'{0}':{1}", kvp.Item1, kvp.Item2);
             }
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
@@ -553,7 +553,7 @@ internal static void RDDWordCountSample()
                 var dictionary = new Dictionary<string, int>();
                 foreach (var kvp in wordCounts)
                 {
-                    dictionary[kvp.Key] = kvp.Value;
+                    dictionary[kvp.Item1] = kvp.Item2;
                 }
 
                 Assert.AreEqual(22, dictionary["the"]);
@@ -563,7 +563,7 @@ internal static void RDDWordCountSample()
                 var caseInsenstiveWordCountDictionary = new Dictionary<string, int>();
                 foreach (var kvp in wordCountsCaseInsensitve)
                 {
-                    caseInsenstiveWordCountDictionary[kvp.Key] = kvp.Value;
+                    caseInsenstiveWordCountDictionary[kvp.Item1] = kvp.Item2;
                 }
 
                 Assert.AreEqual(45, caseInsenstiveWordCountDictionary["the"]);
@@ -584,12 +584,12 @@ internal static void RDDJoinSample()
             var requestsColumns = requests.Map(s =>
             {
                 var columns = s.Split(',');
-                return new KeyValuePair<string, string[]>(columns[0], new[] { columns[1], columns[2], columns[3] });
+                return new Tuple<string, string[]>(columns[0], new[] { columns[1], columns[2], columns[3] });
             });
             var metricsColumns = metrics.Map(s =>
             {
                 var columns = s.Split(',');
-                return new KeyValuePair<string, string[]>(columns[3], new[] { columns[4], columns[5], columns[6] });
+                return new Tuple<string, string[]>(columns[3], new[] { columns[4], columns[5], columns[6] });
             });
 
             var requestsJoinedWithMetrics = requestsColumns.Join(metricsColumns)
@@ -597,29 +597,29 @@ internal static void RDDJoinSample()
                                                                 s =>
                                                                     new []
                                                                     {
-                                                                        s.Key, //guid
-                                                                        s.Value.Item1[0], s.Value.Item1[1], s.Value.Item1[2], //dc, abtestid, traffictype
-                                                                        s.Value.Item2[0],s.Value.Item2[1], s.Value.Item2[2] //lang, country, metric
+                                                                        s.Item1, //guid
+                                                                        s.Item2.Item1[0], s.Item2.Item1[1], s.Item2.Item1[2], //dc, abtestid, traffictype
+                                                                        s.Item2.Item2[0],s.Item2.Item2[1], s.Item2.Item2[2] //lang, country, metric
                                                                     });
 
 
-            var latencyByDatacenter = requestsJoinedWithMetrics.Map(i => new KeyValuePair<string, int> (i[1], int.Parse(i[6]))); //key is "datacenter"      
+            var latencyByDatacenter = requestsJoinedWithMetrics.Map(i => new Tuple<string, int> (i[1], int.Parse(i[6]))); //key is "datacenter"      
             var maxLatencyByDataCenterList = latencyByDatacenter.ReduceByKey(Math.Max).Collect();
 
             Console.WriteLine("***** Max latency metrics by DC *****");
-            foreach (var keyValuePair in maxLatencyByDataCenterList)
+            foreach (var Tuple in maxLatencyByDataCenterList)
             {
-                Console.WriteLine("Datacenter={0}, Max latency={1}", keyValuePair.Key, keyValuePair.Value);
+                Console.WriteLine("Datacenter={0}, Max latency={1}", Tuple.Item1, Tuple.Item2);
             }
             
-            var latencyAndCountByDatacenter = requestsJoinedWithMetrics.Map(i => new KeyValuePair<string, Tuple<int,int>> (i[1], new Tuple<int, int>(int.Parse(i[6]), 1)));
+            var latencyAndCountByDatacenter = requestsJoinedWithMetrics.Map(i => new Tuple<string, Tuple<int,int>> (i[1], new Tuple<int, int>(int.Parse(i[6]), 1)));
             var sumLatencyAndCountByDatacenter = latencyAndCountByDatacenter.ReduceByKey((tuple, tuple1) => new Tuple<int, int>((tuple == null ? 0 : tuple.Item1) + tuple1.Item1, (tuple == null ? 0 : tuple.Item2) + tuple1.Item2));
             var sumLatencyAndCountByDatacenterList = sumLatencyAndCountByDatacenter.Collect();
 
             Console.WriteLine("***** Mean latency metrics by DC *****");
-            foreach (var keyValuePair in sumLatencyAndCountByDatacenterList)
+            foreach (var Tuple in sumLatencyAndCountByDatacenterList)
             {
-                Console.WriteLine("Datacenter={0}, Mean latency={1}", keyValuePair.Key, keyValuePair.Value.Item1/keyValuePair.Value.Item2);
+                Console.WriteLine("Datacenter={0}, Mean latency={1}", Tuple.Item1, Tuple.Item2.Item1/Tuple.Item2.Item2);
             }
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
@@ -627,7 +627,7 @@ internal static void RDDJoinSample()
                 var dictionary = new Dictionary<string, int>();
                 foreach (var kvp in maxLatencyByDataCenterList)
                 {
-                    dictionary[kvp.Key] = kvp.Value;
+                    dictionary[kvp.Item1] = kvp.Item2;
                 }
 
                 Assert.AreEqual(835, dictionary["iowa"]);
@@ -636,7 +636,7 @@ internal static void RDDJoinSample()
                 var meanDictionary = new Dictionary<string, Tuple<int, int>>();
                 foreach (var kvp in sumLatencyAndCountByDatacenterList)
                 {
-                    meanDictionary[kvp.Key] = new Tuple<int, int>(kvp.Value.Item1, kvp.Value.Item2);
+                    meanDictionary[kvp.Item1] = new Tuple<int, int>(kvp.Item2.Item1, kvp.Item2.Item2);
                 }
 
                 Assert.AreEqual(1621, meanDictionary["iowa"].Item1);
@@ -737,7 +737,7 @@ internal static void RDDCombineBySample()
             var markets = SparkCLRSamples.SparkContext.TextFile(SparkCLRSamples.Configuration.GetInputDataPath("market.tab"), 1);
             long totalMarketsCount = markets.Count();
 
-            var marketsByKey = markets.Map(x => new KeyValuePair<string, string>(x.Substring(0, x.IndexOf('-')), x));
+            var marketsByKey = markets.Map(x => new Tuple<string, string>(x.Substring(0, x.IndexOf('-')), x));
             var categories = marketsByKey.PartitionBy(2)
                 .CombineByKey(() => "", (c, v) => v.Substring(0, v.IndexOf('-')), (c1, c2) => c1, 2);
             var categoriesCollectedCount = categories.Collect().Count();

From 873286cb15add1a83140724931c8e86b103db5ee Mon Sep 17 00:00:00 2001
From: Isaac Abraham <isaac.abraham@gmail.com>
Date: Thu, 21 Apr 2016 00:54:33 +0100
Subject: [PATCH 02/20] Updated a few more methods.

---
 .../Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs   | 4 ++--
 csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs             | 4 ++--
 csharp/AdapterTest/PairRDDTest.cs                             | 2 +-
 csharp/AdapterTest/RDDTest.cs                                 | 2 +-
 csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs       | 2 +-
 csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs           | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
index a40c6197..8600e498 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
@@ -146,9 +146,9 @@ public static IDictionary<K, V> ReduceByKeyLocally<K, V>(this RDD<Tuple<K, V>> s
         /// <typeparam name="V"></typeparam>
         /// <param name="self"></param>
         /// <returns></returns>
-        public static IDictionary<K, long> CountByKey<K, V>(this RDD<Tuple<K, V>> self)
+        public static IEnumerable<Tuple<K, long>> CountByKey<K, V>(this RDD<Tuple<K, V>> self)
         {
-            return self.MapValues(v => 1L).ReduceByKey((a, b) => a + b).CollectAsMap();
+            return self.MapValues(v => 1L).ReduceByKey((a, b) => a + b).Collect();
         }
 
         /// <summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
index 345672f7..f1af3edd 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
@@ -762,9 +762,9 @@ public long Count()
         /// 
         /// </summary>
         /// <returns></returns>
-        public IDictionary<T, long> CountByValue()
+        public IEnumerable<Tuple<T, long>> CountByValue()
         {
-            return Map<Tuple<T, T>>(v => new Tuple<T, T>(v, default(T))).CountByKey();
+            return Map(v => new Tuple<T, T>(v, default(T))).CountByKey();
         }
 
         /// <summary>
diff --git a/csharp/AdapterTest/PairRDDTest.cs b/csharp/AdapterTest/PairRDDTest.cs
index 00dd3e99..7fe06083 100644
--- a/csharp/AdapterTest/PairRDDTest.cs
+++ b/csharp/AdapterTest/PairRDDTest.cs
@@ -27,7 +27,7 @@ public void TestPairRddCountByKey()
             foreach (var record in pairs.CountByKey())
             {
                 // the 1st paramter of AreEqual() method is the expected value, the 2nd one is the acutal value.
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2);
             }
         }
 
diff --git a/csharp/AdapterTest/RDDTest.cs b/csharp/AdapterTest/RDDTest.cs
index 23bbdc11..ab406b32 100644
--- a/csharp/AdapterTest/RDDTest.cs
+++ b/csharp/AdapterTest/RDDTest.cs
@@ -42,7 +42,7 @@ public void TestRddCountByValue()
         {
             foreach (var record in words.CountByValue())
             {
-                Assert.AreEqual(record.Key == "The" || record.Key == "dog" || record.Key == "lazy" ? 23 : 22, record.Value);
+                Assert.AreEqual(record.Item1 == "The" || record.Item1 == "dog" || record.Item1 == "lazy" ? 23 : 22, record.Item2);
             }
         }
 
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
index 97604337..22de846f 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
@@ -112,7 +112,7 @@ internal static void PairRDDCountByKeySample()
                     new Tuple<string, int>("a", 1)
                 }, 2)
                 .CountByKey()
-                .ToLookup(x => x.Key);
+                .ToLookup(x => x.Item1);
 
             foreach (var kv in countByKey)
                 Console.WriteLine(kv);
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
index 37943c26..62477c3f 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
@@ -221,7 +221,7 @@ internal static void RDDTreeAggregateSample()
         [Sample]
         internal static void RDDCountByValueSample()
         {
-            var countByValue = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue();
+            var countByValue = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue().ToArray();
             foreach (var item in countByValue)
                 Console.WriteLine(item);
 

From 34567724738ce095c268c8e1408a48504cb440bc Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Fri, 7 Oct 2016 17:40:49 -0700
Subject: [PATCH 03/20] updating HiveExample to use SparkSession

---
 examples/Sql/HiveDataFrame/Program.cs | 38 ++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/examples/Sql/HiveDataFrame/Program.cs b/examples/Sql/HiveDataFrame/Program.cs
index 101dd206..d488cfa3 100644
--- a/examples/Sql/HiveDataFrame/Program.cs
+++ b/examples/Sql/HiveDataFrame/Program.cs
@@ -1,4 +1,4 @@
-﻿// Copyright (c) Microsoft. All rights reserved.
+// Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
@@ -20,21 +20,40 @@ static void Main(string[] args)
         {
             LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set
             var logger = LoggerServiceFactory.GetLogger(typeof(HiveDataFrameExample));
+            var jsonFilePath = args[0];
+            const string dbName = "SampleHiveDataBaseForMobius";
+            const string tableName = "people";
+
+            var builder = SparkSession.Builder().EnableHiveSupport();
+            // The following setting is required to use Spark 2.0 in Windows
+            // It may be provided in command line when running Mobius app
+            //builder = builder.Config("spark.sql.warehouse.dir", "<hdfs or local path>");
+            var session = builder.GetOrCreate();
+            var peopleDataFrame = session.Read().Json(jsonFilePath);
+            session.Sql(string.Format("CREATE DATABASE IF NOT EXISTS {0}", dbName)); // create database if not exists
+            session.Sql(string.Format("USE {0}", dbName));
+            //hiveContext.Sql(string.Format("DROP TABLE {0}", tableName)); // drop table if exists
+
+            peopleDataFrame.Write().Mode(SaveMode.Overwrite).SaveAsTable(tableName); // create table
+            var tablesDataFrame = session.Table(tableName); // get all tables in database
+            logger.LogInfo(string.Format("table count in database {0}: {1}", dbName, tablesDataFrame.Count()));
+            tablesDataFrame.Show();
+
+            session.Sql(string.Format("SELECT * FROM {0}", tableName)).Show(); // select from table
 
+            // Following example is for the deprecated API
+            /*
             var sparkConf = new SparkConf();
+            // The following setting is required to use Spark 2.0 in Windows
+            // It may be provided in command line when running Mobius app
+            //sparkConf.Set("spark.sql.warehouse.dir", @"<hdfs or local path>");
             var sparkContext = new SparkContext(sparkConf);
-            var hiveContext = new HiveContext(sparkContext);
-            
-            // please give the path to input json file
-            var jsonFilePath = args[0];
+            var hiveContext = new HiveContext(sparkContext);           
             var peopleDataFrame = hiveContext.Read().Json(jsonFilePath);
-
-            const string dbName = "SampleHiveDataBaseForMobius";
-            const string tableName = "people";
             
             hiveContext.Sql(string.Format("CREATE DATABASE IF NOT EXISTS {0}", dbName)); // create database if not exists
             hiveContext.Sql(string.Format("USE {0}", dbName));
-            hiveContext.Sql(string.Format("DROP TABLE {0}", tableName)); // drop table if exists
+            //hiveContext.Sql(string.Format("DROP TABLE {0}", tableName)); // drop table if exists
 
             peopleDataFrame.Write().Mode(SaveMode.Overwrite).SaveAsTable(tableName); // create table
             var tablesDataFrame = hiveContext.Tables(dbName); // get all tables in database
@@ -42,6 +61,7 @@ static void Main(string[] args)
             tablesDataFrame.Show();
 
             hiveContext.Sql(string.Format("SELECT * FROM {0}", tableName)).Show(); // select from table
+            */
         }
     }
 }

From 8597df2d0f4a9ac823cff138e256361fda184054 Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Tue, 11 Oct 2016 16:20:41 -0700
Subject: [PATCH 04/20] updating examples to use latest preview release

---
 dev/scripts/SetSparkClrJarVersion.ps1                |  4 ++--
 examples/Batch/WordCount/WordCount.csproj            | 10 +++++-----
 examples/Batch/WordCount/packages.config             |  2 +-
 examples/Batch/pi/Pi.csproj                          | 12 ++++++------
 examples/Batch/pi/packages.config                    |  2 +-
 .../Sql/CassandraDataFrame/CassandraDataFrame.csproj |  4 ++--
 examples/Sql/CassandraDataFrame/packages.config      |  2 +-
 examples/Sql/HiveDataFrame/HiveDataFrame.csproj      |  8 ++++----
 examples/Sql/HiveDataFrame/packages.config           |  2 +-
 examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj      | 10 +++++-----
 examples/Sql/JdbcDataFrame/packages.config           |  2 +-
 examples/Sql/SparkXml/SparkXml.csproj                | 10 +++++-----
 examples/Sql/SparkXml/packages.config                |  2 +-
 examples/Streaming/EventHub/EventHub.csproj          |  6 +++---
 examples/Streaming/EventHub/packages.config          |  2 +-
 .../Streaming/HdfsWordCount/HdfsWordCount.csproj     |  6 +++---
 examples/Streaming/HdfsWordCount/packages.config     |  2 +-
 examples/Streaming/Kafka/Kafka.csproj                |  4 ++--
 examples/Streaming/Kafka/packages.config             |  2 +-
 examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj   |  4 ++--
 examples/fsharp/JsonDataFrame/packages.config        |  2 +-
 examples/fsharp/WordCount/WordCountFSharp.fsproj     |  4 ++--
 examples/fsharp/WordCount/packages.config            |  2 +-
 23 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/dev/scripts/SetSparkClrJarVersion.ps1 b/dev/scripts/SetSparkClrJarVersion.ps1
index f8d06ead..604ff548 100644
--- a/dev/scripts/SetSparkClrJarVersion.ps1
+++ b/dev/scripts/SetSparkClrJarVersion.ps1
@@ -45,7 +45,7 @@ function Update-SparkClrSubmit($targetDir, $version)
     # 
     Get-ChildItem $targetDir -filter "sparkclr-submit.cmd" -recurs | % { 
         Write-Output "[SetSparkClrJarVersion.Update-SparkClrSubmit] updating $($_.FullName)"
-        ((Get-Content $_.FullName) -replace "\(set SPARKCLR_JAR=.*\)", "(set SPARKCLR_JAR=spark-clr_2.10-$version.jar)") | Set-Content $_.FullName -force
+        ((Get-Content $_.FullName) -replace "\(set SPARKCLR_JAR=.*\)", "(set SPARKCLR_JAR=spark-clr_2.11-$version.jar)") | Set-Content $_.FullName -force
     }
 
     Write-Output "[SetSparkClrJarVersion.Update-SparkClrSubmit] Done setting sparkclr-submit.cmd under $targetDir to version=$version"
@@ -59,7 +59,7 @@ function Update-SparkClrSubmit($targetDir, $version)
     # 
     Get-ChildItem $targetDir -filter "sparkclr-submit.sh" -recurs | % { 
         Write-Output "[SetSparkClrJarVersion.Update-SparkClrSubmit] updating $($_.FullName)"
-        ((Get-Content $_.FullName) -replace "export SPARKCLR_JAR=.*", "export SPARKCLR_JAR=spark-clr_2.10-$version.jar") | Set-Content $_.FullName -force
+        ((Get-Content $_.FullName) -replace "export SPARKCLR_JAR=.*", "export SPARKCLR_JAR=spark-clr_2.11-$version.jar") | Set-Content $_.FullName -force
     }
 
     Write-Output "[SetSparkClrJarVersion.Update-SparkClrSubmit] Done setting sparkclr-submit.sh under $targetDir to version=$version"
diff --git a/examples/Batch/WordCount/WordCount.csproj b/examples/Batch/WordCount/WordCount.csproj
index 40b8f52a..054cbbb3 100644
--- a/examples/Batch/WordCount/WordCount.csproj
+++ b/examples/Batch/WordCount/WordCount.csproj
@@ -32,17 +32,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="CSharpWorker">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -67,7 +67,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Batch/WordCount/packages.config b/examples/Batch/WordCount/packages.config
index 293105d3..e8bf6251 100644
--- a/examples/Batch/WordCount/packages.config
+++ b/examples/Batch/WordCount/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
 </packages>
diff --git a/examples/Batch/pi/Pi.csproj b/examples/Batch/pi/Pi.csproj
index 751852f8..0417fe0a 100644
--- a/examples/Batch/pi/Pi.csproj
+++ b/examples/Batch/pi/Pi.csproj
@@ -35,17 +35,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="CSharpWorker">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -66,7 +66,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
@@ -84,4 +84,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file
diff --git a/examples/Batch/pi/packages.config b/examples/Batch/pi/packages.config
index eaa63869..dec53cfc 100644
--- a/examples/Batch/pi/packages.config
+++ b/examples/Batch/pi/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
index 228764bd..87638435 100644
--- a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
+++ b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
@@ -35,13 +35,13 @@
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
diff --git a/examples/Sql/CassandraDataFrame/packages.config b/examples/Sql/CassandraDataFrame/packages.config
index 293105d3..e8bf6251 100644
--- a/examples/Sql/CassandraDataFrame/packages.config
+++ b/examples/Sql/CassandraDataFrame/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
 </packages>
diff --git a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
index 6c81a1d4..f682b02c 100644
--- a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
+++ b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
@@ -37,12 +37,12 @@
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="CSharpWorker, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+    <Reference Include="CSharpWorker">
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
       <Private>True</Private>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+    <Reference Include="Microsoft.Spark.CSharp.Adapter">
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="System" />
diff --git a/examples/Sql/HiveDataFrame/packages.config b/examples/Sql/HiveDataFrame/packages.config
index 218d018f..fc7565bb 100644
--- a/examples/Sql/HiveDataFrame/packages.config
+++ b/examples/Sql/HiveDataFrame/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net452" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net452" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
index f19954c4..28f78836 100644
--- a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
+++ b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
@@ -34,17 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="CSharpWorker">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -65,7 +65,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Sql/JdbcDataFrame/packages.config b/examples/Sql/JdbcDataFrame/packages.config
index 293105d3..e8bf6251 100644
--- a/examples/Sql/JdbcDataFrame/packages.config
+++ b/examples/Sql/JdbcDataFrame/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
 </packages>
diff --git a/examples/Sql/SparkXml/SparkXml.csproj b/examples/Sql/SparkXml/SparkXml.csproj
index 381ec5db..63407574 100644
--- a/examples/Sql/SparkXml/SparkXml.csproj
+++ b/examples/Sql/SparkXml/SparkXml.csproj
@@ -34,17 +34,17 @@
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
   <ItemGroup>
-    <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="CSharpWorker">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
-    <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
+    <Reference Include="Microsoft.Spark.CSharp.Adapter">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -65,7 +65,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Sql/SparkXml/packages.config b/examples/Sql/SparkXml/packages.config
index 293105d3..e8bf6251 100644
--- a/examples/Sql/SparkXml/packages.config
+++ b/examples/Sql/SparkXml/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
 </packages>
diff --git a/examples/Streaming/EventHub/EventHub.csproj b/examples/Streaming/EventHub/EventHub.csproj
index 631b2c3d..dec83bcb 100644
--- a/examples/Streaming/EventHub/EventHub.csproj
+++ b/examples/Streaming/EventHub/EventHub.csproj
@@ -35,7 +35,7 @@
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -43,7 +43,7 @@
     </Reference>
     <Reference Include="Microsoft.CSharp" />
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -68,7 +68,7 @@
     <Compile Include="EventPublisher.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Streaming/EventHub/packages.config b/examples/Streaming/EventHub/packages.config
index abe733c5..4deb4954 100644
--- a/examples/Streaming/EventHub/packages.config
+++ b/examples/Streaming/EventHub/packages.config
@@ -4,7 +4,7 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
 
   <!-- *** ****************************************************************** *** -->
   <!-- *** Following references are needed for publishing events to EventHubs *** -->
diff --git a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
index 277fa405..22763af3 100644
--- a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
+++ b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
@@ -38,7 +38,7 @@
     </Reference>
     <Reference Include="CSharpWorker">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
       <SpecificVersion>False</SpecificVersion>
@@ -46,7 +46,7 @@
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <SpecificVersion>False</SpecificVersion>
@@ -64,7 +64,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe.config">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Streaming/HdfsWordCount/packages.config b/examples/Streaming/HdfsWordCount/packages.config
index eaa63869..dec53cfc 100644
--- a/examples/Streaming/HdfsWordCount/packages.config
+++ b/examples/Streaming/HdfsWordCount/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/Streaming/Kafka/Kafka.csproj b/examples/Streaming/Kafka/Kafka.csproj
index c221fa81..f05b5b94 100644
--- a/examples/Streaming/Kafka/Kafka.csproj
+++ b/examples/Streaming/Kafka/Kafka.csproj
@@ -33,14 +33,14 @@
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
diff --git a/examples/Streaming/Kafka/packages.config b/examples/Streaming/Kafka/packages.config
index eaa63869..dec53cfc 100644
--- a/examples/Streaming/Kafka/packages.config
+++ b/examples/Streaming/Kafka/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
index b49e9de0..1e38738c 100644
--- a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
+++ b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
@@ -66,13 +66,13 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="mscorlib" />
     <Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
diff --git a/examples/fsharp/JsonDataFrame/packages.config b/examples/fsharp/JsonDataFrame/packages.config
index 941d504a..e0e1e271 100644
--- a/examples/fsharp/JsonDataFrame/packages.config
+++ b/examples/fsharp/JsonDataFrame/packages.config
@@ -2,7 +2,7 @@
 <packages>
   <package id="FSharp.Core" version="4.0.0.1" targetFramework="net45" />
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/fsharp/WordCount/WordCountFSharp.fsproj b/examples/fsharp/WordCount/WordCountFSharp.fsproj
index 3b40aad9..facc6813 100644
--- a/examples/fsharp/WordCount/WordCountFSharp.fsproj
+++ b/examples/fsharp/WordCount/WordCountFSharp.fsproj
@@ -71,7 +71,7 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\CSharpWorker.exe</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="FSharp.Core">
@@ -83,7 +83,7 @@
       <Private>True</Private>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-2\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="mscorlib" />
diff --git a/examples/fsharp/WordCount/packages.config b/examples/fsharp/WordCount/packages.config
index 941d504a..e0e1e271 100644
--- a/examples/fsharp/WordCount/packages.config
+++ b/examples/fsharp/WordCount/packages.config
@@ -2,7 +2,7 @@
 <packages>
   <package id="FSharp.Core" version="4.0.0.1" targetFramework="net45" />
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-2" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />

From 142701109b6056973f486ffd7b8a043d987bd6dd Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Mon, 17 Oct 2016 11:58:54 -0700
Subject: [PATCH 05/20] documentation for implementing Spark Apps in F# using
 Mobius

---
 README.md                    |  1 +
 notes/mobius-init.fsx        | 31 +++++++++++++++++++++++++++++++
 notes/spark-fsharp-mobius.md | 17 +++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 notes/mobius-init.fsx
 create mode 100644 notes/spark-fsharp-mobius.md

diff --git a/README.md b/README.md
index 55b3d28c..ff598731 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius
 * [Configuration parameters in Mobius](./notes/configuration-mobius.md)
 * [Troubleshoot errors in Mobius](./notes/troubleshooting-mobius.md)
 * [Debug Mobius apps](./notes/running-mobius-app.md#debug-mode)
+* [Implementing Spark Apps in F# using Mobius](./notes/spark-fsharp-mobius.md)
 
 ## Supported Spark Versions
 
diff --git a/notes/mobius-init.fsx b/notes/mobius-init.fsx
new file mode 100644
index 00000000..11e28e10
--- /dev/null
+++ b/notes/mobius-init.fsx
@@ -0,0 +1,31 @@
+// *** Replace the paths below to point to correct location of Mobius binaries ***
+#r @"C:\spark-clr_2.11-2.0.000\runtime\bin\Microsoft.Spark.CSharp.Adapter.dll"
+#r @"C:\spark-clr_2.11-2.0.000\runtime\bin\log4net.dll"
+#r @"C:\spark-clr_2.11-2.0.000\runtime\bin\Newtonsoft.Json.dll"
+#r @"C:\spark-clr_2.11-2.0.000\runtime\bin\Razorvine.Pyrolite.dll"
+#r @"C:\spark-clr_2.11-2.0.000\runtime\bin\Razorvine.Serpent.dll"
+#r @"C:\spark-clr_2.11-2.0.000\runtime\bin\CSharpWorker.exe"
+open Microsoft.Spark.CSharp.Core
+open Microsoft.Spark.CSharp.Services
+open Microsoft.Spark.CSharp.Sql
+open System.Reflection
+open System.Collections.Generic
+LoggerServiceFactory.SetLoggerService Log4NetLoggerService.Instance
+
+// *** Uncomment & use the following code block to use SqlContext API ***
+//let conf = SparkConf().SetAppName "FSharpInteractiveShell"
+// *** uncomment & update master URL if running in non-local mode ***
+//conf.Master "spark://sparkmaster:7077"
+// *** Spark 2.0 in Windows requires the following config ***
+//conf.Set("spark.sql.warehouse.dir", @"file:///C:/sparktemp")
+//let sc = SparkContext conf
+//let sqlContext = SqlContext sc
+
+// *** Uncomment & use the following code block to use SparkSession API ***
+let builder = SparkSession.Builder()
+builder = builder.AppName "FSharpInteractiveShell"
+// *** uncomment & update master URL if running in non-local mode ***
+//builder = builder.Master "spark://sparkmaster:7077"
+// *** Spark 2.0 in Windows requires the following config ***
+builder = builder.Config("spark.sql.warehouse.dir", "file:///C:/sparktemp")
+let session = builder.GetOrCreate()
diff --git a/notes/spark-fsharp-mobius.md b/notes/spark-fsharp-mobius.md
new file mode 100644
index 00000000..7ff6372c
--- /dev/null
+++ b/notes/spark-fsharp-mobius.md
@@ -0,0 +1,17 @@
+# Implementing Spark Apps in F# using Mobius
+
+## Non-Interactive Apps
+1. Develop your application in a F# IDE using Mobius API. Refer to [F# examples](../examples/fsharp) for sample code
+2. Use [`sparkclr-submit.cmd`](running-mobius-app.md) to run your Mobius-based Spark application implemented in F#
+
+## Interactive Apps
+### Using F# Interactive (fsi.exe)
+1. Run `sparkclr-submit.cmd debug` in a command prompt after setting necessary [environment variables](running-mobius-app.md#pre-requisites). Note that this `debug` parameter is a misnomer in this context and this command initializes .NET-JVM bridge similiar to [running Mobius apps in debug mode](./running-mobius-app.md#debug-mode).
+2. In Developer Command Prompt for VS, run `fsi.exe --use:c:\temp\mobius-init.fsx`. [mobius-init.fsx](mobius-init.fsx) has the initialization code that can be used to create `SparkContext`, `SqlContext` or `SparkSession`. You need to update the location of Mobius binaries referenced in the beginning of the script file. You may also need to update other configuration settings in the script.
+3. When the F# command prompt is available, Spark functionality can be invoked using Mobius API. For example, the following code can be used process JSON file.
+```
+let dataframe = sparkSession.Read().Json @"C:\temp\data.json";;
+dataframe.Show();;
+dataframe.ShowSchema();;
+dataframe.Count();;
+```

From 5311e3027a119aa3b8b08f047902925efedc1675 Mon Sep 17 00:00:00 2001
From: Kai Zeng <kai-zeng@users.noreply.github.com>
Date: Tue, 25 Oct 2016 14:06:42 -0700
Subject: [PATCH 06/20] add RunJob method to SparkContext; add SparkContext
 property to RDD (#590)

* add RunJob method to SparkContext; add SparkContext property to RDD

* test RDD.SparkContext
---
 .../Adapter/Microsoft.Spark.CSharp/Core/RDD.cs  | 11 +++++++++++
 .../Microsoft.Spark.CSharp/Core/SparkContext.cs | 11 +++++++++++
 csharp/AdapterTest/SparkContextTest.cs          | 17 +++++++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
index 44111778..31a0edc7 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/RDD.cs
@@ -51,6 +51,17 @@ internal virtual IRDDProxy RddProxy
             }
         }
 
+        /// <summary>
+        /// Return the SparkContext that created this RDD
+        /// </summary>
+        public SparkContext SparkContext
+        {
+            get
+            {
+                return sparkContext;
+            }
+        }
+
         /// <summary>
         /// Return whether this RDD has been cached or not
         /// </summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
index bc8faac4..f6d8a1a5 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@@ -571,6 +571,17 @@ public void SetLogLevel(string logLevel)
             SparkContextProxy.SetLogLevel(logLevel);
         }
 
+        /// <summary>
+        /// Run a job on a given set of partitions of an RDD.
+        /// </summary>
+        /// <typeparam name="T"></typeparam>
+        /// <param name="rdd"></param>
+        /// <param name="partitions"></param>
+        public void RunJob<T>(RDD<T> rdd, IEnumerable<int> partitions)
+        {
+            SparkContextProxy.RunJob(rdd.RddProxy, partitions);
+        }
+
         /// <summary>
         /// Cancel active jobs for the specified group. See <see cref="SetJobGroup"/> for more information.
         /// </summary>
diff --git a/csharp/AdapterTest/SparkContextTest.cs b/csharp/AdapterTest/SparkContextTest.cs
index d0005683..c7e2803a 100644
--- a/csharp/AdapterTest/SparkContextTest.cs
+++ b/csharp/AdapterTest/SparkContextTest.cs
@@ -152,6 +152,23 @@ public void TestSparkContextHadoopConfigurationProperty()
             Assert.IsNotNull(hadoopConf);
         }
 
+        [Test]
+        public void TestRunJob()
+        {
+            // Arrange
+            Mock<ISparkContextProxy> sparkContextProxy = new Mock<ISparkContextProxy>();
+            SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
+            RDD<int> rdd = sc.Parallelize(new int[] {0, 1, 2, 3, 4, 5}, 2);
+            sparkContextProxy.Setup(m => m.RunJob(It.IsAny<IRDDProxy>(), It.IsAny<IEnumerable<int>>()));
+
+            // Act
+            int[] partitions = new int[] { 0, 1 };
+            rdd.SparkContext.RunJob(rdd, partitions);
+
+            // Assert
+            sparkContextProxy.Verify(m => m.RunJob(rdd.RddProxy, partitions), Times.Once);
+        }
+
         [Test]
         public void TestCancelAllJobs()
         {

From 6e2b820524c8184b19d2650094480c7c3ae0229c Mon Sep 17 00:00:00 2001
From: Jorgen Thelin <jthelin@microsoft.com>
Date: Tue, 1 Nov 2016 22:41:17 -0700
Subject: [PATCH 07/20] Fix Spark version build problems

* Check that Download-File function got a real file.
- Check in the `Download-File` function that the current downloaded copy of the file is not empty [zero length].
* archive.apache.org
- Use `archive.apache.org` as the default server location for downloading Spark [and Maven] binary distributions.
- Previous location was `www.us.apache.org` but that only supports the most recent distributions for each major release
  [ie currently `2.0.1` but not `2.0.0` which Mobius is using at the moment.]

* The Apache distribution server is used during both build and run phases.
---
 build/localmode/RunSamples.cmd    |  3 ++-
 build/localmode/downloadtools.ps1 | 28 ++++++++++++++++++++++------
 build/localmode/run-samples.sh    |  5 +++--
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/build/localmode/RunSamples.cmd b/build/localmode/RunSamples.cmd
index 3b75e857..d99b39bd 100644
--- a/build/localmode/RunSamples.cmd
+++ b/build/localmode/RunSamples.cmd
@@ -49,7 +49,8 @@ if "%precheck%" == "bad" (goto :EOF)
 @rem
 set SPARK_VERSION=2.0.0
 set HADOOP_VERSION=2.6
-@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%
+set APACHE_DIST_SERVER=archive.apache.org
+@echo [RunSamples.cmd] SPARK_VERSION=%SPARK_VERSION%, HADOOP_VERSION=%HADOOP_VERSION%, APACHE_DIST_SERVER=%APACHE_DIST_SERVER%
 
 @rem download runtime dependencies
 pushd "%CMDHOME%"
diff --git a/build/localmode/downloadtools.ps1 b/build/localmode/downloadtools.ps1
index 78d20a91..e3609a56 100644
--- a/build/localmode/downloadtools.ps1
+++ b/build/localmode/downloadtools.ps1
@@ -10,6 +10,9 @@
 #
 Param([string] $stage, [string] $verbose)
 
+$envValue = [Environment]::GetEnvironmentVariable("APACHE_DIST_SERVER")
+$apacheDistServer = if ($envValue -eq $null) { "archive.apache.org" } else { $envValue }
+    
 if ($stage.ToLower() -eq "run")
 {
     # retrieve hadoop and spark versions from environment variables
@@ -18,8 +21,8 @@ if ($stage.ToLower() -eq "run")
     
     $envValue = [Environment]::GetEnvironmentVariable("SPARK_VERSION")
     $sparkVersion = if ($envValue -eq $null) { "1.6.1" } else { $envValue }
-    
-    Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion"
+
+    Write-Output "[downloadtools] hadoopVersion=$hadoopVersion, sparkVersion=$sparkVersion, apacheDistServer=$apacheDistServer"
 }
 
 function Get-ScriptDirectory
@@ -73,8 +76,16 @@ function Download-File($url, $output)
     $output = [System.IO.Path]::GetFullPath($output)
     if (test-path $output)
     {
-        Write-Output "[downloadtools.Download-File] $output exists. No need to download."
-        return
+        if ((Get-Item $output).Length -gt 0)
+        {
+            Write-Output "[downloadtools.Download-File] $output exists. No need to download."
+            return
+        }
+        else
+        {
+            Write-Output "[downloadtools.Download-File] [WARNING] $output exists but is empty. We need to download a new copy of the file."
+            Remove-Item $output
+        }
     }
 
     $start_time = Get-Date
@@ -122,6 +133,11 @@ function Download-File($url, $output)
     }
 
     Write-Output "[downloadtools.Download-File] Download completed. Time taken: $howlong"
+    
+    if ( !(test-path $output) -or (Get-Item $output).Length -eq 0)
+    {
+        throw [System.IO.FileNotFoundException] "Failed to download file $output from $url"
+    }
 }
 
 function Unzip-File($zipFile, $targetDir)
@@ -252,7 +268,7 @@ function Download-BuildTools
     $mvnCmd = "$toolsDir\$mvnVer\bin\mvn.cmd"
     if (!(test-path $mvnCmd))
     {
-        $url = "http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/$mvnVer-bin.tar.gz"
+        $url = "http://$apacheDistServer/dist/maven/maven-3/3.3.9/binaries/$mvnVer-bin.tar.gz"
         $output="$toolsDir\$mvnVer-bin.tar.gz"
         Download-File $url $output
         Untar-File $output $toolsDir
@@ -402,7 +418,7 @@ function Download-RuntimeDependencies
     $sparkSubmit="$S_HOME\bin\spark-submit.cmd"
     if (!(test-path $sparkSubmit))
     {
-        $url = "http://www.us.apache.org/dist/spark/spark-$sparkVersion/spark-$sparkVersion-bin-hadoop$hadoopVersion.tgz"
+        $url = "http://$apacheDistServer/dist/spark/spark-$sparkVersion/spark-$sparkVersion-bin-hadoop$hadoopVersion.tgz"
         $output = "$toolsDir\spark-$sparkVersion-bin-hadoop$hadoopVersion.tgz"
         Download-File $url $output
         Untar-File $output $toolsDir
diff --git a/build/localmode/run-samples.sh b/build/localmode/run-samples.sh
index 4e94bca2..73d15cf3 100755
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@@ -18,7 +18,8 @@ done
 # setup Hadoop and Spark versions
 export SPARK_VERSION=2.0.0
 export HADOOP_VERSION=2.6
-echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION"
+export APACHE_DIST_SERVER=archive.apache.org
+echo "[run-samples.sh] SPARK_VERSION=$SPARK_VERSION, HADOOP_VERSION=$HADOOP_VERSION, APACHE_DIST_SERVER=$APACHE_DIST_SERVER"
 
 export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
@@ -30,7 +31,7 @@ export SPARK=spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION
 export SPARK_HOME="$TOOLS_DIR/$SPARK"
 if [ ! -d "$SPARK_HOME" ];
 then
-  wget "http://www.us.apache.org/dist/spark/spark-$SPARK_VERSION/$SPARK.tgz" -O "$TOOLS_DIR/$SPARK.tgz"
+  wget "http://$APACHE_DIST_SERVER/dist/spark/spark-$SPARK_VERSION/$SPARK.tgz" -O "$TOOLS_DIR/$SPARK.tgz"
   tar xfz "$TOOLS_DIR/$SPARK.tgz" -C "$TOOLS_DIR"
 fi
 export PATH="$SPARK_HOME/bin:$PATH"

From 12c46c8ed129783e66f8e2c21ee50c76d1596052 Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Sat, 3 Dec 2016 22:28:59 -0800
Subject: [PATCH 08/20] fixing Travis build issue

* using openjdk7
---
 .travis.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 986efb1a..2e56c979 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,8 @@ language: csharp
 solution: csharp/SparkCLR.sln
 sudo: required
 dist: trusty
+env:
+  - JDK=openjdk7
 before_install:
   - sudo apt-get install xsltproc
   - nuget install NUnit.Runners -Version 3.0.0 -OutputDirectory testrunner
@@ -12,6 +14,8 @@ before_install:
   - export M2="$M2_HOME/bin"
   - export PATH="$M2:$PATH"
   - hash -r
+before_script:
+  - jdk_switcher use $JDK
 script:
   - export MAVEN_OPTS="-XX:MaxPermSize=2g -Xmx4g"
   - export JAVA_OPTS="-XX:MaxPermSize=2g -Xmx4g"

From 88f76af038b303ebd871c006b3234549f981c7f2 Mon Sep 17 00:00:00 2001
From: Attila Szucs <attila.szucs@gmail.com>
Date: Sun, 4 Dec 2016 07:49:00 +0100
Subject: [PATCH 09/20] Fix ReduceByKey failure when the value type is a
 reference type

---
 .../Core/PairRDDFunctions.cs                  |  6 +++-
 csharp/AdapterTest/PairRDDTest.cs             | 33 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
index 5fe3c679..133b2f9d 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/PairRDDFunctions.cs
@@ -97,7 +97,11 @@ public static RDD<V> Values<K, V>(this RDD<KeyValuePair<K, V>> self)
         /// <returns></returns>
         public static RDD<KeyValuePair<K, V>> ReduceByKey<K, V>(this RDD<KeyValuePair<K, V>> self, Func<V, V, V> reduceFunc, int numPartitions = 0)
         {
-            return CombineByKey(self, () => default(V), reduceFunc, reduceFunc, numPartitions);
+            var locallyCombined = self.MapPartitionsWithIndex(new GroupByMergeHelper<K, V>(reduceFunc).Execute, true);
+
+            var shuffled = locallyCombined.PartitionBy(numPartitions);
+
+            return shuffled.MapPartitionsWithIndex(new GroupByMergeHelper<K, V>(reduceFunc).Execute, true);
         }
 
         /// <summary>
diff --git a/csharp/AdapterTest/PairRDDTest.cs b/csharp/AdapterTest/PairRDDTest.cs
index f8ba3847..15054af5 100644
--- a/csharp/AdapterTest/PairRDDTest.cs
+++ b/csharp/AdapterTest/PairRDDTest.cs
@@ -105,6 +105,39 @@ public void TestPairRddReduceByKeyLocally()
             }
         }
 
+        [Serializable]
+        private class IntWrapper
+        {
+            public IntWrapper(int value)
+            {
+                Value = value;
+            }
+
+            public int Value { get; }
+        }
+
+        [Test]
+        public void TestPairRddReduceByKeyWithObjects()
+        {
+            // The ReduceByKey method below fails with NPE if ReduceByKey
+            // calls CombineByKey with () => default(V) as seed generator
+            var sums = pairs
+                .MapValues(value => new IntWrapper(value))
+                .ReduceByKey((x, y) => new IntWrapper(x.Value + y.Value));
+
+            var result = sums
+                .CollectAsMap()
+                .Select(pair => new KeyValuePair<string, int>(pair.Key, pair.Value.Value))
+                .ToList();
+
+            var expectedResult = pairs
+                .ReduceByKey((x, y) => x + y)
+                .CollectAsMap()
+                .ToList();
+
+            Assert.That(result, Is.EquivalentTo(expectedResult));
+        }
+
         [Test]
         public void TestPairRddFoldByKey()
         {

From 66fc5123d01d25a54f02cbf867f5e8313c9c350f Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Tue, 13 Dec 2016 16:08:02 -0500
Subject: [PATCH 10/20] Fix test TestDStreamUpdateStateByKey to use Tuples

---
 csharp/AdapterTest/DStreamTest.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/AdapterTest/DStreamTest.cs b/csharp/AdapterTest/DStreamTest.cs
index 90281eb3..c6a363bf 100644
--- a/csharp/AdapterTest/DStreamTest.cs
+++ b/csharp/AdapterTest/DStreamTest.cs
@@ -281,8 +281,8 @@ public void TestDStreamUpdateStateByKey()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<string, int> countByWord = (KeyValuePair<string, int>)record;
-                    Assert.AreEqual(countByWord.Key == "The" || countByWord.Key == "dog" || countByWord.Key == "lazy" ? 23 : 22, countByWord.Value);
+                    Tuple<string, int> countByWord = (Tuple<string, int>)record;
+                    Assert.AreEqual(countByWord.Item1 == "The" || countByWord.Item1 == "dog" || countByWord.Item1 == "lazy" ? 23 : 22, countByWord.Item2);
                 }
             });
 

From a36b16009ed0c1f66d5bf90f95fe094dac17cfb6 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Tue, 13 Dec 2016 16:24:59 -0500
Subject: [PATCH 11/20] Further work on Tuple change

---
 .../Core/OrderedRDDFunctions.cs                      | 12 ++++++------
 .../Microsoft.Spark.CSharp/Core/SparkContext.cs      | 12 ++++++------
 csharp/AdapterTest/SparkContextTest.cs               |  4 ++--
 .../Samples/Microsoft.Spark.CSharp/DStreamSamples.cs | 10 +++++-----
 csharp/Worker/Microsoft.Spark.CSharp/Worker.cs       |  4 ++--
 csharp/WorkerTest/WorkerTest.cs                      | 10 +++++-----
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
index 23509e56..e5deb09a 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/OrderedRDDFunctions.cs
@@ -18,7 +18,7 @@ public static class OrderedRDDFunctions
     {
 
         /// <summary>
-        /// Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
+        /// Sorts this RDD, which is assumed to consist of Tuple pairs.
         /// </summary>
         /// <typeparam name="K"></typeparam>
         /// <typeparam name="V"></typeparam>
@@ -32,7 +32,7 @@ public static RDD<Tuple<K, V>> SortByKey<K, V>(this RDD<Tuple<K, V>> self,
             return SortByKey<K, V, K>(self, ascending, numPartitions, new DefaultSortKeyFuncHelper<K>().Execute);
         }
         /// <summary>
-        /// Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
+        /// Sorts this RDD, which is assumed to consist of Tuples. If Item1 is type of string, case is sensitive.
         /// </summary>
         /// <typeparam name="K"></typeparam>
         /// <typeparam name="V"></typeparam>
@@ -40,7 +40,7 @@ public static RDD<Tuple<K, V>> SortByKey<K, V>(this RDD<Tuple<K, V>> self,
         /// <param name="self"></param>
         /// <param name="ascending"></param>
         /// <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
-        /// <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
+        /// <param name="keyFunc">RDD will sort by keyFunc(key) for every Item1 in Tuple. Must not be null.</param>
         /// <returns></returns>
         public static RDD<Tuple<K, V>> SortByKey<K, V, U>(this RDD<Tuple<K, V>> self,
             bool ascending, int? numPartitions, Func<K, U> keyFunc)
@@ -103,13 +103,13 @@ public static RDD<Tuple<K, V>> SortByKey<K, V, U>(this RDD<Tuple<K, V>> self,
         /// <param name="partitionFunc"></param>
         /// <param name="ascending"></param>
         /// <returns></returns>
-        public static RDD<KeyValuePair<K, V>> repartitionAndSortWithinPartitions<K, V>(
-            this RDD<KeyValuePair<K, V>> self, 
+        public static RDD<Tuple<K, V>> repartitionAndSortWithinPartitions<K, V>(
+            this RDD<Tuple<K, V>> self, 
             int? numPartitions = null, 
             Func<K, int> partitionFunc = null, 
             bool ascending = true)
         {
-            return self.MapPartitionsWithIndex<KeyValuePair<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Key) : iter.OrderByDescending(kv => kv.Key));
+            return self.MapPartitionsWithIndex<Tuple<K, V>>((pid, iter) => ascending ? iter.OrderBy(kv => kv.Item1) : iter.OrderByDescending(kv => kv.Item1));
         }
 
         [Serializable]
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
index 20861c2b..9add6da9 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@@ -241,7 +241,7 @@ public RDD<T> EmptyRDD<T>()
         ///
         /// Do
         /// {{{
-        ///   RDD&lt;KeyValuePair&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
+        ///   RDD&lt;Tuple&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
         /// }}}
         ///
         /// then `rdd` contains
@@ -259,9 +259,9 @@ public RDD<T> EmptyRDD<T>()
         /// <param name="filePath"></param>
         /// <param name="minPartitions"></param>
         /// <returns></returns>
-        public RDD<KeyValuePair<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
+        public RDD<Tuple<byte[], byte[]>> WholeTextFiles(string filePath, int? minPartitions = null)
         {
-            return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
+            return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.WholeTextFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
         }
 
         /// <summary>
@@ -279,7 +279,7 @@ public RDD<KeyValuePair<byte[], byte[]>> WholeTextFiles(string filePath, int? mi
         /// }}}
         ///
         /// Do
-        /// RDD&lt;KeyValuePair&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+        /// RDD&lt;Tuple&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
         ///
         /// then `rdd` contains
         /// {{{
@@ -296,9 +296,9 @@ public RDD<KeyValuePair<byte[], byte[]>> WholeTextFiles(string filePath, int? mi
         /// <param name="filePath"></param>
         /// <param name="minPartitions"></param>
         /// <returns></returns>
-        public RDD<KeyValuePair<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
+        public RDD<Tuple<byte[], byte[]>> BinaryFiles(string filePath, int? minPartitions)
         {
-            return new RDD<KeyValuePair<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
+            return new RDD<Tuple<byte[], byte[]>>(SparkContextProxy.BinaryFiles(filePath, minPartitions ?? DefaultMinPartitions), this, SerializedMode.Pair);
         }
 
         /// <summary>
diff --git a/csharp/AdapterTest/SparkContextTest.cs b/csharp/AdapterTest/SparkContextTest.cs
index 223f99c3..cd5b03f0 100644
--- a/csharp/AdapterTest/SparkContextTest.cs
+++ b/csharp/AdapterTest/SparkContextTest.cs
@@ -372,7 +372,7 @@ public void TestWholeTextFiles()
             SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
 
             // Act
-            RDD<KeyValuePair<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
+            RDD<Tuple<byte[], byte[]>> rdd = sc.WholeTextFiles(filePath, null);
 
             // Assert
             Assert.IsNotNull(rdd);
@@ -394,7 +394,7 @@ public void TestBinaryFiles()
             SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
 
             // Act
-            RDD<KeyValuePair<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
+            RDD<Tuple<byte[], byte[]>> rdd = sc.BinaryFiles(filePath, null);
 
             // Assert
             Assert.IsNotNull(rdd);
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
index c20c1855..f4978e0c 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DStreamSamples.cs
@@ -99,8 +99,8 @@ internal static void DStreamTextFileSample()
                         {
                             Console.WriteLine(record);
                             
-                            var countByWord = (KeyValuePair<string, int>)record;
-                            Assert.AreEqual(countByWord.Value, countByWord.Key == "The" || countByWord.Key == "lazy" || countByWord.Key == "dog" ? 92 : 88);
+                            var countByWord = (Tuple<string, int>)record;
+                            Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88);
                         }
                         Console.WriteLine();
 
@@ -283,10 +283,10 @@ private static void DStreamReduceByKeyAndWindowSample()
 
                 foreach (object record in taken)
                 {
-                    KeyValuePair<int, int> sum = (KeyValuePair<int, int>)record;
-                    Console.WriteLine("Key: {0}, Value: {1}", sum.Key, sum.Value);
+                    Tuple<int, int> sum = (Tuple<int, int>)record;
+                    Console.WriteLine("Key: {0}, Value: {1}", sum.Item1, sum.Item2);
                     // when batch count reaches window size, sum of even/odd number stay at windowDuration / slideDuration * (2450, 2500) respectively
-                    Assert.AreEqual(sum.Value, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Key == 0 ? 2450 : 2500));
+                    Assert.AreEqual(sum.Item2, (count > windowDuration / slideDuration ? windowDuration : count * slideDuration) / (bacthIntervalMs / 1000) * (sum.Item1 == 0 ? 2450 : 2500));
                 }
             });
 
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
index 9c95d112..486a1bc7 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
@@ -533,7 +533,7 @@ private static void WriteAccumulatorValues(Stream networkStream, IFormatter form
                         .GetField("value", BindingFlags.NonPublic | BindingFlags.Instance)
                         .GetValue(item.Value);
                 logger.LogDebug("({0}, {1})", item.Key, value);
-                formatter.Serialize(ms, new KeyValuePair<int, dynamic>(item.Key, value));
+                formatter.Serialize(ms, new Tuple<int, dynamic>(item.Key, value));
                 byte[] buffer = ms.ToArray();
                 SerDe.Write(networkStream, buffer.Length);
                 SerDe.Write(networkStream, buffer);
@@ -649,7 +649,7 @@ private static IEnumerable<dynamic> GetIterator(Stream inputStream, string seria
                                 }
                                 watch.Stop();
 
-                                yield return new KeyValuePair<byte[], byte[]>(pairKey, pairValue);
+                                yield return new Tuple<byte[], byte[]>(pairKey, pairValue);
                                 break;
                             }
 
diff --git a/csharp/WorkerTest/WorkerTest.cs b/csharp/WorkerTest/WorkerTest.cs
index 19df98d5..18264375 100644
--- a/csharp/WorkerTest/WorkerTest.cs
+++ b/csharp/WorkerTest/WorkerTest.cs
@@ -573,7 +573,7 @@ public void TestWorkerWithPairDeserializedModeAndNoneSerializedMode()
             {
                 WritePayloadHeaderToWorker(s);
                 byte[] command = SparkContext.BuildCommand(
-                    new CSharpWorkerFunc((pid, iter) => iter.Cast<KeyValuePair<byte[], byte[]>>().Select(pair => pair.Key)),
+                    new CSharpWorkerFunc((pid, iter) => iter.Cast<Tuple<byte[], byte[]>>().Select(pair => pair.Item1)),
                     SerializedMode.Pair, SerializedMode.None);
 
                 SerDe.Write(s, command.Length);
@@ -713,7 +713,7 @@ private IEnumerable<byte[]> ReadDataSection(Stream s, int expectedCount = 0)
         /// <summary>
         /// read accumulator
         /// </summary>
-        private IEnumerable<KeyValuePair<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
+        private IEnumerable<Tuple<int, dynamic>> ReadAccumulator(Stream s, int expectedCount = 0)
         {
             int count = 0;
             var formatter = new BinaryFormatter();
@@ -723,7 +723,7 @@ private IEnumerable<KeyValuePair<int, dynamic>> ReadAccumulator(Stream s, int ex
                 if (length > 0)
                 {
                     var ms = new MemoryStream(SerDe.ReadBytes(s, length));
-                    yield return (KeyValuePair<int, dynamic>)formatter.Deserialize(ms);
+                    yield return (Tuple<int, dynamic>)formatter.Deserialize(ms);
 
                     if (expectedCount > 0 && ++count >= expectedCount)
                     {
@@ -780,8 +780,8 @@ public void TestAccumulatorInWorker()
                 int accumulatorsCount = SerDe.ReadInt(s);
                 Assert.IsTrue(accumulatorsCount == 1);
                 var accumulatorFromWorker = ReadAccumulator(s, accumulatorsCount).First();
-                Assert.AreEqual(accumulatorId, accumulatorFromWorker.Key);
-                Assert.AreEqual(expectedCount, accumulatorFromWorker.Value);
+                Assert.AreEqual(accumulatorId, accumulatorFromWorker.Item1);
+                Assert.AreEqual(expectedCount, accumulatorFromWorker.Item2);
 
                 SerDe.ReadInt(s);
             }

From 2fe5ec409ec00ba812f1947c5648a82527c3b516 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Tue, 13 Dec 2016 16:25:23 -0500
Subject: [PATCH 12/20] Update documatation for tuples change

---
 README.md                                            |  4 ++--
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML           | 12 ++++++------
 .../documentation/Mobius_API_Documentation.md        |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index ff598731..f318e52c 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ For example, the word count sample in Apache Spark can be implemented in C# as f
 ```c#
 var lines = sparkContext.TextFile(@"hdfs://path/to/input.txt");  
 var words = lines.FlatMap(s => s.Split(' '));
-var wordCounts = words.Map(w => new KeyValuePair<string, int>(w.Trim(), 1))  
+var wordCounts = words.Map(w => new Tuple<string, int>(w.Trim(), 1))  
                       .ReduceByKey((x, y) => x + y);  
 var wordCountCollection = wordCounts.Collect();  
 wordCounts.SaveAsTextFile(@"hdfs://path/to/wordcount.txt");  
@@ -63,7 +63,7 @@ StreamingContext sparkStreamingContext = StreamingContext.GetOrCreate(checkpoint
                                     .Map(kvp => Encoding.UTF8.GetString(kvp.Value))
                                     .Filter(line => line.Contains(","))
                                     .Map(line => line.Split(','))
-                                    .Map(columns => new KeyValuePair<string, int>(
+                                    .Map(columns => new Tuple<string, int>(
                                                           string.Format("{0},{1}", columns[0], columns[1]), 1))
                                     .ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y,
                                                           windowDurationInSecs, slideDurationInSecs, 3)
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 2923020b..0f4c49ce 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -453,7 +453,7 @@
         </member>
         <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Boolean,System.Nullable{System.Int32})">
             <summary>
-            Sorts this RDD, which is assumed to consist of KeyValuePair pairs.
+            Sorts this RDD, which is assumed to consist of Tuple pairs.
             </summary>
             <typeparam name="K"></typeparam>
             <typeparam name="V"></typeparam>
@@ -464,7 +464,7 @@
         </member>
         <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.SortByKey``3(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Boolean,System.Nullable{System.Int32},System.Func{``0,``2})">
             <summary>
-            Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.
+            Sorts this RDD, which is assumed to consist of Tuples. If Item1 is type of string, case is sensitive.
             </summary>
             <typeparam name="K"></typeparam>
             <typeparam name="V"></typeparam>
@@ -472,10 +472,10 @@
             <param name="self"></param>
             <param name="ascending"></param>
             <param name="numPartitions">Number of partitions. Each partition of the sorted RDD contains a sorted range of the elements.</param>
-            <param name="keyFunc">RDD will sort by keyFunc(key) for every key in KeyValuePair. Must not be null.</param>
+            <param name="keyFunc">RDD will sort by keyFunc(key) for every Item1 in Tuple. Must not be null.</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.repartitionAndSortWithinPartitions``2(Microsoft.Spark.CSharp.Core.RDD{System.Collections.Generic.KeyValuePair{``0,``1}},System.Nullable{System.Int32},System.Func{``0,System.Int32},System.Boolean)">
+        <member name="M:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions.repartitionAndSortWithinPartitions``2(Microsoft.Spark.CSharp.Core.RDD{System.Tuple{``0,``1}},System.Nullable{System.Int32},System.Func{``0,System.Int32},System.Boolean)">
              <summary>
              Repartition the RDD according to the given partitioner and, within each resulting partition,
              sort records by their keys.
@@ -2137,7 +2137,7 @@
             
              Do
              {{{
-               RDD&lt;KeyValuePair&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
+               RDD&lt;Tuple&lt;string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path")
              }}}
             
              then `rdd` contains
@@ -2172,7 +2172,7 @@
              }}}
             
              Do
-             RDD&lt;KeyValuePair&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+             RDD&lt;Tuple&lt;string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
             
              then `rdd` contains
              {{{
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index 92d5d764..c6d235b1 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -187,7 +187,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">SortByKey``2</font></td><td>Sorts this RDD, which is assumed to consist of KeyValuePair pairs.</td></tr><tr><td><font color="blue">SortByKey``3</font></td><td>Sorts this RDD, which is assumed to consist of KeyValuePairs. If key is type of string, case is sensitive.</td></tr><tr><td><font color="blue">repartitionAndSortWithinPartitions``2</font></td><td>Repartition the RDD according to the given partitioner and, within each resulting partition, sort records by their keys. This is more efficient than calling `repartition` and then sorting within each partition because it can push the sorting down into the shuffle machinery.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">SortByKey``2</font></td><td>Sorts this RDD, which is assumed to consist of Tuple pairs.</td></tr><tr><td><font color="blue">SortByKey``3</font></td><td>Sorts this RDD, which is assumed to consist of Tuples. If Item1 is type of string, case is sensitive.</td></tr><tr><td><font color="blue">repartitionAndSortWithinPartitions``2</font></td><td>Repartition the RDD according to the given partitioner and, within each resulting partition, sort records by their keys. This is more efficient than calling `repartition` and then sorting within each partition because it can push the sorting down into the shuffle machinery.</td></tr></table>
 
 ---
   
@@ -327,7 +327,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetActiveSparkContext</font></td><td>Get existing SparkContext</td></tr><tr><td><font color="blue">GetConf</font></td><td>Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.</td></tr><tr><td><font color="blue">GetOrCreate</font></td><td>This function may be used to get or instantiate a SparkContext and register it as a singleton object. Because we can only have one active SparkContext per JVM, this is useful when applications may wish to share a SparkContext. Note: This function cannot be used to create multiple SparkContext instances even if multiple contexts are allowed.</td></tr><tr><td><font color="blue">TextFile</font></td><td>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</td></tr><tr><td><font color="blue">Parallelize``1</font></td><td>Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]</td></tr><tr><td><font color="blue">EmptyRDD</font></td><td>Create an RDD that has no partitions or elements.</td></tr><tr><td><font color="blue">WholeTextFiles</font></td><td>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD&lt;KeyValuePair&lt;string, string&gt;&gt; rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">BinaryFiles</font></td><td>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD&lt;KeyValuePair&lt;string, byte[]&gt;&gt;"/&gt; rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">SequenceFile</font></td><td>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side</td></tr><tr><td><font color="blue">NewAPIHadoopFile</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java</td></tr><tr><td><font color="blue">NewAPIHadoopRDD</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">HadoopFile</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.</td></tr><tr><td><font color="blue">HadoopRDD</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">Union``1</font></td><td>Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt") &gt;&gt;&gt; with open(path, "w") as testFile: ... _ = testFile.write("Hello") &gt;&gt;&gt; textFile = sc.textFile(path) &gt;&gt;&gt; textFile.collect() [u'Hello'] &gt;&gt;&gt; parallelized = sc.parallelize(["World!"]) &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']</td></tr><tr><td><font color="blue">Broadcast``1</font></td><td>Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.</td></tr><tr><td><font color="blue">Accumulator``1</font></td><td>Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.</td></tr><tr><td><font color="blue">Stop</font></td><td>Shut down the SparkContext.</td></tr><tr><td><font color="blue">AddFile</font></td><td>Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.</td></tr><tr><td><font color="blue">SetCheckpointDir</font></td><td>Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.</td></tr><tr><td><font color="blue">SetJobGroup</font></td><td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.</td></tr><tr><td><font color="blue">SetLocalProperty</font></td><td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr><tr><td><font color="blue">GetLocalProperty</font></td><td>Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].</td></tr><tr><td><font color="blue">SetLogLevel</font></td><td>Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</td></tr><tr><td><font color="blue">RunJob``1</font></td><td>Run a job on a given set of partitions of an RDD.</td></tr><tr><td><font color="blue">CancelJobGroup</font></td><td>Cancel active jobs for the specified group. See for more information.</td></tr><tr><td><font color="blue">CancelAllJobs</font></td><td>Cancel all jobs that have been scheduled or are running.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetActiveSparkContext</font></td><td>Get existing SparkContext</td></tr><tr><td><font color="blue">GetConf</font></td><td>Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.</td></tr><tr><td><font color="blue">GetOrCreate</font></td><td>This function may be used to get or instantiate a SparkContext and register it as a singleton object. Because we can only have one active SparkContext per JVM, this is useful when applications may wish to share a SparkContext. Note: This function cannot be used to create multiple SparkContext instances even if multiple contexts are allowed.</td></tr><tr><td><font color="blue">TextFile</font></td><td>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</td></tr><tr><td><font color="blue">Parallelize``1</font></td><td>Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]</td></tr><tr><td><font color="blue">EmptyRDD</font></td><td>Create an RDD that has no partitions or elements.</td></tr><tr><td><font color="blue">WholeTextFiles</font></td><td>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD&lt;Tuple&lt;string, string&gt;&gt; rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">BinaryFiles</font></td><td>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD&lt;Tuple&lt;string, byte[]&gt;&gt;"/&gt; rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">SequenceFile</font></td><td>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side</td></tr><tr><td><font color="blue">NewAPIHadoopFile</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java</td></tr><tr><td><font color="blue">NewAPIHadoopRDD</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">HadoopFile</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.</td></tr><tr><td><font color="blue">HadoopRDD</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">Union``1</font></td><td>Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt") &gt;&gt;&gt; with open(path, "w") as testFile: ... _ = testFile.write("Hello") &gt;&gt;&gt; textFile = sc.textFile(path) &gt;&gt;&gt; textFile.collect() [u'Hello'] &gt;&gt;&gt; parallelized = sc.parallelize(["World!"]) &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']</td></tr><tr><td><font color="blue">Broadcast``1</font></td><td>Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.</td></tr><tr><td><font color="blue">Accumulator``1</font></td><td>Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.</td></tr><tr><td><font color="blue">Stop</font></td><td>Shut down the SparkContext.</td></tr><tr><td><font color="blue">AddFile</font></td><td>Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.</td></tr><tr><td><font color="blue">SetCheckpointDir</font></td><td>Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.</td></tr><tr><td><font color="blue">SetJobGroup</font></td><td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.</td></tr><tr><td><font color="blue">SetLocalProperty</font></td><td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr><tr><td><font color="blue">GetLocalProperty</font></td><td>Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].</td></tr><tr><td><font color="blue">SetLogLevel</font></td><td>Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</td></tr><tr><td><font color="blue">RunJob``1</font></td><td>Run a job on a given set of partitions of an RDD.</td></tr><tr><td><font color="blue">CancelJobGroup</font></td><td>Cancel active jobs for the specified group. See for more information.</td></tr><tr><td><font color="blue">CancelAllJobs</font></td><td>Cancel all jobs that have been scheduled or are running.</td></tr></table>
 
 ---
   

From 3e3a209bf45555636c6906c6fb1f6eb0bbaf5ec5 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Tue, 13 Dec 2016 22:44:07 -0500
Subject: [PATCH 13/20] Test fixes for Samples in PairRDDSamples and RDDSamples

---
 csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs | 2 +-
 csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs     | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
index 22de846f..9a56de93 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/PairRDDSamples.cs
@@ -112,7 +112,7 @@ internal static void PairRDDCountByKeySample()
                     new Tuple<string, int>("a", 1)
                 }, 2)
                 .CountByKey()
-                .ToLookup(x => x.Item1);
+                .ToDictionary(k => k.Item1, v => v.Item2);
 
             foreach (var kv in countByKey)
                 Console.WriteLine(kv);
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
index 62477c3f..1112a823 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/RDDSamples.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using Microsoft.Spark.CSharp.Core;
@@ -221,7 +222,10 @@ internal static void RDDTreeAggregateSample()
         [Sample]
         internal static void RDDCountByValueSample()
         {
-            var countByValue = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue().ToArray();
+            var countByValue = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2)
+                .CountByValue()
+                .ToDictionary(k => k.Item1, v => v.Item2);
+
             foreach (var item in countByValue)
                 Console.WriteLine(item);
 

From 50d65dead523c16798a69b975bc931c93a30b213 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Wed, 14 Dec 2016 00:22:00 -0500
Subject: [PATCH 14/20] Added a few tests against Adapter Builder and RDDs

---
 csharp/AdapterTest/BuilderTest.cs | 18 ++++++++++++++++++
 csharp/AdapterTest/RDDTest.cs     |  5 +++++
 2 files changed, 23 insertions(+)

diff --git a/csharp/AdapterTest/BuilderTest.cs b/csharp/AdapterTest/BuilderTest.cs
index aae3c626..d77d73ef 100644
--- a/csharp/AdapterTest/BuilderTest.cs
+++ b/csharp/AdapterTest/BuilderTest.cs
@@ -1,4 +1,6 @@
 ﻿using System;
+using System.Linq;
+using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Sql;
 using NUnit.Framework;
 
@@ -46,5 +48,21 @@ public void TestDoubleConfig()
             builder.Config("doublevalue", 3.5D);
             Assert.True(builder.options["doublevalue"].Equals("3.5", StringComparison.InvariantCultureIgnoreCase));
         }
+
+        [Test]
+        public void TestEnableHiveSupport()
+        {
+            var builder = new Builder();
+            builder.EnableHiveSupport();
+            Assert.True(builder.options["spark.sql.catalogImplementation"].Equals("hive", StringComparison.InvariantCultureIgnoreCase));
+        }
+
+        [Test]
+        public void TestGetOrCreate()
+        {
+            var builder = new Builder();
+            builder.GetOrCreate();
+            Assert.IsEmpty(builder.options);
+        }  
     }
 }
diff --git a/csharp/AdapterTest/RDDTest.cs b/csharp/AdapterTest/RDDTest.cs
index ab406b32..add22429 100644
--- a/csharp/AdapterTest/RDDTest.cs
+++ b/csharp/AdapterTest/RDDTest.cs
@@ -20,6 +20,7 @@ namespace AdapterTest
     public class RDDTest
     {
         private static RDD<string> words;
+        private static RDD<string> empty;
 
         [OneTimeSetUp]
         public static void Initialize()
@@ -27,6 +28,7 @@ public static void Initialize()
             var sparkContext = new SparkContext(null);
             var lines = sparkContext.TextFile(Path.GetTempFileName());
             words = lines.FlatMap(l => l.Split(' '));
+            empty = sparkContext.EmptyRDD<string>();
         }
 
         [Test]
@@ -82,6 +84,7 @@ public void TestRddTreeReduce()
         public void TestRddTreeAggregate()
         {
             Assert.AreEqual(201, words.Map(w => 1).TreeAggregate(0, (x, y) => x + y, (x, y) => x + y));
+            Assert.Throws<ArgumentException>(() => empty.TreeAggregate(0, (x, y) => 1, (x, y) => x + y, 0));
         }
 
         [Test]
@@ -135,6 +138,7 @@ public void TestRddGroupBy()
         public void TestRddIsEmpty()
         {
             Assert.IsFalse(words.IsEmpty());
+            Assert.IsTrue(empty.IsEmpty());
             Assert.IsTrue(words.Filter(w => w == null).IsEmpty());
         }
 
@@ -166,6 +170,7 @@ public void TestRddTakeSample()
             Assert.AreEqual(20, words.TakeSample(true, 20, 1).Length);
             Assert.Throws<ArgumentException>(() => words.TakeSample(true, -1, 1));
             Assert.AreEqual(0, words.TakeSample(true, 0, 1).Length);
+            Assert.AreEqual(20, words.TakeSample(false, 20, 1).Length);
         }
 
         [Test]

From 8eb8cd0b5456248ab8796487dc04fb046247b2f0 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Wed, 14 Dec 2016 00:28:12 -0500
Subject: [PATCH 15/20] Remove test

---
 csharp/AdapterTest/BuilderTest.cs | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/csharp/AdapterTest/BuilderTest.cs b/csharp/AdapterTest/BuilderTest.cs
index d77d73ef..f4afc6a9 100644
--- a/csharp/AdapterTest/BuilderTest.cs
+++ b/csharp/AdapterTest/BuilderTest.cs
@@ -55,14 +55,6 @@ public void TestEnableHiveSupport()
             var builder = new Builder();
             builder.EnableHiveSupport();
             Assert.True(builder.options["spark.sql.catalogImplementation"].Equals("hive", StringComparison.InvariantCultureIgnoreCase));
-        }
-
-        [Test]
-        public void TestGetOrCreate()
-        {
-            var builder = new Builder();
-            builder.GetOrCreate();
-            Assert.IsEmpty(builder.options);
-        }  
+        } 
     }
 }

From ed73082895dfea6448d604ae5c15794dbc1e7ec3 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Wed, 14 Dec 2016 14:04:56 -0500
Subject: [PATCH 16/20] Test changes to improve test coverage

---
 .../Mocks/MockStreamingContextProxy.cs         | 18 ++++++++++++++----
 csharp/AdapterTest/StreamingContextTest.cs     |  2 +-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs b/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
index f4a7d546..c2161916 100644
--- a/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockStreamingContextProxy.cs
@@ -89,13 +89,23 @@ public IDStreamProxy CreateCSharpTransformed2DStream(IDStreamProxy jdstream, IDS
 
         public IDStreamProxy CreateCSharpReducedWindowedDStream(IDStreamProxy jdstream, byte[] func, byte[] invFunc, int windowSeconds, int slideSeconds, string serializationMode)
         {
-            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> f = (Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>)formatter.Deserialize(new MemoryStream(func));
-            RDD<dynamic> rdd = f(DateTime.UtcNow.Ticks,
+            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> f = (Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>) formatter.Deserialize(new MemoryStream(func));
+
+            var ticks = DateTime.UtcNow.Ticks;
+            RDD<dynamic> rdd = f(ticks,
                 new RDD<dynamic>((jdstream as MockDStreamProxy).rddProxy ?? new MockRddProxy(null), new SparkContext("", "")),
                 new RDD<dynamic>((jdstream as MockDStreamProxy).rddProxy ?? new MockRddProxy(null), new SparkContext("", "")));
-            return new MockDStreamProxy(rdd.RddProxy);
-        }
 
+            if (invFunc == null) return new MockDStreamProxy(rdd.RddProxy);
+
+            Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>> invf = (Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>) formatter.Deserialize(new MemoryStream(invFunc));
+            RDD<dynamic> invRdd = invf(ticks,
+                new RDD<dynamic>((jdstream as MockDStreamProxy).rddProxy ?? new MockRddProxy(null), new SparkContext("", "")),
+                new RDD<dynamic>((jdstream as MockDStreamProxy).rddProxy ?? new MockRddProxy(null), new SparkContext("", "")));
+            var difference = rdd.Subtract(invRdd);
+
+            return new MockDStreamProxy(difference.RddProxy);
+        }
 
         public IDStreamProxy CreateCSharpStateDStream(IDStreamProxy jdstream, byte[] func, string className, string serializationMode, string serializationMode2)
         {
diff --git a/csharp/AdapterTest/StreamingContextTest.cs b/csharp/AdapterTest/StreamingContextTest.cs
index 78ea6deb..20d9325f 100644
--- a/csharp/AdapterTest/StreamingContextTest.cs
+++ b/csharp/AdapterTest/StreamingContextTest.cs
@@ -32,7 +32,7 @@ public void TestStreamingContext()
             var socketStream = ssc.SocketTextStream(IPAddress.Loopback.ToString(), 12345);
             Assert.IsNotNull(socketStream.DStreamProxy);
 
-            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new [] { Tuple.Create("testTopic1", 1) }, new List<Tuple<string, string>>());
+            var kafkaStream = KafkaUtils.CreateStream(ssc, IPAddress.Loopback + ":2181", "testGroupId", new [] { Tuple.Create("testTopic1", 1) }, null);
             Assert.IsNotNull(kafkaStream.DStreamProxy);
 
             var directKafkaStream = KafkaUtils.CreateDirectStream(ssc, new List<string> { "testTopic2" }, new List<Tuple<string, string>>(), new List<Tuple<string, long>>());

From 46e09f6ab6ce80dd42fb02be20a8caf3976e0d91 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Wed, 14 Dec 2016 14:49:41 -0500
Subject: [PATCH 17/20] Add PairRDD SortBy descending test

---
 csharp/AdapterTest/PairRDDTest.cs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/csharp/AdapterTest/PairRDDTest.cs b/csharp/AdapterTest/PairRDDTest.cs
index e772d4bf..22df0279 100644
--- a/csharp/AdapterTest/PairRDDTest.cs
+++ b/csharp/AdapterTest/PairRDDTest.cs
@@ -211,6 +211,14 @@ public void TestPairRddSortByKey2()
             CollectionAssert.AreEqual(expectedSortedRdd, rddSortByKey);
         }
 
+        [Test]
+        public void TestPairRddSortByKey3()
+        {
+            var expectedSortedRdd = pairs.Collect().OrderByDescending(kv => kv.Item1, StringComparer.OrdinalIgnoreCase).ToArray();
+            var rddSortByKey = pairs.SortByKey(false, 1, key => key.ToLowerInvariant()).Collect();
+            CollectionAssert.AreEqual(expectedSortedRdd, rddSortByKey);
+        }
+
         [Test]
         public void TestPairRddProxy()
         {

From 313dbf308bcdd91d22843d91279448af8e7111d4 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Thu, 5 Jan 2017 21:23:40 +0100
Subject: [PATCH 18/20] Update linux-instructions.md

Add a note about `wget` (Im on macOS and had to install it using homebrew) + formatting
---
 notes/linux-instructions.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/notes/linux-instructions.md b/notes/linux-instructions.md
index 4a6e1972..df55cace 100644
--- a/notes/linux-instructions.md
+++ b/notes/linux-instructions.md
@@ -6,7 +6,8 @@
 * Maven 3.0.5 or above.
 * Mono 4.2 stable or above. The download and installation instructions for Mono are available in [http://www.mono-project.com/download/#download-lin](http://www.mono-project.com/download/#download-lin) (see [Debian, Ubuntu and derivatives](http://www.mono-project.com/docs/getting-started/install/linux/#debian-ubuntu-and-derivatives) or [CentOS, Fedora, similar Linux distributions or OS X](http://www.mono-project.com/docs/getting-started/install/linux/#centos-7-fedora-19-and-later-and-derivatives))
 * F# for Mono. The download and installation instructions for the F# Mono extension are available in [http://fsharp.org/use/linux/](http://fsharp.org/use/linux/)
-* NuGet.
+* NuGet
+* wget
 * XSLTPROC
 
 The following environment variables should be set properly:
@@ -15,9 +16,10 @@ The following environment variables should be set properly:
 
 ## Instructions
 
-Instructions to build Mobius in Linux are same as [instructions for Windows](./windows-instructions.md#instructions). The only change required is to use the following script files instead of .cmd files:
-* build.sh
-* clean.sh
+Instructions to build Mobius on Linux are same as [instructions for Windows](./windows-instructions.md#instructions). The only change required is to use the following script files instead of `.cmd` files:
+
+* `build.sh`
+* `clean.sh`
 
 # Running Unit Tests in Linux
 

From a1b74ad8a7085388d4e4e117ba377431771f3efd Mon Sep 17 00:00:00 2001
From: skaarthik <skaarthik@gmail.com>
Date: Sat, 28 Jan 2017 01:03:17 -0800
Subject: [PATCH 19/20] added UDF support to SparkSession

---
 .../Microsoft.Spark.CSharp/Adapter.csproj     |   1 +
 .../Proxy/ISparkSessionProxy.cs               |   7 +-
 .../Proxy/Ipc/SparkSessionIpcProxy.cs         |  27 +-
 .../Sql/SparkSession.cs                       |   5 +
 .../Sql/UdfRegistration.cs                    | 254 ++++++++++++++++++
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML    | 165 ++++++++++++
 csharp/AdapterTest/AdapterTest.csproj         |   1 +
 .../Mocks/MockSparkSessionProxy.cs            |   2 +-
 csharp/AdapterTest/UdfRegistrationTest.cs     |  57 ++++
 .../SparkSessionSamples.cs                    |  26 ++
 10 files changed, 534 insertions(+), 11 deletions(-)
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
 create mode 100644 csharp/AdapterTest/UdfRegistrationTest.cs

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
index 4daf4aa5..d887daf8 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
@@ -157,6 +157,7 @@
     <Compile Include="Sql\SparkSession.cs" />
     <Compile Include="Sql\SqlContext.cs" />
     <Compile Include="Sql\Types.cs" />
+    <Compile Include="Sql\UdfRegistration.cs" />
     <Compile Include="Sql\UserDefinedFunction.cs" />
     <Compile Include="Streaming\ConstantInputDStream.cs" />
     <Compile Include="Streaming\DStream.cs" />
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs
index 56f869cd..11a58cd0 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs
@@ -10,12 +10,15 @@
 
 namespace Microsoft.Spark.CSharp.Proxy
 {
-    internal interface IUdfRegistration { }
+    internal interface IUdfRegistrationProxy
+    {
+        void RegisterFunction(string name, byte[] command, string returnType);
+    }
 
     interface ISparkSessionProxy
     {
         ISqlContextProxy SqlContextProxy { get; }
-        IUdfRegistration Udf { get; }
+        IUdfRegistrationProxy Udf { get; }
         ICatalogProxy GetCatalog();
         IDataFrameReaderProxy Read();
         ISparkSessionProxy NewSession();
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
index d134c086..febfd3b5 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
@@ -17,18 +17,13 @@ internal class SparkSessionIpcProxy : ISparkSessionProxy
         private readonly JvmObjectReference jvmSparkSessionReference;
         private readonly ISqlContextProxy sqlContextProxy;
 
-        private readonly IUdfRegistration udfRegistration;
+        private readonly IUdfRegistrationProxy udfRegistrationProxy;
 
-        public IUdfRegistration Udf
+        public IUdfRegistrationProxy Udf
         {
             get
             {
-                if (udfRegistration == null)
-                {
-                    //TODO implementation needed
-                }
-
-                return udfRegistration;
+                return udfRegistrationProxy;
             }
         }
 
@@ -46,6 +41,7 @@ internal SparkSessionIpcProxy(JvmObjectReference jvmSparkSessionReference)
         {
             this.jvmSparkSessionReference = jvmSparkSessionReference;
             sqlContextProxy = new SqlContextIpcProxy(GetSqlContextReference());
+            udfRegistrationProxy = new UdfRegistrationIpcProxy(sqlContextProxy);
         }
 
         private JvmObjectReference GetSqlContextReference()
@@ -98,4 +94,19 @@ public void Stop()
             SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "stop");
         }
     }
+
+    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    internal class UdfRegistrationIpcProxy : IUdfRegistrationProxy
+    {
+        private readonly ISqlContextProxy sqlContextProxy;
+        internal UdfRegistrationIpcProxy(ISqlContextProxy sqlContextProxy)
+        {
+            this.sqlContextProxy = sqlContextProxy;
+        }
+
+        public void RegisterFunction(string name, byte[] command, string returnType)
+        {
+            sqlContextProxy.RegisterFunction(name, command, returnType);
+        }
+    }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
index 3ff8a8ab..55b58462 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
@@ -48,6 +48,11 @@ internal SparkContext SparkContext
             get { return sparkContext; }
         }
 
+        public UdfRegistration Udf
+        {
+            get { return new UdfRegistration(sparkSessionProxy.Udf); }
+        }
+
         /// <summary>
         /// Builder for SparkSession
         /// </summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
new file mode 100644
index 00000000..b9c50083
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/UdfRegistration.cs
@@ -0,0 +1,254 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;
+
+namespace Microsoft.Spark.CSharp.Sql
+{
+    /// <summary>
+    /// Used for registering User Defined Functions. SparkSession.Udf is used to access instance of this type.
+    /// </summary>
+    public class UdfRegistration
+    {
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(UdfRegistration));
+
+        private IUdfRegistrationProxy udfRegistrationProxy;
+
+        internal UdfRegistration(IUdfRegistrationProxy udfRegistrationProxy)
+        {
+            this.udfRegistrationProxy = udfRegistrationProxy;
+        }
+
+        //TODO - the following section is a copy of the same functionality in SQLContext..refactoring needed
+        #region UDF Registration
+        /// <summary>
+        /// Register UDF with no input argument, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool>("MyFilter", () => true);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT>(string name, Func<RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 1 input argument, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string>("MyFilter", (arg1) => arg1 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1>(string name, Func<A1, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 2 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string>("MyFilter", (arg1, arg2) => arg1 != null &amp;&amp; arg2 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2>(string name, Func<A1, A2, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 3 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, string>("MyFilter", (arg1, arg2, arg3) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3>(string name, Func<A1, A2, A3, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 4 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4>(string name, Func<A1, A2, A3, A4, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 5 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <typeparam name="A5"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4, A5>(string name, Func<A1, A2, A3, A4, A5, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 6 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <typeparam name="A5"></typeparam>
+        /// <typeparam name="A6"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4, A5, A6>(string name, Func<A1, A2, A3, A4, A5, A6, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 7 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <typeparam name="A5"></typeparam>
+        /// <typeparam name="A6"></typeparam>
+        /// <typeparam name="A7"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4, A5, A6, A7>(string name, Func<A1, A2, A3, A4, A5, A6, A7, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 8 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <typeparam name="A5"></typeparam>
+        /// <typeparam name="A6"></typeparam>
+        /// <typeparam name="A7"></typeparam>
+        /// <typeparam name="A8"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4, A5, A6, A7, A8>(string name, Func<A1, A2, A3, A4, A5, A6, A7, A8, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 9 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <typeparam name="A5"></typeparam>
+        /// <typeparam name="A6"></typeparam>
+        /// <typeparam name="A7"></typeparam>
+        /// <typeparam name="A8"></typeparam>
+        /// <typeparam name="A9"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9>(string name, Func<A1, A2, A3, A4, A5, A6, A7, A8, A9, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+
+        /// <summary>
+        /// Register UDF with 10 input arguments, e.g:
+        ///     SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null);
+        ///     sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");
+        /// </summary>
+        /// <typeparam name="RT"></typeparam>
+        /// <typeparam name="A1"></typeparam>
+        /// <typeparam name="A2"></typeparam>
+        /// <typeparam name="A3"></typeparam>
+        /// <typeparam name="A4"></typeparam>
+        /// <typeparam name="A5"></typeparam>
+        /// <typeparam name="A6"></typeparam>
+        /// <typeparam name="A7"></typeparam>
+        /// <typeparam name="A8"></typeparam>
+        /// <typeparam name="A9"></typeparam>
+        /// <typeparam name="A10"></typeparam>
+        /// <param name="name"></param>
+        /// <param name="f"></param>
+        public void RegisterFunction<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(string name, Func<A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT> f)
+        {
+            logger.LogInfo("Name of the function to register {0}, method info", name, f.Method);
+            Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> udfHelper = new UdfHelper<RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>(f).Execute;
+            udfRegistrationProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT)));
+        }
+        #endregion
+    }
+}
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 0f4c49ce..0fde9067 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -7675,6 +7675,171 @@
             <param name="json">The Json object used to construct a StructType</param>
             <returns>A new StructType instance</returns>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``1(System.String,System.Func{``0})">
+            <summary>
+            Register UDF with no input argument, e.g:
+                SqlContext.RegisterFunction&lt;bool>("MyFilter", () => true);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``2(System.String,System.Func{``1,``0})">
+            <summary>
+            Register UDF with 1 input argument, e.g:
+                SqlContext.RegisterFunction&lt;bool, string>("MyFilter", (arg1) => arg1 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``3(System.String,System.Func{``1,``2,``0})">
+            <summary>
+            Register UDF with 2 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string>("MyFilter", (arg1, arg2) => arg1 != null &amp;&amp; arg2 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``4(System.String,System.Func{``1,``2,``3,``0})">
+            <summary>
+            Register UDF with 3 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, string>("MyFilter", (arg1, arg2, arg3) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``5(System.String,System.Func{``1,``2,``3,``4,``0})">
+            <summary>
+            Register UDF with 4 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``6(System.String,System.Func{``1,``2,``3,``4,``5,``0})">
+            <summary>
+            Register UDF with 5 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``7(System.String,System.Func{``1,``2,``3,``4,``5,``6,``0})">
+            <summary>
+            Register UDF with 6 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``8(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``0})">
+            <summary>
+            Register UDF with 7 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``9(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``0})">
+            <summary>
+            Register UDF with 8 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <typeparam name="A8"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``10(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``9,``0})">
+            <summary>
+            Register UDF with 9 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <typeparam name="A8"></typeparam>
+            <typeparam name="A9"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.UdfRegistration.RegisterFunction``11(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``9,``10,``0})">
+            <summary>
+            Register UDF with 10 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <typeparam name="A8"></typeparam>
+            <typeparam name="A9"></typeparam>
+            <typeparam name="A10"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1">
             <summary>
             An input stream that always returns the same RDD on each timestep. Useful for testing.
diff --git a/csharp/AdapterTest/AdapterTest.csproj b/csharp/AdapterTest/AdapterTest.csproj
index ca95b87c..c32ed7aa 100644
--- a/csharp/AdapterTest/AdapterTest.csproj
+++ b/csharp/AdapterTest/AdapterTest.csproj
@@ -119,6 +119,7 @@
     <Compile Include="PairRDDTest.cs" />
     <Compile Include="ComparableRDDTest.cs" />
     <Compile Include="DoubleRDDTest.cs" />
+    <Compile Include="UdfRegistrationTest.cs" />
     <Compile Include="UserDefinedFunctionTest.cs" />
     <Compile Include="WeakObjectManagerTest.cs" />
   </ItemGroup>
diff --git a/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs b/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs
index da695c3f..b7cf2ca8 100644
--- a/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs
@@ -13,7 +13,7 @@ namespace AdapterTest.Mocks
     class MockSparkSessionProxy : ISparkSessionProxy
     {
         public ISqlContextProxy SqlContextProxy { get { return new MockSqlContextProxy(new MockSparkContextProxy(new MockSparkConfProxy()));} }
-        public IUdfRegistration Udf { get; }
+        public IUdfRegistrationProxy Udf { get; }
         public ICatalogProxy GetCatalog()
         {
             throw new NotImplementedException();
diff --git a/csharp/AdapterTest/UdfRegistrationTest.cs b/csharp/AdapterTest/UdfRegistrationTest.cs
new file mode 100644
index 00000000..918fe9df
--- /dev/null
+++ b/csharp/AdapterTest/UdfRegistrationTest.cs
@@ -0,0 +1,57 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Sql;
+using Moq;
+using NUnit.Framework;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    public class UdfRegistrationTest
+    {
+        [Test]
+        public void TestRegisterFunction()
+        {
+            Mock<IUdfRegistrationProxy> mockUdfRegistrationProxy = new Mock<IUdfRegistrationProxy>();
+            mockUdfRegistrationProxy.Setup(m => m.RegisterFunction(It.IsAny<string>(), It.IsAny<byte[]>(), It.IsAny<string>()));
+
+            var udfRegistration = new UdfRegistration(mockUdfRegistrationProxy.Object);
+
+            udfRegistration.RegisterFunction("Func0", () => "Func0");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func0", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string>("Func1", s => "Func1");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func1", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string>("Func2", (s1, s2) => "Func2");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func2", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string>("Func3", (s1, s2, s3) => "Func3");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func3", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string>("Func4", (s1, s2, s3, s4) => "Func4");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func4", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string, string>("Func5", (s1, s2, s3, s4, s5) => "Func5");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func5", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string, string, string>("Func6", (s1, s2, s3, s4, s5, s6) => "Func6");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func6", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string, string, string, string>("Func7", (s1, s2, s3, s4, s5, s6, s7) => "Func7");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func7", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string, string, string, string, string>("Func8", (s1, s2, s3, s4, s5, s6, s7, s8) => "Func8");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func8", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string, string, string, string, string, string>("Func9", (s1, s2, s3, s4, s5, s6, s7, s8, s9) => "Func9");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func9", It.IsAny<byte[]>(), "string"));
+
+            udfRegistration.RegisterFunction<string, string, string, string, string, string, string, string, string, string, string>("Func10", (s1, s2, s3, s4, s5, s6, s7, s8, s9, s10) => "Func10");
+            mockUdfRegistrationProxy.Verify(m => m.RegisterFunction("Func10", It.IsAny<byte[]>(), "string"));
+        }
+    }
+}
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs
index f628e1c8..7fd1dd3f 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs
@@ -185,5 +185,31 @@ internal static void SSCreateDataFrameSample()
                 Assert.AreEqual(schemaPeople.Json, dataFramePeople.Schema.Json);
             }
         }
+
+        [Sample]
+        internal static void SparkSessionUdfSample()
+        {
+            GetSparkSession().Udf.RegisterFunction<string, string, string>("FullAddress", (city, state) => city + " " + state);
+            GetSparkSession().Udf.RegisterFunction<bool, string, int>("PeopleFilter", (name, age) => name == "Bill" && age > 80);
+
+            var peopleDataFrame = GetSparkSession().Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
+            var functionAppliedDF = peopleDataFrame.SelectExpr("name", "age * 2 as age",
+                "FullAddress(address.city, address.state) as address")
+                .Where("PeopleFilter(name, age)");
+
+            functionAppliedDF.ShowSchema();
+            functionAppliedDF.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                var collected = functionAppliedDF.Collect().ToArray();
+                CollectionAssert.AreEquivalent(new[] { "name", "age", "address" },
+                    functionAppliedDF.Schema.Fields.Select(f => f.Name).ToArray());
+                Assert.AreEqual(1, collected.Length);
+                Assert.AreEqual("Bill", collected[0].Get("name"));
+                Assert.AreEqual(86, collected[0].Get("age"));
+                Assert.AreEqual("Seattle Washington", collected[0].Get("address"));
+            }
+        }
     }
 }

From b2be81c37fac8a9f0c7e0f8b73522340aa2f3156 Mon Sep 17 00:00:00 2001
From: skaarthik <skaarthik@gmail.com>
Date: Sat, 28 Jan 2017 01:21:35 -0800
Subject: [PATCH 20/20] disabling log messages that are not useful to Mobius
 app developers

---
 .../Interop/Ipc/WeakObjectManager.cs          | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/WeakObjectManager.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/WeakObjectManager.cs
index 62576db5..77be1776 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/WeakObjectManager.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Interop/Ipc/WeakObjectManager.cs
@@ -69,8 +69,8 @@ public int AdjustCheckCount(int currentReferenceCount)
                 int previousReferencesCountBenchmark = referencesCountBenchmark;
                 checkCount *= 2;
                 referencesCountBenchmark = referencesCountBenchmark + referencesCountBenchmark / 2;
-                logger.LogDebug("Adjust checkCount from {0} to {1}, referencesCountBenchmark from {2} to {3}",
-                    previousCheckCount, checkCount, previousReferencesCountBenchmark, referencesCountBenchmark);
+                //logger.LogDebug("Adjust checkCount from {0} to {1}, referencesCountBenchmark from {2} to {3}",
+                //    previousCheckCount, checkCount, previousReferencesCountBenchmark, referencesCountBenchmark);
             }
             return checkCount;
         }
@@ -134,14 +134,14 @@ public int GetReferencesCount()
 
         private void RunReleaseObjectLoop()
         {
-            logger.LogDebug("Checking objects thread start ...");
+            //logger.LogDebug("Checking objects thread start ...");
             while (shouldKeepRunning)
             {
                 ReleseGarbageCollectedObjects();
                 Thread.Sleep(CheckInterval);
             }
 
-            logger.LogDebug("Checking objects thread stopped.");
+            //logger.LogDebug("Checking objects thread stopped.");
         }
 
         ~WeakObjectManagerImpl()
@@ -165,13 +165,13 @@ private void ReleseGarbageCollectedObjects()
             int referencesCount = weakReferences.Count;
             if (referencesCount == 0)
             {
-                logger.LogDebug("check begin : quit as weakReferences.Count = 0");
+                //logger.LogDebug("check begin : quit as weakReferences.Count = 0");
                 return;
             }
 
             var beginTime = DateTime.Now;
             int checkCount = checkCountController.AdjustCheckCount(referencesCount);
-            logger.LogDebug("check begin : weakReferences.Count = {0}, checkCount: {1}", referencesCount, checkCount);
+            //logger.LogDebug("check begin : weakReferences.Count = {0}, checkCount: {1}", referencesCount, checkCount);
             int garbageCount;
             var aliveList = ReleseGarbageCollectedObjects(checkCount, out garbageCount);
 
@@ -179,11 +179,11 @@ private void ReleseGarbageCollectedObjects()
             aliveList.ForEach(item => weakReferences.Enqueue(item));
             var timeStoreAlive = DateTime.Now;
 
-            logger.LogDebug("check end : released {0} garbage, remain {1} alive, used {2} ms : release garbage used {3} ms, store alive used {4} ms",
-                    garbageCount, weakReferences.Count, (DateTime.Now - beginTime).TotalMilliseconds,
-                    (timeReleaseGarbage - beginTime).TotalMilliseconds,
-                    (timeStoreAlive - timeReleaseGarbage).TotalMilliseconds
-                );
+            //logger.LogDebug("check end : released {0} garbage, remain {1} alive, used {2} ms : release garbage used {3} ms, store alive used {4} ms",
+            //        garbageCount, weakReferences.Count, (DateTime.Now - beginTime).TotalMilliseconds,
+            //        (timeReleaseGarbage - beginTime).TotalMilliseconds,
+            //        (timeStoreAlive - timeReleaseGarbage).TotalMilliseconds
+            //    );
         }
 
         private List<WeakReferenceObjectIdPair> ReleseGarbageCollectedObjects(int checkCount, out int garbageCount)
@@ -208,7 +208,7 @@ private List<WeakReferenceObjectIdPair> ReleseGarbageCollectedObjects(int checkC
                 i++;
                 if (i >= checkCount)
                 {
-                    logger.LogDebug("Stop releasing as exceeded allowed checkCount: {0}", checkCount);
+                    //logger.LogDebug("Stop releasing as exceeded allowed checkCount: {0}", checkCount);
                     break;
                 }
             }
@@ -238,7 +238,7 @@ public int GetAliveCount()
 
         public virtual void Dispose()
         {
-            logger.LogInfo("Dispose {0}", this.GetType());
+            //logger.LogInfo("Dispose {0}", this.GetType());
             shouldKeepRunning = false;
         }
     }