Merge branch 'master' of github.com:apache/spark into dag-viz-streaming

Conflicts: streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
apache · May 15, 2015 · 25416dc · 25416dc
2 parents 9113183 + 48fc38f
commit 25416dc
Show file tree

Hide file tree

Showing 20 changed files with 214 additions and 99 deletions.
diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
@@ -98,7 +98,16 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
     var x = d3.scale.linear().domain([minX, maxX]).range([0, width]);
     var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]);
 
-    var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) { return timeFormat[d]; });
+    var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) {
+        var formattedDate = timeFormat[d];
+        var dotIndex = formattedDate.indexOf('.');
+        if (dotIndex >= 0) {
+            // Remove milliseconds
+            return formattedDate.substring(0, dotIndex);
+        } else {
+            return formattedDate;
+        }
+    });
     var formatYValue = d3.format(",.2f");
     var yAxis = d3.svg.axis().scale(y).orient("left").ticks(5).tickFormat(formatYValue);
 
@@ -252,28 +261,16 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
 }
 
 $(function() {
-    function getParameterFromURL(param)
-    {
-        var parameters = window.location.search.substring(1); // Remove "?"
-        var keyValues = parameters.split('&');
-        for (var i = 0; i < keyValues.length; i++)
-        {
-            var paramKeyValue = keyValues[i].split('=');
-            if (paramKeyValue[0] == param)
-            {
-                return paramKeyValue[1];
-            }
-        }
-    }
-
-    var status = getParameterFromURL("show-streams-detail") == "true";
+    var status = window.localStorage && window.localStorage.getItem("show-streams-detail") == "true";
 
     $("span.expand-input-rate").click(function() {
         status = !status;
         $("#inputs-table").toggle('collapsed');
         // Toggle the class of the arrow between open and closed
         $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
-        window.history.pushState('', document.title, window.location.pathname + '?show-streams-detail=' + status);
+        if (window.localStorage) {
+            window.localStorage.setItem("show-streams-detail", "" + status);
+        }
     });
 
     if (status) {

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
@@ -30,6 +30,11 @@ private[spark] class MasterSource(val master: Master) extends Source {
     override def getValue: Int = master.workers.size
   })
 
+  // Gauge for alive worker numbers in cluster
+  metricRegistry.register(MetricRegistry.name("aliveWorkers"), new Gauge[Int]{
+    override def getValue: Int = master.workers.filter(_.state == WorkerState.ALIVE).size
+  })
+
   // Gauge for application numbers in cluster
   metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] {
     override def getValue: Int = master.apps.size

diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -77,14 +77,17 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
+          <h4>
+            Data Distribution on {rddStorageInfo.dataDistribution.map(_.size).getOrElse(0)}
+            Executors
+          </h4>
           {workerTable}
         </div>
       </div>
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> {rddStorageInfo.partitions.size} Partitions </h4>
+          <h4> {rddStorageInfo.partitions.map(_.size).getOrElse(0)} Partitions </h4>
           {blockTable}
         </div>
       </div>;

diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
@@ -33,6 +33,7 @@
 
 # User facing configs
 GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
+JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK")
 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
 JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
 JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
@@ -68,7 +69,7 @@ def get_jira_prs():
 	page_json = get_json(page)
 
 	for pull in page_json:
-	    jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
+	    jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
 	    for jira in jiras:
 		result = result + [(jira,  pull)]
 

diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
@@ -1,16 +1,16 @@
 pyspark.ml package
-=====================
+==================
 
 ML Pipeline APIs
---------------
+----------------
 
 .. automodule:: pyspark.ml
     :members:
     :undoc-members:
     :inherited-members:
 
 pyspark.ml.param module
--------------------------
+-----------------------
 
 .. automodule:: pyspark.ml.param
     :members:
@@ -34,31 +34,31 @@ pyspark.ml.classification module
     :inherited-members:
 
 pyspark.ml.recommendation module
--------------------------
+--------------------------------
 
 .. automodule:: pyspark.ml.recommendation
     :members:
     :undoc-members:
     :inherited-members:
 
 pyspark.ml.regression module
--------------------------
+----------------------------
 
 .. automodule:: pyspark.ml.regression
     :members:
     :undoc-members:
     :inherited-members:
 
 pyspark.ml.tuning module
---------------------------------
+------------------------
 
 .. automodule:: pyspark.ml.tuning
     :members:
     :undoc-members:
     :inherited-members:
 
 pyspark.ml.evaluation module
---------------------------------
+----------------------------
 
 .. automodule:: pyspark.ml.evaluation
     :members:

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -43,6 +43,10 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
     >>> model.transform(test0).head().prediction
     0.0
+    >>> model.weights
+    DenseVector([5.5...])
+    >>> model.intercept
+    -2.68...
     >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
     >>> model.transform(test1).head().prediction
     1.0
@@ -67,7 +71,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  threshold=0.5, probabilityCol="probability"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  threshold=0.5, probabilityCol="probability")
         """
         super(LogisticRegression, self).__init__()
@@ -92,8 +96,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   threshold=0.5, probabilityCol="probability"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  threshold=0.5, probabilityCol="probability")
         Sets params for logistic regression.
         """
@@ -148,6 +152,20 @@ class LogisticRegressionModel(JavaModel):
     Model fitted by LogisticRegression.
     """
 
+    @property
+    def weights(self):
+        """
+        Model weights.
+        """
+        return self._call_java("weights")
+
+    @property
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
 
 class TreeClassifierParams(object):
     """
@@ -202,7 +220,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
         """
         super(DecisionTreeClassifier, self).__init__()
@@ -224,9 +242,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   impurity="gini"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  impurity="gini")
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
         Sets params for the DecisionTreeClassifier.
         """
         kwargs = self.setParams._input_kwargs
@@ -302,9 +319,9 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
                  numTrees=20, featureSubsetStrategy="auto", seed=42):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
                  numTrees=20, featureSubsetStrategy="auto", seed=42)
         """
         super(RandomForestClassifier, self).__init__()
@@ -337,9 +354,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto")
         Sets params for linear classification.
         """
@@ -453,10 +470,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
                  maxIter=20, stepSize=0.1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
-                 maxIter=20, stepSize=0.1)
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 lossType="logistic", maxIter=20, stepSize=0.1)
         """
         super(GBTClassifier, self).__init__()
         #: param for Loss function which GBT tries to minimize (case-insensitive).
@@ -484,9 +501,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   lossType="logistic", maxIter=20, stepSize=0.1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   lossType="logistic", maxIter=20, stepSize=0.1)
         Sets params for Gradient Boosted Tree Classification.
         """

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
@@ -481,7 +481,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
                  inputCol=None, outputCol=None):
         """
-        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
                  inputCol=None, outputCol=None)
         """
         super(RegexTokenizer, self).__init__()
@@ -496,7 +496,7 @@ def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+"
     def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
                   inputCol=None, outputCol=None):
         """
-        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
                   inputCol="input", outputCol="output")
         Sets params for this RegexTokenizer.
         """
@@ -869,7 +869,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                  seed=42, inputCol=None, outputCol=None):
         """
-        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
                  seed=42, inputCol=None, outputCol=None)
         """
         super(Word2Vec, self).__init__()
@@ -889,7 +889,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
     def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                   seed=42, inputCol=None, outputCol=None):
         """
-        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
+        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
                  inputCol=None, outputCol=None)
         Sets params for this Word2Vec.
         """

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
@@ -92,8 +92,8 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
                  ratingCol="rating", nonnegative=False, checkpointInterval=10):
         """
-        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0,
+        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0, \
                  ratingCol="rating", nonnegative=false, checkpointInterval=10)
         """
         super(ALS, self).__init__()
@@ -118,8 +118,8 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem
                   implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
                   ratingCol="rating", nonnegative=False, checkpointInterval=10):
         """
-        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0, \
                  ratingCol="rating", nonnegative=False, checkpointInterval=10)
         Sets params for ALS.
         """