Skip to content

Commit

Permalink
Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
Browse files Browse the repository at this point in the history
Conflicts:
	streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
	streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
  • Loading branch information
Andrew Or committed May 15, 2015
2 parents 9113183 + 48fc38f commit 25416dc
Show file tree
Hide file tree
Showing 20 changed files with 214 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,16 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
var x = d3.scale.linear().domain([minX, maxX]).range([0, width]);
var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]);

var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) { return timeFormat[d]; });
var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) {
var formattedDate = timeFormat[d];
var dotIndex = formattedDate.indexOf('.');
if (dotIndex >= 0) {
// Remove milliseconds
return formattedDate.substring(0, dotIndex);
} else {
return formattedDate;
}
});
var formatYValue = d3.format(",.2f");
var yAxis = d3.svg.axis().scale(y).orient("left").ticks(5).tickFormat(formatYValue);

Expand Down Expand Up @@ -252,28 +261,16 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
}

$(function() {
function getParameterFromURL(param)
{
var parameters = window.location.search.substring(1); // Remove "?"
var keyValues = parameters.split('&');
for (var i = 0; i < keyValues.length; i++)
{
var paramKeyValue = keyValues[i].split('=');
if (paramKeyValue[0] == param)
{
return paramKeyValue[1];
}
}
}

var status = getParameterFromURL("show-streams-detail") == "true";
var status = window.localStorage && window.localStorage.getItem("show-streams-detail") == "true";

$("span.expand-input-rate").click(function() {
status = !status;
$("#inputs-table").toggle('collapsed');
// Toggle the class of the arrow between open and closed
$(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
window.history.pushState('', document.title, window.location.pathname + '?show-streams-detail=' + status);
if (window.localStorage) {
window.localStorage.setItem("show-streams-detail", "" + status);
}
});

if (status) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ private[spark] class MasterSource(val master: Master) extends Source {
override def getValue: Int = master.workers.size
})

// Gauge for alive worker numbers in cluster
metricRegistry.register(MetricRegistry.name("aliveWorkers"), new Gauge[Int]{
override def getValue: Int = master.workers.filter(_.state == WorkerState.ALIVE).size
})

// Gauge for application numbers in cluster
metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] {
override def getValue: Int = master.apps.size
Expand Down
7 changes: 5 additions & 2 deletions core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,17 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {

<div class="row-fluid">
<div class="span12">
<h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
<h4>
Data Distribution on {rddStorageInfo.dataDistribution.map(_.size).getOrElse(0)}
Executors
</h4>
{workerTable}
</div>
</div>

<div class="row-fluid">
<div class="span12">
<h4> {rddStorageInfo.partitions.size} Partitions </h4>
<h4> {rddStorageInfo.partitions.map(_.size).getOrElse(0)} Partitions </h4>
{blockTable}
</div>
</div>;
Expand Down
3 changes: 2 additions & 1 deletion dev/github_jira_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

# User facing configs
GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK")
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
Expand Down Expand Up @@ -68,7 +69,7 @@ def get_jira_prs():
page_json = get_json(page)

for pull in page_json:
jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
for jira in jiras:
result = result + [(jira, pull)]

Expand Down
14 changes: 7 additions & 7 deletions python/docs/pyspark.ml.rst
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
pyspark.ml package
=====================
==================

ML Pipeline APIs
--------------
----------------

.. automodule:: pyspark.ml
:members:
:undoc-members:
:inherited-members:

pyspark.ml.param module
-------------------------
-----------------------

.. automodule:: pyspark.ml.param
:members:
Expand All @@ -34,31 +34,31 @@ pyspark.ml.classification module
:inherited-members:

pyspark.ml.recommendation module
-------------------------
--------------------------------

.. automodule:: pyspark.ml.recommendation
:members:
:undoc-members:
:inherited-members:

pyspark.ml.regression module
-------------------------
----------------------------

.. automodule:: pyspark.ml.regression
:members:
:undoc-members:
:inherited-members:

pyspark.ml.tuning module
--------------------------------
------------------------

.. automodule:: pyspark.ml.tuning
:members:
:undoc-members:
:inherited-members:

pyspark.ml.evaluation module
--------------------------------
----------------------------

.. automodule:: pyspark.ml.evaluation
:members:
Expand Down
57 changes: 37 additions & 20 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
>>> model.transform(test0).head().prediction
0.0
>>> model.weights
DenseVector([5.5...])
>>> model.intercept
-2.68...
>>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
>>> model.transform(test1).head().prediction
1.0
Expand All @@ -67,7 +71,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
threshold=0.5, probabilityCol="probability"):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
threshold=0.5, probabilityCol="probability")
"""
super(LogisticRegression, self).__init__()
Expand All @@ -92,8 +96,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
threshold=0.5, probabilityCol="probability"):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
threshold=0.5, probabilityCol="probability")
Sets params for logistic regression.
"""
Expand Down Expand Up @@ -148,6 +152,20 @@ class LogisticRegressionModel(JavaModel):
Model fitted by LogisticRegression.
"""

@property
def weights(self):
"""
Model weights.
"""
return self._call_java("weights")

@property
def intercept(self):
"""
Model intercept.
"""
return self._call_java("intercept")


class TreeClassifierParams(object):
"""
Expand Down Expand Up @@ -202,7 +220,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
"""
super(DecisionTreeClassifier, self).__init__()
Expand All @@ -224,9 +242,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
impurity="gini"):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
impurity="gini")
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
Sets params for the DecisionTreeClassifier.
"""
kwargs = self.setParams._input_kwargs
Expand Down Expand Up @@ -302,9 +319,9 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
numTrees=20, featureSubsetStrategy="auto", seed=42):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
numTrees=20, featureSubsetStrategy="auto", seed=42)
"""
super(RandomForestClassifier, self).__init__()
Expand Down Expand Up @@ -337,9 +354,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
impurity="gini", numTrees=20, featureSubsetStrategy="auto")
Sets params for linear classification.
"""
Expand Down Expand Up @@ -453,10 +470,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
maxIter=20, stepSize=0.1):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
maxIter=20, stepSize=0.1)
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
lossType="logistic", maxIter=20, stepSize=0.1)
"""
super(GBTClassifier, self).__init__()
#: param for Loss function which GBT tries to minimize (case-insensitive).
Expand Down Expand Up @@ -484,9 +501,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
lossType="logistic", maxIter=20, stepSize=0.1):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
lossType="logistic", maxIter=20, stepSize=0.1)
Sets params for Gradient Boosted Tree Classification.
"""
Expand Down
8 changes: 4 additions & 4 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
inputCol=None, outputCol=None):
"""
__init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
__init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
inputCol=None, outputCol=None)
"""
super(RegexTokenizer, self).__init__()
Expand All @@ -496,7 +496,7 @@ def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+"
def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
inputCol=None, outputCol=None):
"""
setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
inputCol="input", outputCol="output")
Sets params for this RegexTokenizer.
"""
Expand Down Expand Up @@ -869,7 +869,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=42, inputCol=None, outputCol=None):
"""
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
seed=42, inputCol=None, outputCol=None)
"""
super(Word2Vec, self).__init__()
Expand All @@ -889,7 +889,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=42, inputCol=None, outputCol=None):
"""
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
inputCol=None, outputCol=None)
Sets params for this Word2Vec.
"""
Expand Down
8 changes: 4 additions & 4 deletions python/pyspark/ml/recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
ratingCol="rating", nonnegative=False, checkpointInterval=10):
"""
__init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0,
__init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0, \
ratingCol="rating", nonnegative=false, checkpointInterval=10)
"""
super(ALS, self).__init__()
Expand All @@ -118,8 +118,8 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
ratingCol="rating", nonnegative=False, checkpointInterval=10):
"""
setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0, \
ratingCol="rating", nonnegative=False, checkpointInterval=10)
Sets params for ALS.
"""
Expand Down
Loading

0 comments on commit 25416dc

Please sign in to comment.