From 6d97fe6151db7189ffa67fcac68813459a168f85 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 10 Nov 2014 23:19:16 -0800 Subject: [PATCH] add AlphaComponent annotation --- mllib/src/main/scala/org/apache/spark/ml/Estimator.scala | 3 +++ mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala | 3 +++ .../src/main/scala/org/apache/spark/ml/Identifiable.scala | 4 ++++ mllib/src/main/scala/org/apache/spark/ml/Model.scala | 3 +++ mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala | 7 +++++++ .../src/main/scala/org/apache/spark/ml/Transformer.scala | 5 +++++ .../spark/ml/classification/LogisticRegression.scala | 5 +++++ .../ml/evaluation/BinaryClassificationEvaluator.scala | 3 +++ .../scala/org/apache/spark/ml/feature/HashingTF.scala | 3 +++ .../org/apache/spark/ml/feature/StandardScaler.scala | 5 +++++ .../src/main/scala/org/apache/spark/ml/param/params.scala | 8 ++++++++ .../scala/org/apache/spark/ml/tuning/CrossValidator.scala | 5 +++++ .../org/apache/spark/ml/tuning/ParamGridBuilder.scala | 3 +++ 13 files changed, 57 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala index 3edb7d7f61e14..fdbee743e8177 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala @@ -20,13 +20,16 @@ package org.apache.spark.ml import scala.annotation.varargs import scala.collection.JavaConverters._ +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param.{ParamMap, ParamPair, Params} import org.apache.spark.sql.SchemaRDD import org.apache.spark.sql.api.java.JavaSchemaRDD /** + * :: AlphaComponent :: * Abstract class for estimators that fit models to data. */ +@AlphaComponent abstract class Estimator[M <: Model[M]] extends PipelineStage with Params { /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala index 3155602ba0154..db563dd550e56 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala @@ -17,12 +17,15 @@ package org.apache.spark.ml +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param.ParamMap import org.apache.spark.sql.SchemaRDD /** + * :: AlphaComponent :: * Abstract class for evaluators that compute metrics from predictions. */ +@AlphaComponent abstract class Evaluator extends Identifiable { /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala index 2f4fb5420818f..70899483992c1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala @@ -19,9 +19,13 @@ package org.apache.spark.ml import java.util.UUID +import org.apache.spark.annotation.AlphaComponent + /** + * :: AlphaComponent :: * Object with a unique id. */ +@AlphaComponent trait Identifiable extends Serializable { /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala index 43e117192da52..cae5082b51196 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Model.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala @@ -17,13 +17,16 @@ package org.apache.spark.ml +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param.ParamMap /** + * :: AlphaComponent :: * A fitted model, i.e., a [[Transformer]] produced by an [[Estimator]]. * * @tparam M model type */ +@AlphaComponent abstract class Model[M <: Model[M]] extends Transformer { /** * The parent estimator that produced this model. diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 8748457bf3f28..7a860fd445f21 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -20,12 +20,15 @@ package org.apache.spark.ml import scala.collection.mutable.ListBuffer import org.apache.spark.Logging +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.sql.{SchemaRDD, StructType} /** + * :: AlphaComponent :: * A stage in a pipeline, either an Estimator or an Transformer. */ +@AlphaComponent abstract class PipelineStage extends Serializable with Logging { /** @@ -49,6 +52,7 @@ abstract class PipelineStage extends Serializable with Logging { } /** + * :: AlphaComponent :: * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline.fit]] is called, the * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator.fit]] method will @@ -59,6 +63,7 @@ abstract class PipelineStage extends Serializable with Logging { * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as * an identity transformer. */ +@AlphaComponent class Pipeline extends Estimator[PipelineModel] { /** param for pipeline stages */ @@ -125,8 +130,10 @@ class Pipeline extends Estimator[PipelineModel] { } /** + * :: AlphaComponent :: * Represents a compiled pipeline. */ +@AlphaComponent class PipelineModel( override val parent: Pipeline, override val fittingParamMap: ParamMap, diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala index 0835cc7cd65b5..7086b534e64a8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala @@ -21,6 +21,7 @@ import scala.annotation.varargs import scala.reflect.runtime.universe.TypeTag import org.apache.spark.Logging +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param._ import org.apache.spark.sql.SchemaRDD import org.apache.spark.sql.api.java.JavaSchemaRDD @@ -30,8 +31,10 @@ import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.types._ /** + * :: AlphaComponet :: * Abstract class for transformers that transform one dataset into another. */ +@AlphaComponent abstract class Transformer extends PipelineStage with Params { /** @@ -80,9 +83,11 @@ abstract class Transformer extends PipelineStage with Params { } /** + * :: AlphaComponent :: * Abstract class for transformers that take one input column, apply transformation, and output the * result as a new column. */ +@AlphaComponent abstract class UnaryTransformer[IN, OUT: TypeTag, T <: UnaryTransformer[IN, OUT, T]] extends Transformer with HasInputCol with HasOutputCol with Logging { diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index c1d1c373af97b..d1c61272f5d8a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -17,6 +17,7 @@ package org.apache.spark.ml.classification +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS @@ -28,8 +29,10 @@ import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.storage.StorageLevel /** + * :: AlphaComponent :: * Params for logistic regression. */ +@AlphaComponent private[classification] trait LogisticRegressionParams extends Params with HasRegParam with HasMaxIter with HasLabelCol with HasThreshold with HasFeaturesCol with HasScoreCol with HasPredictionCol { @@ -108,8 +111,10 @@ class LogisticRegression extends Estimator[LogisticRegressionModel] with Logisti } /** + * :: AlphaComponent :: * Model produced by [[LogisticRegression]]. */ +@AlphaComponent class LogisticRegressionModel private[ml] ( override val parent: LogisticRegression, override val fittingParamMap: ParamMap, diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala index eec450beb70b1..0b0504e036ec9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala @@ -17,14 +17,17 @@ package org.apache.spark.ml.evaluation +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics import org.apache.spark.sql.{DoubleType, Row, SchemaRDD} /** + * :: AlphaComponent :: * Evaluator for binary classification, which expects two input columns: score and label. */ +@AlphaComponent class BinaryClassificationEvaluator extends Evaluator with Params with HasScoreCol with HasLabelCol { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index b464f444fb3b7..b98b1755a3584 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -17,14 +17,17 @@ package org.apache.spark.ml.feature +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{IntParam, ParamMap} import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.Vector /** + * :: AlphaComponent :: * Maps a sequence of terms to their term frequencies using the hashing trick. */ +@AlphaComponent class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] { /** number of features */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 9300a4f5e7003..d3e663d490479 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -17,6 +17,7 @@ package org.apache.spark.ml.feature +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.mllib.feature @@ -31,9 +32,11 @@ import org.apache.spark.sql.catalyst.dsl._ private[feature] trait StandardScalerParams extends Params with HasInputCol with HasOutputCol /** + * :: AlphaComponent :: * Standardizes features by removing the mean and scaling to unit variance using column summary * statistics on the samples in the training set. */ +@AlphaComponent class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams { def setInputCol(value: String): this.type = set(inputCol, value) @@ -66,8 +69,10 @@ class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerP } /** + * :: AlphaComponent :: * Model fitted by [[StandardScaler]]. */ +@AlphaComponent class StandardScalerModel private[ml] ( override val parent: StandardScaler, override val fittingParamMap: ParamMap, diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index a642af9588697..c08362837184c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -19,12 +19,15 @@ package org.apache.spark.ml.param import java.lang.reflect.Modifier +import org.apache.spark.annotation.AlphaComponent + import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.ml.Identifiable /** + * :: AlphaComponent :: * A param with self-contained documentation and optionally default value. Primitive-typed param * should use the specialized versions, which are more friendly to Java users. * @@ -33,6 +36,7 @@ import org.apache.spark.ml.Identifiable * @param doc documentation * @tparam T param value type */ +@AlphaComponent class Param[T] ( val parent: Params, val name: String, @@ -102,9 +106,11 @@ class BooleanParam(parent: Params, name: String, doc: String, defaultValue: Opti case class ParamPair[T](param: Param[T], value: T) /** + * :: AlphaComponent :: * Trait for components that take parameters. This also provides an internal param map to store * parameter values attached to the instance. */ +@AlphaComponent trait Params extends Identifiable with Serializable { /** Returns all params. */ @@ -198,8 +204,10 @@ private[ml] object Params { } /** + * :: AlphaComponent :: * A param to value map. */ +@AlphaComponent class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) extends Serializable { /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala index cc70bba696140..7d0862f79c9e3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala @@ -20,6 +20,7 @@ package org.apache.spark.ml.tuning import com.github.fommil.netlib.F2jBLAS import org.apache.spark.Logging +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml._ import org.apache.spark.ml.param.{IntParam, Param, ParamMap, Params} import org.apache.spark.mllib.util.MLUtils @@ -49,8 +50,10 @@ private[ml] trait CrossValidatorParams extends Params { } /** + * :: AlphaComponent :: * K-fold cross validation. */ +@AlphaComponent class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorParams with Logging { private val f2jBLAS = new F2jBLAS @@ -103,8 +106,10 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP } /** + * :: AlphaComponent :: * Model from k-fold cross validation. */ +@AlphaComponent class CrossValidatorModel private[ml] ( override val parent: CrossValidator, override val fittingParamMap: ParamMap, diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala index dd7c868654928..7bafd7111b20f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala @@ -20,11 +20,14 @@ package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param._ /** + * :: AlphaComponent :: * Builder for a param grid used in grid search-based model selection. */ +@AlphaComponent class ParamGridBuilder { private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]