Skip to content

Commit

Permalink
[SPARK-1938] [SQL] ApproxCountDistinctMergeFunction should return Int…
Browse files Browse the repository at this point in the history
… value.

`ApproxCountDistinctMergeFunction` should return `Int` value because the `dataType` of `ApproxCountDistinct` is `IntegerType`.

Author: Takuya UESHIN <[email protected]>

Closes apache#893 from ueshin/issues/SPARK-1938 and squashes the following commits:

3970e88 [Takuya UESHIN] Remove a superfluous line.
5ad7ec1 [Takuya UESHIN] Make dataType for each of CountDistinct, ApproxCountDistinctMerge and ApproxCountDistinct LongType.
cbe7c71 [Takuya UESHIN] Revert a change.
fc3ac0f [Takuya UESHIN] Fix evaluated value type of ApproxCountDistinctMergeFunction to Int.
  • Loading branch information
ueshin authored and rxin committed May 28, 2014
1 parent 0682567 commit 9df8683
Showing 1 changed file with 4 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ abstract class AggregateFunction
override def dataType = base.dataType

def update(input: Row): Unit
override def eval(input: Row): Any

// Do we really need this?
override def newInstance() = makeCopy(productIterator.map { case a: AnyRef => a }.toArray)
Expand Down Expand Up @@ -166,7 +165,7 @@ case class CountDistinct(expressions: Seq[Expression]) extends AggregateExpressi
override def children = expressions
override def references = expressions.flatMap(_.references).toSet
override def nullable = false
override def dataType = IntegerType
override def dataType = LongType
override def toString = s"COUNT(DISTINCT ${expressions.mkString(",")})"
override def newInstance() = new CountDistinctFunction(expressions, this)
}
Expand All @@ -184,7 +183,7 @@ case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double)
extends AggregateExpression with trees.UnaryNode[Expression] {
override def references = child.references
override def nullable = false
override def dataType = IntegerType
override def dataType = LongType
override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
override def newInstance() = new ApproxCountDistinctMergeFunction(child, this, relativeSD)
}
Expand All @@ -193,7 +192,7 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
extends PartialAggregate with trees.UnaryNode[Expression] {
override def references = child.references
override def nullable = false
override def dataType = IntegerType
override def dataType = LongType
override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"

override def asPartial: SplitEvaluation = {
Expand Down Expand Up @@ -394,7 +393,7 @@ case class CountDistinctFunction(expr: Seq[Expression], base: AggregateExpressio
}
}

override def eval(input: Row): Any = seen.size
override def eval(input: Row): Any = seen.size.toLong
}

case class FirstFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {
Expand Down

0 comments on commit 9df8683

Please sign in to comment.