-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-27915][SQL][WIP] Update logical Filter's output nullability based on IsNotNull conditions #24765
[SPARK-27915][SQL][WIP] Update logical Filter's output nullability based on IsNotNull conditions #24765
Changes from 6 commits
b0182ac
b950474
33b579c
acc98f8
fa89706
05e4bcf
1ad4d49
a10632f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,7 +51,14 @@ case class Subquery(child: LogicalPlan) extends OrderPreservingUnaryNode { | |
|
||
case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) | ||
extends OrderPreservingUnaryNode { | ||
override def output: Seq[Attribute] = projectList.map(_.toAttribute) | ||
override def output: Seq[Attribute] = { | ||
// The child operator may have inferred more precise nullability information | ||
// for the project expression, so leverage that information if it's availble: | ||
val childOutputNullability = child.output.map(a => a.exprId -> a.nullable).toMap | ||
projectList | ||
.map(_.toAttribute) | ||
.map{ a => childOutputNullability.get(a.exprId).map(a.withNullability).getOrElse(a) } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to fix this part? It seems |
||
} | ||
override def maxRows: Option[Long] = child.maxRows | ||
|
||
override lazy val resolved: Boolean = { | ||
|
@@ -129,7 +136,22 @@ case class Generate( | |
|
||
case class Filter(condition: Expression, child: LogicalPlan) | ||
extends OrderPreservingUnaryNode with PredicateHelper { | ||
override def output: Seq[Attribute] = child.output | ||
|
||
override def output: Seq[Attribute] = { | ||
val impliedNotNullExprIds: Set[ExprId] = { | ||
splitConjunctivePredicates(condition) | ||
.collect { case isNotNull: IsNotNull => isNotNull } | ||
.map(getImpliedNotNullExprIds) | ||
.foldLeft(Set.empty[ExprId])(_ ++ _) | ||
} | ||
child.output.map { a => | ||
if (a.nullable && impliedNotNullExprIds.contains(a.exprId)) { | ||
a.withNullability(false) | ||
} else { | ||
a | ||
} | ||
} | ||
} | ||
|
||
override def maxRows: Option[Long] = child.maxRows | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,28 +87,25 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan) | |
case class FilterExec(condition: Expression, child: SparkPlan) | ||
extends UnaryExecNode with CodegenSupport with PredicateHelper { | ||
|
||
// Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate | ||
// all the variables at the beginning to take advantage of short circuiting. | ||
override def usedInputs: AttributeSet = AttributeSet.empty | ||
|
||
// Split out all the IsNotNulls from condition. | ||
private val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found the old code here to be slightly confusing because it seemed to be using
There might be some subtleties related in (1) related to non-deterministic expressions, but I think that's accounted for further down at the place where we're actually generating the checks. In the old code, the In this PR, I've separated these two usages: the "update nullability for downstream operators" now uses the more precise condition implemented in |
||
case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet) | ||
case IsNotNull(_) => true | ||
case _ => false | ||
} | ||
|
||
// If one expression and its children are null intolerant, it is null intolerant. | ||
private def isNullIntolerant(expr: Expression): Boolean = expr match { | ||
case e: NullIntolerant => e.children.forall(isNullIntolerant) | ||
case _ => false | ||
private val impliedNotNullExprIds: Set[ExprId] = { | ||
notNullPreds | ||
.map { case n: IsNotNull => getImpliedNotNullExprIds(n) } | ||
.foldLeft(Set.empty[ExprId])(_ ++ _) | ||
} | ||
|
||
// The columns that will filtered out by `IsNotNull` could be considered as not nullable. | ||
private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId) | ||
|
||
// Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate | ||
// all the variables at the beginning to take advantage of short circuiting. | ||
override def usedInputs: AttributeSet = AttributeSet.empty | ||
|
||
override def output: Seq[Attribute] = { | ||
child.output.map { a => | ||
if (a.nullable && notNullAttributes.contains(a.exprId)) { | ||
if (a.nullable && impliedNotNullExprIds.contains(a.exprId)) { | ||
a.withNullability(false) | ||
} else { | ||
a | ||
|
@@ -193,7 +190,7 @@ case class FilterExec(condition: Expression, child: SparkPlan) | |
// Reset the isNull to false for the not-null columns, then the followed operators could | ||
// generate better code (remove dead branches). | ||
val resultVars = input.zipWithIndex.map { case (ev, i) => | ||
if (notNullAttributes.contains(child.output(i).exprId)) { | ||
if (impliedNotNullExprIds.contains(child.output(i).exprId)) { | ||
ev.isNull = FalseLiteral | ||
} | ||
ev | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this should be
AttributeReference
? I couldn't remember offhand how to getExprIds
from arbitrary expressions, hence this hack.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use
AttributeSet
?