From 878dad782f759e9790014c8f923a8d496aaba05e Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 5 Nov 2019 15:44:48 -0800 Subject: [PATCH 1/2] colexec: fix handling of filters by joiners Previously, in order to handle ON expression of the joiners, we would modify the expression itself (i.e. we would remap the IndexedVars inside of the expression). However, this approach is error-prone and doesn't work in all cases (consider an example when we have a filter like "@1 = 'abc@2def'" - @2 is not an ordinal, but it would get treated as an IndexedVar with index 1). Now we have enhanced the IndexedVarHelper to handle the remapping internally when the IndexedVar is being bound to a container. This way no modifications to the actual expressions are needed, and such approach should work in all cases. Release note: None --- pkg/sql/colexec/execplan.go | 101 ++++++------------ pkg/sql/colexec/expr.go | 34 +++--- pkg/sql/execinfra/expr.go | 11 +- .../testdata/logic_test/exec_hash_join | 31 ++++++ pkg/sql/sem/tree/indexed_vars.go | 49 +++++++-- 5 files changed, 133 insertions(+), 93 deletions(-) diff --git a/pkg/sql/colexec/execplan.go b/pkg/sql/colexec/execplan.go index 90465919fd66..cfa2d92f5662 100644 --- a/pkg/sql/colexec/execplan.go +++ b/pkg/sql/colexec/execplan.go @@ -15,7 +15,6 @@ import ( "fmt" "math" "reflect" - "strings" "github.com/cockroachdb/cockroach/pkg/col/coltypes" "github.com/cockroachdb/cockroach/pkg/sql/colexec/execerror" @@ -196,9 +195,12 @@ func createJoiner( if !post.Filter.Empty() { planningState.postFilterPlanning = makeFilterPlanningState(len(leftTypes), len(rightTypes)) - leftOutCols, rightOutCols = planningState.postFilterPlanning.renderAllNeededCols( + leftOutCols, rightOutCols, err = planningState.postFilterPlanning.renderAllNeededCols( post.Filter, leftOutCols, rightOutCols, ) + if err != nil { + return err + } } var ( @@ -215,8 +217,7 @@ func createJoiner( result.setProjectedByJoinerColumnTypes(spec, leftOutCols, rightOutCols) if onExpr != nil && joinType == sqlbase.JoinType_INNER { - remappedOnExpr := onExprPlanning.remapIVars(*onExpr) - err = result.planFilterExpr(flowCtx.NewEvalCtx(), remappedOnExpr) + err = result.planFilterExpr(flowCtx.NewEvalCtx(), *onExpr, onExprPlanning.indexVarMap) onExprPlanning.projectOutExtraCols(result) } return err @@ -434,9 +435,12 @@ func NewColOperator( } onExpr = &core.HashJoiner.OnExpr onExprPlanning = makeFilterPlanningState(len(leftTypes), len(rightTypes)) - leftOutCols, rightOutCols = onExprPlanning.renderAllNeededCols( + leftOutCols, rightOutCols, err = onExprPlanning.renderAllNeededCols( *onExpr, leftOutCols, rightOutCols, ) + if err != nil { + return onExpr, onExprPlanning, leftOutCols, rightOutCols, err + } } result.Op, err = NewEqHashJoinerOp( @@ -492,26 +496,29 @@ func NewColOperator( onExprPlanning = makeFilterPlanningState(len(leftTypes), len(rightTypes)) switch core.MergeJoiner.Type { case sqlbase.JoinType_INNER: - leftOutCols, rightOutCols = onExprPlanning.renderAllNeededCols( + leftOutCols, rightOutCols, err = onExprPlanning.renderAllNeededCols( *onExpr, leftOutCols, rightOutCols, ) case sqlbase.JoinType_LEFT_SEMI, sqlbase.JoinType_LEFT_ANTI: - filterOnlyOnLeft = onExprPlanning.isFilterOnlyOnLeft(*onExpr) + filterOnlyOnLeft, err = onExprPlanning.isFilterOnlyOnLeft(*onExpr) filterConstructor = func(op Operator) (Operator, error) { r := NewColOperatorResult{ Op: op, ColumnTypes: append(spec.Input[0].ColumnTypes, spec.Input[1].ColumnTypes...), } - // We don't need to remap the indexed vars in onExpr because the - // filter will be run alongside the merge joiner, and it will have - // access to all of the columns from both sides. - err := r.planFilterExpr(flowCtx.NewEvalCtx(), *onExpr) + // We don't need to specify indexVarMap because the filter will be + // run alongside the merge joiner, and it will have access to all + // of the columns from both sides. + err := r.planFilterExpr(flowCtx.NewEvalCtx(), *onExpr, nil /* indexVarMap */) return r.Op, err } default: return onExpr, onExprPlanning, leftOutCols, rightOutCols, errors.Errorf("can only plan INNER, LEFT SEMI, and LEFT ANTI merge joins with ON expressions") } } + if err != nil { + return onExpr, onExprPlanning, leftOutCols, rightOutCols, err + } result.Op, err = NewMergeJoinOp( core.MergeJoiner.Type, @@ -692,8 +699,7 @@ func NewColOperator( // legal for empty rows to be passed between processors). if !post.Filter.Empty() { - filterExpr := planningState.postFilterPlanning.remapIVars(post.Filter) - if err = result.planFilterExpr(flowCtx.NewEvalCtx(), filterExpr); err != nil { + if err = result.planFilterExpr(flowCtx.NewEvalCtx(), post.Filter, planningState.postFilterPlanning.indexVarMap); err != nil { return result, err } planningState.postFilterPlanning.projectOutExtraCols(&result) @@ -773,12 +779,15 @@ func makeFilterPlanningState(numLeftInputCols, numRightInputCols int) filterPlan // NOTE: projectOutExtraCols must be called after the filter has been run. func (p *filterPlanningState) renderAllNeededCols( filter execinfrapb.Expression, leftOutCols []uint32, rightOutCols []uint32, -) ([]uint32, []uint32) { - neededColumnsForFilter := findIVarsInRange( +) ([]uint32, []uint32, error) { + neededColumnsForFilter, err := findIVarsInRange( filter, 0, /* start */ p.numLeftInputCols+p.numRightInputCols, ) + if err != nil { + return nil, nil, errors.Errorf("error parsing filter expression %q: %s", filter, err) + } if len(neededColumnsForFilter) > 0 { // Store the original out columns to be restored later. p.originalLeftOutCols = leftOutCols @@ -842,66 +851,20 @@ func (p *filterPlanningState) renderAllNeededCols( } } } - return leftOutCols, rightOutCols + return leftOutCols, rightOutCols, nil } // isFilterOnlyOnLeft returns whether the filter expression doesn't use columns // from the right side. -func (p *filterPlanningState) isFilterOnlyOnLeft(filter execinfrapb.Expression) bool { +func (p *filterPlanningState) isFilterOnlyOnLeft(filter execinfrapb.Expression) (bool, error) { // Find all needed columns for filter only from the right side. - neededColumnsForFilter := findIVarsInRange( + neededColumnsForFilter, err := findIVarsInRange( filter, p.numLeftInputCols, p.numLeftInputCols+p.numRightInputCols, ) - return len(neededColumnsForFilter) == 0 -} - -// remapIVars remaps tree.IndexedVars in expr using p.indexVarMap. Note that if -// the remapping is needed, then a new remapped expression is returned, but if -// the remapping is not needed (which is the case when all needed by the filter -// columns were part of the projection), then the same expression is returned. -func (p *filterPlanningState) remapIVars(expr execinfrapb.Expression) execinfrapb.Expression { - if p.indexVarMap == nil { - // If p.indexVarMap is nil, then there is no remapping to do. - return expr - } - ret := execinfrapb.Expression{} - if expr.LocalExpr != nil { - ret.LocalExpr = sqlbase.RemapIVarsInTypedExpr(expr.LocalExpr, p.indexVarMap) - } else { - ret.Expr = expr.Expr - // We iterate in the reverse order so that the multiple digit numbers are - // handled correctly (consider an expression like @1 AND @11). - // - // In order not to confuse the newly replaced ordinals with the original - // ones we first remap all ordinals using "custom" ordinal symbol first - // (namely, instead of using `@1` we will use `@#1`). Consider an example - // `@2 = @4` with p.idxVarMap = {-1, 0, -1, 1}. If we didn't do this custom - // ordinal remapping, then we would get into a situation of `@2 = @2` in - // which the first @2 is original and needs to be replaced whereas the - // second one should not be touched. After the first loop, we will have - // `@#1 = @#2`. - for idx := len(p.indexVarMap) - 1; idx >= 0; idx-- { - if p.indexVarMap[idx] != -1 { - // We need +1 below because the ordinals are counting from 1. - ret.Expr = strings.ReplaceAll( - ret.Expr, - fmt.Sprintf("@%d", idx+1), - fmt.Sprintf("@#%d", p.indexVarMap[idx]+1), - ) - } - } - // Now we simply need to convert the "custom" ordinal symbol by removing - // the pound sign (in the example above, after this loop we will have - // `@1 = @2`). - for idx := len(p.indexVarMap); idx > 0; idx-- { - ret.Expr = strings.ReplaceAll( - ret.Expr, - fmt.Sprintf("@#%d", idx), - fmt.Sprintf("@%d", idx), - ) - } + if err != nil { + return false, errors.Errorf("error parsing filter expression %q: %s", filter, err) } - return ret + return len(neededColumnsForFilter) == 0, nil } // projectOutExtraCols, possibly, adds a projection to remove all the extra @@ -947,13 +910,13 @@ func (r *NewColOperatorResult) setProjectedByJoinerColumnTypes( } func (r *NewColOperatorResult) planFilterExpr( - evalCtx *tree.EvalContext, filter execinfrapb.Expression, + evalCtx *tree.EvalContext, filter execinfrapb.Expression, indexVarMap []int, ) error { var ( helper execinfra.ExprHelper selectionMem int ) - err := helper.Init(filter, r.ColumnTypes, evalCtx) + err := helper.InitWithRemapping(filter, r.ColumnTypes, evalCtx, indexVarMap) if err != nil { return err } diff --git a/pkg/sql/colexec/expr.go b/pkg/sql/colexec/expr.go index dfebb0fd0334..c05cf9477753 100644 --- a/pkg/sql/colexec/expr.go +++ b/pkg/sql/colexec/expr.go @@ -11,36 +11,36 @@ package colexec import ( - "fmt" - "strings" - "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/parser" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" ) // findIVarsInRange searches Expr for presence of tree.IndexedVars with indices // in range [start, end). It returns a slice containing all such indices. -func findIVarsInRange(expr execinfrapb.Expression, start int, end int) []uint32 { +func findIVarsInRange(expr execinfrapb.Expression, start int, end int) ([]uint32, error) { res := make([]uint32, 0) if start >= end { - return res + return res, nil } + var exprToWalk tree.Expr if expr.LocalExpr != nil { - visitor := ivarExpressionVisitor{ivarSeen: make([]bool, end)} - _, _ = tree.WalkExpr(visitor, expr.LocalExpr) - for i := start; i < end; i++ { - if visitor.ivarSeen[i] { - res = append(res, uint32(i)) - } - } + exprToWalk = expr.LocalExpr } else { - for i := start; i < end; i++ { - if strings.Contains(expr.Expr, fmt.Sprintf("@%d", i+1)) { - res = append(res, uint32(i)) - } + e, err := parser.ParseExpr(expr.Expr) + if err != nil { + return nil, err + } + exprToWalk = e + } + visitor := ivarExpressionVisitor{ivarSeen: make([]bool, end)} + _, _ = tree.WalkExpr(visitor, exprToWalk) + for i := start; i < end; i++ { + if visitor.ivarSeen[i] { + res = append(res, uint32(i)) } } - return res + return res, nil } type ivarExpressionVisitor struct { diff --git a/pkg/sql/execinfra/expr.go b/pkg/sql/execinfra/expr.go index e3b0446ff145..e2a1a44a83da 100644 --- a/pkg/sql/execinfra/expr.go +++ b/pkg/sql/execinfra/expr.go @@ -142,13 +142,22 @@ func (eh *ExprHelper) IndexedVarNodeFormatter(idx int) tree.NodeFormatter { // Init initializes the ExprHelper. func (eh *ExprHelper) Init( expr execinfrapb.Expression, types []types.T, evalCtx *tree.EvalContext, +) error { + return eh.InitWithRemapping(expr, types, evalCtx, nil /* indexVarMap */) +} + +// InitWithRemapping initializes the ExprHelper. +// indexVarMap specifies an optional (i.e. it can be left nil) map that will be +// used to remap the indices of IndexedVars before binding them to a container. +func (eh *ExprHelper) InitWithRemapping( + expr execinfrapb.Expression, types []types.T, evalCtx *tree.EvalContext, indexVarMap []int, ) error { if expr.Empty() { return nil } eh.evalCtx = evalCtx eh.Types = types - eh.Vars = tree.MakeIndexedVarHelper(eh, len(types)) + eh.Vars = tree.MakeIndexedVarHelperWithRemapping(eh, len(types), indexVarMap) if expr.LocalExpr != nil { eh.Expr = expr.LocalExpr diff --git a/pkg/sql/logictest/testdata/logic_test/exec_hash_join b/pkg/sql/logictest/testdata/logic_test/exec_hash_join index 7ab96bfffacd..e1932b751286 100644 --- a/pkg/sql/logictest/testdata/logic_test/exec_hash_join +++ b/pkg/sql/logictest/testdata/logic_test/exec_hash_join @@ -124,3 +124,34 @@ x y NULL NULL NULL 42 NULL 44 + +# Regression test for #41407. +statement ok +CREATE TABLE t41407 AS + SELECT + g AS _float8, + g % 0 = 0 AS _bool, + g AS _decimal, + g AS _string, + g AS _bytes + FROM + generate_series(NULL, NULL) AS g; + +query TRTTRRBR +SELECT + tab_1688._bytes, + tab_1688._float8, + tab_1689._string, + tab_1689._string, + tab_1688._float8, + tab_1688._float8, + tab_1689._bool, + tab_1690._decimal +FROM + t41407 AS tab_1688 + JOIN t41407 AS tab_1689 + JOIN t41407 AS tab_1690 ON + tab_1689._bool = tab_1690._bool ON + tab_1688._float8 = tab_1690._float8 + AND tab_1688._bool = tab_1689._bool; +---- diff --git a/pkg/sql/sem/tree/indexed_vars.go b/pkg/sql/sem/tree/indexed_vars.go index b73f154a5229..b739e86afca5 100644 --- a/pkg/sql/sem/tree/indexed_vars.go +++ b/pkg/sql/sem/tree/indexed_vars.go @@ -119,6 +119,12 @@ func NewTypedOrdinalReference(r int, typ *types.T) *IndexedVar { type IndexedVarHelper struct { vars []IndexedVar container IndexedVarContainer + + // indexVarMap is an optional mapping for indices of IndexedVars. If it is + // not nil, it will be used to determine the "actual index" of an IndexedVar + // before binding - instead of using ivar.Idx, a new IndexedVar will have + // indexVarMap[ivar.Idx] as its index. + indexVarMap []int } // Container returns the container associated with the helper. @@ -126,6 +132,22 @@ func (h *IndexedVarHelper) Container() IndexedVarContainer { return h.container } +// getIndex is a helper function that returns an "actual index" of the +// IndexedVar. +func (h *IndexedVarHelper) getIndex(ivar *IndexedVar) int { + if ivar.Used { + // ivar has already been bound, so the remapping step (if it was needed) + // has already occurred. + return ivar.Idx + } + if h.indexVarMap != nil { + // indexVarMap is non-nil, so we need to remap the index. + return h.indexVarMap[ivar.Idx] + } + // indexVarMap is nil, so we return the index as is. + return ivar.Idx +} + // BindIfUnbound ensures the IndexedVar is attached to this helper's container. // - for freshly created IndexedVars (with a nil container) this will bind in-place. // - for already bound IndexedVar, bound to this container, this will return the same ivar unchanged. @@ -135,9 +157,10 @@ func (h *IndexedVarHelper) BindIfUnbound(ivar *IndexedVar) (*IndexedVar, error) // We perform the range check always, even if the ivar is already // bound, as a form of safety assertion against misreuse of ivars // across containers. - if ivar.Idx < 0 || ivar.Idx >= len(h.vars) { + ivarIdx := h.getIndex(ivar) + if ivarIdx < 0 || ivarIdx >= len(h.vars) { return ivar, pgerror.Newf( - pgcode.UndefinedColumn, "invalid column ordinal: @%d", ivar.Idx+1) + pgcode.UndefinedColumn, "invalid column ordinal: @%d", ivarIdx+1) } if !ivar.Used { @@ -145,17 +168,30 @@ func (h *IndexedVarHelper) BindIfUnbound(ivar *IndexedVar) (*IndexedVar, error) // This container must also remember it has "seen" the variable // so that IndexedVarUsed() below returns the right results. // The IndexedVar() method ensures this. - *ivar = *h.IndexedVar(ivar.Idx) + *ivar = *h.IndexedVar(ivarIdx) return ivar, nil } - return h.IndexedVar(ivar.Idx), nil + return h.IndexedVar(ivarIdx), nil } return ivar, nil } // MakeIndexedVarHelper initializes an IndexedVarHelper structure. func MakeIndexedVarHelper(container IndexedVarContainer, numVars int) IndexedVarHelper { - return IndexedVarHelper{vars: make([]IndexedVar, numVars), container: container} + return MakeIndexedVarHelperWithRemapping(container, numVars, nil /* indexVarMap */) +} + +// MakeIndexedVarHelperWithRemapping initializes an IndexedVarHelper structure. +// An optional (it can be left nil) indexVarMap argument determines a mapping +// for indices of IndexedVars (see comment above for more details). +func MakeIndexedVarHelperWithRemapping( + container IndexedVarContainer, numVars int, indexVarMap []int, +) IndexedVarHelper { + return IndexedVarHelper{ + vars: make([]IndexedVar, numVars), + container: container, + indexVarMap: indexVarMap, + } } // AppendSlot expands the capacity of this IndexedVarHelper by one and returns @@ -252,7 +288,8 @@ var _ Visitor = &IndexedVarHelper{} // VisitPre implements the Visitor interface. func (h *IndexedVarHelper) VisitPre(expr Expr) (recurse bool, newExpr Expr) { if iv, ok := expr.(*IndexedVar); ok { - return false, h.IndexedVar(iv.Idx) + ivarIdx := h.getIndex(iv) + return false, h.IndexedVar(ivarIdx) } return true, expr } From baa64b7e8875aa3268b27e719d6d267d5d350ca8 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 5 Nov 2019 16:00:07 -0800 Subject: [PATCH 2/2] sem/tree: remove unused field from IndexedVar This commit removes 'bindInPlace' field from IndexedVar struct since it is not being used anywhere. Release note: None --- pkg/sql/sem/tree/indexed_vars.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pkg/sql/sem/tree/indexed_vars.go b/pkg/sql/sem/tree/indexed_vars.go index b739e86afca5..825b87b49ed8 100644 --- a/pkg/sql/sem/tree/indexed_vars.go +++ b/pkg/sql/sem/tree/indexed_vars.go @@ -34,9 +34,8 @@ type IndexedVarContainer interface { // represents a dynamic value. It defers calls to TypeCheck, Eval, String to an // IndexedVarContainer. type IndexedVar struct { - Idx int - Used bool - bindInPlace bool + Idx int + Used bool col NodeFormatter @@ -164,13 +163,6 @@ func (h *IndexedVarHelper) BindIfUnbound(ivar *IndexedVar) (*IndexedVar, error) } if !ivar.Used { - if ivar.bindInPlace { - // This container must also remember it has "seen" the variable - // so that IndexedVarUsed() below returns the right results. - // The IndexedVar() method ensures this. - *ivar = *h.IndexedVar(ivarIdx) - return ivar, nil - } return h.IndexedVar(ivarIdx), nil } return ivar, nil