From b4df74ea166c6ffe75c19d19e2ea750637737afc Mon Sep 17 00:00:00 2001 From: Arenatlx <314806019@qq.com> Date: Mon, 9 May 2022 19:06:33 +0800 Subject: [PATCH] cherry pick #34147 to release-5.3 Signed-off-by: ti-srebot --- planner/core/logical_plans.go | 13 + planner/funcdep/doc.go | 180 +++ planner/funcdep/fd_graph.go | 1215 ++++++++++++++++++++ planner/funcdep/only_full_group_by_test.go | 267 +++++ 4 files changed, 1675 insertions(+) create mode 100644 planner/funcdep/doc.go create mode 100644 planner/funcdep/fd_graph.go create mode 100644 planner/funcdep/only_full_group_by_test.go diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 78a41ee2bf826..94b6899c4487b 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -452,8 +452,21 @@ type LogicalSelection struct { // split into a list of AND conditions. Conditions []expression.Expression +<<<<<<< HEAD // having selection can't be pushed down, because it must above the aggregation. buildByHaving bool +======= + // apply operator's characteristic's FD setting. + fds.MakeNotNull(notnullColsUniqueIDs) + fds.AddConstants(constUniqueIDs) + for _, equiv := range equivUniqueIDs { + fds.AddEquivalence(equiv[0], equiv[1]) + } + fds.ProjectCols(outputColsUniqueIDs) + // just trace it down in every operator for test checking. + p.fdSet = fds + return fds +>>>>>>> 2f86eac3c... planner: introduce Cond-FD to maintain null-constraint FD (#34147) } // ExtractCorrelatedCols implements LogicalPlan interface. diff --git a/planner/funcdep/doc.go b/planner/funcdep/doc.go new file mode 100644 index 0000000000000..599439c025af5 --- /dev/null +++ b/planner/funcdep/doc.go @@ -0,0 +1,180 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package funcdep + +// Theory to Practice +// +// For more rigorous examination of functional dependencies and their +// interaction with various SQL operators, see the following Master's Thesis: +// +// Norman Paulley, Glenn. (2000). +// Exploiting Functional Dependence in Query Optimization. +// https://cs.uwaterloo.ca/research/tr/2000/11/CS-2000-11.thesis.pdf + +// TODO: Add the RFC design. + +// NOTE 1. +// when handling Lax FD, we don't care the null value in the dependency, which means +// as long as null-attribute coverage of the determinant can make a Lax FD as strict one. + +// The definition of "lax" used in the paper differs from the definition used by this +// library. For a lax dependency A~~>B, the paper allows this set of rows: +// +// a b +// ------- +// 1 1 +// 1 NULL +// +// This alternate definition is briefly covered in section 2.5.3.2 of the paper (see definition +// 2.19). The reason for this change is to allow a lax dependency to be upgraded to a strict +// dependency more readily, needing only the determinant columns to be not-null rather than +// both determinant and dependant columns. +// +// This is on the condition that, for definite values of determinant of a Lax FD, it won't +// have two same definite dependant value. That's true, because there is no way can derive +// to this kind of FD. +// +// Even in our implementation of outer join, the only way to produce duplicate definite +// determinant is the join predicate. But for now, we only maintain the equivalence and +// some strict FD of it. +// +// t(a,b) left join t1(c,d,e) on t.a = t1.c and b=1 +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// +// Actually it's possible, the lax FD {a} -> {c} can be derived but not that useful. we only +// maintain the {c} ~> {a} for existence after outer join. Besides, there two Cond-FD should +// be preserved waiting for be visible again once with the null-reject on the condition of +// null constraint columns. (see below) +// +// NOTE 2. +// When handle outer join, it won't produce lax FD with duplicate definite determinant values and +// different dependency values. +// +// In implementation,we come across some lax FD dependent on null-reject of some other cols. For +// example. +// t(a,b) left join t1(c,d,e) on t.a = t1.c and b=1 +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// +// here constant FD {} -> {b} won't be existed after the outer join is done. Notice null-constraint +// {c,d,e} -| {c,d,e}, this FD should be preserved and will be visible again when some null-reject +// predicate take effect on the null-constraint cols. +// +// It's same for strict equivalence {t.a} = {t1.c}. Notice there are no lax equivalence here, because +// left side couldn't be guaranteed to be definite or null. like a=2 here. Let's collect all of this +// on-condition FD down, correspondent with a null-constraints column set, name it as Cond-FD. +// +// lax equivalencies are theoretically possible, but it won't be constructed from an outer join unless +// t already has a constant FD in column `a` here before outer join take a run. So the lax equivalence +// has some pre-conditions as you see, and it couldn't cover the case shown above. Let us do it like a +// Cond-FD does. +// +// The FD constructed from the join predicate should be considered as Cond-FD. Here like equivalence of +// {a} == {c} and constant FD {b} = 1 (if the join condition is e=1, it's here too). We can say that for +// every matched row, this FDs is valid, while for the other rows, the inner side are supplied of null +// rows. So this FDs are stored as ncEdges with nc condition of all inner table cols. +// +// We introduced invisible FD with null-constraint column to solve the problem above named as Cond-FD. +// For multi embedded left join, we take the following case as an example. +// a,b c,d,e +// -----------+----------- +// 1 2 | 1 1 1 +// 2 2 | +// -----------+----------- +// +// left join on (a=c) res: +// a b c e e +// ------------------------- +// 1 2 1 1 1 +// 2 2 +- null null null -+ +// | | +// +-------------------+ +// \ +// \ +// the Cond-FD are < a=c with {c,d,e} > the latter is as null constraint cols +// +// e,f +// ----------------------- +// 1 2 +// 2 2 +// 3 3 +// ----------------------- +// +// left join on (e=a) res: +// e f a b c d e +// ----------------------------------- +// 1 2 1 2 1 1 1 +// 2 2 2 2 +- null null null --+---------------> Cond-FD are still exists. +// 3 3 +-null null | null null null |---+ +// | +-------------------+ | +// +-----------------------------------+-----------> New Cond-FD are occurs. +// +// +// the old Cond-FD with null constraint columns set {c,d,e} is preserved cause new appended cols are all null too. +// the new Cond-FD with null constraint columns set {a,b,c,d,e} are also meaningful, even if the null-reject column +// is one of {c,d,e} which may reduce one of the matched row out of the result, the equivalence {a}={e} still exist. +// +// Provide that the result of the first left join is like: +// left join on (a=c) res: +// a b c e e +// --------------------------- +// 1 2 1 1 1 +// null 2 null null null +// +// THEN: left join on (e=a) res: +// e f a b c d e +// --------------------------------- +// 1 2 1 2 1 1 1 +// 2 2 null null null null null +// 3 3 null null null null null +// +// Even like that, the case of old Cond-FD and new Cond-FD are existed too. Seems the null-constraint column set of +// old Cond-FD {c,d,e} can be expanded as {a,b,c,d,e} visually, but we couldn't derive the inference of the join predicate +// (e=a). The null-reject of column `a` couldn't bring the visibility to the old Cond-FD theoretically, it just happened +// to refuse that row with a null value in column a. +// +// Think about adding one more row in first left join result. +// +// left join on (a=c) res: +// a b c e e +// --------------------------- +// 1 2 1 1 1 +// null 2 null null null +// 3 3 null null null +// +// THEN: left join on (e=a) res: +// e f a b c d e +// --------------------------------- +// 1 2 1 2 1 1 1 +// 2 2 null null null null null +// 3 3 3 3 null null null +// +// Conclusion: +// As you see that's right we couldn't derive the inference of the join predicate (e=a) to expand old Cond-FD's nc +// {c,d,e} as {a,b,c,d,e}. So the rule for Cond-FD is quite simple, just keep the old ncEdge from right, appending +// the new ncEdges in current left join. +// +// If the first left join result is in the outer side of the second left join, just keep the ncEdge from left as well, +// appending the new ncEdges in current left join. +// +// For a inner join, both side of the join result won't be appended with null-supplied rows, so we can simply collect +// the ncEdges from both join side together. diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go new file mode 100644 index 0000000000000..4dee30d437a52 --- /dev/null +++ b/planner/funcdep/fd_graph.go @@ -0,0 +1,1215 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package funcdep + +import ( + "fmt" + "strings" + + "github.com/pingcap/tidb/util/logutil" +) + +type fdEdge struct { + // functional dependency = determinants -> dependencies + // determinants = from + // dependencies = to + from FastIntSet + to FastIntSet + // The value of the strict and eq bool forms the four kind of edges: + // functional dependency, lax functional dependency, strict equivalence constraint, lax equivalence constraint. + // And if there's a functional dependency `const` -> `column` exists. We would let the from side be empty. + strict bool + equiv bool + + // FD with non-nil conditionNC is hidden in FDSet, it will be visible again when at least one null-reject column in conditionNC. + // conditionNC should be satisfied before some FD make vision again, it's quite like lax FD to be strengthened as strict + // one. But the constraints should take effect on specified columns from conditionNC rather than just determinant columns. + conditionNC *FastIntSet +} + +// FDSet is the main portal of functional dependency, it stores the relationship between (extended table / physical table)'s +// columns. For more theory about this design, ref the head comments in the funcdep/doc.go. +type FDSet struct { + fdEdges []*fdEdge + // after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence + // cols pointing to outer equivalence cols. eg: t left join t1 on t.a = t1.b, leading a + // lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all + // left rows, once there is a null-refusing predicate on the inner side on upper layer, this + // can be equivalence again. (the outer rows left are all coming from equal matching) + ncEdges []*fdEdge + // NotNullCols is used to record the columns with not-null attributes applied. + // eg: {1} ~~> {2,3}, when {2,3} not null is applied, it actually does nothing. + // but we should record {2,3} as not-null down for the convenience of transferring + // Lax FD: {1} ~~> {2,3} to strict FD: {1} --> {2,3} with {1} as not-null next time. + NotNullCols FastIntSet + // HashCodeToUniqueID map the expression's hashcode to a statement allocated unique + // ID quite like the unique ID bounded with column. It's mainly used to add the expr + // to the fdSet as an extended column. + HashCodeToUniqueID map[string]int + // GroupByCols is used to record columns / expressions that under the group by phrase. + GroupByCols FastIntSet + HasAggBuilt bool + + // todo: when multi join and across select block, this may need to be maintained more precisely. + +} + +// ClosureOfStrict is exported for outer usage. +func (s *FDSet) ClosureOfStrict(colSet FastIntSet) FastIntSet { + return s.closureOfStrict(colSet) +} + +// closureOfStrict is to find strict fd closure of X with respect to F. +// A -> B = colSet -> { resultIntSet } +// eg: considering closure F: {A-> CD, B -> E}, and input is {AB} +// res: AB -> {CDE} (AB is included in trivial FD) +// The time complexity is O(n^2). +func (s *FDSet) closureOfStrict(colSet FastIntSet) FastIntSet { + resultSet := NewFastIntSet() + // self included. + resultSet.UnionWith(colSet) + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + if fd.strict && fd.from.SubsetOf(resultSet) && !fd.to.SubsetOf(resultSet) { + resultSet.UnionWith(fd.to) + // If the closure is updated, we redo from the beginning. + i = -1 + } + // why this? it is caused by our definition of equivalence as {superset} == {superset}, + // which is also seen as {superset} --> {superset}. but when we compute the transitive + // closure, `fd.from.SubsetOf(resultSet)` is not transitive here. Actually, we can also + // see the equivalence as {single element} == {superset} / {single element} --> {superset}. + if fd.equiv && fd.from.Intersects(resultSet) && !fd.to.SubsetOf(resultSet) { + resultSet.UnionWith(fd.to) + i = -1 + } + } + return resultSet +} + +// ClosureOfLax is exported for outer usage. +func (s *FDSet) ClosureOfLax(colSet FastIntSet) FastIntSet { + return s.closureOfLax(colSet) +} + +// ClosureOfLax is used to find lax fd closure of X with respect to F. +func (s *FDSet) closureOfLax(colSet FastIntSet) FastIntSet { + // Lax dependencies are not transitive (see figure 2.1 in the paper for + // properties that hold for lax dependencies), so only include them if they + // are reachable in a single lax dependency step from the input set. + laxOneStepReached := NewFastIntSet() + // self included. + laxOneStepReached.UnionWith(colSet) + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + // A ~~> B && A == C && C ~~> D: given A, BD can be included via Lax FDs (plus self A and equiv C). + // A ~~> B && B == C: given A, BC can be included via Lax FDs (plus self A). + // which means both dependency and determinant can extend Lax exploration via equivalence. + // + // Besides, strict is a kind of strong lax FDs, result computed above should be unionised with strict FDs closure. + if fd.equiv && fd.from.Intersects(laxOneStepReached) && !fd.to.SubsetOf(laxOneStepReached) { + // equiv can extend the lax-set's access paths. + laxOneStepReached.UnionWith(fd.to) + i = -1 + } + if !fd.strict && !fd.equiv && fd.from.SubsetOf(laxOneStepReached) && !fd.to.SubsetOf(laxOneStepReached) { + // lax FDs. + laxOneStepReached.UnionWith(fd.to) + } + } + // Unionised strict FDs + laxOneStepReached.UnionWith(s.closureOfStrict(colSet)) + return laxOneStepReached +} + +// closureOfEquivalence is to find strict equivalence closure of X with respect to F. +func (s *FDSet) closureOfEquivalence(colSet FastIntSet) FastIntSet { + resultSet := NewFastIntSet() + // self included. + resultSet.UnionWith(colSet) + for i := 0; i < len(s.fdEdges); i++ { + // equivalence is maintained as {superset} == {superset}, we don't need to do transitive computation. + // but they may multi equivalence closure, eg: {a,b}=={a,b}, {c,d} =={c,d}, when adding b=c, we need traverse them all. + fd := s.fdEdges[i] + if fd.equiv { + if fd.from.Intersects(resultSet) && !fd.to.SubsetOf(resultSet) { + resultSet.UnionWith(fd.to) + } + } + } + return resultSet +} + +// InClosure is used to judge whether fd: setA -> setB can be inferred from closure s. +// It's a short-circuit version of the `closureOf`. +func (s *FDSet) InClosure(setA, setB FastIntSet) bool { + if setB.SubsetOf(setA) { + return true + } + currentClosure := NewFastIntSet() + // self included. + currentClosure.UnionWith(setA) + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + if fd.strict && fd.from.SubsetOf(currentClosure) && !fd.to.SubsetOf(currentClosure) { + // once fd.from is subset of setA, which means fd is part of our closure; + // when fd.to is not subset of setA itself, it means inference result is necessary to add; (closure extending) + currentClosure.UnionWith(fd.to) + if setB.SubsetOf(currentClosure) { + return true + } + // If the closure is updated, we redo from the beginning. + i = -1 + } + // why this? it is caused by our definition of equivalence as {superset} == {superset}, + // which is also seen as {superset} --> {superset}. but when we compute the transitive + // closure, `fd.from.SubsetOf(resultSet)` is not transitive here. Actually, we can also + // see the equivalence as {single element} == {superset} / {single element} --> {superset}. + if fd.equiv && fd.from.Intersects(currentClosure) && !fd.to.SubsetOf(currentClosure) { + currentClosure.UnionWith(fd.to) + if setB.SubsetOf(currentClosure) { + return true + } + i = -1 + } + } + return false +} + +// ReduceCols is used to minimize the determinants in one fd input. +// function dependency = determinants -> dependencies +// given: AB -> XY, once B can be inferred from current closure when inserting, take A -> XY instead. +func (s *FDSet) ReduceCols(colSet FastIntSet) FastIntSet { + // Suppose the colSet is A and B, we have A --> B. Then we only need A since B' value is always determined by A. + var removed, result = NewFastIntSet(), NewFastIntSet() + result.CopyFrom(colSet) + for k, ok := colSet.Next(0); ok; k, ok = colSet.Next(k + 1) { + removed.Insert(k) + result.Remove(k) + // If the removed one is not dependent with the result. We add the bit back. + if !s.InClosure(result, removed) { + removed.Remove(k) + result.Insert(k) + } + } + return result +} + +// AddStrictFunctionalDependency is to add `STRICT` functional dependency to the fdGraph. +func (s *FDSet) AddStrictFunctionalDependency(from, to FastIntSet) { + s.addFunctionalDependency(from, to, true, false) +} + +// AddLaxFunctionalDependency is to add `LAX` functional dependency to the fdGraph. +func (s *FDSet) AddLaxFunctionalDependency(from, to FastIntSet) { + s.addFunctionalDependency(from, to, false, false) +} + +// AddNCFunctionalDependency is to add conditional functional dependency to the fdGraph. +func (s *FDSet) AddNCFunctionalDependency(from, to, nc FastIntSet, strict, equiv bool) { + // Since nc edge is invisible by now, just collecting them together simply, once the + // null-reject on nc cols is satisfied, let's pick them out and insert into the fdEdge + // normally. + s.ncEdges = append(s.ncEdges, &fdEdge{ + from: from, + to: to, + strict: strict, + equiv: equiv, + conditionNC: &nc, + }) +} + +// addFunctionalDependency will add strict/lax functional dependency to the fdGraph. +// eg: +// CREATE TABLE t (a int key, b int, c int, d int, e int, UNIQUE (b,c)) +// strict FD: {a} --> {a,b,c,d,e} && lax FD: {b,c} ~~> {a,b,c,d,e} will be added. +// stored FD: {a} --> {b,c,d,e} && lax FD: {b,c} ~~> {a,d,e} is determinant eliminated. +// +// To reduce the edge number, we limit the functional dependency when we insert into the +// set. The key code of insert is like the following codes. +func (s *FDSet) addFunctionalDependency(from, to FastIntSet, strict, equiv bool) { + // trivial FD, refused. + if to.SubsetOf(from) { + return + } + + // exclude the intersection part. + if to.Intersects(from) { + to.DifferenceWith(from) + } + + // reduce the determinants. + from = s.ReduceCols(from) + + newFD := &fdEdge{ + from: from, + to: to, + strict: strict, + equiv: equiv, + } + + swapPointer := 0 + added := false + // the newFD couldn't be superSet of existed one A and be subset of the other existed one B at same time. + // Because the subset relationship between A and B will be replaced previously. + for i := range s.fdEdges { + fd := s.fdEdges[i] + // If the new one is stronger than the old one. Just replace it. + if newFD.implies(fd) { + if added { + continue + } + fd.from = from + fd.to = to + fd.strict = strict + fd.equiv = equiv + added = true + } else if !added { + // There's a strong one. No need to add. + if fd.implies(newFD) { + added = true + } else if fd.strict && !fd.equiv && fd.from.Equals(from) { + // We can use the new FD to extend the current one. + // eg: A -> BC, A -> CE, they couldn't be the subset of each other, union them. + // res: A -> BCE + fd.to.UnionWith(to) + added = true + } + } + // If the current one is not eliminated, add it to the result. + s.fdEdges[swapPointer] = s.fdEdges[i] + swapPointer++ + } + s.fdEdges = s.fdEdges[:swapPointer] + + // If it's still not added. + if !added { + s.fdEdges = append(s.fdEdges, newFD) + } +} + +// implies is used to shrink the edge size, keeping the minimum of the functional dependency set size. +func (e *fdEdge) implies(otherEdge *fdEdge) bool { + // The given one's from should be larger than the current one and the current one's to should be larger than the given one. + // STRICT FD: + // A --> C is stronger than AB --> C. --- YES + // A --> BC is stronger than A --> C. --- YES + // + // LAX FD: + // 1: A ~~> C is stronger than AB ~~> C. --- YES + // 2: A ~~> BC is stronger than A ~~> C. --- NO + // The precondition for 2 to be strict FD is much easier to satisfied than 1, only to + // need {a,c} is not null. So we couldn't merge this two to be one lax FD. + // but for strict/equiv FD implies lax FD, 1 & 2 is implied both reasonably. + lhsIsLax := !e.equiv && !e.strict + rhsIsLax := !otherEdge.equiv && !otherEdge.strict + if lhsIsLax && rhsIsLax { + if e.from.SubsetOf(otherEdge.from) && e.to.Equals(otherEdge.to) { + return true + } + return false + } + if e.from.SubsetOf(otherEdge.from) && otherEdge.to.SubsetOf(e.to) { + // The given one should be weaker than the current one. + // So the given one should not be more strict than the current one. + // The given one should not be equivalence constraint if the given one is not equivalence constraint. + if (e.strict || !otherEdge.strict) && (e.equiv || !otherEdge.equiv) { + return true + } + // 1: e.strict + e.equiv => e > o + // 2: e.strict + !o.equiv => e >= o + // 3: !o.strict + e.equiv => e > o + // 4: !o.strict + !o.equiv => o.lax + } + return false +} + +// addEquivalence add an equivalence set to fdSet. +// when adding an equivalence between column a and b: +// 1: they may be integrated into the origin equivalence superset if this two enclosure have some id in common. +// 2: they can be used to extend the existed constant closure, consequently leading some reduce work: see addConstant. +// 3: they self can be used to eliminate existed strict/lax FDs, see comments below. +func (s *FDSet) addEquivalence(eqs FastIntSet) { + var addConst bool + // get equivalence closure. + eqClosure := s.closureOfEquivalence(eqs) + s.fdEdges = append(s.fdEdges, &fdEdge{from: eqClosure.Copy(), to: eqClosure.Copy(), strict: true, equiv: true}) + + for i := 0; i < len(s.fdEdges)-1; i++ { + fd := s.fdEdges[i] + + if fd.isConstant() { + // If any equivalent column is a constant, then all are constants. + if fd.to.Intersects(eqClosure) && !eqClosure.SubsetOf(fd.to) { + // new non-constant-subset columns need to be merged to the constant closure. + // Since adding constant wil induce extra FD reshapes, we call addConstant directly. + addConst = true + } + } else if fd.from.SubsetOf(eqClosure) { + if fd.equiv { + // this equivalence is enclosed in the super closure appended above, remove it. + s.fdEdges = append(s.fdEdges[:i], s.fdEdges[i+1:]...) + i-- + } else { + // Since from side are all in equivalence closure, we can eliminate some + // columns of dependencies in equivalence closure. because equivalence is + // a stronger relationship than a strict or lax dependency. + // eg: {A,B} --> {C,D}, {A,B,D} in equivalence, FD can be shortly as {A,B} --> {C} + if fd.removeColumnsToSide(eqClosure) { + // Once the to side is empty, remove the FD. + s.fdEdges = append(s.fdEdges[:i], s.fdEdges[i+1:]...) + i-- + } + } + } + } + // addConstant logic won't induce recomputing of the logic above recursively, so do it here. + if addConst { + // {} --> {a} + {a,b,c} --> {a,b,c} leading to extension {} --> {a,b,c} + s.AddConstants(eqClosure) + } +} + +// AddEquivalence take two column id as parameters, establish a strict equivalence between +// this two column which may be enclosed a superset of equivalence closure in the fdSet. +// +// For the equivalence storage, for simplicity, we only keep the superset relationship of +// equivalence since equivalence has the reflexivity. +// +// eg: a==b, b==c, a==e +// +// in our fdSet, the equivalence will be stored like: {a, b, c, e} == {a, b, c,e} +// According and characteristic and reflexivity, each element in the equivalence enclosure +// can derive whatever in the same enclosure. +func (s *FDSet) AddEquivalence(from, to FastIntSet) { + // trivial equivalence, refused. + if to.SubsetOf(from) { + return + } + s.addEquivalence(from.Union(to)) +} + +// AddConstants adds a strict FD to the source which indicates that each of the given column +// have the same constant value for all rows, or same null value for all rows if it's nullable. +// +// {} --> {a} +// +// there are several usage for this kind of FD, for example, constant FD column can exist in the +// select fields not matter whatever the group by column is; distinct predicate can be eliminated +// for these columns as well. +// +// when adding columns, be cautious that +// 1: constant can be propagated in the equivalence/strict closure, turning strict FD as a constant one. +// 2: constant can simplify the strict FD both in determinants side and dependencies side. +// 3: constant can simplify the lax FD in the dependencies side. +func (s *FDSet) AddConstants(cons FastIntSet) { + if cons.IsEmpty() { + return + } + // 1: {} --> {a}, {} --> {b}, when counting the closure, it will collect all constant FD if it has. + // 2: {m,n} --> {x, y}, once the m,n is subset of constant closure, x,y must be constant as well. + // 3: {a,b,c} == {a,b,c}, equiv dependency is also strict FD included int the closure computation here. + cols := s.closureOfStrict(cons) + s.fdEdges = append(s.fdEdges, &fdEdge{to: cols.Copy(), strict: true}) + + // skip the last, newly append one. + for i := 0; i < len(s.fdEdges)-1; i++ { + shouldRemoved := false + fd := s.fdEdges[i] + + if !fd.equiv { + if fd.strict { + // Constant columns can be removed from the determinant of a strict + // FD. If all determinant columns are constant, then the entire FD + // can be removed, since this means that the dependant columns must + // also be constant (and were part of constant closure added to the + // constant FD above). + if fd.removeColumnsFromSide(cols) { + // transfer to constant FD which is enclosed in cols above. + shouldRemoved = true + } + } + // pre-condition NOTE 1 in doc.go, it won't occur duplicate definite determinant of Lax FD. + // for strict or lax FDs, both can reduce the dependencies side columns with constant closure. + if fd.removeColumnsToSide(cols) { + shouldRemoved = true + } + } + if shouldRemoved { + s.fdEdges = append(s.fdEdges[:i], s.fdEdges[i+1:]...) + i-- + } + } +} + +// removeColumnsFromSide remove the columns from determinant side of FDs in source. +// +// eg: {A B} --> {C} +// +// once B is a constant, whether values of c in difference rows have the same values or are all +// null is only determined by A, which can be used to simplify the strict FDs. +// +// Attention: why this reduction can not be applied to lax FDs? +// +// According to the lax definition, once determinant side have the null value, whatever dependencies +// would be. So let B be null value, once two row like: <1, null> and <1, null> (false interpreted), +// their dependencies may would be <3>, <4>, once we eliminate B here, FDs looks like: <1>,<1> lax +// determinate <3>,<4>. +func (e *fdEdge) removeColumnsFromSide(cons FastIntSet) bool { + if e.from.Intersects(cons) { + e.from = e.from.Difference(cons) + } + return e.isConstant() +} + +// isConstant returns whether this FD indicates a constant FD which means {} --> {...} +func (e *fdEdge) isConstant() bool { + return e.from.IsEmpty() +} + +// isEquivalence returns whether this FD indicates an equivalence FD which means {xyz...} == {xyz...} +func (e *fdEdge) isEquivalence() bool { + return e.equiv && e.from.Equals(e.to) +} + +// removeColumnsToSide remove the columns from dependencies side of FDs in source. +// +// eg: {A} --> {B, C} +// +// once B is a constant, only the C's value can be determined by A, this kind of irrelevant coefficient +// can be removed in the to side both for strict and lax FDs. +func (e *fdEdge) removeColumnsToSide(cons FastIntSet) bool { + if e.to.Intersects(cons) { + e.to = e.to.Difference(cons) + } + return e.to.IsEmpty() +} + +// ConstantCols returns the set of columns that will always have the same value for all rows in table. +func (s *FDSet) ConstantCols() FastIntSet { + for i := 0; i < len(s.fdEdges); i++ { + if s.fdEdges[i].isConstant() { + return s.fdEdges[i].to + } + } + return FastIntSet{} +} + +// EquivalenceCols returns the set of columns that are constrained to equal to each other. +func (s *FDSet) EquivalenceCols() (eqs []*FastIntSet) { + for i := 0; i < len(s.fdEdges); i++ { + if s.fdEdges[i].isEquivalence() { + // return either side is the same. + eqs = append(eqs, &s.fdEdges[i].from) + } + } + return eqs +} + +// MakeNotNull modify the FD set based the listed column with NOT NULL flags. +// Most of the case is used in the derived process after predicate evaluation, +// which can upgrade lax FDs to strict ones. +func (s *FDSet) MakeNotNull(notNullCols FastIntSet) { + notNullCols.UnionWith(s.NotNullCols) + notNullColsSet := s.closureOfEquivalence(notNullCols) + // make nc FD visible. + for i := 0; i < len(s.ncEdges); i++ { + fd := s.ncEdges[i] + if fd.conditionNC.Intersects(notNullColsSet) { + // condition satisfied. + s.ncEdges = append(s.ncEdges[:i], s.ncEdges[i+1:]...) + i-- + if fd.isConstant() { + s.AddConstants(fd.to) + } else if fd.equiv { + s.AddEquivalence(fd.from, fd.to) + newNotNullColsSet := s.closureOfEquivalence(notNullColsSet) + if !newNotNullColsSet.Difference(notNullColsSet).IsEmpty() { + notNullColsSet = newNotNullColsSet + // expand not-null set. + i = -1 + } + } else { + s.addFunctionalDependency(fd.from, fd.to, fd.strict, fd.equiv) + } + } + } + // make origin FD strengthened. + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + if fd.strict { + continue + } + // unique lax can be made strict only if determinant are not null. + if fd.from.SubsetOf(notNullColsSet) { + // we don't need to clean the old lax FD because when adding the corresponding strict one, the lax + // one will be implied by that and itself is removed. + s.AddStrictFunctionalDependency(fd.from, fd.to) + // add strict FDs will cause reconstruction of FDSet, re-traverse it. + i = -1 + } + } + s.NotNullCols = notNullColsSet +} + +// MakeNullable make the fd's NotNullCols to be cleaned, after the both side fds are handled it can be nullable. +func (s *FDSet) MakeNullable(nullableCols FastIntSet) { + s.NotNullCols.DifferenceWith(nullableCols) +} + +// MakeCartesianProduct records fdSet after the impact of Cartesian Product of (T1 x T2) is made. +// 1: left FD is reserved. +// 2: right FD is reserved. +// Actually, for two independent table, FDs won't affect (implies or something) each other, appending +// them together is adequate. But for constant FDs, according to our definition, we should merge them +// as a larger superset pointing themselves. +func (s *FDSet) MakeCartesianProduct(rhs *FDSet) { + for i := 0; i < len(rhs.fdEdges); i++ { + fd := rhs.fdEdges[i] + if fd.isConstant() { + // both from or to side is ok since {superset} --> {superset}. + s.AddConstants(fd.to) + } else { + s.fdEdges = append(s.fdEdges, fd) + } + } + // just simple merge the ncEdge from both side together. + s.ncEdges = append(s.ncEdges, rhs.ncEdges...) + // todo: add strict FD: (left key + right key) -> all cols. + // maintain a key? +} + +// MakeOuterJoin generates the records the fdSet of the outer join. +// +// We always take the left side as the row-supplying side and the right side as the null-supplying side. (swap it if not) +// As we know, the outer join would generate null extended rows compared with the inner join. +// So we cannot do the same thing directly with the inner join. This function deals with the special cases of the outer join. +// +// Knowledge: +// 1: the filter condition related to the lhs column won't filter predicate-allowed rows and refuse null rows (left rows always remain) +// 2: the filter condition related to the rhs column won't filter NULL rows, although the filter has the not-null attribute. (null-appending happened after that) +// +// Notification: +// 1: the origin FD from the left side (rows-supplying) over the result of outer join filtered are preserved because +// it may be duplicated by multi-matching, but actually, they are the same left rows (don't violate FD definition). +// +// 2: the origin FD from the right side (nulls-supplying) over the result of outer join filtered may not be valid anymore. +// +// <1> strict FD may be wakened as a lax one. But if at least one non-NULL column is part of the determinant, the +// strict FD can be preserved. +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// left join with (a,b) * (c,d,e) on (a=c and b=1), if there is a strict FD {d} -> {e} on the rhs. After supplied +// with null values, {d} -> {e} are degraded to a lax one {d} ~~> {e} as you see. the origin and supplied null value +// for d column determine different dependency. NULL -> 1 and NULL -> NULL which breaks strict FD definition. +// +// If the determinant contains at least a not null column for example c here, FD like {c,d} -> {e} can survive +// after the left join. Because you can not find two same key, one from the origin rows and the other one from the +// supplied rows. +// +// for lax FD, the supplied rows of null values don't affect lax FD itself. So we can keep it. +// +// <2> The FDSet should remove constant FD since null values may be substituted for some unmatched left rows. NULL is not a +// constant anymore. +// +// <3> equivalence FD should be removed since substituted null values are not equal to the other substituted null value. +// +// 3: the newly added FD from filters should take some consideration as below: +// +// <1> strict/lax FD: join key filter conditions can not produce new strict/lax FD yet (knowledge: 1&2). +// +// <2> constant FD from the join conditions is only used for checking other FD. We cannot keep itself. +// a b | c d +// -------+--------- +// 1 1 | 1 1 +// 1 2 | NULL NULL +// left join with (a,b) * (c,d) on (a=c and d=1), some rhs rows will be substituted with null values, and FD on rhs +// {d=1} are lost. +// +// a b | c d +// -------+--------- +// 1 1 | 1 1 +// 1 2 | NULL NULL +// left join with (a,b) * (c,d) on (a=c and b=1), it only gives the pass to the first matching, lhs other rows are still +// kept and appended with null values. So the FD on rhs {b=1} are not applicable to lhs rows. +// +// above all: constant FD are lost +// +// <3.1> equivalence FD: when the left join conditions only contain equivalence FD (EFD for short below) across left and right +// cols and no other `LEFT` condition on the (left-side cols except the cols in EFD's from) to filter the left join results. We can maintain the strict +// FD from EFD's `from` side to EFD's `to` side over the left join result. +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// Neg eg: left join with (a,b) * (c,d,e) on (a=c and b=1), other b=1 will filter the result, causing the left row (1, 2) +// miss matched with right row (1, null 1) by a=c, consequently leading the left row appended as (1,2,null,null,null), which +// will break the FD: {a} -> {c} for key a=1 with different c=1/null. +// a b | c d e +// ------+---------------- +// 1 1 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// Pos eg: if the filter is on EFD's `from` cols, it's ok. Let's say: (a,b) * (c,d,e) on (a=c and a=2), a=2 only won't leading +// same key a with matched c and mismatched NULL, neg case result is changed as above, so strict FD {a} -> {c} can exist. +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// Neg eg: left join with (a,b) * (c,d,e) on (a=c and b=c), two EFD here, where b=c can also refuse some rows joined by a=c, +// consequently applying it with NULL as (1 2 | NULL NULL NULL), leading the same key a has different value 1/NULL. But +// macroscopically, we can combine {a,b} together as the strict FD's from side, so new FD {a,b} -> {c} is secured. For case +// of (a=c and b=ce), the FD is {a, b} -> {c, e} +// +// conclusion: without this kind of limited left conditions to judge the join match, we can say: FD {a} -> {c} exists. +// +// <3.2> equivalence FD: when the determinant and dependencies from an equivalence FD of join condition are each covering a strict +// FD of the left / right side. After joining, we can extend the left side strict FD's dependencies to all cols. +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 2 2 | NULL NULL NULL +// 3 1 | NULL NULL NULL +// left join with (a,b) * (c,d,e) on (a=c and b=1). Supposing that left `a` are strict Key and right `c` are strict Key too. +// Key means the strict FD can determine all cols from that table. +// case 1: left join matched +// one left row match one / multi rows from right side, since `c` is strict determine all cols from right table, so +// {a} == {b} --> {all cols in right}, according to the transitive rule of strict FD, we get {a} --> {all cols in right} +// case 2: left join miss match +// miss matched rows from left side are unique originally, even appended with NULL value from right side, they are still +// strictly determine themselves and even the all rows after left join. +// conclusion combined: +// If there is an equivalence covering both strict Key from the right and left, we can create a new strict FD: {columns of the left side of the join in the equivalence} -> {all columns after join}. +// +// <3.3> equivalence FD: let's see equivalence FD as double-directed strict FD from join equal conditions, and we only keep the +// rhs ~~> lhs. +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// left join with (a,b) * (c,d,e) on (a=c and b=1). From the join equivalence condition can derive a new FD {ac} == {ac}. +// while since there are some supplied null value in the c column, we don't guarantee {ac} == {ac} yet, so do {a} -> {c} +// because two same determinant key {1} can point to different dependency {1} & {NULL}. But in return, FD like {c} -> {a} +// are degraded to the corresponding lax one. +// +// 4: the new formed FD {left primary key, right primary key} -> {all columns} are preserved in spite of the null-supplied rows. +// 5: There's no join key and no filters from the outer side. The join case is a cartesian product. In this case, +// the strict equivalence classes still exist. +// - If the right side has no row, we will supply null-extended rows, then the value of any column is NULL, and the equivalence class exists. +// - If the right side has rows, no row is filtered out after the filters since no row of the outer side is filtered out. Hence, the equivalence class remains. +// +func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols FastIntSet, opt *ArgOpts) { + // copy down the left PK and right PK before the s has changed for later usage. + leftPK, ok1 := s.FindPrimaryKey() + rightPK, ok2 := innerFDs.FindPrimaryKey() + copyLeftFDSet := &FDSet{} + copyLeftFDSet.AddFrom(s) + copyRightFDSet := &FDSet{} + copyRightFDSet.AddFrom(innerFDs) + + for _, edge := range innerFDs.fdEdges { + // Rule #2.2, constant FD are removed from right side of left join. + if edge.isConstant() { + continue + } + // Rule #2.3, equivalence FD are removed from right side of left join. + if edge.equiv { + continue + } + // Rule #2.1, lax FD can be kept after the left join. + if !edge.strict { + s.addFunctionalDependency(edge.from, edge.to, edge.strict, edge.equiv) + continue + } + // Rule #2.1, strict FD can be kept when determinant contains not null column, otherwise, downgraded to the lax one. + // + // If the one of the column from the inner child's functional dependency's left side is not null, this FD can be remained. + // This is because that the outer join would generate null-extended rows. So if at least one row from the left side + // is not null. We can guarantee that the there's no same part between the original rows and the generated rows. + // So the null extended rows would not break the original functional dependency. + if edge.from.Intersects(innerFDs.NotNullCols) { + // One of determinant are not null column, strict FD are kept. + // According knowledge #2, we can't take use of right filter's not null attribute. + s.addFunctionalDependency(edge.from, edge.to, edge.strict, edge.equiv) + } else { + // Otherwise, the strict FD are downgraded to a lax one. + s.addFunctionalDependency(edge.from, edge.to, false, edge.equiv) + } + } + s.ncEdges = append(s.ncEdges, innerFDs.ncEdges...) + leftCombinedFDFrom := NewFastIntSet() + leftCombinedFDTo := NewFastIntSet() + for _, edge := range filterFDs.fdEdges { + // Rule #3.2, constant FD are removed from right side of left join. + if edge.isConstant() { + s.AddNCFunctionalDependency(edge.from, edge.to, innerCols, edge.strict, edge.equiv) + continue + } + // Rule #3.3, we only keep the lax FD from right side pointing the left side. + if edge.equiv { + equivColsRight := edge.from.Intersection(innerCols) + equivColsLeft := edge.from.Intersection(outerCols) + // equivalence: {superset} --> {superset}, either `from` or `to` side is ok here. + // Rule 3.3.1 + if !opt.SkipFDRule331 { + if equivColsLeft.Len() > 0 && equivColsRight.Len() > 0 { + leftCombinedFDFrom.UnionWith(equivColsLeft) + leftCombinedFDTo.UnionWith(equivColsRight) + } + } + + // Rule 3.3.2 + rightAllCols := copyRightFDSet.AllCols() + leftAllCols := copyLeftFDSet.AllCols() + coveringStrictKeyRight := rightAllCols.SubsetOf(copyRightFDSet.ClosureOfStrict(equivColsRight)) + coveringStrictKeyLeft := leftAllCols.SubsetOf(copyLeftFDSet.closureOfStrict(equivColsLeft)) + if coveringStrictKeyLeft && coveringStrictKeyRight { + // find the minimum strict Key set, and add + s.addFunctionalDependency(copyLeftFDSet.ReduceCols(equivColsLeft), rightAllCols.Union(leftAllCols), true, false) + } + + // Rule 3.3.3 + // need to break down the superset of equivalence, adding each lax FD of them. + laxFDFrom := equivColsRight + laxFDTo := equivColsLeft + for i, ok := laxFDFrom.Next(0); ok; i, ok = laxFDFrom.Next(i + 1) { + for j, ok := laxFDTo.Next(0); ok; j, ok = laxFDTo.Next(j + 1) { + s.addFunctionalDependency(NewFastIntSet(i), NewFastIntSet(j), false, false) + } + } + s.AddNCFunctionalDependency(equivColsLeft, equivColsRight, innerCols, true, true) + } + // Rule #3.1, filters won't produce any strict/lax FDs. + } + // Rule #3.3.1 combinedFD case + if !opt.SkipFDRule331 { + s.addFunctionalDependency(leftCombinedFDFrom, leftCombinedFDTo, true, false) + } + + // Rule #4, add new FD {left key + right key} -> {all columns} if it could. + if ok1 && ok2 { + s.addFunctionalDependency(leftPK.Union(*rightPK), outerCols.Union(innerCols), true, false) + } + + // Rule #5, adding the strict equiv edges if there's no join key and no filters from outside. + if opt.OnlyInnerFilter { + if opt.InnerIsFalse { + s.AddConstants(innerCols) + } else { + for _, edge := range filterFDs.fdEdges { + // keep filterFD's constant and equivalence. + if edge.strict && (edge.equiv || edge.from.IsEmpty()) { + s.addFunctionalDependency(edge.from, edge.to, edge.strict, edge.equiv) + } + } + // keep all FDs from inner side. + for _, edge := range innerFDs.fdEdges { + s.addFunctionalDependency(edge.from, edge.to, edge.strict, edge.equiv) + } + } + } + + // merge the not-null-cols/registered-map from both side together. + s.NotNullCols.UnionWith(filterFDs.NotNullCols) + // inner cols can be nullable since then. + s.NotNullCols.DifferenceWith(innerCols) + if s.HashCodeToUniqueID == nil { + s.HashCodeToUniqueID = innerFDs.HashCodeToUniqueID + } else { + for k, v := range innerFDs.HashCodeToUniqueID { + if _, ok := s.HashCodeToUniqueID[k]; ok { + logutil.BgLogger().Warn("Error occurred when building the functional dependency") + } + s.HashCodeToUniqueID[k] = v + } + } + for i, ok := innerFDs.GroupByCols.Next(0); ok; i, ok = innerFDs.GroupByCols.Next(i + 1) { + s.GroupByCols.Insert(i) + } + s.HasAggBuilt = s.HasAggBuilt || innerFDs.HasAggBuilt +} + +// ArgOpts contains some arg used for FD maintenance. +type ArgOpts struct { + SkipFDRule331 bool + OnlyInnerFilter bool + InnerIsFalse bool +} + +// FindPrimaryKey checks whether there's a key in the current set which implies key -> all cols. +func (s FDSet) FindPrimaryKey() (*FastIntSet, bool) { + allCols := s.AllCols() + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + // Since we haven't maintained the key column, let's traverse every strict FD to judge with. + if fd.strict && !fd.equiv { + closure := s.closureOfStrict(fd.from) + if allCols.SubsetOf(closure) { + pk := NewFastIntSet() + pk.CopyFrom(fd.from) + return &pk, true + } + } + } + return nil, false +} + +// AllCols returns all columns in the current set. +func (s FDSet) AllCols() FastIntSet { + allCols := NewFastIntSet() + for i := 0; i < len(s.fdEdges); i++ { + allCols.UnionWith(s.fdEdges[i].from) + if !s.fdEdges[i].equiv { + allCols.UnionWith(s.fdEdges[i].to) + } + } + return allCols +} + +// AddFrom merges two FD sets by adding each FD from the given set to this set. +// Since two different tables may have some column ID overlap, we better use +// column unique ID to build the FDSet instead. +func (s *FDSet) AddFrom(fds *FDSet) { + for i := range fds.fdEdges { + fd := fds.fdEdges[i] + if fd.equiv { + s.addEquivalence(fd.from) + } else if fd.isConstant() { + s.AddConstants(fd.to) + } else if fd.strict { + s.AddStrictFunctionalDependency(fd.from, fd.to) + } else { + s.AddLaxFunctionalDependency(fd.from, fd.to) + } + } + for i := range fds.ncEdges { + fd := fds.ncEdges[i] + s.ncEdges = append(s.ncEdges, fd) + } + s.NotNullCols.UnionWith(fds.NotNullCols) + if s.HashCodeToUniqueID == nil { + s.HashCodeToUniqueID = fds.HashCodeToUniqueID + } else { + for k, v := range fds.HashCodeToUniqueID { + if _, ok := s.HashCodeToUniqueID[k]; ok { + logutil.BgLogger().Warn("Error occurred when building the functional dependency") + continue + } + s.HashCodeToUniqueID[k] = v + } + } + for i, ok := fds.GroupByCols.Next(0); ok; i, ok = fds.GroupByCols.Next(i + 1) { + s.GroupByCols.Insert(i) + } + s.HasAggBuilt = fds.HasAggBuilt +} + +// MaxOneRow will regard every column in the fdSet as a constant. Since constant is stronger that strict FD, it will +// take over all existed strict/lax FD, only keeping the equivalence. Because equivalence is stronger than constant. +// +// f: {a}--> {b,c}, {abc} == {abc} +// cols: {a,c} +// result: {} --> {a,c}, {a,c} == {a,c} +func (s *FDSet) MaxOneRow(cols FastIntSet) { + cnt := 0 + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + // non-equivalence FD, skip it. + if !fd.equiv { + continue + } + // equivalence: {superset} --> {superset} + if cols.Intersects(fd.from) { + s.fdEdges[cnt] = &fdEdge{ + from: fd.from.Intersection(cols), + to: fd.to.Intersection(cols), + strict: true, + equiv: true, + } + cnt++ + } + } + s.fdEdges = s.fdEdges[:cnt] + // At last, add the constant FD, {} --> {cols} + if !cols.IsEmpty() { + s.fdEdges = append(s.fdEdges, &fdEdge{ + to: cols, + strict: true, + }) + } +} + +// ProjectCols projects FDSet to the target columns +// Formula: +// Strict decomposition FD4A: If X −→ Y Z then X −→ Y and X −→ Z. +// Lax decomposition FD4B: If X ~→ Y Z and I(R) is Y-definite then X ~→ Z. +func (s *FDSet) ProjectCols(cols FastIntSet) { + // **************************************** START LOOP 1 ******************************************** + // Ensure the transitive relationship between remaining columns won't be lost. + // 1: record all the constant columns + // 2: if an FD's to side contain un-projected column, substitute it with its closure. + // fd1: {a} --> {b,c} + // fd2: {b} --> {d} + // when b is un-projected, the fd1 should be {a} --> {b,c's closure} which is {a} --> {b,c,d} + // 3: track all columns that have equivalent alternates that are part of the projection. + // fd1: {a} --> {c} + // fd2: {a,b} == {a,b} + // if only a is un-projected, the fd1 can actually be kept as {b} --> {c}. + var constCols, detCols, equivCols FastIntSet + for i := 0; i < len(s.fdEdges); i++ { + fd := s.fdEdges[i] + + if fd.isConstant() { + constCols = fd.to + } + + if !fd.to.SubsetOf(cols) { + // equivalence FD has been the closure as {superset} == {superset}. + if !fd.equiv && fd.strict { + // extended the `to` as it's complete closure, in case of missing some transitive FDs. + fd.to = s.closureOfStrict(fd.to.Union(fd.from)) + fd.to.DifferenceWith(fd.from) + } + } + + // {a,b} --> {c}, when b is un-projected, this FD should be handled latter, recording `b` here. + if !fd.equiv && !fd.from.SubsetOf(cols) { + detCols.UnionWith(fd.from.Difference(cols)) + } + + // equivalence {superset} == {superset} + if fd.equiv && fd.from.Intersects(cols) { + equivCols.UnionWith(fd.from) + } + } + // ****************************************** END LOOP 1 ******************************************** + + // find deleted columns with equivalence. + detCols.IntersectionWith(equivCols) + equivMap := s.makeEquivMap(detCols, cols) + + // it's actually maintained already. + if !constCols.IsEmpty() { + s.AddConstants(constCols) + } + + // **************************************** START LOOP 2 ******************************************** + // leverage the information collected in the loop1 above and try to do some FD substitution. + var ( + cnt int + newFDs []*fdEdge + ) + for i := range s.fdEdges { + fd := s.fdEdges[i] + + // step1: clear the `to` side + // subtract out un-projected columns from dependants. + // subtract out strict constant columns from dependants. + if !fd.to.SubsetOf(cols) { + // since loop 1 has computed the complete transitive closure for strict FD, now as: + // 1: equivalence FD: {superset} == {superset} + // 2: strict FD: {xxx} --> {complete closure} + // 3: lax FD: {xxx} ~~> {yyy} + if fd.equiv { + // As formula FD4A above, delete from un-projected column from `to` side directly. + fd.to = fd.to.Intersection(cols) + // Since from are the same, delete it too here. + fd.from = fd.from.Intersection(cols) + } else if fd.strict { + // As formula FD4A above, delete from un-projected column from `to` side directly. + fd.to = fd.to.Intersection(cols) + } else { + // As formula FD4B above, only if the deleted columns are definite, then we can keep it. + deletedCols := fd.to.Difference(cols) + if deletedCols.SubsetOf(constCols) { + fd.to = fd.to.Intersection(cols) + } else if deletedCols.SubsetOf(s.NotNullCols) { + fd.to = fd.to.Intersection(cols) + } else { + continue + } + } + + if !fd.isConstant() { + // clear the constant columns in the dependency of FD. + if fd.removeColumnsToSide(constCols) { + continue + } + } + // from and to side of equiv are same, don't do trivial elimination. + if !fd.isEquivalence() { + if fd.removeColumnsToSide(fd.from) { + // fd.to side is empty, remove this FD. + continue + } + } + } + + // step2: clear the `from` side + // substitute the equivalence columns for removed determinant columns. + if !fd.from.SubsetOf(cols) { + // equivalence and constant FD couldn't be here. + deletedCols := fd.from.Difference(cols) + substitutedCols := NewFastIntSet() + foundAll := true + for c, ok := deletedCols.Next(0); ok; c, ok = deletedCols.Next(c + 1) { + // For every un-projected column, try to found their substituted column in projection list. + var id int + if id, foundAll = equivMap[c]; !foundAll { + break + } + substitutedCols.Insert(id) + } + if foundAll { + // deleted columns can be remapped using equivalencies. + from := fd.from.Union(substitutedCols) + from.DifferenceWith(deletedCols) + newFDs = append(newFDs, &fdEdge{ + from: from, + to: fd.to, + strict: fd.strict, + equiv: fd.equiv, + }) + } + continue + } + + if cnt != i { + s.fdEdges[cnt] = s.fdEdges[i] + } + cnt++ + } + s.fdEdges = s.fdEdges[:cnt] + // ****************************************** END LOOP 2 ******************************************** + + for i := range newFDs { + fd := newFDs[i] + if fd.equiv { + s.addEquivalence(fd.from) + } else if fd.isConstant() { + s.AddConstants(fd.to) + } else if fd.strict { + s.AddStrictFunctionalDependency(fd.from, fd.to) + } else { + s.AddLaxFunctionalDependency(fd.from, fd.to) + } + } + // ncEdge should also be projected. + for i := 0; i < len(s.ncEdges); i++ { + nc := s.ncEdges[i] + if !nc.conditionNC.Intersects(cols) { + // edge is projected out, the nc edge's condition won't be satisfied anymore. + continue + } + if nc.isConstant() { + nc.to.IntersectionWith(cols) + if nc.to.IsEmpty() { + // edge is projected out. + s.ncEdges = append(s.ncEdges[:i], s.ncEdges[i+1:]...) + i-- + } + continue + } + if nc.equiv { + nc.from.IntersectionWith(cols) + nc.to.IntersectionWith(cols) + if nc.from.IsEmpty() { + // edge is projected out. + s.ncEdges = append(s.ncEdges[:i], s.ncEdges[i+1:]...) + i-- + } + } + } +} + +// makeEquivMap try to find the equivalence column of every deleted column in the project list. +func (s *FDSet) makeEquivMap(detCols, projectedCols FastIntSet) map[int]int { + var equivMap map[int]int + for i, ok := detCols.Next(0); ok; i, ok = detCols.Next(i + 1) { + var oneCol FastIntSet + oneCol.Insert(i) + closure := s.closureOfEquivalence(oneCol) + closure.IntersectionWith(projectedCols) + // the column to be deleted has an equivalence column exactly in the project list. + if !closure.IsEmpty() { + if equivMap == nil { + equivMap = make(map[int]int) + } + id, _ := closure.Next(0) // We can record more equiv columns. + equivMap[i] = id + } + } + return equivMap +} + +// String returns format string of this FDSet. +func (s *FDSet) String() string { + var builder strings.Builder + + for i := range s.fdEdges { + if i != 0 { + builder.WriteString(", ") + } + builder.WriteString(s.fdEdges[i].String()) + } + return builder.String() +} + +// String returns format string of this FD. +func (e *fdEdge) String() string { + var b strings.Builder + if e.equiv { + if !e.strict { + logutil.BgLogger().Warn("Error occurred when building the functional dependency. We don't support lax equivalent columns") + return "Wrong functional dependency" + } + _, _ = fmt.Fprintf(&b, "%s==%s", e.from, e.to) + } else { + if e.strict { + _, _ = fmt.Fprintf(&b, "%s-->%s", e.from, e.to) + } else { + _, _ = fmt.Fprintf(&b, "%s~~>%s", e.from, e.to) + } + } + return b.String() +} + +// RegisterUniqueID is used to record the map relationship between expr and allocated uniqueID. +func (s *FDSet) RegisterUniqueID(hashCode string, uniqueID int) { + if len(hashCode) == 0 { + // shouldn't be here. + logutil.BgLogger().Warn("Error occurred when building the functional dependency") + return + } + if _, ok := s.HashCodeToUniqueID[hashCode]; ok { + // shouldn't be here. + logutil.BgLogger().Warn("Error occurred when building the functional dependency") + return + } + s.HashCodeToUniqueID[hashCode] = uniqueID +} + +// IsHashCodeRegistered checks whether the given hashcode has been registered in the current set. +func (s *FDSet) IsHashCodeRegistered(hashCode string) (int, bool) { + if uniqueID, ok := s.HashCodeToUniqueID[hashCode]; ok { + return uniqueID, true + } + return -1, false +} diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go new file mode 100644 index 0000000000000..aecad5c5182ae --- /dev/null +++ b/planner/funcdep/only_full_group_by_test.go @@ -0,0 +1,267 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package funcdep_test + +import ( + "testing" + + "github.com/pingcap/tidb/testkit" + "github.com/stretchr/testify/require" +) + +func TestOnlyFullGroupByOldCases(t *testing.T) { + store, clean := testkit.CreateMockStore(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + + tk.MustExec("set @@session.tidb_enable_new_only_full_group_by_check = 'on';") + + // test case 1 + tk.MustExec("use test") + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("drop view if exists v1") + tk.MustExec("CREATE TABLE t1 ( c1 INT, c2 INT, c4 DATE, c5 VARCHAR(1));") + tk.MustExec("CREATE TABLE t2 ( c1 INT, c2 INT, c3 INT, c5 VARCHAR(1));") + tk.MustExec("CREATE VIEW v1 AS SELECT alias1.c4 AS field1 FROM t1 AS alias1 INNER JOIN t1 AS alias2 ON 1 GROUP BY field1 ORDER BY alias1.c5;") + _, err := tk.Exec("SELECT * FROM v1;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #2 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.alias1.c5' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + + // test case 2 + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("drop view if exists v1") + tk.MustExec("CREATE TABLE t1 (c1 INT, c2 INT, c4 DATE, c5 VARCHAR(1));") + tk.MustExec("CREATE TABLE t2 (c1 INT, c2 INT, c3 INT, c5 VARCHAR(1));") + tk.MustExec("CREATE definer='root'@'localhost' VIEW v1 AS SELECT alias1.c4 AS field1, alias1.c4 AS field2 FROM t1 AS alias1 INNER JOIN t1 AS alias2 ON (alias2.c1 = alias1.c2) WHERE ( NOT EXISTS ( SELECT SQ1_alias1.c5 AS SQ1_field1 FROM t2 AS SQ1_alias1 WHERE SQ1_alias1.c3 < alias1.c1 )) AND (alias1.c5 = alias1.c5 AND alias1.c5 = 'd' ) GROUP BY field1, field2 ORDER BY alias1.c5, field1, field2") + tk.MustQuery("SELECT * FROM v1;") + + // test case 3 + // need to resolve the name resolver problem first (view and base table's column can not refer each other) + // see some cases in 21 + + // test case 4 + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("drop view if exists v2") + tk.MustExec("CREATE TABLE t1 ( col_varchar_10_utf8 VARCHAR(10) CHARACTER SET utf8, col_int_key INT, pk INT PRIMARY KEY);") + tk.MustExec("CREATE TABLE t2 ( col_varchar_10_utf8 VARCHAR(10) CHARACTER SET utf8 DEFAULT NULL, col_int_key INT DEFAULT NULL, pk INT PRIMARY KEY);") + tk.MustExec("CREATE ALGORITHM=MERGE definer='root'@'localhost' VIEW v2 AS SELECT t2.pk, COALESCE(t2.pk, 3) AS coa FROM t1 LEFT JOIN t2 ON 0;") + tk.MustQuery("SELECT v2.pk, v2.coa FROM t1 LEFT JOIN v2 AS v2 ON 0 GROUP BY v2.pk;") + + // test case 5 + tk.MustExec("drop table if exists t") + tk.MustExec("CREATE TABLE t ( a INT, c INT GENERATED ALWAYS AS (a+2), d INT GENERATED ALWAYS AS (c+2) );") + tk.MustQuery("SELECT c FROM t GROUP BY a;") + tk.MustQuery("SELECT d FROM t GROUP BY c;") + tk.MustQuery("SELECT d FROM t GROUP BY a;") + tk.MustQuery("SELECT 1+c FROM t GROUP BY a;") + tk.MustQuery("SELECT 1+d FROM t GROUP BY c;") + tk.MustQuery("SELECT 1+d FROM t GROUP BY a;") + tk.MustQuery("SELECT t1.d FROM t as t1, t as t2 WHERE t2.d=t1.c GROUP BY t2.a;") + _, err = tk.Exec("SELECT t1.d FROM t as t1, t as t2 WHERE t2.d>t1.c GROUP BY t2.a;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.d' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + + // test case 6 + tk.MustExec("drop table if exists t") + tk.MustExec("CREATE TABLE t ( a INT, c INT GENERATED ALWAYS AS (a+2), d INT GENERATED ALWAYS AS (c+2) );") + _, err = tk.Exec("SELECT t1.d FROM t as t1, t as t2 WHERE t2.d>t1.c GROUP BY t2.a;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.d' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + _, err = tk.Exec("SELECT (SELECT t1.c FROM t as t1 GROUP BY -3) FROM t as t2;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.c' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + _, err = tk.Exec("SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT") + _, err = tk.Exec("SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT") + _, err = tk.Exec("SELECT (SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1) FROM t as t2;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT") + + // test case 7 + tk.MustExec("drop table if exists t") + tk.MustExec("CREATE TABLE t(a INT NULL, b INT NOT NULL, c INT, UNIQUE(a,b));") + tk.MustQuery("SELECT a,b,c FROM t WHERE a IS NOT NULL GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE NOT (a IS NULL) GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a > 3 GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a = 3 GROUP BY b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a BETWEEN 3 AND 6 GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a <> 3 GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a IN (3,4) GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a IN (SELECT b FROM t) GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a IS TRUE GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE (a <> 3) IS TRUE GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a IS FALSE GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE (a <> 3) IS FALSE GROUP BY a,b;") + tk.MustQuery("SELECT a,b,c FROM t WHERE a LIKE \"%abc%\" GROUP BY a,b;") + // todo: eval not-null refuse NOT wrapper. + // tk.MustQuery("SELECT a,b,c FROM t WHERE NOT(a IN (3,4)) GROUP BY a,b;") + // tk.MustQuery("SELECT a,b,c FROM t WHERE a NOT IN (3,4) GROUP BY a,b;") + // tk.MustQuery("SELECT a,b,c FROM t WHERE a NOT LIKE \"%abc%\" GROUP BY a,b;") + _, err = tk.Exec("SELECT a,b,c FROM t WHERE a<=>NULL GROUP BY b;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #3 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t.c' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + // is-not-true will let the null value pass, so evaluating it won't derive to a with not-null attribute. + _, err = tk.Exec("SELECT a,b,c FROM t WHERE a IS NOT TRUE GROUP BY a,b;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #3 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t.c' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + + // test case 8 + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("drop table if exists t3") + tk.MustExec("CREATE TABLE t1 (a INT, b INT);") + tk.MustExec("CREATE TABLE t2 (b INT);") + tk.MustExec("CREATE TABLE t3 (b INT NULL, c INT NULL, d INT NULL, e INT NULL, UNIQUE KEY (b,d,e));") + tk.MustQuery("SELECT * FROM t1, t2, t3 WHERE t2.b = t1.b AND t2.b = t3.b AND t3.d = 1 AND t3.e = 1 AND t3.d IS NOT NULL AND t1.a = 2 GROUP BY t1.b;") + + // test case 9 + tk.MustExec("drop table if exists t1") + tk.MustExec("create table t1(a int, b int not null, c int not null, d int, unique key(b,c), unique key(b,d));") + _, err = tk.Exec("select (select sin(a)) as z from t1 group by d,b;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'z' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + + // test case 10 & 11 + tk.MustExec("drop table if exists t1") + tk.MustExec("create table t1(a int, b int not null, c int not null, d int, unique key(b,c), unique key(b,d));") + tk.MustExec("select t3.a from t1, t1 as t2, t1 as t3 where t3.b=t2.b and t3.c=t1.d and t2.b=t1.b and t2.c=t1.c group by t1.b,t1.c") + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t3") + tk.MustExec("create table t1(a int, b int not null, c int not null, d int, unique key(b,c), unique key(b,d));") + tk.MustExec("create table t3(pk int primary key, b int);") + tk.MustQuery("select t3.b from t1,t1 as t2,t3 where t3.pk=t2.d and t2.b=t1.b and t2.c=t1.a group by t1.b,t1.c;") + + // test case 12 + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("create table t1(a int,b int not null,c int not null,d int, unique key(b,c), unique key(b,d));") + tk.MustExec("create table t2 like t1") + tk.MustQuery("select t1.a,t2.c from t1 left join t2 on t1.a=t2.c and cos(t2.c+t2.b)>0.5 and sin(t1.a+t2.d)<0.9 group by t1.a;") + tk.MustQuery("select t1.a,t2.d from t1 left join t2 on t1.a=t2.c and t1.d=t2.b and cos(t2.c+t2.b)>0.5 and sin(t1.a+t2.d)<0.9 group by t1.a,t1.d;") + + // test case 17 + tk.MustExec("drop table if exists customer1") + tk.MustExec("drop table if exists customer2") + tk.MustExec("drop view if exists customer") + tk.MustExec("create table customer1(pk int primary key, a int);") + tk.MustExec("create table customer2(pk int primary key, b int);") + tk.MustExec("CREATE algorithm=merge definer='root'@'localhost' VIEW customer as SELECT pk,a,b FROM customer1 JOIN customer2 USING (pk);") + tk.MustQuery("select customer.pk, customer.b from customer group by customer.pk;") + // classic cases + tk.MustQuery("select customer1.a, count(*) from customer1 left join customer2 on customer1.a=customer2.b where customer2.pk in (7,9) group by customer2.b;") + tk.MustQuery("select customer1.a, count(*) from customer1 left join customer2 on customer1.a=1 where customer2.pk in (7,9) group by customer2.b;") + // c2.pk reject the null from both inner side of the left join. + tk.MustQuery("select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + tk.MustQuery("select c3.b, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=1) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + tk.MustQuery("select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=1) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=1) on c3.b=1 where c2.pk in (7,9) group by c2.b;") + // inner join nested with outer join. + tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 join (customer1 c1 left join customer2 c2 on c1.a=1) on c3.b=1 where c2.pk in (7,9) group by c2.b;") + tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 join (customer1 c1 left join customer2 c2 on c1.a=1) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + // outer join nested with inner join. + // TODO: inner side's strict FD and equiv FD can be saved. + //tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 left join (customer1 c1 inner join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + //tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 left join (customer1 c1 inner join customer2 c2 on c1.a=1) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + //tk.MustQuery("select c1.a, c3.b, count(*) from customer2 c3 left join (customer1 c1 inner join customer2 c2 on c1.a=1 and c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") + + tk.MustExec("drop view if exists customer") + // this left join can extend left pk to all cols. + tk.MustExec("CREATE algorithm=merge definer='root'@'localhost' VIEW customer as SELECT pk,a,b FROM customer1 LEFT JOIN customer2 USING (pk);") + tk.MustQuery("select customer.pk, customer.b from customer group by customer.pk;") + + // test case 18 + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("create table t1(pk int primary key, a int);") + tk.MustExec("create table t2(pk int primary key, b int);") + tk.MustQuery("select t1.pk, t2.b from t1 join t2 on t1.pk=t2.pk group by t1.pk;") + tk.MustQuery("select t1.pk, t2.b from t1 join t2 using(pk) group by t1.pk;") + tk.MustQuery("select t1.pk, t2.b from t1 natural join t2 group by t1.pk;") + tk.MustQuery("select t1.pk, t2.b from t1 left join t2 using(pk) group by t1.pk;") + tk.MustQuery("select t1.pk, t2.b from t1 natural left join t2 group by t1.pk;") + tk.MustQuery("select t1.pk, t2.b from t2 right join t1 using(pk) group by t1.pk;") + tk.MustQuery("select t1.pk, t2.b from t2 natural right join t1 group by t1.pk;") + + // test case 20 + tk.MustExec("drop table t1") + tk.MustExec("create table t1(pk int primary key, a int);") + tk.MustQuery("select t3.a from t1 left join (t1 as t2 left join t1 as t3 on 1) on 1 group by t3.pk;") + tk.MustQuery("select (select t1.a from t1 as t2 limit 1) from t1 group by pk;") + + // test case 21 + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int, b int);") + // TODO: to be fixed. + //tk.MustExec("drop view if exists v1;") + //tk.MustExec("create view v1 as select a as a, 2*a as b, coalesce(a,3) as c from t1;") + //err = tk.ExecToErr("select v1.b from t1 left join v1 on 1 group by v1.a") + //require.NotNil(t, err) + //require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'z' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + tk.MustExec("create table t2(c int, d int);") + err = tk.ExecToErr("select t4.d from t1 left join (t2 as t3 join t2 as t4 on t4.d=3) on t1.a=10 group by \"\";") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t4.d' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + tk.MustExec("select t4.d from t1 join (t2 as t3 left join t2 as t4 on t4.d=3) on t1.a=10 group by \"\";") + err = tk.ExecToErr("select t4.d from t1 join (t2 as t3 left join t2 as t4 on t4.d=3 and t4.c+t3.c=2) on t1.a=10 group by \"\";") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t4.d' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + //tk.MustExec("drop table t1") + //tk.MustExec("drop view v1") + //tk.MustExec("create table t1(a int not null, b int)") + //tk.MustExec("create view v1 as select a as a, 2*a as b, coalesce(a,3) as c from t1") + //tk.MustExec("select v1.b from t1 left join v1 on 1 group by v1.a") + + // test issue #25196 + tk.MustExec("drop table if exists t1") + tk.MustExec("create table t1 (i1 integer, c1 integer);") + tk.MustExec("insert into t1 values (2, 41), (1, 42), (3, 43), (0, null);") + tk.MustExec("drop table if exists t2") + tk.MustExec("create table t2 (i2 integer, c2 integer, f2 float);") + tk.MustExec("insert into t2 values (0, 43, null), (1, null, 0.1), (3, 42, 0.01), (2, 73, 0.12), (null, 41, -0.1), (null, null, null);") + err = tk.ExecToErr("SELECT * FROM t2 AS _tmp_1 JOIN (SELECT max(_tmp_3.f2) AS _tmp_4,min(_tmp_3.i2) AS _tmp_5 FROM t2 AS _tmp_3 WHERE _tmp_3.f2>=_tmp_3.c2 GROUP BY _tmp_3.c2 ORDER BY _tmp_3.i2) AS _tmp_2 WHERE _tmp_2._tmp_5=100;") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #3 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test._tmp_3.i2' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + + // test issue #22301 and #33056 + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1 (a int);") + tk.MustExec("create table t2 (a int, b int);") + tk.MustQuery("select t1.a from t1 join t2 on t2.a=t1.a group by t2.a having min(t2.b) > 0;") + tk.MustQuery("select t2.a, count(t2.b) from t1 join t2 using (a) where t1.a = 1;") + tk.MustQuery("select count(t2.b) from t1 join t2 using (a) order by t2.a;") + + // test issue #30024 + tk.MustExec("drop table if exists t1,t2;") + tk.MustExec("CREATE TABLE t1 (a INT, b INT, c INT DEFAULT 0);") + tk.MustExec("INSERT INTO t1 (a, b) VALUES (3,3), (2,2), (3,3), (2,2), (3,3), (4,4);") + tk.MustExec("CREATE TABLE t2 (a INT, b INT, c INT DEFAULT 0);") + tk.MustExec("INSERT INTO t2 (a, b) VALUES (3,3), (2,2), (3,3), (2,2), (3,3), (4,4);") + tk.MustQuery("SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b));") + + // a normal case + tk.MustExec("drop table if exists t1") + tk.MustExec("create table t1(a int not null, b int not null, index(a))") + err = tk.ExecToErr("select b from t1 group by a") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") +}