From 8550dbb21655012826f78177616cc02deadfa847 Mon Sep 17 00:00:00 2001
From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com>
Date: Mon, 29 Nov 2021 17:01:52 +0800
Subject: [PATCH] *: add cardinality estimation trace for `Selectivity`
 (#29883)

---
 executor/executor.go                     |   1 +
 sessionctx/stmtctx/stmtctx.go            |   4 +
 statistics/main_test.go                  |   7 +-
 statistics/selectivity.go                | 123 ++++++++++++++-
 statistics/testdata/trace_suite_in.json  |  11 ++
 statistics/testdata/trace_suite_out.json | 181 +++++++++++++++++++++++
 statistics/trace_test.go                 |  86 +++++++++++
 util/tracing/opt_trace.go                |   9 ++
 8 files changed, 418 insertions(+), 4 deletions(-)
 create mode 100644 statistics/testdata/trace_suite_in.json
 create mode 100644 statistics/testdata/trace_suite_out.json
 create mode 100644 statistics/trace_test.go

diff --git a/executor/executor.go b/executor/executor.go
index 381af32a608d0..f1c82484ebcf9 100644
--- a/executor/executor.go
+++ b/executor/executor.go
@@ -1690,6 +1690,7 @@ func ResetContextOfStmt(ctx sessionctx.Context, s ast.StmtNode) (err error) {
 	sc.IsStaleness = false
 	sc.LockTableIDs = make(map[int64]struct{})
 	sc.LogicalOptimizeTrace = nil
+	sc.OptimizerCETrace = nil
 
 	sc.InitMemTracker(memory.LabelForSQLText, vars.MemQuotaQuery)
 	sc.InitDiskTracker(memory.LabelForSQLText, -1)
diff --git a/sessionctx/stmtctx/stmtctx.go b/sessionctx/stmtctx/stmtctx.go
index 100dbb79bee3f..8a7f6602b6a74 100644
--- a/sessionctx/stmtctx/stmtctx.go
+++ b/sessionctx/stmtctx/stmtctx.go
@@ -196,6 +196,10 @@ type StatementContext struct {
 
 	// LogicalOptimizeTrace indicates the trace for optimize
 	LogicalOptimizeTrace *tracing.LogicalOptimizeTracer
+	// EnableOptimizerCETrace indicate if cardinality estimation internal process needs to be traced.
+	// CE Trace is currently a submodule of the optimizer trace and is controlled by a separated option.
+	EnableOptimizerCETrace bool
+	OptimizerCETrace       []*tracing.CETraceRecord
 }
 
 // StmtHints are SessionVars related sql hints.
diff --git a/statistics/main_test.go b/statistics/main_test.go
index 3d2bf6e45abbc..7e40d650fe393 100644
--- a/statistics/main_test.go
+++ b/statistics/main_test.go
@@ -29,7 +29,7 @@ import (
 	"go.uber.org/goleak"
 )
 
-var testDataMap = make(testdata.BookKeeper, 2)
+var testDataMap = make(testdata.BookKeeper, 3)
 
 func TestMain(m *testing.M) {
 	testbridge.WorkaroundGoCheckFlags()
@@ -45,6 +45,7 @@ func TestMain(m *testing.M) {
 
 	testDataMap.LoadTestSuiteData("testdata", "integration_suite")
 	testDataMap.LoadTestSuiteData("testdata", "stats_suite")
+	testDataMap.LoadTestSuiteData("testdata", "trace_suite")
 
 	opts := []goleak.Option{
 		goleak.IgnoreTopFunction("go.etcd.io/etcd/pkg/logutil.(*MergeLogger).outputLoop"),
@@ -66,6 +67,10 @@ func GetStatsSuiteData() testdata.TestData {
 	return testDataMap["stats_suite"]
 }
 
+func GetTraceSuiteData() testdata.TestData {
+	return testDataMap["trace_suite"]
+}
+
 // TestStatistics batches tests sharing a test suite to reduce the setups
 // overheads.
 func TestStatistics(t *testing.T) {
diff --git a/statistics/selectivity.go b/statistics/selectivity.go
index 7a85404c1e3e8..86321d561e954 100644
--- a/statistics/selectivity.go
+++ b/statistics/selectivity.go
@@ -15,6 +15,7 @@
 package statistics
 
 import (
+	"bytes"
 	"math"
 	"math/bits"
 	"sort"
@@ -22,12 +23,17 @@ import (
 	"github.com/pingcap/errors"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/parser/ast"
+	"github.com/pingcap/tidb/parser/format"
 	"github.com/pingcap/tidb/parser/mysql"
 	planutil "github.com/pingcap/tidb/planner/util"
 	"github.com/pingcap/tidb/sessionctx"
+	"github.com/pingcap/tidb/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/types"
+	driver "github.com/pingcap/tidb/types/parser_driver"
+	"github.com/pingcap/tidb/util/chunk"
 	"github.com/pingcap/tidb/util/logutil"
 	"github.com/pingcap/tidb/util/ranger"
+	"github.com/pingcap/tidb/util/tracing"
 	"go.uber.org/zap"
 )
 
@@ -179,14 +185,20 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
 	if coll.Count == 0 || len(exprs) == 0 {
 		return 1, nil, nil
 	}
+	ret := 1.0
+	sc := ctx.GetSessionVars().StmtCtx
+	tableID := coll.PhysicalID
 	// TODO: If len(exprs) is bigger than 63, we could use bitset structure to replace the int64.
 	// This will simplify some code and speed up if we use this rather than a boolean slice.
 	if len(exprs) > 63 || (len(coll.Columns) == 0 && len(coll.Indices) == 0) {
-		return pseudoSelectivity(coll, exprs), nil, nil
+		ret = pseudoSelectivity(coll, exprs)
+		if sc.EnableOptimizerCETrace {
+			CETraceExpr(sc, tableID, "Table Stats-Pseudo-Expression", expression.ComposeCNFCondition(ctx, exprs...), ret*float64(coll.Count))
+		}
+		return ret, nil, nil
 	}
-	ret := 1.0
+
 	var nodes []*StatsNode
-	sc := ctx.GetSessionVars().StmtCtx
 
 	remainedExprs := make([]expression.Expression, 0, len(exprs))
 
@@ -281,6 +293,9 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
 	usedSets := GetUsableSetsByGreedy(nodes)
 	// Initialize the mask with the full set.
 	mask := (int64(1) << uint(len(remainedExprs))) - 1
+	// curExpr records covered expressions by now. It's for cardinality estimation tracing.
+	var curExpr []expression.Expression
+
 	for _, set := range usedSets {
 		mask &^= set.mask
 		ret *= set.Selectivity
@@ -291,6 +306,16 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
 		if set.partCover {
 			ret *= selectionFactor
 		}
+		if sc.EnableOptimizerCETrace {
+			// Tracing for the expression estimation results after applying this StatsNode.
+			for i := range remainedExprs {
+				if set.mask&(1<<uint64(i)) > 0 {
+					curExpr = append(curExpr, remainedExprs[i])
+				}
+			}
+			expr := expression.ComposeCNFCondition(ctx, curExpr...)
+			CETraceExpr(sc, tableID, "Table Stats-Expression-CNF", expr, ret*float64(coll.Count))
+		}
 	}
 
 	// Now we try to cover those still not covered DNF conditions using independence assumption,
@@ -345,12 +370,22 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
 				}
 
 				selectivity = selectivity + curSelectivity - selectivity*curSelectivity
+				if sc.EnableOptimizerCETrace {
+					// Tracing for the expression estimation results of this DNF.
+					CETraceExpr(sc, tableID, "Table Stats-Expression-DNF", scalarCond, selectivity*float64(coll.Count))
+				}
 			}
 
 			if selectivity != 0 {
 				ret *= selectivity
 				mask &^= 1 << uint64(i)
 			}
+			if sc.EnableOptimizerCETrace {
+				// Tracing for the expression estimation results after applying the DNF estimation result.
+				curExpr = append(curExpr, remainedExprs[i])
+				expr := expression.ComposeCNFCondition(ctx, curExpr...)
+				CETraceExpr(sc, tableID, "Table Stats-Expression-CNF", expr, ret*float64(coll.Count))
+			}
 		}
 	}
 
@@ -358,6 +393,11 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
 	if mask > 0 {
 		ret *= selectionFactor
 	}
+	if sc.EnableOptimizerCETrace {
+		// Tracing for the expression estimation results after applying the default selectivity.
+		totalExpr := expression.ComposeCNFCondition(ctx, remainedExprs...)
+		CETraceExpr(sc, tableID, "Table Stats-Expression-CNF", totalExpr, ret*float64(coll.Count))
+	}
 	return ret, nodes, nil
 }
 
@@ -478,3 +518,80 @@ func FindPrefixOfIndexByCol(cols []*expression.Column, idxColIDs []int64, cached
 	}
 	return expression.FindPrefixOfIndex(cols, idxColIDs)
 }
+
+// CETraceExpr appends an expression and related information into CE trace
+func CETraceExpr(sc *stmtctx.StatementContext, tableID int64, tp string, expr expression.Expression, rowCount float64) {
+	exprStr, err := ExprToString(expr)
+	if err != nil {
+		logutil.BgLogger().Debug("[OptimizerTrace] Failed to trace CE of an expression",
+			zap.Any("expression", expr))
+		return
+	}
+	rec := tracing.CETraceRecord{
+		TableID:  tableID,
+		Type:     tp,
+		Expr:     exprStr,
+		RowCount: uint64(rowCount),
+	}
+	sc.OptimizerCETrace = append(sc.OptimizerCETrace, &rec)
+}
+
+// ExprToString prints an Expression into a string which can appear in a SQL.
+//
+// It might be too tricky because it makes use of TiDB allowing using internal function name in SQL.
+// For example, you can write `eq`(a, 1), which is the same as a = 1.
+// We should have implemented this by first implementing a method to turn an expression to an AST
+//   then call astNode.Restore(), like the Constant case here. But for convenience, we use this trick for now.
+//
+// It may be more appropriate to put this in expression package. But currently we only use it for CE trace,
+//   and it may not be general enough to handle all possible expressions. So we put it here for now.
+func ExprToString(e expression.Expression) (string, error) {
+	switch expr := e.(type) {
+	case *expression.ScalarFunction:
+		var buffer bytes.Buffer
+		buffer.WriteString("`" + expr.FuncName.L + "`(")
+		switch expr.FuncName.L {
+		case ast.Cast:
+			for _, arg := range expr.GetArgs() {
+				argStr, err := ExprToString(arg)
+				if err != nil {
+					return "", err
+				}
+				buffer.WriteString(argStr)
+				buffer.WriteString(", ")
+				buffer.WriteString(expr.RetType.String())
+			}
+		default:
+			for i, arg := range expr.GetArgs() {
+				argStr, err := ExprToString(arg)
+				if err != nil {
+					return "", err
+				}
+				buffer.WriteString(argStr)
+				if i+1 != len(expr.GetArgs()) {
+					buffer.WriteString(", ")
+				}
+			}
+		}
+		buffer.WriteString(")")
+		return buffer.String(), nil
+	case *expression.Column:
+		return expr.String(), nil
+	case *expression.CorrelatedColumn:
+		return "", errors.New("tracing for correlated columns not supported now")
+	case *expression.Constant:
+		value, err := expr.Eval(chunk.Row{})
+		if err != nil {
+			return "", err
+		}
+		valueExpr := driver.ValueExpr{Datum: value}
+		var buffer bytes.Buffer
+		restoreCtx := format.NewRestoreCtx(format.DefaultRestoreFlags, &buffer)
+		err = valueExpr.Restore(restoreCtx)
+		if err != nil {
+			return "", err
+		}
+		return buffer.String(), nil
+	}
+	return "", errors.New("unexpected type of Expression")
+}
diff --git a/statistics/testdata/trace_suite_in.json b/statistics/testdata/trace_suite_in.json
new file mode 100644
index 0000000000000..62ecf9e378432
--- /dev/null
+++ b/statistics/testdata/trace_suite_in.json
@@ -0,0 +1,11 @@
+[
+  {
+    "name": "TestTraceCE",
+    "cases": [
+      "a > 0 and a < 2",
+      "a >= 1 and a < 10",
+      "a < 3 or b < 4",
+      "a = 1 and b = 2"
+    ]
+  }
+]
diff --git a/statistics/testdata/trace_suite_out.json b/statistics/testdata/trace_suite_out.json
new file mode 100644
index 0000000000000..d45173d34d24c
--- /dev/null
+++ b/statistics/testdata/trace_suite_out.json
@@ -0,0 +1,181 @@
+[
+  {
+    "Name": "TestTraceCE",
+    "Cases": [
+      {
+        "Expr": "a > 0 and a < 2",
+        "Trace": [
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`and`(`gt`(test.t.a, 0), `lt`(test.t.a, 2))",
+            "RowCount": 4
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`and`(`gt`(test.t.a, 0), `lt`(test.t.a, 2))",
+            "RowCount": 4
+          }
+        ]
+      },
+      {
+        "Expr": "a >= 1 and a < 10",
+        "Trace": [
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`and`(`ge`(test.t.a, 1), `lt`(test.t.a, 10))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`and`(`ge`(test.t.a, 1), `lt`(test.t.a, 10))",
+            "RowCount": 6
+          }
+        ]
+      },
+      {
+        "Expr": "a < 3 or b < 4",
+        "Trace": [
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.a, 3)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.a, 3)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-DNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.b, 4)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.b, 4)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-DNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.a, 3)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.a, 3)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-DNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.b, 4)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`lt`(test.t.b, 4)",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-DNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`or`(`lt`(test.t.a, 3), `lt`(test.t.b, 4))",
+            "RowCount": 6
+          }
+        ]
+      },
+      {
+        "Expr": "a = 1 and b = 2",
+        "Trace": [
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`and`(`eq`(test.t.a, 1), `eq`(test.t.b, 2))",
+            "RowCount": 2
+          },
+          {
+            "TableID": 57,
+            "TableName": "",
+            "Type": "Table Stats-Expression-CNF",
+            "Expr": "`and`(`eq`(test.t.a, 1), `eq`(test.t.b, 2))",
+            "RowCount": 2
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/statistics/trace_test.go b/statistics/trace_test.go
new file mode 100644
index 0000000000000..2b1c624cbaf6d
--- /dev/null
+++ b/statistics/trace_test.go
@@ -0,0 +1,86 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statistics_test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/pingcap/tidb/domain"
+	"github.com/pingcap/tidb/infoschema"
+	"github.com/pingcap/tidb/parser"
+	plannercore "github.com/pingcap/tidb/planner/core"
+	"github.com/pingcap/tidb/sessionctx"
+	"github.com/pingcap/tidb/statistics"
+	"github.com/pingcap/tidb/testkit"
+	"github.com/pingcap/tidb/testkit/testdata"
+	"github.com/pingcap/tidb/util/tracing"
+	"github.com/stretchr/testify/require"
+)
+
+func TestTraceCE(t *testing.T) {
+	domain.RunAutoAnalyze = false
+	store, dom, clean := testkit.CreateMockStoreAndDomain(t)
+	defer clean()
+	tk := testkit.NewTestKit(t, store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t(a int, b int, d varchar(10), index idx(a, b))")
+	tk.MustExec(`insert into t values(1, 1, "aaa"),
+		(1, 1, "bbb"),
+		(1, 2, "ccc"),
+		(1, 2, "ddd"),
+		(2, 2, "aaa"),
+		(2, 3, "bbb")`)
+	tk.MustExec("analyze table t")
+	var (
+		in  []string
+		out []struct {
+			Expr  string
+			Trace []*tracing.CETraceRecord
+		}
+	)
+	traceSuiteData := statistics.GetTraceSuiteData()
+	traceSuiteData.GetTestCases(t, &in, &out)
+
+	// Load needed statistics.
+	for _, tt := range in {
+		sql := "explain select * from t where " + tt
+		tk.MustExec(sql)
+	}
+	statsHandle := dom.StatsHandle()
+	err := statsHandle.LoadNeededHistograms()
+	require.NoError(t, err)
+
+	sctx := tk.Session().(sessionctx.Context)
+	stmtCtx := sctx.GetSessionVars().StmtCtx
+	is := sctx.GetInfoSchema().(infoschema.InfoSchema)
+	p := parser.New()
+	for i, expr := range in {
+		sql := "explain select * from t where " + expr
+		stmtCtx.EnableOptimizerCETrace = true
+		stmtCtx.OptimizerCETrace = nil
+		stmt, err := p.ParseOneStmt(sql, "", "")
+		require.NoError(t, err)
+		_, _, err = plannercore.OptimizeAstNode(context.Background(), sctx, stmt, is)
+		require.NoError(t, err)
+
+		testdata.OnRecord(func() {
+			out[i].Expr = expr
+			out[i].Trace = sctx.GetSessionVars().StmtCtx.OptimizerCETrace
+		})
+		require.Equal(t, sctx.GetSessionVars().StmtCtx.OptimizerCETrace, out[i].Trace)
+	}
+}
diff --git a/util/tracing/opt_trace.go b/util/tracing/opt_trace.go
index d4de9a66f4dd4..1e401d59b57f1 100644
--- a/util/tracing/opt_trace.go
+++ b/util/tracing/opt_trace.go
@@ -81,3 +81,12 @@ type LogicalRuleOptimizeTraceStep struct {
 	ID     int    `json:"id"`
 	TP     string `json:"type"`
 }
+
+// CETraceRecord records an expression and related cardinality estimation result.
+type CETraceRecord struct {
+	TableID   int64
+	TableName string
+	Type      string
+	Expr      string
+	RowCount  uint64
+}