pingcap · ti-chi-bot · Oct 29, 2024
diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go
@@ -54,6 +54,8 @@ func GetTableAvgRowSize(ctx sessionctx.Context, coll *statistics.HistColl, cols
 			size += 8 /* row_id length */
 		}
 	}
+	// Avoid errors related to size less than zero
+	size = max(0, size)
 	return
 }
 
@@ -80,6 +82,8 @@ func GetAvgRowSize(ctx sessionctx.Context, coll *statistics.HistColl, cols []*ex
 			}
 		}
 	}
+	// Avoid errors related to size less than zero
+	size = max(0, size)
 	if sessionVars.EnableChunkRPC && !isForScan {
 		// Add 1/8 byte for each column's nullBitMap byte.
 		return size + float64(len(cols))/8
@@ -106,8 +110,13 @@ func GetAvgRowSizeListInDisk(coll *statistics.HistColl, cols []*expression.Colum
 			size += AvgColSizeListInDisk(colHist, coll.RealtimeCount)
 		}
 	}
+<<<<<<< HEAD
 	// Add 8 byte for each column's size record. See `ListInDisk` for details.
 	return size + float64(8*len(cols))
+=======
+	// Add 8 byte for each column's size record. See `DataInDiskByRows` for details.
+	return max(0, size+float64(8*len(cols)))
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 }
 
 // AvgColSize is the average column size of the histogram. These sizes are derived from function `encode`
@@ -126,7 +135,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
 	histCount := c.TotalRowCount()
 	notNullRatio := 1.0
 	if histCount > 0 {
-		notNullRatio = 1.0 - float64(c.NullCount)/histCount
+		notNullRatio = max(0, 1.0-float64(c.NullCount)/histCount)
 	}
 	switch c.Histogram.Tp.GetType() {
 	case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
@@ -137,7 +146,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
 		}
 	}
 	// Keep two decimal place.
-	return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
+	return max(0, math.Round(float64(c.TotColSize)/float64(count)*100)/100)
 }
 
 // AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode`
@@ -147,17 +156,17 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 {
 		return 0
 	}
 	fixedLen := chunk.GetFixedLen(c.Histogram.Tp)
-	if fixedLen != -1 {
+	if fixedLen >= 0 {
 		return float64(fixedLen)
 	}
 	// Keep two decimal place.
 	// Add 8 bytes for unfixed-len type's offsets.
 	// Minus Log2(avgSize) for unfixed-len type LEN.
 	avgSize := float64(c.TotColSize) / float64(count)
 	if avgSize < 1 {
-		return math.Round(avgSize*100)/100 + 8
+		return max(0, math.Round(avgSize*100)/100) + 8
 	}
-	return math.Round((avgSize-math.Log2(avgSize))*100)/100 + 8
+	return max(0, math.Round((avgSize-math.Log2(avgSize))*100)/100) + 8
 }
 
 // AvgColSizeListInDisk is the average column size of the histogram. These sizes are derived
@@ -172,14 +181,14 @@ func AvgColSizeListInDisk(c *statistics.Column, count int64) float64 {
 		notNullRatio = 1.0 - float64(c.NullCount)/histCount
 	}
 	size := chunk.GetFixedLen(c.Histogram.Tp)
-	if size != -1 {
+	if size >= 0 {
 		return float64(size) * notNullRatio
 	}
 	// Keep two decimal place.
 	// Minus Log2(avgSize) for unfixed-len type LEN.
 	avgSize := float64(c.TotColSize) / float64(count)
 	if avgSize < 1 {
-		return math.Round((avgSize)*100) / 100
+		return max(0, math.Round((avgSize)*100)/100)
 	}
 	return math.Round((avgSize-math.Log2(avgSize))*100) / 100
 }
diff --git a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
@@ -470,12 +470,12 @@
         "Plan": [
           "Projection 0.00 root  test_partition.t1.id, test_partition.t1.a, test_partition.t1.b, test_partition.t2.id, test_partition.t2.a, test_partition.t2.b",
           "└─HashJoin 0.00 root  CARTESIAN inner join",
-          "  ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
-          "  │ └─Selection 0.00 cop[tikv]  eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
-          "  │   └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
-          "  └─TableReader(Probe) 0.01 root partition:p0 data:Selection",
-          "    └─Selection 0.01 cop[tikv]  eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
-          "      └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
+          "  ├─TableReader(Build) 0.01 root partition:p0 data:Selection",
+          "  │ └─Selection 0.01 cop[tikv]  eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
+          "  │   └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
+          "  └─TableReader(Probe) 0.00 root partition:p1 data:Selection",
+          "    └─Selection 0.00 cop[tikv]  eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
+          "      └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
         ],
         "IndexPlan": [
           "HashJoin 0.03 root  CARTESIAN inner join",

diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json
@@ -130,6 +130,7 @@
         "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
         "Result": [
           "HashJoin 0.33 root  inner join, equal:[eq(test.tp.c, test.t2.a)]",
+<<<<<<< HEAD
           "├─IndexJoin(Build) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
           "│ ├─TableReader(Build) 0.33 root  data:Selection",
           "│ │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
@@ -140,6 +141,18 @@
           "│   └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[ic:allEvicted, c:allEvicted]",
           "└─TableReader(Probe) 1.00 root  data:TableRangeScan",
           "  └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
+=======
+          "├─TableReader(Build) 1.00 root  data:TableRangeScan",
+          "│ └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]",
+          "└─IndexJoin(Probe) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
+          "  ├─TableReader(Build) 0.33 root  data:Selection",
+          "  │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
+          "  │   └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
+          "  └─IndexLookUp(Probe) 0.33 root partition:p0 ",
+          "    ├─Selection(Build) 0.33 cop[tikv]  gt(test.tp.c, 10), not(isnull(test.tp.c))",
+          "    │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
+          "    └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]"
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
         ]
       }
     ]

diff --git a/pkg/planner/core/casetest/testdata/integration_suite_out.json b/pkg/planner/core/casetest/testdata/integration_suite_out.json
@@ -356,6 +356,7 @@
       {
         "SQL": "explain format = 'verbose' select (2) in (select /*+ read_from_storage(tiflash[t1]) */ count(*) from t1) from (select t.b < (select /*+ read_from_storage(tiflash[t2]) */ t.b from t2 limit 1 )  from t3 t) t; -- we do generate the agg pushed-down plan of mpp, but cost-cmp failed",
         "Plan": [
+<<<<<<< HEAD
           "HashJoin_19 3.00 65286.81 root  CARTESIAN left outer semi join",
           "├─Selection_35(Build) 0.80 31149.25 root  eq(2, Column#18)",
           "│ └─StreamAgg_42 1.00 31099.35 root  funcs:count(1)->Column#18",
@@ -371,6 +372,16 @@
           "  │         └─TableFullScan_31 1.00 464046.40 mpp[tiflash] table:t2 keep order:false",
           "  └─IndexReader_25(Probe) 3.00 53.37 root  index:IndexFullScan_24",
           "    └─IndexFullScan_24 3.00 610.50 cop[tikv] table:t, index:c(b) keep order:false"
+=======
+          "HashJoin_17 3.00 32781.07 root  CARTESIAN left outer semi join",
+          "├─Selection_22(Build) 0.80 31149.25 root  eq(2, Column#18)",
+          "│ └─StreamAgg_29 1.00 31099.35 root  funcs:count(1)->Column#18",
+          "│   └─TableReader_41 3.00 30949.65 root  MppVersion: 2, data:ExchangeSender_40",
+          "│     └─ExchangeSender_40 3.00 464139.20 mpp[tiflash]  ExchangeType: PassThrough",
+          "│       └─TableFullScan_39 3.00 464139.20 mpp[tiflash] table:t1 keep order:false",
+          "└─IndexReader_21(Probe) 3.00 53.37 root  index:IndexFullScan_20",
+          "  └─IndexFullScan_20 3.00 610.50 cop[tikv] table:t, index:c(b) keep order:false"
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
         ]
       },
       {

diff --git a/pkg/planner/core/plan_cost_ver1.go b/pkg/planner/core/plan_cost_ver1.go
@@ -1276,10 +1276,10 @@ func getCardinality(operator PhysicalPlan, costFlag uint64) float64 {
 		if actualProbeCnt == 0 {
 			return 0
 		}
-		return getOperatorActRows(operator) / float64(actualProbeCnt)
+		return max(0, getOperatorActRows(operator)/float64(actualProbeCnt))
 	}
 	rows := operator.StatsCount()
-	if rows == 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
+	if rows <= 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
 		// 0 est-row can lead to 0 operator cost which makes plan choice unstable.
 		rows = 1
 	}

diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go
@@ -121,6 +121,7 @@ func (p *PhysicalProjection) getPlanCostVer2(taskType property.TaskType, option
 	return p.planCostVer2, nil
 }
 
+<<<<<<< HEAD
 // getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
 // plan-cost = rows * log2(row-size) * scan-factor
 // log2(row-size) is from experiments.
@@ -150,17 +151,101 @@ func (p *PhysicalTableScan) getPlanCostVer2(taskType property.TaskType, option *
 	var rowSize float64
 	if p.StoreType == kv.TiKV {
 		rowSize = getAvgRowSize(p.StatsInfo(), p.tblCols) // consider all columns if TiKV
+=======
+const (
+	// MinNumRows provides a minimum to avoid underestimation. As selectivity estimation approaches
+	// zero, all plan choices result in a low cost - making it difficult to differentiate plan choices.
+	// A low value of 1.0 here is used for most (non probe acceses) to reduce this risk.
+	MinNumRows = 1.0
+	// MinRowSize provides a minimum column length to ensure that any adjustment or calculation
+	// in costing does not go below this value. 2.0 is used as a reasonable lowest column length.
+	MinRowSize = 2.0
+	// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
+	TiFlashStartupRowPenalty = 10000
+	// MaxPenaltyRowCount applies a penalty for high risk scans
+	MaxPenaltyRowCount = 1000
+)
+
+// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
+// plan-cost = rows * log2(row-size) * scan-factor
+// log2(row-size) is from experiments.
+func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *optimizetrace.PlanCostOption) (costusage.CostVer2, error) {
+	if p.PlanCostInit && !hasCostFlag(option.CostFlag, costusage.CostFlagRecalculate) {
+		return p.PlanCostVer2, nil
+	}
+
+	rows := getCardinality(p, option.CostFlag)
+	rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns) // consider all index columns
+	scanFactor := getTaskScanFactorVer2(p, kv.TiKV, taskType)
+
+	p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
+	p.PlanCostInit = true
+	return p.PlanCostVer2, nil
+}
+
+// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
+// plan-cost = rows * log2(row-size) * scan-factor
+// log2(row-size) is from experiments.
+func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *optimizetrace.PlanCostOption) (costusage.CostVer2, error) {
+	if p.PlanCostInit && !hasCostFlag(option.CostFlag, costusage.CostFlagRecalculate) {
+		return p.PlanCostVer2, nil
+	}
+
+	var columns []*expression.Column
+	if p.StoreType == kv.TiKV { // Assume all columns for TiKV
+		columns = p.tblCols
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	} else { // TiFlash
 		rowSize = getAvgRowSize(p.StatsInfo(), p.schema.Columns)
 	}
+<<<<<<< HEAD
 	rowSize = math.Max(rowSize, 2.0)
+=======
+	rows := getCardinality(p, option.CostFlag)
+	rowSize := getAvgRowSize(p.StatsInfo(), columns)
+	// Ensure rows and rowSize have a reasonable minimum value to avoid underestimation
+	if !p.isChildOfIndexLookUp {
+		rows = max(MinNumRows, rows)
+		rowSize = max(rowSize, MinRowSize)
+	}
+
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType)
 
 	p.planCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
 
 	// give TiFlash a start-up cost to let the optimizer prefers to use TiKV to process small table scans.
 	if p.StoreType == kv.TiFlash {
+<<<<<<< HEAD
 		p.planCostVer2 = sumCostVer2(p.planCostVer2, scanCostVer2(option, 10000, rowSize, scanFactor))
+=======
+		p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, TiFlashStartupRowPenalty, rowSize, scanFactor))
+	} else {
+		// Apply cost penalty for full scans that carry high risk of underestimation
+		sessionVars := p.SCtx().GetSessionVars()
+		allowPreferRangeScan := sessionVars.GetAllowPreferRangeScan()
+		tblColHists := p.tblColHists
+
+		// preferRangeScan check here is same as in skylinePruning
+		preferRangeScanCondition := allowPreferRangeScan && (tblColHists.Pseudo || tblColHists.RealtimeCount < 1)
+		// hasHighModifyCount tracks the high risk of a tablescan where auto-analyze had not yet updated the table row count
+		hasHighModifyCount := tblColHists.ModifyCount > tblColHists.RealtimeCount
+		// hasLowEstimate is a check to capture a unique customer case where modifyCount is used for tablescan estimate (but it not adequately understood why)
+		hasLowEstimate := rows > 1 && int64(rows) < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
+		var unsignedIntHandle bool
+		if p.Table.PKIsHandle {
+			if pkColInfo := p.Table.GetPkColInfo(); pkColInfo != nil {
+				unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
+			}
+		}
+		hasFullRangeScan := !p.isChildOfIndexLookUp && ranger.HasFullRange(p.Ranges, unsignedIntHandle)
+
+		shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
+		if shouldApplyPenalty {
+			newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
+			p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
+		}
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	}
 
 	p.planCostInit = true
@@ -201,7 +286,7 @@ func (p *PhysicalTableReader) getPlanCostVer2(taskType property.TaskType, option
 	}
 
 	rows := getCardinality(p.tablePlan, option.CostFlag)
-	rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns)
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.schema.Columns))
 	netFactor := getTaskNetFactorVer2(p, taskType)
 	concurrency := float64(p.SCtx().GetSessionVars().DistSQLScanConcurrency())
 	childType := property.CopSingleReadTaskType
@@ -361,8 +446,13 @@ func (p *PhysicalSort) getPlanCostVer2(taskType property.TaskType, option *PlanC
 		return p.planCostVer2, nil
 	}
 
+<<<<<<< HEAD
 	rows := math.Max(getCardinality(p.children[0], option.CostFlag), 1)
 	rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+=======
+	rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 	diskFactor := defaultVer2Factors.TiDBDisk
@@ -409,14 +499,18 @@ func (p *PhysicalTopN) getPlanCostVer2(taskType property.TaskType, option *PlanC
 		return p.planCostVer2, nil
 	}
 
+<<<<<<< HEAD
 	rows := getCardinality(p.children[0], option.CostFlag)
+=======
+	rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	n := max(1, float64(p.Count+p.Offset))
 	if n > 10000 {
 		// It's only used to prevent some extreme cases, e.g. `select * from t order by a limit 18446744073709551615`.
 		// For normal cases, considering that `rows` may be under-estimated, better to keep `n` unchanged.
 		n = min(n, rows)
 	}
-	rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 
@@ -465,9 +559,15 @@ func (p *PhysicalHashAgg) getPlanCostVer2(taskType property.TaskType, option *Pl
 		return p.planCostVer2, nil
 	}
 
+<<<<<<< HEAD
 	inputRows := getCardinality(p.children[0], option.CostFlag)
 	outputRows := getCardinality(p, option.CostFlag)
 	outputRowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+=======
+	inputRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	outputRows := max(MinNumRows, getCardinality(p, option.CostFlag))
+	outputRowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 	concurrency := float64(p.SCtx().GetSessionVars().HashAggFinalConcurrency())
@@ -497,8 +597,13 @@ func (p *PhysicalMergeJoin) getPlanCostVer2(taskType property.TaskType, option *
 		return p.planCostVer2, nil
 	}
 
+<<<<<<< HEAD
 	leftRows := getCardinality(p.children[0], option.CostFlag)
 	rightRows := getCardinality(p.children[1], option.CostFlag)
+=======
+	leftRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	rightRows := max(MinNumRows, getCardinality(p.Children()[1], option.CostFlag))
+>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 
 	filterCost := sumCostVer2(filterCostVer2(option, leftRows, p.LeftConditions, cpuFactor),
@@ -536,9 +641,9 @@ func (p *PhysicalHashJoin) getPlanCostVer2(taskType property.TaskType, option *P
 		build, probe = probe, build
 		buildFilters, probeFilters = probeFilters, buildFilters
 	}
-	buildRows := getCardinality(build, option.CostFlag)
+	buildRows := max(MinNumRows, getCardinality(build, option.CostFlag))
 	probeRows := getCardinality(probe, option.CostFlag)
-	buildRowSize := getAvgRowSize(build.StatsInfo(), build.Schema().Columns)
+	buildRowSize := max(MinRowSize, getAvgRowSize(build.StatsInfo(), build.Schema().Columns))
 	tidbConcurrency := float64(p.Concurrency)
 	mppConcurrency := float64(3) // TODO: remove this empirical value
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)