Skip to content

Commit

Permalink
*: restrict column range mem usage (#37714)
Browse files Browse the repository at this point in the history
ref #37176
  • Loading branch information
xuyifangreeneyes authored Sep 9, 2022
1 parent 663c06a commit 001abf4
Show file tree
Hide file tree
Showing 15 changed files with 307 additions and 52 deletions.
10 changes: 7 additions & 3 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1247,7 +1247,9 @@ func (cwc *ColWithCmpFuncManager) BuildRangesByRow(ctx sessionctx.Context, row c
}
exprs = append(exprs, newExpr) // nozero
}
ranges, err := ranger.BuildColumnRange(exprs, ctx, cwc.TargetCol.RetType, cwc.colLength)
// TODO: We already limit range mem usage when buildTemplateRange for inner table of IndexJoin in optimizer phase,
// so we don't need and shouldn't limit range mem usage when we refill inner ranges during the execution phase.
ranges, _, _, err := ranger.BuildColumnRange(exprs, ctx, cwc.TargetCol.RetType, cwc.colLength, 0)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1495,7 +1497,8 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *util.AccessPath
var ranges, nextColRange []*ranger.Range
var err error
if len(colAccesses) > 0 {
nextColRange, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx, lastPossibleCol.RetType, path.IdxColLens[lastColPos])
// TODO: restrict the mem usage of column ranges
nextColRange, _, _, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx, lastPossibleCol.RetType, path.IdxColLens[lastColPos], 0)
if err != nil {
return false, err
}
Expand Down Expand Up @@ -1607,7 +1610,8 @@ func (ijHelper *indexJoinBuildHelper) buildTemplateRange(matchedKeyCnt int, eqAn
continue
}
exprs := []expression.Expression{eqAndInFuncs[j]}
oneColumnRan, err := ranger.BuildColumnRange(exprs, ijHelper.join.ctx, ijHelper.curNotUsedIndexCols[j].RetType, ijHelper.curNotUsedColLens[j])
// TODO: restrict the mem usage of column ranges
oneColumnRan, _, _, err := ranger.BuildColumnRange(exprs, ijHelper.join.ctx, ijHelper.curNotUsedIndexCols[j].RetType, ijHelper.curNotUsedColLens[j], 0)
if err != nil {
return nil, false, err
}
Expand Down
4 changes: 2 additions & 2 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1737,8 +1737,8 @@ func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, conds []expre
if len(accessConds) == 0 {
return 0, false, corr
}
ranges, err := ranger.BuildColumnRange(accessConds, ds.ctx, col.RetType, types.UnspecifiedLength)
if len(ranges) == 0 || err != nil {
ranges, accessConds, _, err := ranger.BuildColumnRange(accessConds, ds.ctx, col.RetType, types.UnspecifiedLength, ds.ctx.GetSessionVars().RangeMaxSize)
if len(ranges) == 0 || len(accessConds) == 0 || err != nil {
return 0, err == nil, corr
}
idxID, idxExists := ds.stats.HistColl.ColID2IdxID[colID]
Expand Down
53 changes: 53 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7244,3 +7244,56 @@ func TestEnableTiFlashReadForWriteStmt(t *testing.T) {
rs = tk.MustQuery("explain update t set a=a+1 where b in (select a from t2 where t.a > t2.a)").Rows()
checkMpp(rs)
}

func TestTableRangeFallback(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)

tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1 (a int primary key, b int)")
tk.MustExec("create table t2 (c int)")
tk.MustQuery("explain format='brief' select * from t1 where a in (10, 20, 30, 40, 50) and b > 1").Check(testkit.Rows(
"Selection 1.67 root gt(test.t1.b, 1)",
"└─Batch_Point_Get 5.00 root table:t1 handle:[10 20 30 40 50], keep order:false, desc:false"))
tk.MustQuery("explain format='brief' select * from t1 join t2 on t1.b = t2.c where t1.a in (10, 20, 30, 40, 50)").Check(testkit.Rows(
"HashJoin 6.24 root inner join, equal:[eq(test.t1.b, test.t2.c)]",
"├─Selection(Build) 5.00 root not(isnull(test.t1.b))",
"│ └─Batch_Point_Get 5.00 root table:t1 handle:[10 20 30 40 50], keep order:false, desc:false",
"└─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t2.c))",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"))
tk.MustExec("set @@tidb_opt_range_max_size=10")
tk.MustQuery("explain format='brief' select * from t1 where a in (10, 20, 30, 40, 50) and b > 1").Check(testkit.Rows(
"TableReader 8000.00 root data:Selection",
"└─Selection 8000.00 cop[tikv] gt(test.t1.b, 1), in(test.t1.a, 10, 20, 30, 40, 50)",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 10 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
tk.MustQuery("explain format='brief' select * from t1 join t2 on t1.b = t2.c where t1.a in (10, 20, 30, 40, 50)").Check(testkit.Rows(
"HashJoin 10000.00 root inner join, equal:[eq(test.t1.b, test.t2.c)]",
"├─TableReader(Build) 8000.00 root data:Selection",
"│ └─Selection 8000.00 cop[tikv] not(isnull(test.t2.c))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 8000.00 root data:Selection",
" └─Selection 8000.00 cop[tikv] in(test.t1.a, 10, 20, 30, 40, 50), not(isnull(test.t1.b))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"))
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 10 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
}

func TestPlanCacheForTableRangeFallback(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)

tk.MustExec("set @@tidb_enable_prepared_plan_cache=1")
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a int primary key, b int)")
tk.MustExec("set @@tidb_opt_range_max_size=10")
tk.MustExec("prepare stmt from 'select * from t where a in (?, ?, ?, ?, ?) and b > 1'")
tk.MustExec("set @a=10, @b=20, @c=30, @d=40, @e=50")
tk.MustExec("execute stmt using @a, @b, @c, @d, @e")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 10 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
tk.MustExec("execute stmt using @a, @b, @c, @d, @e")
// The plan with range fallback is not cached.
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))
}
4 changes: 3 additions & 1 deletion planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1459,7 +1459,9 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres
path.CountAfterAccess = 1
return nil
}
path.Ranges, err = ranger.BuildTableRange(path.AccessConds, ds.ctx, pkCol.RetType)
var remainedConds []expression.Expression
path.Ranges, path.AccessConds, remainedConds, err = ranger.BuildTableRange(path.AccessConds, ds.ctx, pkCol.RetType, ds.ctx.GetSessionVars().RangeMaxSize)
path.TableFilters = append(path.TableFilters, remainedConds...)
if err != nil {
return err
}
Expand Down
6 changes: 4 additions & 2 deletions planner/core/physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,9 @@ func (ts *PhysicalTableScan) IsPartition() (bool, int64) {
return ts.isPartition, ts.physicalTableID
}

// ResolveCorrelatedColumns resolves the correlated columns in range access
// ResolveCorrelatedColumns resolves the correlated columns in range access.
// We already limit range mem usage when building ranges in optimizer phase, so we don't need and shouldn't limit range
// mem usage when rebuilding ranges during the execution phase.
func (ts *PhysicalTableScan) ResolveCorrelatedColumns() ([]*ranger.Range, error) {
access := ts.AccessCondition
if ts.Table.IsCommonHandle {
Expand All @@ -761,7 +763,7 @@ func (ts *PhysicalTableScan) ResolveCorrelatedColumns() ([]*ranger.Range, error)
} else {
var err error
pkTP := ts.Table.GetPkColInfo().FieldType
ts.Ranges, err = ranger.BuildTableRange(access, ts.SCtx(), &pkTP)
ts.Ranges, _, _, err = ranger.BuildTableRange(access, ts.SCtx(), &pkTP, 0)
if err != nil {
return nil, err
}
Expand Down
15 changes: 12 additions & 3 deletions planner/core/plan_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,15 @@ func RebuildPlan4CachedPlan(p Plan) error {
return rebuildRange(p)
}

// rebuildRange doesn't set mem limit for building ranges. There are two reasons why we don't restrict range mem usage here.
// 1. The cached plan must be able to build complete ranges under mem limit when it is generated. Hence we can just build
// ranges from x.AccessConditions. The only difference between the last ranges and new ranges is the change of parameter
// values, which doesn't cause much change on the mem usage of complete ranges.
// 2. Different parameter values can change the mem usage of complete ranges. If we set range mem limit here, range fallback
// may heppen and cause correctness problem. For example, a in (?, ?, ?) is the access condition. When the plan is firstly
// generated, its complete ranges are ['a','a'], ['b','b'], ['c','c'], whose mem usage is under range mem limit 100B.
// When the cached plan is hit, the complete ranges may become ['aaa','aaa'], ['bbb','bbb'], ['ccc','ccc'], whose mem
// usage exceeds range mem limit 100B, and range fallback happens and tidb may fetch more rows than users expect.
func rebuildRange(p Plan) error {
sctx := p.SCtx()
sc := p.SCtx().GetSessionVars().StmtCtx
Expand Down Expand Up @@ -371,7 +380,7 @@ func rebuildRange(p Plan) error {
}
}
if pkCol != nil {
ranges, err := ranger.BuildTableRange(x.AccessConditions, x.ctx, pkCol.RetType)
ranges, _, _, err := ranger.BuildTableRange(x.AccessConditions, x.ctx, pkCol.RetType, 0)
if err != nil {
return err
}
Expand Down Expand Up @@ -432,7 +441,7 @@ func rebuildRange(p Plan) error {
}
}
if pkCol != nil {
ranges, err := ranger.BuildTableRange(x.AccessConditions, x.ctx, pkCol.RetType)
ranges, _, _, err := ranger.BuildTableRange(x.AccessConditions, x.ctx, pkCol.RetType, 0)
if err != nil {
return err
}
Expand Down Expand Up @@ -562,7 +571,7 @@ func buildRangeForTableScan(sctx sessionctx.Context, ts *PhysicalTableScan) (err
}
}
if pkCol != nil {
ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sctx, pkCol.RetType)
ts.Ranges, _, _, err = ranger.BuildTableRange(ts.AccessCondition, sctx, pkCol.RetType, 0)
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,8 @@ func (ts *LogicalTableScan) DeriveStats(_ []*property.StatsInfo, _ *expression.S
// ts.Handle could be nil if PK is Handle, and PK column has been pruned.
// TODO: support clustered index.
if ts.HandleCols != nil {
ts.Ranges, err = ranger.BuildTableRange(ts.AccessConds, ts.ctx, ts.HandleCols.GetCol(0).RetType)
// TODO: restrict mem usage of table ranges.
ts.Ranges, _, _, err = ranger.BuildTableRange(ts.AccessConds, ts.ctx, ts.HandleCols.GetCol(0).RetType, 0)
} else {
isUnsigned := false
if ts.Source.tableInfo.PKIsHandle {
Expand Down
14 changes: 14 additions & 0 deletions sessionctx/stmtctx/stmtctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ type StatementContext struct {
UseDynamicPruneMode bool
// ColRefFromPlan mark the column ref used by assignment in update statement.
ColRefFromUpdatePlan []int64

// RangeFallback indicates that building complete ranges exceeds the memory limit so it falls back to less accurate ranges such as full range.
RangeFallback bool
}

// StmtHints are SessionVars related sql hints.
Expand Down Expand Up @@ -986,6 +989,17 @@ func (sc *StatementContext) GetLockWaitStartTime() time.Time {
return time.Unix(0, startTime)
}

// RecordRangeFallback records range fallback.
func (sc *StatementContext) RecordRangeFallback(rangeMaxSize int64) {
// If range fallback happens, it means ether the query is unreasonable(for example, several long IN lists) or tidb_opt_range_max_size is too small
// and the generated plan is probably suboptimal. In that case we don't put it into plan cache.
sc.SkipPlanCache = true
if !sc.RangeFallback {
sc.AppendWarning(errors.Errorf("Memory capacity of %v bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen", rangeMaxSize))
sc.RangeFallback = true
}
}

// UseDynamicPartitionPrune indicates whether dynamic partition is used during the query
func (sc *StatementContext) UseDynamicPartitionPrune() bool {
return sc.UseDynamicPruneMode
Expand Down
2 changes: 1 addition & 1 deletion statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran
switch rangeType {
case ranger.ColumnRangeType:
accessConds = ranger.ExtractAccessConditionsForColumn(exprs, cols[0])
ranges, err = ranger.BuildColumnRange(accessConds, ctx, cols[0].RetType, types.UnspecifiedLength)
ranges, accessConds, _, err = ranger.BuildColumnRange(accessConds, ctx, cols[0].RetType, types.UnspecifiedLength, ctx.GetSessionVars().RangeMaxSize)
case ranger.IndexRangeType:
if cachedPath != nil {
ranges, accessConds, remainedConds, isDNF = cachedPath.Ranges, cachedPath.AccessConds, cachedPath.TableFilters, cachedPath.IsDNFCond
Expand Down
3 changes: 2 additions & 1 deletion util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,8 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
firstColumnChecker.shouldReserve = d.lengths[0] != types.UnspecifiedLength
}
points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate()))
ranges, err := points2Ranges(d.sctx, points, newTpSlice[0])
// TODO: restrict the mem usage of ranges
ranges, _, err := points2Ranges(d.sctx, points, newTpSlice[0], 0)
if err != nil {
return nil, nil, nil, false, errors.Trace(err)
}
Expand Down
18 changes: 9 additions & 9 deletions util/ranger/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,26 +157,26 @@ func getNotNullFullRange() []*point {

// FullIntRange is used for table range. Since table range cannot accept MaxValueDatum as the max value.
// So we need to set it to MaxInt64.
func FullIntRange(isUnsigned bool) []*Range {
func FullIntRange(isUnsigned bool) Ranges {
if isUnsigned {
return []*Range{{LowVal: []types.Datum{types.NewUintDatum(0)}, HighVal: []types.Datum{types.NewUintDatum(math.MaxUint64)}, Collators: collate.GetBinaryCollatorSlice(1)}}
return Ranges{{LowVal: []types.Datum{types.NewUintDatum(0)}, HighVal: []types.Datum{types.NewUintDatum(math.MaxUint64)}, Collators: collate.GetBinaryCollatorSlice(1)}}
}
return []*Range{{LowVal: []types.Datum{types.NewIntDatum(math.MinInt64)}, HighVal: []types.Datum{types.NewIntDatum(math.MaxInt64)}, Collators: collate.GetBinaryCollatorSlice(1)}}
return Ranges{{LowVal: []types.Datum{types.NewIntDatum(math.MinInt64)}, HighVal: []types.Datum{types.NewIntDatum(math.MaxInt64)}, Collators: collate.GetBinaryCollatorSlice(1)}}
}

// FullRange is [null, +∞) for Range.
func FullRange() []*Range {
return []*Range{{LowVal: []types.Datum{{}}, HighVal: []types.Datum{types.MaxValueDatum()}, Collators: collate.GetBinaryCollatorSlice(1)}}
func FullRange() Ranges {
return Ranges{{LowVal: []types.Datum{{}}, HighVal: []types.Datum{types.MaxValueDatum()}, Collators: collate.GetBinaryCollatorSlice(1)}}
}

// FullNotNullRange is (-∞, +∞) for Range.
func FullNotNullRange() []*Range {
return []*Range{{LowVal: []types.Datum{types.MinNotNullDatum()}, HighVal: []types.Datum{types.MaxValueDatum()}}}
func FullNotNullRange() Ranges {
return Ranges{{LowVal: []types.Datum{types.MinNotNullDatum()}, HighVal: []types.Datum{types.MaxValueDatum()}}}
}

// NullRange is [null, null] for Range.
func NullRange() []*Range {
return []*Range{{LowVal: []types.Datum{{}}, HighVal: []types.Datum{{}}, Collators: collate.GetBinaryCollatorSlice(1)}}
func NullRange() Ranges {
return Ranges{{LowVal: []types.Datum{{}}, HighVal: []types.Datum{{}}, Collators: collate.GetBinaryCollatorSlice(1)}}
}

// builder is the range builder struct.
Expand Down
Loading

0 comments on commit 001abf4

Please sign in to comment.