-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
*: provide a variable to ignore the real-time stats in the planner #43988
Changes from all commits
dfa13f3
f64da67
deaad60
c7bb788
0bd929b
5eecce8
a67f82a
3e4219c
32ed12e
1f76745
f6c9fe0
62fb635
58fb333
824a815
01bd74b
0d60e44
06ad390
5cbb64f
009ba15
9193a5e
c9a0747
bb918a1
099507e
d8aadaa
d9aabd6
25fb2de
5cd26fb
e7a2d6c
39ceefd
7cbd3e2
ddd96b4
5363b99
9a0290c
b3cd610
959baeb
67ece7d
48a41af
e4d66a2
a5866d3
4a7b4f3
769aa9b
5a28685
ad15c60
56352a9
823c149
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4695,6 +4695,7 @@ | |
// 1. tidb-server started and statistics handle has not been initialized. | ||
// 2. table row count from statistics is zero. | ||
// 3. statistics is outdated. | ||
// Note: please also update getLatestVersionFromStatsTable() when logic in this function changes. | ||
func getStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64) *statistics.Table { | ||
statsHandle := domain.GetDomain(ctx).StatsHandle() | ||
var usePartitionStats, countIs0, pseudoStatsForUninitialized, pseudoStatsForOutdated bool | ||
|
@@ -4717,7 +4718,7 @@ | |
} | ||
// 1. tidb-server started and statistics handle has not been initialized. | ||
if statsHandle == nil { | ||
return statistics.PseudoTable(tblInfo) | ||
return statistics.PseudoTable(tblInfo, false) | ||
} | ||
|
||
if pid == tblInfo.ID || ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { | ||
|
@@ -4727,11 +4728,35 @@ | |
statsTbl = statsHandle.GetPartitionStats(tblInfo, pid, cache.WithTableStatsByQuery()) | ||
} | ||
|
||
allowPseudoTblTriggerLoading := false | ||
// In OptObjectiveDeterminate mode, we need to ignore the real-time stats. | ||
// To achieve this, we copy the statsTbl and reset the real-time stats fields (set ModifyCount to 0 and set | ||
// RealtimeCount to the row count from the ANALYZE, which is fetched from loaded stats in GetAnalyzeRowCount()). | ||
if ctx.GetSessionVars().GetOptObjective() == variable.OptObjectiveDeterminate { | ||
analyzeCount := max(int64(statsTbl.GetAnalyzeRowCount()), 0) | ||
// If the two fields are already the values we want, we don't need to modify it, and also we don't need to copy. | ||
if statsTbl.RealtimeCount != analyzeCount || statsTbl.ModifyCount != 0 { | ||
// Here is a case that we need specially care about: | ||
// The original stats table from the stats cache is not a pseudo table, but the analyze row count is 0 (probably | ||
// because of no col/idx stats are loaded), which will makes it a pseudo table according to the rule 2 below. | ||
// Normally, a pseudo table won't trigger stats loading since we assume it means "no stats available", but | ||
// in such case, we need it able to trigger stats loading. | ||
// That's why we use the special allowPseudoTblTriggerLoading flag here. | ||
if !statsTbl.Pseudo && statsTbl.RealtimeCount > 0 && analyzeCount == 0 { | ||
allowPseudoTblTriggerLoading = true | ||
} | ||
// Copy it so we can modify the ModifyCount and the RealtimeCount safely. | ||
statsTbl = statsTbl.ShallowCopy() | ||
statsTbl.RealtimeCount = analyzeCount | ||
statsTbl.ModifyCount = 0 | ||
} | ||
} | ||
|
||
// 2. table row count from statistics is zero. | ||
if statsTbl.RealtimeCount == 0 { | ||
countIs0 = true | ||
core_metrics.PseudoEstimationNotAvailable.Inc() | ||
return statistics.PseudoTable(tblInfo) | ||
return statistics.PseudoTable(tblInfo, allowPseudoTblTriggerLoading) | ||
} | ||
|
||
// 3. statistics is uninitialized or outdated. | ||
|
@@ -4751,6 +4776,44 @@ | |
return statsTbl | ||
} | ||
|
||
// getLatestVersionFromStatsTable gets statistics information for a table specified by "tableID", and get the max | ||
// LastUpdateVersion among all Columns and Indices in it. | ||
// Its overall logic is quite similar to getStatsTable(). During plan cache matching, only the latest version is needed. | ||
// In such case, compared to getStatsTable(), this function can save some copies, memory allocations and unnecessary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's possible to merge this two func in the future? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also want to merge these two functions in this PR. But I didn't come up with a graceful way to merge them. |
||
// checks. Also, this function won't trigger metrics changes. | ||
func getLatestVersionFromStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64) (version uint64) { | ||
statsHandle := domain.GetDomain(ctx).StatsHandle() | ||
// 1. tidb-server started and statistics handle has not been initialized. Pseudo stats table. | ||
if statsHandle == nil { | ||
return 0 | ||
} | ||
|
||
var statsTbl *statistics.Table | ||
if pid == tblInfo.ID || ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { | ||
statsTbl = statsHandle.GetTableStats(tblInfo, cache.WithTableStatsByQuery()) | ||
} else { | ||
statsTbl = statsHandle.GetPartitionStats(tblInfo, pid, cache.WithTableStatsByQuery()) | ||
} | ||
|
||
// 2. Table row count from statistics is zero. Pseudo stats table. | ||
realtimeRowCount := statsTbl.RealtimeCount | ||
if ctx.GetSessionVars().GetOptObjective() == variable.OptObjectiveDeterminate { | ||
realtimeRowCount = max(int64(statsTbl.GetAnalyzeRowCount()), 0) | ||
} | ||
if realtimeRowCount == 0 { | ||
return 0 | ||
} | ||
|
||
// 3. Not pseudo stats table. Return the max LastUpdateVersion among all Columns and Indices | ||
for _, col := range statsTbl.Columns { | ||
version = max(version, col.LastUpdateVersion) | ||
} | ||
for _, idx := range statsTbl.Indices { | ||
version = max(version, idx.LastUpdateVersion) | ||
} | ||
return version | ||
} | ||
|
||
func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName *model.CIStr) (LogicalPlan, error) { | ||
for i := len(b.outerCTEs) - 1; i >= 0; i-- { | ||
cte := b.outerCTEs[i] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will have many copy for
statstics.Table
. sometime we use deep copy. sometime we need to deep copy except some statistics of column or index.so we need a name that is easier to understand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have any suggestions?
For this case, I think
ShallowCopy
indicates its nature and differentiate it fromCopy
. And I also added comments.