-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
executor: support save partition stats in concurrency #38239
Changes from all commits
e304632
09b8652
f2fd3ef
591ecdd
0e071e1
d2804d6
a190c94
e711e3b
1db7698
c65cc87
9c6e08d
781a619
72d7a59
4ff0abb
72b092b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -643,14 +643,16 @@ type Performance struct { | |
ProjectionPushDown bool `toml:"projection-push-down" json:"projection-push-down"` | ||
MaxTxnTTL uint64 `toml:"max-txn-ttl" json:"max-txn-ttl"` | ||
// Deprecated | ||
MemProfileInterval string `toml:"-" json:"-"` | ||
IndexUsageSyncLease string `toml:"index-usage-sync-lease" json:"index-usage-sync-lease"` | ||
PlanReplayerGCLease string `toml:"plan-replayer-gc-lease" json:"plan-replayer-gc-lease"` | ||
GOGC int `toml:"gogc" json:"gogc"` | ||
EnforceMPP bool `toml:"enforce-mpp" json:"enforce-mpp"` | ||
StatsLoadConcurrency uint `toml:"stats-load-concurrency" json:"stats-load-concurrency"` | ||
StatsLoadQueueSize uint `toml:"stats-load-queue-size" json:"stats-load-queue-size"` | ||
EnableStatsCacheMemQuota bool `toml:"enable-stats-cache-mem-quota" json:"enable-stats-cache-mem-quota"` | ||
MemProfileInterval string `toml:"-" json:"-"` | ||
|
||
IndexUsageSyncLease string `toml:"index-usage-sync-lease" json:"index-usage-sync-lease"` | ||
PlanReplayerGCLease string `toml:"plan-replayer-gc-lease" json:"plan-replayer-gc-lease"` | ||
GOGC int `toml:"gogc" json:"gogc"` | ||
EnforceMPP bool `toml:"enforce-mpp" json:"enforce-mpp"` | ||
StatsLoadConcurrency uint `toml:"stats-load-concurrency" json:"stats-load-concurrency"` | ||
StatsLoadQueueSize uint `toml:"stats-load-queue-size" json:"stats-load-queue-size"` | ||
AnalyzePartitionConcurrencyQuota uint `toml:"analyze-partition-concurrency-quota" json:"analyze-partition-concurrency-quota"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about using an SQL variable instead of a config item? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We use config is due to AnalyzePartitionConcurrencyQuota is used to pre-create the session pool in domain when tidb-server started. |
||
EnableStatsCacheMemQuota bool `toml:"enable-stats-cache-mem-quota" json:"enable-stats-cache-mem-quota"` | ||
// The following items are deprecated. We need to keep them here temporarily | ||
// to support the upgrade process. They can be removed in future. | ||
|
||
|
@@ -905,15 +907,16 @@ var defaultConf = Config{ | |
CommitterConcurrency: defTiKVCfg.CommitterConcurrency, | ||
MaxTxnTTL: defTiKVCfg.MaxTxnTTL, // 1hour | ||
// TODO: set indexUsageSyncLease to 60s. | ||
IndexUsageSyncLease: "0s", | ||
GOGC: 100, | ||
EnforceMPP: false, | ||
PlanReplayerGCLease: "10m", | ||
StatsLoadConcurrency: 5, | ||
StatsLoadQueueSize: 1000, | ||
EnableStatsCacheMemQuota: false, | ||
RunAutoAnalyze: true, | ||
EnableLoadFMSketch: false, | ||
IndexUsageSyncLease: "0s", | ||
GOGC: 100, | ||
EnforceMPP: false, | ||
PlanReplayerGCLease: "10m", | ||
StatsLoadConcurrency: 5, | ||
StatsLoadQueueSize: 1000, | ||
AnalyzePartitionConcurrencyQuota: 16, | ||
EnableStatsCacheMemQuota: false, | ||
RunAutoAnalyze: true, | ||
EnableLoadFMSketch: false, | ||
}, | ||
ProxyProtocol: ProxyProtocol{ | ||
Networks: "", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -188,8 +188,8 @@ func (e *AnalyzeExec) saveV2AnalyzeOpts() error { | |
return nil | ||
} | ||
|
||
func (e *AnalyzeExec) recordHistoricalStats(tableID int64) error { | ||
statsHandle := domain.GetDomain(e.ctx).StatsHandle() | ||
func recordHistoricalStats(sctx sessionctx.Context, tableID int64) error { | ||
statsHandle := domain.GetDomain(sctx).StatsHandle() | ||
historicalStatsEnabled, err := statsHandle.CheckHistoricalStatsEnable() | ||
if err != nil { | ||
return errors.Errorf("check tidb_enable_historical_stats failed: %v", err) | ||
|
@@ -198,7 +198,7 @@ func (e *AnalyzeExec) recordHistoricalStats(tableID int64) error { | |
return nil | ||
} | ||
|
||
is := domain.GetDomain(e.ctx).InfoSchema() | ||
is := domain.GetDomain(sctx).InfoSchema() | ||
tbl, existed := is.TableByID(tableID) | ||
if !existed { | ||
return errors.Errorf("cannot get table by id %d", tableID) | ||
|
@@ -217,6 +217,23 @@ func (e *AnalyzeExec) recordHistoricalStats(tableID int64) error { | |
// handleResultsError will handle the error fetch from resultsCh and record it in log | ||
func (e *AnalyzeExec) handleResultsError(ctx context.Context, concurrency int, needGlobalStats bool, | ||
globalStatsMap globalStatsMap, resultsCh <-chan *statistics.AnalyzeResults) error { | ||
partitionStatsConcurrency := e.ctx.GetSessionVars().AnalyzePartitionConcurrency | ||
// If 'partitionStatsConcurrency' > 1, we will try to demand extra session from Domain to save Analyze results in concurrency. | ||
// If there is no extra session we can use, we will save analyze results in single-thread. | ||
if partitionStatsConcurrency > 1 { | ||
dom := domain.GetDomain(e.ctx) | ||
subSctxs := dom.FetchAnalyzeExec(partitionStatsConcurrency) | ||
if len(subSctxs) > 0 { | ||
defer func() { | ||
dom.ReleaseAnalyzeExec(subSctxs) | ||
}() | ||
internalCtx := kv.WithInternalSourceType(ctx, kv.InternalTxnStats) | ||
err := e.handleResultsErrorWithConcurrency(internalCtx, concurrency, needGlobalStats, subSctxs, globalStatsMap, resultsCh) | ||
return err | ||
} | ||
} | ||
|
||
// save analyze results in single-thread. | ||
statsHandle := domain.GetDomain(e.ctx).StatsHandle() | ||
panicCnt := 0 | ||
var err error | ||
|
@@ -235,36 +252,16 @@ func (e *AnalyzeExec) handleResultsError(ctx context.Context, concurrency int, n | |
finishJobWithLog(e.ctx, results.Job, err) | ||
continue | ||
} | ||
if results.TableID.IsPartitionTable() && needGlobalStats { | ||
for _, result := range results.Ars { | ||
if result.IsIndex == 0 { | ||
// If it does not belong to the statistics of index, we need to set it to -1 to distinguish. | ||
globalStatsID := globalStatsKey{tableID: results.TableID.TableID, indexID: int64(-1)} | ||
histIDs := make([]int64, 0, len(result.Hist)) | ||
for _, hg := range result.Hist { | ||
// It's normal virtual column, skip. | ||
if hg == nil { | ||
continue | ||
} | ||
histIDs = append(histIDs, hg.ID) | ||
} | ||
globalStatsMap[globalStatsID] = globalStatsInfo{isIndex: result.IsIndex, histIDs: histIDs, statsVersion: results.StatsVer} | ||
} else { | ||
for _, hg := range result.Hist { | ||
globalStatsID := globalStatsKey{tableID: results.TableID.TableID, indexID: hg.ID} | ||
globalStatsMap[globalStatsID] = globalStatsInfo{isIndex: result.IsIndex, histIDs: []int64{hg.ID}, statsVersion: results.StatsVer} | ||
} | ||
} | ||
} | ||
} | ||
if err1 := statsHandle.SaveTableStatsToStorage(results, results.TableID.IsPartitionTable(), e.ctx.GetSessionVars().EnableAnalyzeSnapshot); err1 != nil { | ||
handleGlobalStats(needGlobalStats, globalStatsMap, results) | ||
|
||
if err1 := statsHandle.SaveTableStatsToStorage(results, e.ctx.GetSessionVars().EnableAnalyzeSnapshot); err1 != nil { | ||
err = err1 | ||
logutil.Logger(ctx).Error("save table stats to storage failed", zap.Error(err)) | ||
finishJobWithLog(e.ctx, results.Job, err) | ||
} else { | ||
finishJobWithLog(e.ctx, results.Job, nil) | ||
// Dump stats to historical storage. | ||
if err := e.recordHistoricalStats(results.TableID.TableID); err != nil { | ||
if err := recordHistoricalStats(e.ctx, results.TableID.TableID); err != nil { | ||
logutil.BgLogger().Error("record historical stats failed", zap.Error(err)) | ||
} | ||
} | ||
|
@@ -273,6 +270,54 @@ func (e *AnalyzeExec) handleResultsError(ctx context.Context, concurrency int, n | |
return err | ||
} | ||
|
||
func (e *AnalyzeExec) handleResultsErrorWithConcurrency(ctx context.Context, statsConcurrency int, needGlobalStats bool, | ||
subSctxs []sessionctx.Context, | ||
globalStatsMap globalStatsMap, resultsCh <-chan *statistics.AnalyzeResults) error { | ||
partitionStatsConcurrency := len(subSctxs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the relation between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
var wg util.WaitGroupWrapper | ||
saveResultsCh := make(chan *statistics.AnalyzeResults, partitionStatsConcurrency) | ||
errCh := make(chan error, partitionStatsConcurrency) | ||
for i := 0; i < partitionStatsConcurrency; i++ { | ||
worker := newAnalyzeSaveStatsWorker(saveResultsCh, subSctxs[i], errCh) | ||
ctx1 := kv.WithInternalSourceType(context.Background(), kv.InternalTxnStats) | ||
wg.Run(func() { | ||
worker.run(ctx1, e.ctx.GetSessionVars().EnableAnalyzeSnapshot) | ||
}) | ||
} | ||
panicCnt := 0 | ||
var err error | ||
for panicCnt < statsConcurrency { | ||
results, ok := <-resultsCh | ||
if !ok { | ||
break | ||
} | ||
if results.Err != nil { | ||
err = results.Err | ||
if isAnalyzeWorkerPanic(err) { | ||
panicCnt++ | ||
} else { | ||
logutil.Logger(ctx).Error("analyze failed", zap.Error(err)) | ||
} | ||
finishJobWithLog(e.ctx, results.Job, err) | ||
continue | ||
} | ||
handleGlobalStats(needGlobalStats, globalStatsMap, results) | ||
saveResultsCh <- results | ||
} | ||
close(saveResultsCh) | ||
wg.Wait() | ||
close(errCh) | ||
if len(errCh) > 0 { | ||
errMsg := make([]string, 0) | ||
for err1 := range errCh { | ||
errMsg = append(errMsg, err1.Error()) | ||
} | ||
err = errors.New(strings.Join(errMsg, ",")) | ||
} | ||
return err | ||
} | ||
|
||
func (e *AnalyzeExec) analyzeWorker(taskCh <-chan *analyzeTask, resultsCh chan<- *statistics.AnalyzeResults) { | ||
var task *analyzeTask | ||
defer func() { | ||
|
@@ -434,3 +479,28 @@ func finishJobWithLog(sctx sessionctx.Context, job *statistics.AnalyzeJob, analy | |
zap.String("cost", job.EndTime.Sub(job.StartTime).String())) | ||
} | ||
} | ||
|
||
func handleGlobalStats(needGlobalStats bool, globalStatsMap globalStatsMap, results *statistics.AnalyzeResults) { | ||
if results.TableID.IsPartitionTable() && needGlobalStats { | ||
for _, result := range results.Ars { | ||
if result.IsIndex == 0 { | ||
// If it does not belong to the statistics of index, we need to set it to -1 to distinguish. | ||
globalStatsID := globalStatsKey{tableID: results.TableID.TableID, indexID: int64(-1)} | ||
histIDs := make([]int64, 0, len(result.Hist)) | ||
for _, hg := range result.Hist { | ||
// It's normal virtual column, skip. | ||
if hg == nil { | ||
continue | ||
} | ||
histIDs = append(histIDs, hg.ID) | ||
} | ||
globalStatsMap[globalStatsID] = globalStatsInfo{isIndex: result.IsIndex, histIDs: histIDs, statsVersion: results.StatsVer} | ||
} else { | ||
for _, hg := range result.Hist { | ||
globalStatsID := globalStatsKey{tableID: results.TableID.TableID, indexID: hg.ID} | ||
globalStatsMap[globalStatsID] = globalStatsInfo{isIndex: result.IsIndex, histIDs: []int64{hg.ID}, statsVersion: results.StatsVer} | ||
} | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// Copyright 2022 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package executor | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/pingcap/tidb/sessionctx" | ||
"github.com/pingcap/tidb/statistics" | ||
"github.com/pingcap/tidb/statistics/handle" | ||
"github.com/pingcap/tidb/util/logutil" | ||
"go.uber.org/zap" | ||
) | ||
|
||
type analyzeSaveStatsWorker struct { | ||
resultsCh <-chan *statistics.AnalyzeResults | ||
sctx sessionctx.Context | ||
errCh chan<- error | ||
} | ||
|
||
func newAnalyzeSaveStatsWorker( | ||
resultsCh <-chan *statistics.AnalyzeResults, | ||
sctx sessionctx.Context, | ||
errCh chan<- error) *analyzeSaveStatsWorker { | ||
worker := &analyzeSaveStatsWorker{ | ||
resultsCh: resultsCh, | ||
sctx: sctx, | ||
errCh: errCh, | ||
} | ||
return worker | ||
} | ||
|
||
func (worker *analyzeSaveStatsWorker) run(ctx context.Context, analyzeSnapshot bool) { | ||
Yisaer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
defer func() { | ||
if r := recover(); r != nil { | ||
logutil.BgLogger().Error("analyze save stats worker panicked", zap.Any("recover", r), zap.Stack("stack")) | ||
worker.errCh <- getAnalyzePanicErr(r) | ||
} | ||
}() | ||
for results := range worker.resultsCh { | ||
err := handle.SaveTableStatsToStorage(worker.sctx, results, analyzeSnapshot) | ||
if err != nil { | ||
logutil.Logger(ctx).Error("save table stats to storage failed", zap.Error(err)) | ||
finishJobWithLog(worker.sctx, results.Job, err) | ||
worker.errCh <- err | ||
} else { | ||
finishJobWithLog(worker.sctx, results.Job, nil) | ||
// Dump stats to historical storage. | ||
if err := recordHistoricalStats(worker.sctx, results.TableID.TableID); err != nil { | ||
logutil.BgLogger().Error("record historical stats failed", zap.Error(err)) | ||
} | ||
} | ||
invalidInfoSchemaStatCache(results.TableID.GetStatisticsID()) | ||
Yisaer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems the Deprecated variables are defined bellow here. Besides, why we use the config rather than the system variables?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it only indicates
MemProfileInterval
is deprecated. We use config is due toAnalyzePartitionConcurrencyQuota
is used to pre-create the session pool in domain when tidb-server started.