From 74a6dddfd05b6970db0e99c46f6e919284bc35ae Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Wed, 5 Sep 2018 16:37:53 +0800 Subject: [PATCH] stats: auto analyze on certain period of a day (#7570) --- sessionctx/variable/sysvar.go | 2 + sessionctx/variable/tidb_vars.go | 6 ++ sessionctx/variable/varsutil.go | 26 ++++++++ statistics/update.go | 78 ++++++++++++++++++----- statistics/update_test.go | 106 +++++++++++++++++++++++++++++++ 5 files changed, 202 insertions(+), 16 deletions(-) diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index d62b2bea3ebe8..3e0d6278474bd 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -623,6 +623,8 @@ var defaultSysVars = []*SysVar{ {ScopeSession, TiDBOptAggPushDown, boolToIntStr(DefOptAggPushDown)}, {ScopeGlobal | ScopeSession, TiDBBuildStatsConcurrency, strconv.Itoa(DefBuildStatsConcurrency)}, {ScopeGlobal, TiDBAutoAnalyzeRatio, strconv.FormatFloat(DefAutoAnalyzeRatio, 'f', -1, 64)}, + {ScopeGlobal, TiDBAutoAnalyzeStartTime, DefAutoAnalyzeStartTime}, + {ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime}, {ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)}, {ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)}, {ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)}, diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 7909cf85c1d1d..e05149e13e052 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -38,6 +38,10 @@ const ( // Auto analyze will run if (table modify count)/(table row count) is greater than this value. TiDBAutoAnalyzeRatio = "tidb_auto_analyze_ratio" + // Auto analyze will run if current time is within start time and end time. + TiDBAutoAnalyzeStartTime = "tidb_auto_analyze_start_time" + TiDBAutoAnalyzeEndTime = "tidb_auto_analyze_end_time" + // tidb_checksum_table_concurrency is used to speed up the ADMIN CHECKSUM TABLE // statement, when a table has multiple indices, those indices can be // scanned concurrently, with the cost of higher system performance impact. @@ -189,6 +193,8 @@ const ( DefDistSQLScanConcurrency = 15 DefBuildStatsConcurrency = 4 DefAutoAnalyzeRatio = 0.5 + DefAutoAnalyzeStartTime = "00:00 +0000" + DefAutoAnalyzeEndTime = "23:59 +0000" DefChecksumTableConcurrency = 4 DefSkipUTF8Check = false DefOptAggPushDown = false diff --git a/sessionctx/variable/varsutil.go b/sessionctx/variable/varsutil.go index c11f8501db71a..3722e9a5ad523 100644 --- a/sessionctx/variable/varsutil.go +++ b/sessionctx/variable/varsutil.go @@ -317,6 +317,12 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string, return value, ErrWrongValueForVar.GenByArgs(name) } return value, nil + case TiDBAutoAnalyzeStartTime, TiDBAutoAnalyzeEndTime: + v, err := setAnalyzeTime(vars, value) + if err != nil { + return "", errors.Trace(err) + } + return v, nil } return value, nil } @@ -395,3 +401,23 @@ func GoTimeToTS(t time.Time) uint64 { ts := (t.UnixNano() / int64(time.Millisecond)) << epochShiftBits return uint64(ts) } + +const ( + analyzeLocalTimeFormat = "15:04" + // AnalyzeFullTimeFormat is the full format of analyze start time and end time. + AnalyzeFullTimeFormat = "15:04 -0700" +) + +func setAnalyzeTime(s *SessionVars, val string) (string, error) { + var t time.Time + var err error + if len(val) <= len(analyzeLocalTimeFormat) { + t, err = time.ParseInLocation(analyzeLocalTimeFormat, val, s.TimeZone) + } else { + t, err = time.ParseInLocation(AnalyzeFullTimeFormat, val, s.TimeZone) + } + if err != nil { + return "", errors.Trace(err) + } + return t.Format(AnalyzeFullTimeFormat), nil +} diff --git a/statistics/update.go b/statistics/update.go index 87bbb0db22128..e84165d7e1534 100644 --- a/statistics/update.go +++ b/statistics/update.go @@ -601,12 +601,28 @@ func TableAnalyzed(tbl *Table) bool { return false } -// needAnalyzeTable checks if we need to analyze the table: +// withinTimePeriod tests whether `now` is between `start` and `end`. +func withinTimePeriod(start, end, now time.Time) bool { + // Converts to UTC and only keeps the hour and minute info. + start, end, now = start.UTC(), end.UTC(), now.UTC() + start = time.Date(0, 0, 0, start.Hour(), start.Minute(), 0, 0, time.UTC) + end = time.Date(0, 0, 0, end.Hour(), end.Minute(), 0, 0, time.UTC) + now = time.Date(0, 0, 0, now.Hour(), now.Minute(), 0, 0, time.UTC) + // for cases like from 00:00 to 06:00 + if end.Sub(start) >= 0 { + return now.Sub(start) >= 0 && now.Sub(end) <= 0 + } + // for cases like from 22:00 to 06:00 + return now.Sub(end) <= 0 || now.Sub(start) >= 0 +} + +// NeedAnalyzeTable checks if we need to analyze the table: // 1. If the table has never been analyzed, we need to analyze it when it has -// not been modified for a time. +// not been modified for a while. // 2. If the table had been analyzed before, we need to analyze it when // "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio". -func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool { +// 3. The current time is between `start` and `end`. +func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) bool { analyzed := TableAnalyzed(tbl) if !analyzed { t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond)) @@ -616,23 +632,36 @@ func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) if autoAnalyzeRatio == 0 { return false } - return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio + // No need to analyze it. + if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio { + return false + } + // Tests if current time is within the time period. + return withinTimePeriod(start, end, now) } -const minAutoAnalyzeRatio = 0.3 +const ( + minAutoAnalyzeRatio = 0.3 +) -func (h *Handle) getAutoAnalyzeRatio() float64 { - sql := fmt.Sprintf("select variable_value from mysql.global_variables where variable_name = '%s'", variable.TiDBAutoAnalyzeRatio) +func (h *Handle) getAutoAnalyzeParameters() map[string]string { + sql := fmt.Sprintf("select variable_name, variable_value from mysql.global_variables where variable_name in ('%s', '%s', '%s')", + variable.TiDBAutoAnalyzeRatio, variable.TiDBAutoAnalyzeStartTime, variable.TiDBAutoAnalyzeEndTime) rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, sql) if err != nil { - return variable.DefAutoAnalyzeRatio + return map[string]string{} } - autoAnalyzeRatio := variable.DefAutoAnalyzeRatio - if len(rows) > 0 { - autoAnalyzeRatio, err = strconv.ParseFloat(rows[0].GetString(0), 64) - if err != nil { - return variable.DefAutoAnalyzeRatio - } + parameters := make(map[string]string) + for _, row := range rows { + parameters[row.GetString(0)] = row.GetString(1) + } + return parameters +} + +func parseAutoAnalyzeRatio(ratio string) float64 { + autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64) + if err != nil { + return variable.DefAutoAnalyzeRatio } if autoAnalyzeRatio > 0 { autoAnalyzeRatio = math.Max(autoAnalyzeRatio, minAutoAnalyzeRatio) @@ -640,10 +669,27 @@ func (h *Handle) getAutoAnalyzeRatio() float64 { return autoAnalyzeRatio } +func parseAnalyzePeriod(start, end string) (time.Time, time.Time, error) { + s, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, start, time.UTC) + if err != nil { + return s, s, errors.Trace(err) + } + e, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, end, time.UTC) + if err != nil { + return s, e, errors.Trace(err) + } + return s, e, nil +} + // HandleAutoAnalyze analyzes the newly created table or index. func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error { dbs := is.AllSchemaNames() - autoAnalyzeRatio := h.getAutoAnalyzeRatio() + parameters := h.getAutoAnalyzeParameters() + autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.TiDBAutoAnalyzeRatio]) + start, end, err := parseAnalyzePeriod(parameters[variable.TiDBAutoAnalyzeStartTime], parameters[variable.TiDBAutoAnalyzeEndTime]) + if err != nil { + return errors.Trace(err) + } for _, db := range dbs { tbls := is.SchemaTables(model.NewCIStr(db)) for _, tbl := range tbls { @@ -653,7 +699,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error { continue } tblName := "`" + db + "`.`" + tblInfo.Name.O + "`" - if needAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio) { + if NeedAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio, start, end, time.Now()) { sql := fmt.Sprintf("analyze table %s", tblName) log.Infof("[stats] auto analyze table %s now", tblName) return errors.Trace(h.execAutoAnalyze(sql)) diff --git a/statistics/update_test.go b/statistics/update_test.go index 306c213bdf90f..6e9bce3747dc3 100644 --- a/statistics/update_test.go +++ b/statistics/update_test.go @@ -25,7 +25,9 @@ import ( "github.com/pingcap/tidb/model" "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" + "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/statistics" + "github.com/pingcap/tidb/store/tikv/oracle" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/ranger" @@ -925,3 +927,107 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) { c.Assert(s.hook.results, Equals, t.result) } } + +func (s *testStatsUpdateSuite) TestNeedAnalyzeTable(c *C) { + columns := map[int64]*statistics.Column{} + columns[1] = &statistics.Column{Count: 1} + tests := []struct { + tbl *statistics.Table + ratio float64 + limit time.Duration + start string + end string + now string + result bool + }{ + // table was never analyzed and has reach the limit + { + tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))}, + limit: 0, + ratio: 0, + start: "00:00 +0800", + end: "00:01 +0800", + now: "00:00 +0800", + result: true, + }, + // table was never analyzed but has not reach the limit + { + tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))}, + limit: time.Hour, + ratio: 0, + start: "00:00 +0800", + end: "00:01 +0800", + now: "00:00 +0800", + result: false, + }, + // table was already analyzed but auto analyze is disabled + { + tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}}, + limit: 0, + ratio: 0, + start: "00:00 +0800", + end: "00:01 +0800", + now: "00:00 +0800", + result: false, + }, + // table was already analyzed and but modify count is small + { + tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 0, Count: 1}}, + limit: 0, + ratio: 0.3, + start: "00:00 +0800", + end: "00:01 +0800", + now: "00:00 +0800", + result: false, + }, + // table was already analyzed and but not within time period + { + tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}}, + limit: 0, + ratio: 0.3, + start: "00:00 +0800", + end: "00:01 +0800", + now: "00:02 +0800", + result: false, + }, + // table was already analyzed and but not within time period + { + tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}}, + limit: 0, + ratio: 0.3, + start: "22:00 +0800", + end: "06:00 +0800", + now: "10:00 +0800", + result: false, + }, + // table was already analyzed and within time period + { + tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}}, + limit: 0, + ratio: 0.3, + start: "00:00 +0800", + end: "00:01 +0800", + now: "00:00 +0800", + result: true, + }, + // table was already analyzed and within time period + { + tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}}, + limit: 0, + ratio: 0.3, + start: "22:00 +0800", + end: "06:00 +0800", + now: "23:00 +0800", + result: true, + }, + } + for _, test := range tests { + start, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.start, time.UTC) + c.Assert(err, IsNil) + end, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.end, time.UTC) + c.Assert(err, IsNil) + now, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.now, time.UTC) + c.Assert(err, IsNil) + c.Assert(statistics.NeedAnalyzeTable(test.tbl, test.limit, test.ratio, start, end, now), Equals, test.result) + } +}