Skip to content

Commit

Permalink
stats: auto analyze on certain period of a day (#7570)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx committed Sep 5, 2018
1 parent b6072de commit 74a6ddd
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 16 deletions.
2 changes: 2 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ var defaultSysVars = []*SysVar{
{ScopeSession, TiDBOptAggPushDown, boolToIntStr(DefOptAggPushDown)},
{ScopeGlobal | ScopeSession, TiDBBuildStatsConcurrency, strconv.Itoa(DefBuildStatsConcurrency)},
{ScopeGlobal, TiDBAutoAnalyzeRatio, strconv.FormatFloat(DefAutoAnalyzeRatio, 'f', -1, 64)},
{ScopeGlobal, TiDBAutoAnalyzeStartTime, DefAutoAnalyzeStartTime},
{ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime},
{ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)},
{ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)},
{ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)},
Expand Down
6 changes: 6 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ const (
// Auto analyze will run if (table modify count)/(table row count) is greater than this value.
TiDBAutoAnalyzeRatio = "tidb_auto_analyze_ratio"

// Auto analyze will run if current time is within start time and end time.
TiDBAutoAnalyzeStartTime = "tidb_auto_analyze_start_time"
TiDBAutoAnalyzeEndTime = "tidb_auto_analyze_end_time"

// tidb_checksum_table_concurrency is used to speed up the ADMIN CHECKSUM TABLE
// statement, when a table has multiple indices, those indices can be
// scanned concurrently, with the cost of higher system performance impact.
Expand Down Expand Up @@ -189,6 +193,8 @@ const (
DefDistSQLScanConcurrency = 15
DefBuildStatsConcurrency = 4
DefAutoAnalyzeRatio = 0.5
DefAutoAnalyzeStartTime = "00:00 +0000"
DefAutoAnalyzeEndTime = "23:59 +0000"
DefChecksumTableConcurrency = 4
DefSkipUTF8Check = false
DefOptAggPushDown = false
Expand Down
26 changes: 26 additions & 0 deletions sessionctx/variable/varsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string,
return value, ErrWrongValueForVar.GenByArgs(name)
}
return value, nil
case TiDBAutoAnalyzeStartTime, TiDBAutoAnalyzeEndTime:
v, err := setAnalyzeTime(vars, value)
if err != nil {
return "", errors.Trace(err)
}
return v, nil
}
return value, nil
}
Expand Down Expand Up @@ -395,3 +401,23 @@ func GoTimeToTS(t time.Time) uint64 {
ts := (t.UnixNano() / int64(time.Millisecond)) << epochShiftBits
return uint64(ts)
}

const (
analyzeLocalTimeFormat = "15:04"
// AnalyzeFullTimeFormat is the full format of analyze start time and end time.
AnalyzeFullTimeFormat = "15:04 -0700"
)

func setAnalyzeTime(s *SessionVars, val string) (string, error) {
var t time.Time
var err error
if len(val) <= len(analyzeLocalTimeFormat) {
t, err = time.ParseInLocation(analyzeLocalTimeFormat, val, s.TimeZone)
} else {
t, err = time.ParseInLocation(AnalyzeFullTimeFormat, val, s.TimeZone)
}
if err != nil {
return "", errors.Trace(err)
}
return t.Format(AnalyzeFullTimeFormat), nil
}
78 changes: 62 additions & 16 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,28 @@ func TableAnalyzed(tbl *Table) bool {
return false
}

// needAnalyzeTable checks if we need to analyze the table:
// withinTimePeriod tests whether `now` is between `start` and `end`.
func withinTimePeriod(start, end, now time.Time) bool {
// Converts to UTC and only keeps the hour and minute info.
start, end, now = start.UTC(), end.UTC(), now.UTC()
start = time.Date(0, 0, 0, start.Hour(), start.Minute(), 0, 0, time.UTC)
end = time.Date(0, 0, 0, end.Hour(), end.Minute(), 0, 0, time.UTC)
now = time.Date(0, 0, 0, now.Hour(), now.Minute(), 0, 0, time.UTC)
// for cases like from 00:00 to 06:00
if end.Sub(start) >= 0 {
return now.Sub(start) >= 0 && now.Sub(end) <= 0
}
// for cases like from 22:00 to 06:00
return now.Sub(end) <= 0 || now.Sub(start) >= 0
}

// NeedAnalyzeTable checks if we need to analyze the table:
// 1. If the table has never been analyzed, we need to analyze it when it has
// not been modified for a time.
// not been modified for a while.
// 2. If the table had been analyzed before, we need to analyze it when
// "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
// 3. The current time is between `start` and `end`.
func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) bool {
analyzed := TableAnalyzed(tbl)
if !analyzed {
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
Expand All @@ -616,34 +632,64 @@ func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64)
if autoAnalyzeRatio == 0 {
return false
}
return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio
// No need to analyze it.
if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio {
return false
}
// Tests if current time is within the time period.
return withinTimePeriod(start, end, now)
}

const minAutoAnalyzeRatio = 0.3
const (
minAutoAnalyzeRatio = 0.3
)

func (h *Handle) getAutoAnalyzeRatio() float64 {
sql := fmt.Sprintf("select variable_value from mysql.global_variables where variable_name = '%s'", variable.TiDBAutoAnalyzeRatio)
func (h *Handle) getAutoAnalyzeParameters() map[string]string {
sql := fmt.Sprintf("select variable_name, variable_value from mysql.global_variables where variable_name in ('%s', '%s', '%s')",
variable.TiDBAutoAnalyzeRatio, variable.TiDBAutoAnalyzeStartTime, variable.TiDBAutoAnalyzeEndTime)
rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, sql)
if err != nil {
return variable.DefAutoAnalyzeRatio
return map[string]string{}
}
autoAnalyzeRatio := variable.DefAutoAnalyzeRatio
if len(rows) > 0 {
autoAnalyzeRatio, err = strconv.ParseFloat(rows[0].GetString(0), 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
parameters := make(map[string]string)
for _, row := range rows {
parameters[row.GetString(0)] = row.GetString(1)
}
return parameters
}

func parseAutoAnalyzeRatio(ratio string) float64 {
autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
if autoAnalyzeRatio > 0 {
autoAnalyzeRatio = math.Max(autoAnalyzeRatio, minAutoAnalyzeRatio)
}
return autoAnalyzeRatio
}

func parseAnalyzePeriod(start, end string) (time.Time, time.Time, error) {
s, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, start, time.UTC)
if err != nil {
return s, s, errors.Trace(err)
}
e, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, end, time.UTC)
if err != nil {
return s, e, errors.Trace(err)
}
return s, e, nil
}

// HandleAutoAnalyze analyzes the newly created table or index.
func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
dbs := is.AllSchemaNames()
autoAnalyzeRatio := h.getAutoAnalyzeRatio()
parameters := h.getAutoAnalyzeParameters()
autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.TiDBAutoAnalyzeRatio])
start, end, err := parseAnalyzePeriod(parameters[variable.TiDBAutoAnalyzeStartTime], parameters[variable.TiDBAutoAnalyzeEndTime])
if err != nil {
return errors.Trace(err)
}
for _, db := range dbs {
tbls := is.SchemaTables(model.NewCIStr(db))
for _, tbl := range tbls {
Expand All @@ -653,7 +699,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
continue
}
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
if needAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio) {
if NeedAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio, start, end, time.Now()) {
sql := fmt.Sprintf("analyze table %s", tblName)
log.Infof("[stats] auto analyze table %s now", tblName)
return errors.Trace(h.execAutoAnalyze(sql))
Expand Down
106 changes: 106 additions & 0 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import (
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/ranger"
Expand Down Expand Up @@ -925,3 +927,107 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
c.Assert(s.hook.results, Equals, t.result)
}
}

func (s *testStatsUpdateSuite) TestNeedAnalyzeTable(c *C) {
columns := map[int64]*statistics.Column{}
columns[1] = &statistics.Column{Count: 1}
tests := []struct {
tbl *statistics.Table
ratio float64
limit time.Duration
start string
end string
now string
result bool
}{
// table was never analyzed and has reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: 0,
ratio: 0,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: true,
},
// table was never analyzed but has not reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: time.Hour,
ratio: 0,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: false,
},
// table was already analyzed but auto analyze is disabled
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: false,
},
// table was already analyzed and but modify count is small
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 0, Count: 1}},
limit: 0,
ratio: 0.3,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: false,
},
// table was already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:02 +0800",
result: false,
},
// table was already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "22:00 +0800",
end: "06:00 +0800",
now: "10:00 +0800",
result: false,
},
// table was already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: true,
},
// table was already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "22:00 +0800",
end: "06:00 +0800",
now: "23:00 +0800",
result: true,
},
}
for _, test := range tests {
start, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.start, time.UTC)
c.Assert(err, IsNil)
end, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.end, time.UTC)
c.Assert(err, IsNil)
now, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.now, time.UTC)
c.Assert(err, IsNil)
c.Assert(statistics.NeedAnalyzeTable(test.tbl, test.limit, test.ratio, start, end, now), Equals, test.result)
}
}

0 comments on commit 74a6ddd

Please sign in to comment.