Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: support auto analyze partition table #7789

Merged
merged 5 commits into from
Oct 12, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ast/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ var (
type AnalyzeTableStmt struct {
stmtNode

TableNames []*TableName
IndexNames []model.CIStr
MaxNumBuckets uint64
TableNames []*TableName
PartitionNames []model.CIStr
IndexNames []model.CIStr
MaxNumBuckets uint64

// IndexFlag is true when we only analyze indices for a table.
IndexFlag bool
Expand Down
23 changes: 23 additions & 0 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,29 @@ PARTITION BY RANGE ( a ) (
c.Assert(idx.Len(), Greater, 0)
}
}

tk.MustExec("drop table t")
tk.MustExec(createTable)
for i := 1; i < 21; i++ {
tk.MustExec(fmt.Sprintf(`insert into t values (%d, %d, "hello")`, i, i))
}
tk.MustExec("alter table t analyze partition p0")
is = executor.GetInfoSchema(tk.Se.(sessionctx.Context))
table, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
pi = table.Meta().GetPartitionInfo()
c.Assert(pi, NotNil)

for i, def := range pi.Definitions {
statsTbl := handle.GetPartitionStats(table.Meta(), def.ID)
if i == 0 {
c.Assert(statsTbl.Pseudo, IsFalse)
c.Assert(len(statsTbl.Columns), Equals, 2)
c.Assert(len(statsTbl.Indices), Equals, 1)
} else {
c.Assert(statsTbl.Pseudo, IsTrue)
}
}
}

func (s *testSuite) TestAnalyzeParameters(c *C) {
Expand Down
25 changes: 25 additions & 0 deletions parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ import (
PartitionDefinitionList "Partition definition list"
PartitionDefinitionListOpt "Partition definition list option"
PartitionOpt "Partition option"
PartitionNameList "Partition name list"
PartitionNumOpt "PARTITION NUM option"
PartDefValuesOpt "VALUES {LESS THAN {(expr | value_list) | MAXVALUE} | IN {value_list}"
PartDefOptionsOpt "PartDefOptionList option"
Expand Down Expand Up @@ -923,6 +924,20 @@ AlterTableStmt:
Specs: $5.([]*ast.AlterTableSpec),
}
}
| "ALTER" IgnoreOptional "TABLE" TableName "ANALYZE" "PARTITION" PartitionNameList MaxNumBuckets
{
$$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$4.(*ast.TableName)}, PartitionNames: $7.([]model.CIStr), MaxNumBuckets: $8.(uint64),}
}
| "ALTER" IgnoreOptional "TABLE" TableName "ANALYZE" "PARTITION" PartitionNameList "INDEX" IndexNameList MaxNumBuckets
{
$$ = &ast.AnalyzeTableStmt{
TableNames: []*ast.TableName{$4.(*ast.TableName)},
PartitionNames: $7.([]model.CIStr),
IndexNames: $9.([]model.CIStr),
IndexFlag: true,
MaxNumBuckets: $10.(uint64),
}
}

AlterTableSpec:
AlterTableOptionListOpt
Expand Down Expand Up @@ -1170,6 +1185,16 @@ AlterTableSpecList:
$$ = append($1.([]*ast.AlterTableSpec), $3.(*ast.AlterTableSpec))
}

PartitionNameList:
Identifier
{
$$ = []model.CIStr{model.NewCIStr($1)}
}
| PartitionNameList ',' Identifier
{
$$ = append($1.([]model.CIStr), model.NewCIStr($3))
}

ConstraintKeywordOpt:
{
$$ = nil
Expand Down
5 changes: 5 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1701,6 +1701,11 @@ func (s *testParserSuite) TestDDL(c *C) {
{"ALTER TABLE t RENAME KEY a TO b;", true},
{"ALTER TABLE t RENAME INDEX a TO b;", true},

{"alter table t analyze partition a", true},
{"alter table t analyze partition a with 4 buckets", true},
{"alter table t analyze partition a index b", true},
{"alter table t analyze partition a index b with 4 buckets", true},

// For create index statement
{"CREATE INDEX idx ON t (a)", true},
{"CREATE INDEX idx ON t (a) USING HASH", true},
Expand Down
57 changes: 44 additions & 13 deletions planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/cznic/mathutil"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/ddl"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/model"
Expand Down Expand Up @@ -622,22 +623,46 @@ func getColsInfo(tn *ast.TableName) (indicesInfo []*model.IndexInfo, colsInfo []
return
}

func getPhysicalIDs(tblInfo *model.TableInfo) []int64 {
if pi := tblInfo.GetPartitionInfo(); pi != nil {
func getPhysicalIDs(tblInfo *model.TableInfo, partitionNames []model.CIStr) ([]int64, error) {
pi := tblInfo.GetPartitionInfo()
if pi == nil {
if len(partitionNames) != 0 {
return nil, errors.Trace(ddl.ErrPartitionMgmtOnNonpartitioned)
}
return []int64{tblInfo.ID}, nil
}
if len(partitionNames) == 0 {
ids := make([]int64, 0, len(pi.Definitions))
for _, def := range pi.Definitions {
ids = append(ids, def.ID)
}
return ids
return ids, nil
}
return []int64{tblInfo.ID}
ids := make([]int64, 0, len(partitionNames))
for _, name := range partitionNames {
found := false
for _, def := range pi.Definitions {
if def.Name.L == name.L {
found = true
ids = append(ids, def.ID)
break
}
}
if !found {
return nil, errors.New(fmt.Sprintf("Error in list of partitions to %s", tblInfo.Name.O))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about: "can not found the specified partition name xxx in the table definition"?

}
}
return ids, nil
}

func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan {
func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) {
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
for _, tbl := range as.TableNames {
idxInfo, colInfo, pkInfo := getColsInfo(tbl)
physicalIDs := getPhysicalIDs(tbl.TableInfo)
physicalIDs, err := getPhysicalIDs(tbl.TableInfo, as.PartitionNames)
if err != nil {
return nil, err
}
for _, idx := range idxInfo {
for _, id := range physicalIDs {
p.IdxTasks = append(p.IdxTasks, AnalyzeIndexTask{PhysicalTableID: id, IndexInfo: idx})
Expand All @@ -649,13 +674,16 @@ func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan {
}
}
}
return p
return p, nil
}

func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) {
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
tblInfo := as.TableNames[0].TableInfo
physicalIDs := getPhysicalIDs(tblInfo)
physicalIDs, err := getPhysicalIDs(tblInfo, as.PartitionNames)
if err != nil {
return nil, err
}
for _, idxName := range as.IndexNames {
idx := findIndexByName(tblInfo.Indices, idxName)
if idx == nil || idx.State != model.StatePublic {
Expand All @@ -668,18 +696,21 @@ func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error)
return p, nil
}

func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan {
func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, error) {
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
tblInfo := as.TableNames[0].TableInfo
physicalIDs := getPhysicalIDs(tblInfo)
physicalIDs, err := getPhysicalIDs(tblInfo, as.PartitionNames)
if err != nil {
return nil, err
}
for _, idx := range tblInfo.Indices {
if idx.State == model.StatePublic {
for _, id := range physicalIDs {
p.IdxTasks = append(p.IdxTasks, AnalyzeIndexTask{PhysicalTableID: id, IndexInfo: idx})
}
}
}
return p
return p, nil
}

const (
Expand All @@ -695,11 +726,11 @@ func (b *planBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) {
}
if as.IndexFlag {
if len(as.IndexNames) == 0 {
return b.buildAnalyzeAllIndex(as), nil
return b.buildAnalyzeAllIndex(as)
}
return b.buildAnalyzeIndex(as)
}
return b.buildAnalyzeTable(as), nil
return b.buildAnalyzeTable(as)
}

func buildShowDDLFields() *expression.Schema {
Expand Down
51 changes: 36 additions & 15 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -707,31 +707,52 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
tbls := is.SchemaTables(model.NewCIStr(db))
for _, tbl := range tbls {
tblInfo := tbl.Meta()
statsTbl := h.GetTableStats(tblInfo)
if statsTbl.Pseudo || statsTbl.Count < AutoAnalyzeMinCnt {
continue
}
pi := tblInfo.GetPartitionInfo()
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
if NeedAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio, start, end, time.Now()) {
if pi == nil {
statsTbl := h.GetTableStats(tblInfo)
sql := fmt.Sprintf("analyze table %s", tblName)
log.Infof("[stats] auto analyze table %s now", tblName)
return errors.Trace(h.execAutoAnalyze(sql))
}
for _, idx := range tblInfo.Indices {
if idx.State != model.StatePublic {
continue
analyzed, err := h.autoAnalyzeTable(tblInfo, statsTbl, start, end, autoAnalyzeRatio, sql)
if analyzed {
return err
}
if _, ok := statsTbl.Indices[idx.ID]; !ok {
sql := fmt.Sprintf("analyze table %s index `%s`", tblName, idx.Name.O)
log.Infof("[stats] auto analyze index `%s` for table %s now", idx.Name.O, tblName)
return errors.Trace(h.execAutoAnalyze(sql))
continue
}
for _, def := range pi.Definitions {
sql := fmt.Sprintf("alter table %s analyze partition `%s`", tblName, def.Name.O)
Copy link
Member

@zz-jason zz-jason Oct 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's wired that mysql uses the alter table statement to analyze the table partitions... https://dev.mysql.com/doc/refman/5.7/en/partitioning-maintenance.htm

despite the compatible issue, can we also support another syntax to analyze table partitions?

Copy link
Contributor Author

@alivxxx alivxxx Oct 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we can also support syntax like analyze table t partition p.

statsTbl := h.GetPartitionStats(tblInfo, def.ID)
analyzed, err := h.autoAnalyzeTable(tblInfo, statsTbl, start, end, autoAnalyzeRatio, sql)
if analyzed {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we analyze all partitions at once instead of only one partition?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think only one partition is better because the partition stats is independent.

return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if one of the partition is analyzed, the rest of the partitions can never get a change to be analyzed, I think we should continue to analyze other partitions instead just return and terminate the analyze command.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason that we only trigger one analyze once a time is that we can get the most updated auto analyze parameters. The rest of the partition can wait for the next round which is just 3s after.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can get the most updated auto analyze parameters.

What does this mean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In https://github.com/pingcap/tidb/blob/master/statistics/update.go#L654, we get the parameters like analyze time period, so if we continue analyze other tables, we may not use the latest parameters.

}
continue
}
}
}
return nil
}

func (h *Handle) autoAnalyzeTable(tblInfo *model.TableInfo, statsTbl *Table, start, end time.Time, ratio float64, sql string) (bool, error) {
if statsTbl.Pseudo || statsTbl.Count < AutoAnalyzeMinCnt {
return false, nil
}
if NeedAnalyzeTable(statsTbl, 20*h.Lease, ratio, start, end, time.Now()) {
log.Infof("[stats] auto %s now", sql)
return true, h.execAutoAnalyze(sql)
}
for _, idx := range tblInfo.Indices {
if idx.State != model.StatePublic {
continue
}
if _, ok := statsTbl.Indices[idx.ID]; !ok {
sql = fmt.Sprintf("%s index `%s`", sql, idx.Name.O)
log.Infof("[stats] auto %s now", sql)
return true, h.execAutoAnalyze(sql)
}
}
return false, nil
}

func (h *Handle) execAutoAnalyze(sql string) error {
startTime := time.Now()
_, _, err := h.restrictedExec.ExecRestrictedSQL(nil, sql)
Expand Down
38 changes: 38 additions & 0 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,44 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
c.Assert(hg.Len(), Equals, 3)
}

func (s *testStatsUpdateSuite) TestAutoUpdatePartition(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("set @@session.tidb_enable_table_partition=1")
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t (a int) PARTITION BY RANGE (a) (PARTITION p0 VALUES LESS THAN (6))")
testKit.MustExec("analyze table t")

statistics.AutoAnalyzeMinCnt = 0
testKit.MustExec("set global tidb_auto_analyze_ratio = 0.6")
defer func() {
statistics.AutoAnalyzeMinCnt = 1000
testKit.MustExec("set global tidb_auto_analyze_ratio = 0.0")
}()

do := s.do
is := do.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo := tbl.Meta()
pi := tableInfo.GetPartitionInfo()
h := do.StatsHandle()

h.Update(is)
stats := h.GetPartitionStats(tableInfo, pi.Definitions[0].ID)
c.Assert(stats.Count, Equals, int64(0))

testKit.MustExec("insert into t values (1)")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
stats = h.GetPartitionStats(tableInfo, pi.Definitions[0].ID)
c.Assert(stats.Count, Equals, int64(1))
c.Assert(stats.ModifyCount, Equals, int64(0))
}

func (s *testStatsUpdateSuite) TestTableAnalyzed(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
Expand Down