Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics, planner: make the content in HistColl.ColID2IdxIDs stable #38458

Merged
merged 11 commits into from
Oct 18, 2022
4 changes: 2 additions & 2 deletions cmd/explaintest/r/imdbload.result
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ IndexLookUp_7 1005030.94 root
└─TableRowIDScan_6(Probe) 1005030.94 cop[tikv] table:char_name keep order:false
trace plan target = 'estimation' select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'));
CE_trace
[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}]
[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Column Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}]

explain select * from char_name where ((imdb_index = 'V') and (surname_pcode < 'L3416'));
id estRows task access object operator info
Expand Down Expand Up @@ -356,7 +356,7 @@ IndexLookUp_11 901.00 root
└─TableRowIDScan_9 901.00 cop[tikv] table:keyword keep order:false
trace plan target = 'estimation' select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
CE_trace
[{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}]
[{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}]

explain select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida');
id estRows task access object operator info
Expand Down
7 changes: 4 additions & 3 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1711,9 +1711,10 @@ func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, conds []expre
if len(ranges) == 0 || len(accessConds) == 0 || err != nil {
return 0, err == nil, corr
}
idxID, idxExists := ds.stats.HistColl.ColID2IdxID[colID]
if !idxExists {
idxID = -1
idxID := int64(-1)
idxIDs, idxExists := ds.stats.HistColl.ColID2IdxIDs[colID]
if idxExists && len(idxIDs) > 0 {
idxID = idxIDs[0]
}
rangeCounts, ok := getColumnRangeCounts(ds.ctx, colID, ranges, ds.tableStats.HistColl, idxID)
if !ok {
Expand Down
2 changes: 1 addition & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ func (coll *HistColl) NewHistCollBySelectivity(sctx sessionctx.Context, statsNod
Columns: make(map[int64]*Column),
Indices: make(map[int64]*Index),
Idx2ColumnIDs: coll.Idx2ColumnIDs,
ColID2IdxID: coll.ColID2IdxID,
ColID2IdxIDs: coll.ColID2IdxIDs,
Count: coll.Count,
}
for _, node := range statsNodes {
Expand Down
28 changes: 22 additions & 6 deletions statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,30 @@ func (idx *Index) expBackoffEstimation(sctx sessionctx.Context, coll *HistColl,
}
colID := colsIDs[i]
var (
count float64
err error
count float64
err error
foundStats bool
)
if anotherIdxID, ok := coll.ColID2IdxID[colID]; ok && anotherIdxID != idx.Histogram.ID {
count, err = coll.GetRowCountByIndexRanges(sctx, anotherIdxID, tmpRan)
} else if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sctx, coll.Pseudo) {
if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sctx, coll.Pseudo) {
foundStats = true
count, err = coll.GetRowCountByColumnRanges(sctx, colID, tmpRan)
} else {
}
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
// check avoids infinite recursion.
for _, idxID := range idxIDs {
if idxID == idx.Histogram.ID {
continue
}
foundStats = true
count, err = coll.GetRowCountByIndexRanges(sctx, idxID, tmpRan)
if err == nil {
break
}
}
}
if !foundStats {
continue
}
if err != nil {
Expand Down
24 changes: 14 additions & 10 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ type HistColl struct {
Indices map[int64]*Index
// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
Idx2ColumnIDs map[int64][]int64
// ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner.
ColID2IdxID map[int64]int64
Count int64
ModifyCount int64 // Total modify count in a table.
// ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner.
ColID2IdxIDs map[int64][]int64
Count int64
ModifyCount int64 // Total modify count in a table.

// HavePhysicalID is true means this HistColl is from single table and have its ID's information.
// The physical id is used when try to load column stats from storage.
Expand Down Expand Up @@ -846,7 +846,7 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl {
return newColl
}

// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter.
// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter.
func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl {
newColHistMap := make(map[int64]*Column)
colInfoID2UniqueID := make(map[int64]int64, len(columns))
Expand All @@ -869,7 +869,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo,
}
newIdxHistMap := make(map[int64]*Index)
idx2Columns := make(map[int64][]int64)
colID2IdxID := make(map[int64]int64)
colID2IdxIDs := make(map[int64][]int64)
for _, idxHist := range coll.Indices {
ids := make([]int64, 0, len(idxHist.Info.Columns))
for _, idxCol := range idxHist.Info.Columns {
Expand All @@ -883,10 +883,13 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo,
if len(ids) == 0 {
continue
}
colID2IdxID[ids[0]] = idxHist.ID
colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID)
newIdxHistMap[idxHist.ID] = idxHist
idx2Columns[idxHist.ID] = ids
}
for _, idxIDs := range colID2IdxIDs {
slices.Sort(idxIDs)
}
newColl := &HistColl{
PhysicalID: coll.PhysicalID,
HavePhysicalID: coll.HavePhysicalID,
Expand All @@ -895,7 +898,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo,
ModifyCount: coll.ModifyCount,
Columns: newColHistMap,
Indices: newIdxHistMap,
ColID2IdxID: colID2IdxID,
ColID2IdxIDs: colID2IdxIDs,
Idx2ColumnIDs: idx2Columns,
}
return newColl
Expand Down Expand Up @@ -1084,8 +1087,9 @@ func (coll *HistColl) getIndexRowCount(sctx sessionctx.Context, idxID int64, ind
colID = colIDs[rangePosition]
}
// prefer index stats over column stats
if idx, ok := coll.ColID2IdxID[colID]; ok {
count, err = coll.GetRowCountByIndexRanges(sctx, idx, []*ranger.Range{&rang})
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
idxID := idxIDs[0]
count, err = coll.GetRowCountByIndexRanges(sctx, idxID, []*ranger.Range{&rang})
} else {
count, err = coll.GetRowCountByColumnRanges(sctx, colID, []*ranger.Range{&rang})
}
Expand Down