Skip to content

Commit

Permalink
statistics, planner: make the content in HistColl.ColID2IdxIDs stab…
Browse files Browse the repository at this point in the history
…le (#38458)

close #38457
  • Loading branch information
time-and-fate authored Oct 18, 2022
1 parent bd014d6 commit 22b85b9
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 22 deletions.
4 changes: 2 additions & 2 deletions cmd/explaintest/r/imdbload.result
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ IndexLookUp_7 1005030.94 root
└─TableRowIDScan_6(Probe) 1005030.94 cop[tikv] table:char_name keep order:false
trace plan target = 'estimation' select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'));
CE_trace
[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}]
[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Column Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}]

explain select * from char_name where ((imdb_index = 'V') and (surname_pcode < 'L3416'));
id estRows task access object operator info
Expand Down Expand Up @@ -356,7 +356,7 @@ IndexLookUp_11 901.00 root
└─TableRowIDScan_9 901.00 cop[tikv] table:keyword keep order:false
trace plan target = 'estimation' select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
CE_trace
[{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}]
[{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}]

explain select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida');
id estRows task access object operator info
Expand Down
7 changes: 4 additions & 3 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1711,9 +1711,10 @@ func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, conds []expre
if len(ranges) == 0 || len(accessConds) == 0 || err != nil {
return 0, err == nil, corr
}
idxID, idxExists := ds.stats.HistColl.ColID2IdxID[colID]
if !idxExists {
idxID = -1
idxID := int64(-1)
idxIDs, idxExists := ds.stats.HistColl.ColID2IdxIDs[colID]
if idxExists && len(idxIDs) > 0 {
idxID = idxIDs[0]
}
rangeCounts, ok := getColumnRangeCounts(ds.ctx, colID, ranges, ds.tableStats.HistColl, idxID)
if !ok {
Expand Down
2 changes: 1 addition & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ func (coll *HistColl) NewHistCollBySelectivity(sctx sessionctx.Context, statsNod
Columns: make(map[int64]*Column),
Indices: make(map[int64]*Index),
Idx2ColumnIDs: coll.Idx2ColumnIDs,
ColID2IdxID: coll.ColID2IdxID,
ColID2IdxIDs: coll.ColID2IdxIDs,
Count: coll.Count,
}
for _, node := range statsNodes {
Expand Down
28 changes: 22 additions & 6 deletions statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,30 @@ func (idx *Index) expBackoffEstimation(sctx sessionctx.Context, coll *HistColl,
}
colID := colsIDs[i]
var (
count float64
err error
count float64
err error
foundStats bool
)
if anotherIdxID, ok := coll.ColID2IdxID[colID]; ok && anotherIdxID != idx.Histogram.ID {
count, err = coll.GetRowCountByIndexRanges(sctx, anotherIdxID, tmpRan)
} else if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sctx, coll.Pseudo) {
if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sctx, coll.Pseudo) {
foundStats = true
count, err = coll.GetRowCountByColumnRanges(sctx, colID, tmpRan)
} else {
}
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
// check avoids infinite recursion.
for _, idxID := range idxIDs {
if idxID == idx.Histogram.ID {
continue
}
foundStats = true
count, err = coll.GetRowCountByIndexRanges(sctx, idxID, tmpRan)
if err == nil {
break
}
}
}
if !foundStats {
continue
}
if err != nil {
Expand Down
24 changes: 14 additions & 10 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ type HistColl struct {
Indices map[int64]*Index
// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
Idx2ColumnIDs map[int64][]int64
// ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner.
ColID2IdxID map[int64]int64
Count int64
ModifyCount int64 // Total modify count in a table.
// ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner.
ColID2IdxIDs map[int64][]int64
Count int64
ModifyCount int64 // Total modify count in a table.

// HavePhysicalID is true means this HistColl is from single table and have its ID's information.
// The physical id is used when try to load column stats from storage.
Expand Down Expand Up @@ -846,7 +846,7 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl {
return newColl
}

// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter.
// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter.
func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl {
newColHistMap := make(map[int64]*Column)
colInfoID2UniqueID := make(map[int64]int64, len(columns))
Expand All @@ -869,7 +869,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo,
}
newIdxHistMap := make(map[int64]*Index)
idx2Columns := make(map[int64][]int64)
colID2IdxID := make(map[int64]int64)
colID2IdxIDs := make(map[int64][]int64)
for _, idxHist := range coll.Indices {
ids := make([]int64, 0, len(idxHist.Info.Columns))
for _, idxCol := range idxHist.Info.Columns {
Expand All @@ -883,10 +883,13 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo,
if len(ids) == 0 {
continue
}
colID2IdxID[ids[0]] = idxHist.ID
colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID)
newIdxHistMap[idxHist.ID] = idxHist
idx2Columns[idxHist.ID] = ids
}
for _, idxIDs := range colID2IdxIDs {
slices.Sort(idxIDs)
}
newColl := &HistColl{
PhysicalID: coll.PhysicalID,
HavePhysicalID: coll.HavePhysicalID,
Expand All @@ -895,7 +898,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo,
ModifyCount: coll.ModifyCount,
Columns: newColHistMap,
Indices: newIdxHistMap,
ColID2IdxID: colID2IdxID,
ColID2IdxIDs: colID2IdxIDs,
Idx2ColumnIDs: idx2Columns,
}
return newColl
Expand Down Expand Up @@ -1084,8 +1087,9 @@ func (coll *HistColl) getIndexRowCount(sctx sessionctx.Context, idxID int64, ind
colID = colIDs[rangePosition]
}
// prefer index stats over column stats
if idx, ok := coll.ColID2IdxID[colID]; ok {
count, err = coll.GetRowCountByIndexRanges(sctx, idx, []*ranger.Range{&rang})
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
idxID := idxIDs[0]
count, err = coll.GetRowCountByIndexRanges(sctx, idxID, []*ranger.Range{&rang})
} else {
count, err = coll.GetRowCountByColumnRanges(sctx, colID, []*ranger.Range{&rang})
}
Expand Down

0 comments on commit 22b85b9

Please sign in to comment.