-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
stats: fix histogram boundaries overflow error #7883
Changes from all commits
4cc2454
89ee4b2
647b2c5
795b25c
2283f87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -243,20 +243,79 @@ type BucketFeedback struct { | |
upper *types.Datum // The upper bound of the new bucket. | ||
} | ||
|
||
// outOfRange checks if the `val` is between `min` and `max`. | ||
func outOfRange(sc *stmtctx.StatementContext, min, max, val *types.Datum) (int, error) { | ||
result, err := val.CompareDatum(sc, min) | ||
if err != nil { | ||
return 0, err | ||
} | ||
if result < 0 { | ||
return result, nil | ||
} | ||
result, err = val.CompareDatum(sc, max) | ||
if err != nil { | ||
return 0, err | ||
} | ||
if result > 0 { | ||
return result, nil | ||
} | ||
return 0, nil | ||
} | ||
|
||
// adjustFeedbackBoundaries adjust the feedback boundaries according to the `min` and `max`. | ||
// If the feedback has no intersection with `min` and `max`, we could just skip this feedback. | ||
func (f *feedback) adjustFeedbackBoundaries(sc *stmtctx.StatementContext, min, max *types.Datum) (bool, error) { | ||
result, err := outOfRange(sc, min, max, f.lower) | ||
if err != nil { | ||
return false, err | ||
} | ||
if result > 0 { | ||
return true, nil | ||
} | ||
if result < 0 { | ||
f.lower = min | ||
} | ||
result, err = outOfRange(sc, min, max, f.upper) | ||
if err != nil { | ||
return false, err | ||
} | ||
if result < 0 { | ||
return true, nil | ||
} | ||
if result > 0 { | ||
f.upper = max | ||
} | ||
return false, nil | ||
} | ||
|
||
// buildBucketFeedback build the feedback for each bucket from the histogram feedback. | ||
func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*BucketFeedback, int) { | ||
bktID2FB := make(map[int]*BucketFeedback) | ||
if len(feedback.feedback) == 0 { | ||
return bktID2FB, 0 | ||
} | ||
total := 0 | ||
for _, ran := range feedback.feedback { | ||
idx, _ := h.Bounds.LowerBound(0, ran.lower) | ||
sc := &stmtctx.StatementContext{TimeZone: time.UTC} | ||
kind := feedback.feedback[0].lower.Kind() | ||
min, max := getMinValue(kind, h.tp), getMaxValue(kind, h.tp) | ||
for _, fb := range feedback.feedback { | ||
skip, err := fb.adjustFeedbackBoundaries(sc, &min, &max) | ||
if err != nil { | ||
log.Debugf("adjust feedback boundaries failed, err: %v", errors.ErrorStack(err)) | ||
continue | ||
} | ||
if skip { | ||
continue | ||
} | ||
idx, _ := h.Bounds.LowerBound(0, fb.lower) | ||
bktIdx := 0 | ||
// The last bucket also stores the feedback that falls outside the upper bound. | ||
if idx >= h.Bounds.NumRows()-2 { | ||
bktIdx = h.Len() - 1 | ||
} else { | ||
bktIdx = idx / 2 | ||
// Make sure that this feedback lies within the bucket. | ||
if chunk.Compare(h.Bounds.GetRow(2*bktIdx+1), 0, ran.upper) < 0 { | ||
if chunk.Compare(h.Bounds.GetRow(2*bktIdx+1), 0, fb.upper) < 0 { | ||
continue | ||
} | ||
} | ||
|
@@ -266,23 +325,23 @@ func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*Bucket | |
bkt = &BucketFeedback{lower: h.GetLower(bktIdx), upper: h.GetUpper(bktIdx)} | ||
bktID2FB[bktIdx] = bkt | ||
} | ||
bkt.feedback = append(bkt.feedback, ran) | ||
bkt.feedback = append(bkt.feedback, fb) | ||
// Update the bound if necessary. | ||
res, err := bkt.lower.CompareDatum(nil, ran.lower) | ||
res, err := bkt.lower.CompareDatum(nil, fb.lower) | ||
if err != nil { | ||
log.Debugf("compare datum %v with %v failed, err: %v", bkt.lower, ran.lower, errors.ErrorStack(err)) | ||
log.Debugf("compare datum %v with %v failed, err: %v", bkt.lower, fb.lower, errors.ErrorStack(err)) | ||
continue | ||
} | ||
if res > 0 { | ||
bkt.lower = ran.lower | ||
bkt.lower = fb.lower | ||
} | ||
res, err = bkt.upper.CompareDatum(nil, ran.upper) | ||
res, err = bkt.upper.CompareDatum(nil, fb.upper) | ||
if err != nil { | ||
log.Debugf("compare datum %v with %v failed, err: %v", bkt.upper, ran.upper, errors.ErrorStack(err)) | ||
log.Debugf("compare datum %v with %v failed, err: %v", bkt.upper, fb.upper, errors.ErrorStack(err)) | ||
continue | ||
} | ||
if res < 0 { | ||
bkt.upper = ran.upper | ||
bkt.upper = fb.upper | ||
} | ||
} | ||
return bktID2FB, total | ||
|
@@ -528,7 +587,12 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6 | |
func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram { | ||
buckets, isNewBuckets, totalCount := splitBuckets(h, feedback) | ||
buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount)) | ||
return buildNewHistogram(h, buckets) | ||
hist := buildNewHistogram(h, buckets) | ||
// Update the NDV of primary key column. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we move NDV update to here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if feedback.tp == pkType { | ||
hist.NDV = int64(hist.totalRowCount()) | ||
} | ||
return hist | ||
} | ||
|
||
// UpdateCMSketch updates the CMSketch by feedback. | ||
|
@@ -1077,13 +1141,13 @@ func supportColumnType(k byte) bool { | |
func getMaxValue(k byte, ft *types.FieldType) (max types.Datum) { | ||
switch k { | ||
case types.KindInt64: | ||
max.SetInt64(math.MaxInt64) | ||
max.SetInt64(types.SignedUpperBound[ft.Tp]) | ||
case types.KindUint64: | ||
max.SetUint64(math.MaxUint64) | ||
max.SetUint64(types.UnsignedUpperBound[ft.Tp]) | ||
case types.KindFloat32: | ||
max.SetFloat32(math.MaxFloat32) | ||
max.SetFloat32(float32(types.GetMaxFloat(ft.Flen, ft.Decimal))) | ||
case types.KindFloat64: | ||
max.SetFloat64(math.MaxFloat64) | ||
max.SetFloat64(types.GetMaxFloat(ft.Flen, ft.Decimal)) | ||
case types.KindString, types.KindBytes: | ||
val := types.MaxValueDatum() | ||
bytes, err := codec.EncodeKey(nil, nil, val) | ||
|
@@ -1093,7 +1157,7 @@ func getMaxValue(k byte, ft *types.FieldType) (max types.Datum) { | |
} | ||
max.SetBytes(bytes) | ||
case types.KindMysqlDecimal: | ||
max.SetMysqlDecimal(types.NewMaxOrMinDec(false, mysql.MaxDecimalWidth, 0)) | ||
max.SetMysqlDecimal(types.NewMaxOrMinDec(false, ft.Flen, ft.Decimal)) | ||
case types.KindMysqlDuration: | ||
max.SetMysqlDuration(types.Duration{Duration: math.MaxInt64}) | ||
case types.KindMysqlTime: | ||
|
@@ -1109,13 +1173,13 @@ func getMaxValue(k byte, ft *types.FieldType) (max types.Datum) { | |
func getMinValue(k byte, ft *types.FieldType) (min types.Datum) { | ||
switch k { | ||
case types.KindInt64: | ||
min.SetInt64(math.MinInt64) | ||
min.SetInt64(types.SignedLowerBound[ft.Tp]) | ||
case types.KindUint64: | ||
min.SetUint64(0) | ||
case types.KindFloat32: | ||
min.SetFloat32(-math.MaxFloat32) | ||
min.SetFloat32(float32(-types.GetMaxFloat(ft.Flen, ft.Decimal))) | ||
case types.KindFloat64: | ||
min.SetFloat64(-math.MaxFloat64) | ||
min.SetFloat64(-types.GetMaxFloat(ft.Flen, ft.Decimal)) | ||
case types.KindString, types.KindBytes: | ||
val := types.MinNotNullDatum() | ||
bytes, err := codec.EncodeKey(nil, nil, val) | ||
|
@@ -1125,7 +1189,7 @@ func getMinValue(k byte, ft *types.FieldType) (min types.Datum) { | |
} | ||
min.SetBytes(bytes) | ||
case types.KindMysqlDecimal: | ||
min.SetMysqlDecimal(types.NewMaxOrMinDec(true, mysql.MaxDecimalWidth, 0)) | ||
min.SetMysqlDecimal(types.NewMaxOrMinDec(true, ft.Flen, ft.Decimal)) | ||
case types.KindMysqlDuration: | ||
min.SetMysqlDuration(types.Duration{Duration: math.MinInt64}) | ||
case types.KindMysqlTime: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The TimeZone is for functions like
compareMysqlTime
? the hard code looks confusing...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because the timeZone does not matter here, so we just do not leave it as nil.