Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: fix incorrect behavior about index who has prefix column #8851

Merged
merged 7 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 35 additions & 7 deletions util/ranger/ranger.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,12 @@ func buildCNFIndexRange(sc *stmtctx.StatementContext, cols []*expression.Column,

// Take prefix index into consideration.
if hasPrefix(lengths) {
fixPrefixColRange(ranges, lengths, newTp)
if fixPrefixColRange(ranges, lengths, newTp) {
ranges, err = unionRanges(sc, ranges)
if err != nil {
return nil, errors.Trace(err)
}
}
}

if len(ranges) > 0 && len(ranges[0].LowVal) < len(cols) {
Expand Down Expand Up @@ -413,20 +418,40 @@ func hasPrefix(lengths []int) bool {
return false
}

func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) {
// fixPrefixColRange checks whether the range of one column exceeds the length and needs to be cut.
// It specially handles the last column of each range point. If the last one need to be cut, it will
// change the exclude status of that point and return `true` to tell
// that we need do a range merging since that interval may have intersection.
// e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last column is 3,
// then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect,
// so we need a merge operation.
func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) bool {
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
hasCut := false
for _, ran := range ranges {
for i := 0; i < len(ran.LowVal); i++ {
lowTail := len(ran.LowVal) - 1
for i := 0; i < lowTail; i++ {
fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i])
}
ran.LowExclude = false
for i := 0; i < len(ran.HighVal); i++ {
lowCut := false
lowCut = fixRangeDatum(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail])
if lowCut {
ran.LowExclude = false
}
highTail := len(ran.HighVal) - 1
for i := 0; i < highTail; i++ {
fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i])
}
ran.HighExclude = false
highCut := false
highCut = fixRangeDatum(&ran.HighVal[highTail], lengths[highTail], tp[highTail])
if highCut {
ran.HighExclude = false
}
hasCut = lowCut || highCut
}
return hasCut
}

func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) {
func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) bool {
// If this column is prefix and the prefix length is smaller than the range, cut it.
// In case of UTF8, prefix should be cut by characters rather than bytes
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
Expand All @@ -439,12 +464,15 @@ func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) {
truncateStr := string(rs[:length])
// truncate value and limit its length
v.SetString(truncateStr)
return true
}
} else if length != types.UnspecifiedLength && len(colValue) > length {
// truncate value and limit its length
v.SetBytes(colValue[:length])
return true
}
}
return false
}

// We cannot use the FieldType of column directly. e.g. the column a is int32 and we have a > 1111111111111111111.
Expand Down
28 changes: 28 additions & 0 deletions util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,34 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
filterConds: "[eq(test.t.e, 你好啊)]",
resultStr: "[[\"[228 189]\",\"[228 189]\"]]",
},
{
indexPos: 2,
exprStr: `d in ("你好啊")`,
accessConds: "[in(test.t.d, 你好啊)]",
filterConds: "[in(test.t.d, 你好啊)]",
resultStr: "[[\"你好\",\"你好\"]]",
},
{
indexPos: 2,
exprStr: `d not in ("你好啊")`,
accessConds: "[not(in(test.t.d, 你好啊))]",
filterConds: "[not(in(test.t.d, 你好啊))]",
resultStr: "[(NULL,+inf]]",
},
{
indexPos: 2,
exprStr: `d < "你好" || d > "你好"`,
accessConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]",
filterConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]",
resultStr: "[[-inf,\"你好\") (\"你好\",+inf]]",
},
{
indexPos: 2,
exprStr: `not(d < "你好" || d > "你好")`,
accessConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]",
filterConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]",
resultStr: "[[\"你好\",\"你好\"]]",
},
}

for _, tt := range tests {
Expand Down