util/ranger: fix incorrect behavior about index who has prefix column (…

…pingcap#8851)
winoros · Dec 29, 2018 · 01ccf32 · 01ccf32
1 parent cceab82
commit 01ccf32
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 7 deletions.
diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go
@@ -328,7 +328,12 @@ func buildCNFIndexRange(sc *stmtctx.StatementContext, cols []*expression.Column,
 
 	// Take prefix index into consideration.
 	if hasPrefix(lengths) {
-		fixPrefixColRange(ranges, lengths, newTp)
+		if fixPrefixColRange(ranges, lengths, newTp) {
+			ranges, err = unionNewRanges(sc, ranges)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+		}
 	}
 
 	if len(ranges) > 0 && len(ranges[0].LowVal) < len(cols) {
@@ -411,20 +416,46 @@ func hasPrefix(lengths []int) bool {
 	return false
 }
 
-func fixPrefixColRange(ranges []*NewRange, lengths []int, tp []*types.FieldType) {
+// fixPrefixColRange checks whether the range of one column exceeds the length and needs to be cut.
+// It specially handles the last column of each range point. If the last one need to be cut, it will
+// change the exclude status of that point and return `true` to tell
+// that we need do a range merging since that interval may have intersection.
+// e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last column is 3,
+//      then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect,
+//      so we need a merge operation.
+// Q: only checking the last column to decide whether the endpoint's exclude status needs to be reset is enough?
+// A: Yes, suppose that the interval is (-inf -inf, a xxxxx b) and only the second column needs to be cut.
+//    The result would be (-inf -inf, a xxx b) if the length of it is 3. Obviously we only need to care about the data
+//    whose the first two key is `a` and `xxx`. It read all data whose index value begins with `a` and `xxx` and the third
+//    value less than `b`, covering the values begin with `a` and `xxxxx` and the third value less than `b` perfectly.
+//    So in this case we don't need to reset its exclude status. The right endpoint case can be proved in the same way.
+func fixPrefixColRange(ranges []*NewRange, lengths []int, tp []*types.FieldType) bool {
+	hasCut := false
 	for _, ran := range ranges {
-		for i := 0; i < len(ran.LowVal); i++ {
+		lowTail := len(ran.LowVal) - 1
+		for i := 0; i < lowTail; i++ {
 			fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i])
 		}
-		ran.LowExclude = false
-		for i := 0; i < len(ran.HighVal); i++ {
+		lowCut := false
+		lowCut = fixRangeDatum(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail])
+		if lowCut {
+			ran.LowExclude = false
+		}
+		highTail := len(ran.HighVal) - 1
+		for i := 0; i < highTail; i++ {
 			fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i])
 		}
-		ran.HighExclude = false
+		highCut := false
+		highCut = fixRangeDatum(&ran.HighVal[highTail], lengths[highTail], tp[highTail])
+		if highCut {
+			ran.HighExclude = false
+		}
+		hasCut = lowCut || highCut
 	}
+	return hasCut
 }
 
-func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) {
+func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) bool {
 	// If this column is prefix and the prefix length is smaller than the range, cut it.
 	// In case of UTF8, prefix should be cut by characters rather than bytes
 	if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
@@ -437,12 +468,15 @@ func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) {
 				truncateStr := string(rs[:length])
 				// truncate value and limit its length
 				v.SetString(truncateStr)
+				return true
 			}
 		} else if length != types.UnspecifiedLength && len(colValue) > length {
 			// truncate value and limit its length
 			v.SetBytes(colValue[:length])
+			return true
 		}
 	}
+	return false
 }
 
 // We cannot use the FieldType of column directly. e.g. the column a is int32 and we have a > 1111111111111111111.

diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go
@@ -524,6 +524,34 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
 			filterConds: "[eq(test.t.e, 你好啊)]",
 			resultStr:   "[[[228 189],[228 189]]]",
 		},
+		{
+			indexPos:    2,
+			exprStr:     `d in ("你好啊")`,
+			accessConds: "[in(test.t.d, 你好啊)]",
+			filterConds: "[in(test.t.d, 你好啊)]",
+			resultStr:   "[[你好,你好]]",
+		},
+		{
+			indexPos:    2,
+			exprStr:     `d not in ("你好啊")`,
+			accessConds: "[not(in(test.t.d, 你好啊))]",
+			filterConds: "[not(in(test.t.d, 你好啊))]",
+			resultStr:   "[(<nil>,+inf]]",
+		},
+		{
+			indexPos:    2,
+			exprStr:     `d < "你好" || d > "你好"`,
+			accessConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]",
+			filterConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]",
+			resultStr:   "[[-inf,你好) (你好,+inf]]",
+		},
+		{
+			indexPos:    2,
+			exprStr:     `not(d < "你好" || d > "你好")`,
+			accessConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]",
+			filterConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]",
+			resultStr:   "[[你好,你好]]",
+		},
 	}
 
 	for _, tt := range tests {