Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: Columns in string pruning (#32626) (#32721) #32926

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions planner/core/partition_pruner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,200 @@ func (s *testPartitionPruneSuit) TestHashPartitionPruner(c *C) {
}
}

<<<<<<< HEAD
func (s *testPartitionPruneSuit) TestListPartitionPruner(c *C) {
tk := testkit.NewTestKit(c, s.store)
=======
func TestRangeColumnPartitionPruningForIn(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("drop database if exists test_range_col_in")
tk.MustExec("create database test_range_col_in")
tk.MustExec("use test_range_col_in")
tk.MustExec(`set @@session.tidb_enable_list_partition = 1`)
tk.MustExec("set @@session.tidb_partition_prune_mode='static'")

// case in issue-26739
tk.MustExec(`CREATE TABLE t1 (
id bigint(20) NOT NULL AUTO_INCREMENT,
dt date,
PRIMARY KEY (id,dt))
PARTITION BY RANGE COLUMNS(dt) (
PARTITION p20201125 VALUES LESS THAN ("20201126"),
PARTITION p20201126 VALUES LESS THAN ("20201127"),
PARTITION p20201127 VALUES LESS THAN ("20201128"),
PARTITION p20201128 VALUES LESS THAN ("20201129"),
PARTITION p20201129 VALUES LESS THAN ("20201130"))`)
tk.MustQuery(`explain format='brief' select /*+ HASH_AGG() */ count(1) from t1 where dt in ('2020-11-27','2020-11-28')`).Check(
testkit.Rows("HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─PartitionUnion 2.00 root ",
" ├─HashAgg 1.00 root funcs:count(Column#7)->Column#5",
" │ └─IndexReader 1.00 root index:HashAgg",
" │ └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#7",
" │ └─Selection 20.00 cop[tikv] in(test_range_col_in.t1.dt, 2020-11-27 00:00:00.000000, 2020-11-28 00:00:00.000000)",
" │ └─IndexFullScan 10000.00 cop[tikv] table:t1, partition:p20201127, index:PRIMARY(id, dt) keep order:false, stats:pseudo",
" └─HashAgg 1.00 root funcs:count(Column#10)->Column#5",
" └─IndexReader 1.00 root index:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#10",
" └─Selection 20.00 cop[tikv] in(test_range_col_in.t1.dt, 2020-11-27 00:00:00.000000, 2020-11-28 00:00:00.000000)",
" └─IndexFullScan 10000.00 cop[tikv] table:t1, partition:p20201128, index:PRIMARY(id, dt) keep order:false, stats:pseudo"))

tk.MustExec(`insert into t1 values (1, "2020-11-25")`)
tk.MustExec(`insert into t1 values (2, "2020-11-26")`)
tk.MustExec(`insert into t1 values (3, "2020-11-27")`)
tk.MustExec(`insert into t1 values (4, "2020-11-28")`)
tk.MustQuery(`select id from t1 where dt in ('2020-11-27','2020-11-28') order by id`).Check(testkit.Rows("3", "4"))
tk.MustQuery(`select id from t1 where dt in (20201127,'2020-11-28') order by id`).Check(testkit.Rows("3", "4"))
tk.MustQuery(`select id from t1 where dt in (20201127,20201128) order by id`).Check(testkit.Rows("3", "4"))
tk.MustQuery(`select id from t1 where dt in (20201127,20201128,null) order by id`).Check(testkit.Rows("3", "4"))
tk.MustQuery(`select id from t1 where dt in ('2020-11-26','2020-11-25','2020-11-28') order by id`).Check(testkit.Rows("1", "2", "4"))
tk.MustQuery(`select id from t1 where dt in ('2020-11-26','wrong','2020-11-28') order by id`).Check(testkit.Rows("2", "4"))

// int
tk.MustExec(`create table t2 (a int) partition by range columns(a) (
partition p0 values less than (0),
partition p1 values less than (10),
partition p2 values less than (20))`)
tk.MustExec(`insert into t2 values (-1), (1), (11), (null)`)
tk.MustQuery(`select a from t2 where a in (-1, 1) order by a`).Check(testkit.Rows("-1", "1"))
tk.MustQuery(`select a from t2 where a in (1, 11, null) order by a`).Check(testkit.Rows("1", "11"))
tk.MustQuery(`explain format='brief' select a from t2 where a in (-1, 1)`).Check(testkit.Rows("PartitionUnion 40.00 root ",
"├─TableReader 20.00 root data:Selection",
"│ └─Selection 20.00 cop[tikv] in(test_range_col_in.t2.a, -1, 1)",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2, partition:p0 keep order:false, stats:pseudo",
"└─TableReader 20.00 root data:Selection",
" └─Selection 20.00 cop[tikv] in(test_range_col_in.t2.a, -1, 1)",
" └─TableFullScan 10000.00 cop[tikv] table:t2, partition:p1 keep order:false, stats:pseudo"))

tk.MustExec(`create table t3 (a varchar(10)) partition by range columns(a) (
partition p0 values less than ("aaa"),
partition p1 values less than ("bbb"),
partition p2 values less than ("ccc"))`)
tk.MustQuery(`explain format='brief' select a from t3 where a in ('aaa', 'aab')`).Check(testkit.Rows(
`TableReader 20.00 root data:Selection`,
`└─Selection 20.00 cop[tikv] in(test_range_col_in.t3.a, "aaa", "aab")`,
` └─TableFullScan 10000.00 cop[tikv] table:t3, partition:p1 keep order:false, stats:pseudo`))
tk.MustQuery(`explain format='brief' select a from t3 where a in ('aaa', 'bu')`).Check(testkit.Rows(
`PartitionUnion 40.00 root `,
`├─TableReader 20.00 root data:Selection`,
`│ └─Selection 20.00 cop[tikv] in(test_range_col_in.t3.a, "aaa", "bu")`,
`│ └─TableFullScan 10000.00 cop[tikv] table:t3, partition:p1 keep order:false, stats:pseudo`,
`└─TableReader 20.00 root data:Selection`,
` └─Selection 20.00 cop[tikv] in(test_range_col_in.t3.a, "aaa", "bu")`,
` └─TableFullScan 10000.00 cop[tikv] table:t3, partition:p2 keep order:false, stats:pseudo`))
}

func TestRangeColumnPartitionPruningForInString(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("drop database if exists test_range_col_in_string")
tk.MustExec("create database test_range_col_in_string")

tk.MustExec("use test_range_col_in_string")
tk.MustExec("set names utf8mb4 collate utf8mb4_bin")
tk.MustExec("set @@session.tidb_partition_prune_mode='static'")

type testStruct struct {
sql string
partitions string
rows []string
}

extractPartitions := func(res *testkit.Result) string {
planStrings := testdata.ConvertRowsToStrings(res.Rows())
partitions := []string{}
for _, s := range planStrings {
parts := getFieldValue("partition:", s)
if parts != "" {
partitions = append(partitions, strings.Split(parts, ",")...)
}
}
return strings.Join(partitions, ",")
}
checkColumnStringPruningTests := func(tests []testStruct) {
modes := []string{"dynamic", "static"}
for _, mode := range modes {
tk.MustExec(`set @@tidb_partition_prune_mode = '` + mode + `'`)
for _, test := range tests {
explainResult := tk.MustQuery("explain format = 'brief' " + test.sql)
partitions := strings.ToLower(extractPartitions(explainResult))
require.Equal(t, test.partitions, partitions, "Mode: %s sql: %s", mode, test.sql)
tk.MustQuery(test.sql).Sort().Check(testkit.Rows(test.rows...))
}
}
}
tk.MustExec("create table t (a varchar(255) charset utf8mb4 collate utf8mb4_bin) partition by range columns(a)" +
`( partition pNull values less than (""),` +
`partition pAAAA values less than ("AAAA"),` +
`partition pCCC values less than ("CCC"),` +
`partition pShrimpsandwich values less than ("Räksmörgås"),` +
`partition paaa values less than ("aaa"),` +
`partition pSushi values less than ("🍣🍣🍣"),` +
`partition pMax values less than (MAXVALUE))`)
tk.MustExec(`insert into t values (NULL), ("a"), ("Räkmacka"), ("🍣 is life"), ("🍺 after work?"), ("🍺🍺🍺🍺🍺 for oktoberfest"),("AA"),("aa"),("AAA"),("aaa")`)
tests := []testStruct{
// Lower case partition names due to issue#32719
{sql: `select * from t where a IS NULL`, partitions: "pnull", rows: []string{"<nil>"}},
{sql: `select * from t where a = 'AA'`, partitions: "paaaa", rows: []string{"AA"}},
{sql: `select * from t where a = 'AA' collate utf8mb4_general_ci`, partitions: "paaaa", rows: []string{"AA"}}, // Notice that the it not uses _bin collation for partition => 'aa' not found! #32749
{sql: `select * from t where a = 'aa'`, partitions: "paaa", rows: []string{"aa"}},
{sql: `select * from t where a = 'aa' collate utf8mb4_general_ci`, partitions: "paaaa", rows: []string{"AA"}}, // Notice that the it not uses _bin collation for partition => 'aa' not found! #32749
{sql: `select * from t where a = 'AAA'`, partitions: "paaaa", rows: []string{"AAA"}},
{sql: `select * from t where a = 'AB'`, partitions: "pccc", rows: []string{}},
{sql: `select * from t where a = 'aB'`, partitions: "paaa", rows: []string{}},
{sql: `select * from t where a = '🍣'`, partitions: "psushi", rows: []string{}},
{sql: `select * from t where a in ('🍣 is life', "Räkmacka", "🍺🍺🍺🍺 after work?")`, partitions: "pshrimpsandwich,psushi,pmax", rows: []string{"Räkmacka", "🍣 is life"}},
{sql: `select * from t where a in ('AAA', 'aa')`, partitions: "paaaa,paaa", rows: []string{"AAA", "aa"}},
{sql: `select * from t where a in ('AAA' collate utf8mb4_general_ci, 'aa')`, partitions: "paaaa,paaa", rows: []string{"AA", "AAA", "aa"}}, // aaa missing due to #32749
{sql: `select * from t where a in ('AAA', 'aa' collate utf8mb4_general_ci)`, partitions: "paaaa", rows: []string{"AA", "AAA"}}, // aa, aaa missing due to #32749
}
checkColumnStringPruningTests(tests)
tk.MustExec(`set names utf8mb4 collate utf8mb4_general_ci`)
checkColumnStringPruningTests(tests)
tk.MustExec(`set names utf8mb4 collate utf8mb4_unicode_ci`)
checkColumnStringPruningTests(tests)
tk.MustExec("drop table t")
tk.MustExec("create table t (a varchar(255) charset utf8mb4 collate utf8mb4_general_ci) partition by range columns(a)" +
`( partition pNull values less than (""),` +
`partition paaa values less than ("aaa"),` +
`partition pAAAA values less than ("AAAA"),` +
`partition pCCC values less than ("CCC"),` +
`partition pShrimpsandwich values less than ("Räksmörgås"),` +
`partition pSushi values less than ("🍣🍣🍣"),` +
`partition pMax values less than (MAXVALUE))`)
tk.MustExec(`insert into t values (NULL), ("a"), ("Räkmacka"), ("🍣 is life"), ("🍺 after work?"), ("🍺🍺🍺🍺🍺 for oktoberfest"),("AA"),("aa"),("AAA"),("aaa")`)

tests = []testStruct{
// Lower case partition names due to issue#32719
{sql: `select * from t where a IS NULL`, partitions: "pnull", rows: []string{"<nil>"}},
{sql: `select * from t where a = 'AA'`, partitions: "paaa", rows: []string{"AA", "aa"}},
{sql: `select * from t where a = 'AA' collate utf8mb4_bin`, partitions: "paaa", rows: []string{"AA"}},
{sql: `select * from t where a = 'AAA'`, partitions: "paaaa", rows: []string{"AAA", "aaa"}},
{sql: `select * from t where a = 'AAA' collate utf8mb4_bin`, partitions: "paaa", rows: []string{}}, // Notice that the it uses _bin collation for partition => not found! #32749
{sql: `select * from t where a = 'AB'`, partitions: "pccc", rows: []string{}},
{sql: `select * from t where a = 'aB'`, partitions: "pccc", rows: []string{}},
{sql: `select * from t where a = '🍣'`, partitions: "psushi", rows: []string{}},
{sql: `select * from t where a in ('🍣 is life', "Räkmacka", "🍺🍺🍺🍺 after work?")`, partitions: "pshrimpsandwich,psushi,pmax", rows: []string{"Räkmacka", "🍣 is life"}},
{sql: `select * from t where a in ('AA', 'aaa')`, partitions: "paaa,paaaa", rows: []string{"AA", "AAA", "aa", "aaa"}},
{sql: `select * from t where a in ('AAA' collate utf8mb4_bin, 'aa')`, partitions: "paaa", rows: []string{"aa"}}, // AAA missing due to #32749, why is AA missing?
{sql: `select * from t where a in ('AAA', 'aa' collate utf8mb4_bin)`, partitions: "paaaa,psushi", rows: []string{"AAA"}}, // aa, aaa missing due to #32749 also all missing paaa
}

tk.MustExec(`set names utf8mb4 collate utf8mb4_bin`)
checkColumnStringPruningTests(tests)
tk.MustExec(`set names utf8mb4 collate utf8mb4_general_ci`)
checkColumnStringPruningTests(tests)
tk.MustExec(`set names utf8mb4 collate utf8mb4_unicode_ci`)
checkColumnStringPruningTests(tests)
}

func TestListPartitionPruner(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
>>>>>>> 7bf5e4e23... planner: Columns in string pruning (#32626) (#32721)
tk.MustExec("drop database if exists test_partition;")
tk.MustExec("create database test_partition")
tk.MustExec("use test_partition")
Expand Down
40 changes: 40 additions & 0 deletions planner/core/rule_partition_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,46 @@ func partitionRangeForOrExpr(sctx sessionctx.Context, expr1, expr2 expression.Ex
return tmp1.union(tmp2)
}

<<<<<<< HEAD
=======
func partitionRangeColumnForInExpr(sctx sessionctx.Context, args []expression.Expression,
pruner *rangeColumnsPruner) partitionRangeOR {
col, ok := args[0].(*expression.Column)
if !ok || col.ID != pruner.partCol.ID {
return pruner.fullRange()
}

var result partitionRangeOR
for i := 1; i < len(args); i++ {
constExpr, ok := args[i].(*expression.Constant)
if !ok {
return pruner.fullRange()
}
switch constExpr.Value.Kind() {
case types.KindInt64, types.KindUint64, types.KindMysqlTime, types.KindString: // for safety, only support string,int and datetime now
case types.KindNull:
result = append(result, partitionRange{0, 1})
continue
default:
return pruner.fullRange()
}

// convert all elements to EQ-exprs and prune them one by one
sf, err := expression.NewFunction(sctx, ast.EQ, types.NewFieldType(types.KindInt64), []expression.Expression{col, args[i]}...)
if err != nil {
return pruner.fullRange()
}
start, end, ok := pruner.partitionRangeForExpr(sctx, sf)
if !ok {
return pruner.fullRange()
}
result = append(result, partitionRange{start, end})
}

return result.simplify()
}

>>>>>>> 7bf5e4e23... planner: Columns in string pruning (#32626) (#32721)
func partitionRangeForInExpr(sctx sessionctx.Context, args []expression.Expression,
pruner *rangePruner) partitionRangeOR {
col, ok := args[0].(*expression.Column)
Expand Down