diff --git a/executor/analyze.go b/executor/analyze.go index ff580346a9072..86f984f9881b9 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -1269,7 +1269,7 @@ workLoop: // When it's new collation data, we need to use its collate key instead of original value because only // the collate key can ensure the correct ordering. // This is also corresponding to similar operation in (*statistics.Column).GetColumnRowCount(). - if ft.EvalType() == types.ETString { + if ft.EvalType() == types.ETString && ft.Tp != mysql.TypeEnum && ft.Tp != mysql.TypeSet { val.SetBytes(collate.GetCollator(ft.Collate).Key(val.GetString())) } sampleItems = append(sampleItems, &statistics.SampleItem{ diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index a52af81d14caf..9ec6e377f2696 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -1094,6 +1094,12 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID } else { sc := &stmtctx.StatementContext{TimeZone: time.UTC} d := rows[i].GetDatum(2, &fields[2].Column.FieldType) + // When there's new collation data, the length of bounds of histogram(the collate key) might be + // longer than the FieldType.Flen of this column. + // We change it to TypeBlob to bypass the length check here. + if tp.EvalType() == types.ETString && tp.Tp != mysql.TypeEnum && tp.Tp != mysql.TypeSet { + tp = types.NewFieldType(mysql.TypeBlob) + } lowerBound, err = d.ConvertTo(sc, tp) if err != nil { return nil, errors.Trace(err) diff --git a/statistics/handle/handle_test.go b/statistics/handle/handle_test.go index 746ec7b27638f..d1087ce9cafd7 100644 --- a/statistics/handle/handle_test.go +++ b/statistics/handle/handle_test.go @@ -2829,6 +2829,27 @@ func (s *testSerialStatsSuite) TestCorrelationWithDefinedCollate(c *C) { c.Assert(rows[0][5], Equals, "-1.000000") } +func (s *testSerialStatsSuite) TestLoadHistogramWithCollate(c *C) { + defer cleanEnv(c, s.store, s.do) + testKit := testkit.NewTestKit(c, s.store) + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a varchar(10) collate utf8mb4_unicode_ci);") + testKit.MustExec("insert into t values('abcdefghij');") + testKit.MustExec("insert into t values('abcdufghij');") + testKit.MustExec("analyze table t with 0 topn;") + do := s.do + h := do.StatsHandle() + is := do.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + tblInfo := tbl.Meta() + _, err = h.TableStatsFromStorage(tblInfo, tblInfo.ID, true, 0) + c.Assert(err, IsNil) +} + func (s *testSerialStatsSuite) TestFastAnalyzeColumnHistWithNullValue(c *C) { defer cleanEnv(c, s.store, s.do) testKit := testkit.NewTestKit(c, s.store)