Skip to content

Commit

Permalink
statistics: fix "data too long" error when dumping stats from table w…
Browse files Browse the repository at this point in the history
…ith new collation data (#27033) (#27301)
  • Loading branch information
ti-srebot authored Sep 16, 2021
1 parent f86eac8 commit 1f0a27c
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
2 changes: 1 addition & 1 deletion executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -1270,7 +1270,7 @@ workLoop:
// When it's new collation data, we need to use its collate key instead of original value because only
// the collate key can ensure the correct ordering.
// This is also corresponding to similar operation in (*statistics.Column).GetColumnRowCount().
if ft.EvalType() == types.ETString {
if ft.EvalType() == types.ETString && ft.Tp != mysql.TypeEnum && ft.Tp != mysql.TypeSet {
val.SetBytes(collate.GetCollator(ft.Collate).Key(val.GetString()))
}
sampleItems = append(sampleItems, &statistics.SampleItem{
Expand Down
6 changes: 6 additions & 0 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,12 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID
} else {
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
d := rows[i].GetDatum(2, &fields[2].Column.FieldType)
// When there's new collation data, the length of bounds of histogram(the collate key) might be
// longer than the FieldType.Flen of this column.
// We change it to TypeBlob to bypass the length check here.
if tp.EvalType() == types.ETString && tp.Tp != mysql.TypeEnum && tp.Tp != mysql.TypeSet {
tp = types.NewFieldType(mysql.TypeBlob)
}
lowerBound, err = d.ConvertTo(sc, tp)
if err != nil {
return nil, errors.Trace(err)
Expand Down
21 changes: 21 additions & 0 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2829,6 +2829,27 @@ func (s *testSerialStatsSuite) TestCorrelationWithDefinedCollate(c *C) {
c.Assert(rows[0][5], Equals, "-1.000000")
}

func (s *testSerialStatsSuite) TestLoadHistogramWithCollate(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a varchar(10) collate utf8mb4_unicode_ci);")
testKit.MustExec("insert into t values('abcdefghij');")
testKit.MustExec("insert into t values('abcdufghij');")
testKit.MustExec("analyze table t with 0 topn;")
do := s.do
h := do.StatsHandle()
is := do.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tblInfo := tbl.Meta()
_, err = h.TableStatsFromStorage(tblInfo, tblInfo.ID, true, 0)
c.Assert(err, IsNil)
}

func (s *testSerialStatsSuite) TestFastAnalyzeColumnHistWithNullValue(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
Expand Down

0 comments on commit 1f0a27c

Please sign in to comment.