Skip to content

Commit

Permalink
statistics: fix "Invalid xxx character string" error when loading new…
Browse files Browse the repository at this point in the history
… collation stats (#36709) (#36724)

close #35208
  • Loading branch information
ti-srebot authored Jul 29, 2022
1 parent f0d4a96 commit 78f5732
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 12 deletions.
19 changes: 11 additions & 8 deletions statistics/handle/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,18 +302,21 @@ func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *J
}
hist := statistics.HistogramFromProto(jsonCol.Histogram)
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
// Deal with sortKey, the length of sortKey maybe longer than the column's length.
orgLen := colInfo.FieldType.GetFlen()
if types.IsString(colInfo.FieldType.GetType()) {
colInfo.SetFlen(types.UnspecifiedLength)
tmpFT := colInfo.FieldType
// For new collation data, when storing the bounds of the histogram, we store the collate key instead of the
// original value.
// But there's additional conversion logic for new collation data, and the collate key might be longer than
// the FieldType.flen.
// If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string"
// or "Data too long".
// So we change it to TypeBlob to bypass those logics here.
if colInfo.FieldType.EvalType() == types.ETString && colInfo.FieldType.GetType() != mysql.TypeEnum && colInfo.FieldType.GetType() != mysql.TypeSet {
tmpFT = *types.NewFieldType(mysql.TypeBlob)
}
hist, err := hist.ConvertTo(sc, &colInfo.FieldType)
hist, err := hist.ConvertTo(sc, &tmpFT)
if err != nil {
return nil, errors.Trace(err)
}
if types.IsString(colInfo.FieldType.GetType()) {
colInfo.SetFlen(orgLen)
}
cm, topN := statistics.CMSketchAndTopNFromProto(jsonCol.CMSketch)
fms := statistics.FMSketchFromProto(jsonCol.FMSketch)
hist.ID, hist.NullCount, hist.LastUpdateVersion, hist.TotColSize, hist.Correlation = colInfo.ID, jsonCol.NullCount, jsonCol.LastUpdateVersion, jsonCol.TotColSize, jsonCol.Correlation
Expand Down
2 changes: 1 addition & 1 deletion statistics/handle/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ func TestLoadStatsForNewCollation(t *testing.T) {
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int, b varchar(3) collate utf8mb4_unicode_ci)")
tk.MustExec("insert into t value(1, 'aaa'), (3, 'aab'), (5, 'bba'), (2, 'bbb'), (4, 'cca'), (6, 'ccc')")
tk.MustExec("insert into t value(1, 'aaa'), (1, 'aaa'), (3, 'aab'), (3, 'aab'), (5, 'bba'), (2, 'bbb'), (4, 'cca'), (6, 'ccc'), (7, 'Ste')")
// mark column stats as needed
tk.MustExec("select * from t where a = 3")
tk.MustExec("select * from t where b = 'bbb'")
Expand Down
10 changes: 7 additions & 3 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -1490,9 +1490,13 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID
} else {
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
d := rows[i].GetDatum(2, &fields[2].Column.FieldType)
// When there's new collation data, the length of bounds of histogram(the collate key) might be
// longer than the FieldType.flen of this column.
// We change it to TypeBlob to bypass the length check here.
// For new collation data, when storing the bounds of the histogram, we store the collate key instead of the
// original value.
// But there's additional conversion logic for new collation data, and the collate key might be longer than
// the FieldType.flen.
// If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string"
// or "Data too long".
// So we change it to TypeBlob to bypass those logics here.
if tp.EvalType() == types.ETString && tp.GetType() != mysql.TypeEnum && tp.GetType() != mysql.TypeSet {
tp = types.NewFieldType(mysql.TypeBlob)
}
Expand Down

0 comments on commit 78f5732

Please sign in to comment.