Skip to content

Commit

Permalink
infoschema, domain, ddl: fix upper cased charset names (pingcap#10272)
Browse files Browse the repository at this point in the history
  • Loading branch information
bb7133 committed May 5, 2019
1 parent 15667ef commit a520a53
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 15 deletions.
47 changes: 46 additions & 1 deletion ddl/db_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ func (s *testIntegrationSuite) TestChangingTableCharset(c *C) {
if rs != nil {
rs.Close()
}
c.Assert(err.Error(), Equals, "Unknown charset gbk")
c.Assert(err.Error(), Equals, "[parser:1115]Unknown character set: 'gbk'")
rs, err = tk.Exec("alter table t charset utf8 collate latin1_bin")
if rs != nil {
rs.Close()
Expand All @@ -286,6 +286,51 @@ func (s *testIntegrationSuite) TestCaseInsensitiveCharsetAndCollate(c *C) {
tk.MustExec("create table t2(id int) ENGINE=InnoDB DEFAULT CHARSET=Utf8 COLLATE=utf8_BIN;")
tk.MustExec("create table t3(id int) ENGINE=InnoDB DEFAULT CHARSET=Utf8mb4 COLLATE=utf8MB4_BIN;")
tk.MustExec("create table t4(id int) ENGINE=InnoDB DEFAULT CHARSET=Utf8mb4 COLLATE=utf8MB4_general_ci;")

tk.MustExec("create table t5(a varchar(20)) ENGINE=InnoDB DEFAULT CHARSET=UTF8MB4 COLLATE=UTF8MB4_GENERAL_CI;")
tk.MustExec("insert into t5 values ('特克斯和凯科斯群岛')")

db, ok := domain.GetDomain(tk.Se).InfoSchema().SchemaByName(model.NewCIStr("test_charset_collate"))
c.Assert(ok, IsTrue)
tbl := testGetTableByName(c, tk.Se, "test_charset_collate", "t5")
tblInfo := tbl.Meta().Clone()
c.Assert(tblInfo.Charset, Equals, "utf8mb4")
c.Assert(tblInfo.Columns[0].Charset, Equals, "utf8mb4")

tblInfo.Version = model.TableInfoVersion2
tblInfo.Charset = "UTF8MB4"

updateTableInfo := func(tblInfo *model.TableInfo) {
mockCtx := mock.NewContext()
mockCtx.Store = s.store
err := mockCtx.NewTxn()
c.Assert(err, IsNil)
txn, err := mockCtx.Txn(true)
c.Assert(err, IsNil)
mt := meta.NewMeta(txn)
c.Assert(ok, IsTrue)
err = mt.UpdateTable(db.ID, tblInfo)
c.Assert(err, IsNil)
err = txn.Commit(context.Background())
c.Assert(err, IsNil)
}
updateTableInfo(tblInfo)
tk.MustExec("alter table t5 add column b varchar(10);") // load latest schema.

tblInfo = testGetTableByName(c, tk.Se, "test_charset_collate", "t5").Meta()
c.Assert(tblInfo.Charset, Equals, "utf8mb4")
c.Assert(tblInfo.Columns[0].Charset, Equals, "utf8mb4")

// For model.TableInfoVersion3, it is believed that all charsets / collations are lower-cased, do not do case-convert
tblInfo = tblInfo.Clone()
tblInfo.Version = model.TableInfoVersion3
tblInfo.Charset = "UTF8MB4"
updateTableInfo(tblInfo)
tk.MustExec("alter table t5 add column c varchar(10);") // load latest schema.

tblInfo = testGetTableByName(c, tk.Se, "test_charset_collate", "t5").Meta()
c.Assert(tblInfo.Charset, Equals, "UTF8MB4")
c.Assert(tblInfo.Columns[0].Charset, Equals, "utf8mb4")
}

func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) {
Expand Down
2 changes: 1 addition & 1 deletion ddl/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4475,7 +4475,7 @@ func (s *testDBSuite) TestCheckTooBigFieldLength(c *C) {
s.testErrorCode(c, "alter table tr_04 add column b varchar(20000) charset utf8mb4;", tmysql.ErrTooBigFieldlength)
s.testErrorCode(c, "alter table tr_04 convert to character set utf8mb4;", tmysql.ErrTooBigFieldlength)
s.testErrorCode(c, "create table tr_05 (id int, name varchar(30000), purchased date ) default charset=utf8 collate=utf8_bin;", tmysql.ErrTooBigFieldlength)
s.testErrorCode(c, "create table tr_05 (id int, name varchar(20000) charset utf8mb4, purchased date ) default charset=utf8 collate=utf8;", tmysql.ErrTooBigFieldlength)
s.testErrorCode(c, "create table tr_05 (id int, name varchar(20000) charset utf8mb4, purchased date ) default charset=utf8 collate=utf8_bin;", tmysql.ErrTooBigFieldlength)
s.testErrorCode(c, "create table tr_05 (id int, name varchar(65536), purchased date ) default charset=latin1;", tmysql.ErrTooBigFieldlength)

s.tk.MustExec("drop table if exists tr_05;")
Expand Down
1 change: 1 addition & 0 deletions domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ func (do *Domain) fetchSchemasWithTables(schemas []*model.DBInfo, m *meta.Meta,
// schema is not public, can't be used outside.
continue
}
infoschema.ConvertCharsetCollateToLowerCaseIfNeed(tbl)
di.Tables = append(di.Tables, tbl)
}
}
Expand Down
9 changes: 3 additions & 6 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ func (s *testIntegrationSuite) TestStringBuiltin(c *C) {
result.Check(testkit.Rows("'121' '0' '中文' <nil>"))

// for convert
result = tk.MustQuery(`select convert("123" using "866"), convert("123" using "binary"), convert("中文" using "binary"), convert("中文" using "utf8"), convert("中文" using "utf8mb4"), convert(cast("中文" as binary) using "utf8");`)
result = tk.MustQuery(`select convert("123" using "binary"), convert("123" using "binary"), convert("中文" using "binary"), convert("中文" using "utf8"), convert("中文" using "utf8mb4"), convert(cast("中文" as binary) using "utf8");`)
result.Check(testkit.Rows("123 123 中文 中文 中文 中文"))

// for insert
Expand Down Expand Up @@ -2286,11 +2286,8 @@ func (s *testIntegrationSuite) TestBuiltin(c *C) {
result.Check(testkit.Rows("ad\x01\x00Y"))
result = tk.MustQuery("select char(97, null, 100, 256, 89 using ascii)")
result.Check(testkit.Rows("ad\x01\x00Y"))
charRecordSet, err := tk.Exec("select char(97, null, 100, 256, 89 using tidb)")
c.Assert(err, IsNil)
c.Assert(charRecordSet, NotNil)
_, err = session.GetRows4Test(ctx, tk.Se, charRecordSet)
c.Assert(err.Error(), Equals, "unknown encoding: tidb")
_, err = tk.Exec("select char(97, null, 100, 256, 89 using tidb)")
c.Assert(err.Error(), Equals, "[parser:1115]Unknown character set: 'tidb'")

// issue 3884
tk.MustExec("drop table if exists t")
Expand Down
8 changes: 4 additions & 4 deletions expression/typeinfer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,10 @@ func (s *testInferTypeSuite) createTestCase4StrFuncs() []typeInferTestCase {
{"quote(c_float_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 26, types.UnspecifiedLength},
{"quote(c_double_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 46, types.UnspecifiedLength},

{"convert(c_double_d using c_text_d)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_binary using c_text_d)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_binary using c_binary)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_text_d using c_binary)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_double_d using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_binary using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_binary using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"convert(c_text_d using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},

{"insert(c_varchar, c_int_d, c_int_d, c_varchar)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength},
{"insert(c_varchar, c_int_d, c_int_d, c_binary)", mysql.TypeLongBlob, charset.CharsetBin, mysql.BinaryFlag, mysql.MaxBlobWidth, types.UnspecifiedLength},
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ require (
github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030
github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11
github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596
github.com/pingcap/parser v0.0.0-20190421035202-497ae72425c5
github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7
github.com/pingcap/pd v2.1.0-rc.4+incompatible
github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible
github.com/pingcap/tipb v0.0.0-20180910045846-371b48b15d93
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 h1:e81flSfRbbMW5RU
github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11/go.mod h1:0gwbe1F2iBIjuQ9AH0DbQhL+Dpr5GofU8fgYyXk+ykk=
github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ=
github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw=
github.com/pingcap/parser v0.0.0-20190421035202-497ae72425c5 h1:csU8uAxq5yIK6SMRtuYkiazo7cFP9iio9vSK1uh23nU=
github.com/pingcap/parser v0.0.0-20190421035202-497ae72425c5/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA=
github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 h1:cbTQGLE0X69qL2nrvtG9HP4u5sBdVGyoIJOhc+KtJXc=
github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA=
github.com/pingcap/pd v2.1.0-rc.4+incompatible h1:/buwGk04aHO5odk/+O8ZOXGs4qkUjYTJ2UpCJXna8NE=
github.com/pingcap/pd v2.1.0-rc.4+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E=
github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible h1:e9Gi/LP9181HT3gBfSOeSBA+5JfemuE4aEAhqNgoE4k=
Expand Down
16 changes: 16 additions & 0 deletions infoschema/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package infoschema
import (
"fmt"
"sort"
"strings"

"github.com/pingcap/errors"
"github.com/pingcap/parser/charset"
Expand Down Expand Up @@ -173,6 +174,7 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i
fmt.Sprintf("(Table ID %d)", tableID),
)
}
ConvertCharsetCollateToLowerCaseIfNeed(tblInfo)
ConvertOldVersionUTF8ToUTF8MB4IfNeed(tblInfo)

if alloc == nil {
Expand All @@ -198,6 +200,20 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i
return nil
}

// ConvertCharsetCollateToLowerCaseIfNeed convert the charset / collation of table and its columns to lower case,
// if the table's version is prior to TableInfoVersion3.
func ConvertCharsetCollateToLowerCaseIfNeed(tbInfo *model.TableInfo) {
if tbInfo.Version >= model.TableInfoVersion3 {
return
}
tbInfo.Charset = strings.ToLower(tbInfo.Charset)
tbInfo.Collate = strings.ToLower(tbInfo.Collate)
for _, col := range tbInfo.Columns {
col.Charset = strings.ToLower(col.Charset)
col.Collate = strings.ToLower(col.Collate)
}
}

// ConvertOldVersionUTF8ToUTF8MB4IfNeed convert old version UTF8 to UTF8MB4 if config.TreatOldVersionUTF8AsUTF8MB4 is enable.
func ConvertOldVersionUTF8ToUTF8MB4IfNeed(tbInfo *model.TableInfo) {
if !config.GetGlobalConfig().TreatOldVersionUTF8AsUTF8MB4 || tbInfo.Version >= model.TableInfoVersion2 {
Expand Down

0 comments on commit a520a53

Please sign in to comment.