Skip to content

Commit

Permalink
cherry pick pingcap#30531 to release-5.3
Browse files Browse the repository at this point in the history
Signed-off-by: ti-srebot <[email protected]>
  • Loading branch information
WizardXiao authored and ti-srebot committed Jan 18, 2022
1 parent 83b273a commit 5837d5c
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 0 deletions.
122 changes: 122 additions & 0 deletions dumpling/export/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,128 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
return nil
}

<<<<<<< HEAD
=======
// adjustDatabaseCollation adjusts db collation and return new create sql and collation
func adjustDatabaseCollation(tctx *tcontext.Context, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) {
stmt, err := parser.ParseOneStmt(originSQL, "", "")
if err != nil {
tctx.L().Warn("parse create database error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err))
return originSQL, nil
}
createStmt, ok := stmt.(*ast.CreateDatabaseStmt)
if !ok {
return originSQL, nil
}
var charset string
for _, createOption := range createStmt.Options {
// already have 'Collation'
if createOption.Tp == ast.DatabaseOptionCollate {
return originSQL, nil
}
if createOption.Tp == ast.DatabaseOptionCharset {
charset = createOption.Value
}
}
// get db collation
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)]
if !ok {
tctx.L().Warn("not found database charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset)))
return originSQL, nil
}
// add collation
createStmt.Options = append(createStmt.Options, &ast.DatabaseOption{Tp: ast.DatabaseOptionCollate, Value: collation})
// rewrite sql
var b []byte
bf := bytes.NewBuffer(b)
err = createStmt.Restore(&format.RestoreCtx{
Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment,
In: bf,
})
if err != nil {
return "", errors.Trace(err)
}
return bf.String(), nil
}

// adjustTableCollation adjusts table collation
func adjustTableCollation(tctx *tcontext.Context, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) {
stmt, err := parser.ParseOneStmt(originSQL, "", "")
if err != nil {
tctx.L().Warn("parse create table error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err))
return originSQL, nil
}
createStmt, ok := stmt.(*ast.CreateTableStmt)
if !ok {
return originSQL, nil
}
var charset string
var collation string
for _, createOption := range createStmt.Options {
// already have 'Collation'
if createOption.Tp == ast.TableOptionCollate {
collation = createOption.StrValue
break
}
if createOption.Tp == ast.TableOptionCharset {
charset = createOption.StrValue
}
}

if collation == "" && charset != "" {
// get db collation
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)]
if !ok {
tctx.L().Warn("not found table charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset)))
return originSQL, nil
}

// add collation
createStmt.Options = append(createStmt.Options, &ast.TableOption{Tp: ast.TableOptionCollate, StrValue: collation})
}

// adjust columns collation
adjustColumnsCollation(tctx, createStmt, charsetAndDefaultCollationMap)

// rewrite sql
var b []byte
bf := bytes.NewBuffer(b)
err = createStmt.Restore(&format.RestoreCtx{
Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment,
In: bf,
})
if err != nil {
return "", errors.Trace(err)
}
return bf.String(), nil
}

// adjustColumnsCollation adds column's collation.
func adjustColumnsCollation(tctx *tcontext.Context, createStmt *ast.CreateTableStmt, charsetAndDefaultCollationMap map[string]string) {
for _, col := range createStmt.Cols {
for _, options := range col.Options {
// already have 'Collation'
if options.Tp == ast.ColumnOptionCollate {
continue
}
}
fieldType := col.Tp
if fieldType.Collate != "" {
continue
}
if fieldType.Charset != "" {
// just have charset
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(fieldType.Charset)]
if !ok {
tctx.L().Warn("not found charset default collation for column.", zap.String("table", createStmt.Table.Name.String()), zap.String("column", col.Name.String()), zap.String("charset", strings.ToLower(fieldType.Charset)))
continue
}
fieldType.Collate = collation
}
}
}

>>>>>>> 9f744cdf8... dumpling: fix default column collation with upstream when dump table (#30531)
func (d *Dumper) dumpTableData(tctx *tcontext.Context, conn *sql.Conn, meta TableMeta, taskChan chan<- Task) error {
conf := d.conf
if conf.NoData {
Expand Down
69 changes: 69 additions & 0 deletions dumpling/export/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,72 @@ func TestGetListTableTypeByConf(t *testing.T) {
require.Equalf(t, x.expected, getListTableTypeByConf(conf), "server info: %s, consistency: %s", x.serverInfo, x.consistency)
}
}
<<<<<<< HEAD
=======

func TestAdjustDatabaseCollation(t *testing.T) {
t.Parallel()

tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()
parser1 := parser.New()

originSQLs := []string{
"create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create database `test` CHARACTER SET=utf8mb4",
}

expectedSQLs := []string{
"create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci",
"CREATE DATABASE `test` CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci",
}
charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"}
for i, originSQL := range originSQLs {
newSQL, err := adjustDatabaseCollation(tctx, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, expectedSQLs[i], newSQL)
}
}

func TestAdjustTableCollation(t *testing.T) {
t.Parallel()

tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()

parser1 := parser.New()

originSQLs := []string{
"create table `test`.`t1` (id int) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int) CHARSET=utf8mb4",
"create table `test`.`t1` (id int, name varchar(20) CHARACTER SET utf8mb4, work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ",
"create table `test`.`t1` (id int, name varchar(20), work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20) CHARACTER SET utf8mb4) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int, name varchar(20) CHARACTER SET utf8mb4, work varchar(20)) CHARSET=utf8mb4 ",
"create table `test`.`t1` (id int, name varchar(20), work varchar(20)) CHARSET=utf8mb4",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20)) CHARSET=utf8mb4",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20) CHARACTER SET utf8mb4) CHARSET=utf8mb4",
}

expectedSQLs := []string{
"CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20),`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20),`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
}

charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"}
for i, originSQL := range originSQLs {
newSQL, err := adjustTableCollation(tctx, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, expectedSQLs[i], newSQL)
}
}
>>>>>>> 9f744cdf8... dumpling: fix default column collation with upstream when dump table (#30531)

0 comments on commit 5837d5c

Please sign in to comment.