From 5837d5c86bb0bb43ffa78989cb25321673e86896 Mon Sep 17 00:00:00 2001 From: WizardXiao <89761062+WizardXiao@users.noreply.github.com> Date: Thu, 9 Dec 2021 13:51:57 +0800 Subject: [PATCH] cherry pick #30531 to release-5.3 Signed-off-by: ti-srebot --- dumpling/export/dump.go | 122 +++++++++++++++++++++++++++++++++++ dumpling/export/dump_test.go | 69 ++++++++++++++++++++ 2 files changed, 191 insertions(+) diff --git a/dumpling/export/dump.go b/dumpling/export/dump.go index 7cf20eb790270..b8039d4b2f418 100755 --- a/dumpling/export/dump.go +++ b/dumpling/export/dump.go @@ -344,6 +344,128 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC return nil } +<<<<<<< HEAD +======= +// adjustDatabaseCollation adjusts db collation and return new create sql and collation +func adjustDatabaseCollation(tctx *tcontext.Context, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) { + stmt, err := parser.ParseOneStmt(originSQL, "", "") + if err != nil { + tctx.L().Warn("parse create database error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err)) + return originSQL, nil + } + createStmt, ok := stmt.(*ast.CreateDatabaseStmt) + if !ok { + return originSQL, nil + } + var charset string + for _, createOption := range createStmt.Options { + // already have 'Collation' + if createOption.Tp == ast.DatabaseOptionCollate { + return originSQL, nil + } + if createOption.Tp == ast.DatabaseOptionCharset { + charset = createOption.Value + } + } + // get db collation + collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)] + if !ok { + tctx.L().Warn("not found database charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset))) + return originSQL, nil + } + // add collation + createStmt.Options = append(createStmt.Options, &ast.DatabaseOption{Tp: ast.DatabaseOptionCollate, Value: collation}) + // rewrite sql + var b []byte + bf := bytes.NewBuffer(b) + err = createStmt.Restore(&format.RestoreCtx{ + Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment, + In: bf, + }) + if err != nil { + return "", errors.Trace(err) + } + return bf.String(), nil +} + +// adjustTableCollation adjusts table collation +func adjustTableCollation(tctx *tcontext.Context, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) { + stmt, err := parser.ParseOneStmt(originSQL, "", "") + if err != nil { + tctx.L().Warn("parse create table error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err)) + return originSQL, nil + } + createStmt, ok := stmt.(*ast.CreateTableStmt) + if !ok { + return originSQL, nil + } + var charset string + var collation string + for _, createOption := range createStmt.Options { + // already have 'Collation' + if createOption.Tp == ast.TableOptionCollate { + collation = createOption.StrValue + break + } + if createOption.Tp == ast.TableOptionCharset { + charset = createOption.StrValue + } + } + + if collation == "" && charset != "" { + // get db collation + collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)] + if !ok { + tctx.L().Warn("not found table charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset))) + return originSQL, nil + } + + // add collation + createStmt.Options = append(createStmt.Options, &ast.TableOption{Tp: ast.TableOptionCollate, StrValue: collation}) + } + + // adjust columns collation + adjustColumnsCollation(tctx, createStmt, charsetAndDefaultCollationMap) + + // rewrite sql + var b []byte + bf := bytes.NewBuffer(b) + err = createStmt.Restore(&format.RestoreCtx{ + Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment, + In: bf, + }) + if err != nil { + return "", errors.Trace(err) + } + return bf.String(), nil +} + +// adjustColumnsCollation adds column's collation. +func adjustColumnsCollation(tctx *tcontext.Context, createStmt *ast.CreateTableStmt, charsetAndDefaultCollationMap map[string]string) { + for _, col := range createStmt.Cols { + for _, options := range col.Options { + // already have 'Collation' + if options.Tp == ast.ColumnOptionCollate { + continue + } + } + fieldType := col.Tp + if fieldType.Collate != "" { + continue + } + if fieldType.Charset != "" { + // just have charset + collation, ok := charsetAndDefaultCollationMap[strings.ToLower(fieldType.Charset)] + if !ok { + tctx.L().Warn("not found charset default collation for column.", zap.String("table", createStmt.Table.Name.String()), zap.String("column", col.Name.String()), zap.String("charset", strings.ToLower(fieldType.Charset))) + continue + } + fieldType.Collate = collation + } + } +} + +>>>>>>> 9f744cdf8... dumpling: fix default column collation with upstream when dump table (#30531) func (d *Dumper) dumpTableData(tctx *tcontext.Context, conn *sql.Conn, meta TableMeta, taskChan chan<- Task) error { conf := d.conf if conf.NoData { diff --git a/dumpling/export/dump_test.go b/dumpling/export/dump_test.go index cdf2deb2a51b3..df978537d0ada 100644 --- a/dumpling/export/dump_test.go +++ b/dumpling/export/dump_test.go @@ -127,3 +127,72 @@ func TestGetListTableTypeByConf(t *testing.T) { require.Equalf(t, x.expected, getListTableTypeByConf(conf), "server info: %s, consistency: %s", x.serverInfo, x.consistency) } } +<<<<<<< HEAD +======= + +func TestAdjustDatabaseCollation(t *testing.T) { + t.Parallel() + + tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel() + defer cancel() + parser1 := parser.New() + + originSQLs := []string{ + "create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci", + "create database `test` CHARACTER SET=utf8mb4", + } + + expectedSQLs := []string{ + "create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci", + "CREATE DATABASE `test` CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci", + } + charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"} + for i, originSQL := range originSQLs { + newSQL, err := adjustDatabaseCollation(tctx, parser1, originSQL, charsetAndDefaultCollationMap) + require.NoError(t, err) + require.Equal(t, expectedSQLs[i], newSQL) + } +} + +func TestAdjustTableCollation(t *testing.T) { + t.Parallel() + + tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel() + defer cancel() + + parser1 := parser.New() + + originSQLs := []string{ + "create table `test`.`t1` (id int) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci", + "create table `test`.`t1` (id int) CHARSET=utf8mb4", + "create table `test`.`t1` (id int, name varchar(20) CHARACTER SET utf8mb4, work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ", + "create table `test`.`t1` (id int, name varchar(20), work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci", + "create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci", + "create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20) CHARACTER SET utf8mb4) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci", + "create table `test`.`t1` (id int, name varchar(20) CHARACTER SET utf8mb4, work varchar(20)) CHARSET=utf8mb4 ", + "create table `test`.`t1` (id int, name varchar(20), work varchar(20)) CHARSET=utf8mb4", + "create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20)) CHARSET=utf8mb4", + "create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20) CHARACTER SET utf8mb4) CHARSET=utf8mb4", + } + + expectedSQLs := []string{ + "CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20),`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20),`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + "CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI", + } + + charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"} + for i, originSQL := range originSQLs { + newSQL, err := adjustTableCollation(tctx, parser1, originSQL, charsetAndDefaultCollationMap) + require.NoError(t, err) + require.Equal(t, expectedSQLs[i], newSQL) + } +} +>>>>>>> 9f744cdf8... dumpling: fix default column collation with upstream when dump table (#30531)