From 9339955f0d84ce88bbafa2a0c8217fce77590f57 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Sat, 7 May 2022 18:30:32 +0800 Subject: [PATCH] backup: backup empty databases (#34385) close pingcap/tidb#33866 --- br/cmd/br/debug.go | 7 ++++ br/pkg/backup/client.go | 4 +-- br/pkg/backup/schema.go | 60 +++++++++++++++++++-------------- br/pkg/backup/schema_test.go | 4 +-- br/pkg/checksum/validate.go | 4 +++ br/pkg/metautil/metafile.go | 36 +++++++++++++------- br/pkg/task/restore.go | 15 +++++---- br/tests/br_backup_empty/run.sh | 29 ++++++++++++---- 8 files changed, 105 insertions(+), 54 deletions(-) diff --git a/br/cmd/br/debug.go b/br/cmd/br/debug.go index 8525c6bd61993..e156ce5c8fda3 100644 --- a/br/cmd/br/debug.go +++ b/br/cmd/br/debug.go @@ -89,6 +89,9 @@ func newCheckSumCommand() *cobra.Command { if err != nil { return errors.Trace(err) } + if schema.Table == nil { + continue + } tblInfo := &model.TableInfo{} err = json.Unmarshal(schema.Table, tblInfo) if err != nil { @@ -216,6 +219,10 @@ func newBackupMetaValidateCommand() *cobra.Command { tableIDMap := make(map[int64]int64) // Simulate to create table for _, table := range tables { + if table.Info == nil { + // empty database. + continue + } indexIDAllocator := mockid.NewIDAllocator() newTable := new(model.TableInfo) tableID, _ := tableIDAllocator.Alloc() diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 9b07f22324d40..8ba944cce281f 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -329,8 +329,8 @@ func BuildBackupRangeAndSchema( } if len(tables) == 0 { - log.Warn("It's not necessary for backing up empty database", - zap.Stringer("db", dbInfo.Name)) + log.Info("backup empty database", zap.Stringer("db", dbInfo.Name)) + backupSchemas.addSchema(dbInfo, nil) continue } diff --git a/br/pkg/backup/schema.go b/br/pkg/backup/schema.go index 4e653253cafcd..9eec6f01ae17c 100644 --- a/br/pkg/backup/schema.go +++ b/br/pkg/backup/schema.go @@ -55,6 +55,12 @@ func newBackupSchemas() *Schemas { func (ss *Schemas) addSchema( dbInfo *model.DBInfo, tableInfo *model.TableInfo, ) { + if tableInfo == nil { + ss.schemas[utils.EncloseName(dbInfo.Name.L)] = &schemaInfo{ + dbInfo: dbInfo, + } + return + } name := fmt.Sprintf("%s.%s", utils.EncloseName(dbInfo.Name.L), utils.EncloseName(tableInfo.Name.L)) ss.schemas[name] = &schemaInfo{ @@ -95,30 +101,31 @@ func (ss *Schemas) BackupSchemas( } workerPool.ApplyOnErrorGroup(errg, func() error { - logger := log.With( - zap.String("db", schema.dbInfo.Name.O), - zap.String("table", schema.tableInfo.Name.O), - ) - - if !skipChecksum { - logger.Info("table checksum start") - start := time.Now() - err := schema.calculateChecksum(ectx, store.GetClient(), backupTS, copConcurrency) - if err != nil { - return errors.Trace(err) + if schema.tableInfo != nil { + logger := log.With( + zap.String("db", schema.dbInfo.Name.O), + zap.String("table", schema.tableInfo.Name.O), + ) + + if !skipChecksum { + logger.Info("table checksum start") + start := time.Now() + err := schema.calculateChecksum(ectx, store.GetClient(), backupTS, copConcurrency) + if err != nil { + return errors.Trace(err) + } + logger.Info("table checksum finished", + zap.Uint64("Crc64Xor", schema.crc64xor), + zap.Uint64("TotalKvs", schema.totalKvs), + zap.Uint64("TotalBytes", schema.totalBytes), + zap.Duration("take", time.Since(start))) } - logger.Info("table checksum finished", - zap.Uint64("Crc64Xor", schema.crc64xor), - zap.Uint64("TotalKvs", schema.totalKvs), - zap.Uint64("TotalBytes", schema.totalBytes), - zap.Duration("take", time.Since(start))) - } - if statsHandle != nil { - if err := schema.dumpStatsToJSON(statsHandle); err != nil { - logger.Error("dump table stats failed", logutil.ShortError(err)) + if statsHandle != nil { + if err := schema.dumpStatsToJSON(statsHandle); err != nil { + logger.Error("dump table stats failed", logutil.ShortError(err)) + } } } - // Send schema to metawriter s, err := schema.encodeToSchema() if err != nil { @@ -187,11 +194,14 @@ func (s *schemaInfo) encodeToSchema() (*backuppb.Schema, error) { return nil, errors.Trace(err) } - tableBytes, err := json.Marshal(s.tableInfo) - if err != nil { - return nil, errors.Trace(err) - } + var tableBytes []byte + if s.tableInfo != nil { + tableBytes, err = json.Marshal(s.tableInfo) + if err != nil { + return nil, errors.Trace(err) + } + } var statsBytes []byte if s.stats != nil { statsBytes, err = json.Marshal(s.stats) diff --git a/br/pkg/backup/schema_test.go b/br/pkg/backup/schema_test.go index 303eb318b2ed3..7ce6dc0c5a6b6 100644 --- a/br/pkg/backup/schema_test.go +++ b/br/pkg/backup/schema_test.go @@ -100,7 +100,7 @@ func TestBuildBackupRangeAndSchema(t *testing.T) { _, backupSchemas, _, err := backup.BuildBackupRangeAndSchema( m.Storage, testFilter, math.MaxUint64, false) require.NoError(t, err) - require.Nil(t, backupSchemas) + require.NotNil(t, backupSchemas) // Database is not exist. fooFilter, err := filter.Parse([]string{"foo.t1"}) @@ -117,7 +117,7 @@ func TestBuildBackupRangeAndSchema(t *testing.T) { _, backupSchemas, _, err = backup.BuildBackupRangeAndSchema( m.Storage, noFilter, math.MaxUint64, false) require.NoError(t, err) - require.Nil(t, backupSchemas) + require.NotNil(t, backupSchemas) tk.MustExec("use test") tk.MustExec("drop table if exists t1;") diff --git a/br/pkg/checksum/validate.go b/br/pkg/checksum/validate.go index a24c1f1f775a6..427d30200c073 100644 --- a/br/pkg/checksum/validate.go +++ b/br/pkg/checksum/validate.go @@ -54,6 +54,10 @@ func FastChecksum( checksum := uint64(0) totalKvs := uint64(0) totalBytes := uint64(0) + if tbl.Info == nil { + // empty database + continue + } for _, file := range tbl.Files { checksum ^= file.Crc64Xor totalKvs += file.TotalKvs diff --git a/br/pkg/metautil/metafile.go b/br/pkg/metautil/metafile.go index e40ab4576bb3e..5d24df35a3f9d 100644 --- a/br/pkg/metautil/metafile.go +++ b/br/pkg/metautil/metafile.go @@ -291,14 +291,18 @@ func (reader *MetaReader) ReadSchemasFiles(ctx context.Context, output chan<- *T tableMap := make(map[int64]*Table, MaxBatchSize) err := receiveBatch(ctx, errCh, ch, MaxBatchSize, func(item interface{}) error { s := item.(*backuppb.Schema) - tableInfo := &model.TableInfo{} - if err := json.Unmarshal(s.Table, tableInfo); err != nil { - return errors.Trace(err) - } dbInfo := &model.DBInfo{} if err := json.Unmarshal(s.Db, dbInfo); err != nil { return errors.Trace(err) } + + var tableInfo *model.TableInfo + if s.Table != nil { + tableInfo = &model.TableInfo{} + if err := json.Unmarshal(s.Table, tableInfo); err != nil { + return errors.Trace(err) + } + } var stats *handle.JSONTable if s.Stats != nil { stats = &handle.JSONTable{} @@ -306,6 +310,7 @@ func (reader *MetaReader) ReadSchemasFiles(ctx context.Context, output chan<- *T return errors.Trace(err) } } + table := &Table{ DB: dbInfo, Info: tableInfo, @@ -315,18 +320,23 @@ func (reader *MetaReader) ReadSchemasFiles(ctx context.Context, output chan<- *T TiFlashReplicas: int(s.TiflashReplicas), Stats: stats, } - if files, ok := fileMap[tableInfo.ID]; ok { - table.Files = append(table.Files, files...) - } - if tableInfo.Partition != nil { - // Partition table can have many table IDs (partition IDs). - for _, p := range tableInfo.Partition.Definitions { - if files, ok := fileMap[p.ID]; ok { - table.Files = append(table.Files, files...) + if tableInfo != nil { + if files, ok := fileMap[tableInfo.ID]; ok { + table.Files = append(table.Files, files...) + } + if tableInfo.Partition != nil { + // Partition table can have many table IDs (partition IDs). + for _, p := range tableInfo.Partition.Definitions { + if files, ok := fileMap[p.ID]; ok { + table.Files = append(table.Files, files...) + } } } + tableMap[tableInfo.ID] = table + } else { + // empty database + tableMap[0] = table } - tableMap[tableInfo.ID] = table return nil }) if err != nil { diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 656e2d6052c47..915a5d86c12bd 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -247,6 +247,10 @@ func CheckRestoreDBAndTable(client *restore.Client, cfg *RestoreConfig) error { } schemasMap[utils.EncloseName(dbName)] = struct{}{} for _, table := range db.Tables { + if table.Info == nil { + // we may back up empty database. + continue + } tablesMap[utils.EncloseDBAndTable(dbName, table.Info.Name.O)] = struct{}{} } } @@ -583,19 +587,18 @@ func filterRestoreFiles( cfg *RestoreConfig, ) (files []*backuppb.File, tables []*metautil.Table, dbs []*utils.Database) { for _, db := range client.GetDatabases() { - createdDatabase := false dbName := db.Info.Name.O if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { dbName = name } + if !cfg.TableFilter.MatchSchema(dbName) { + continue + } + dbs = append(dbs, db) for _, table := range db.Tables { - if !cfg.TableFilter.MatchTable(dbName, table.Info.Name.O) { + if table.Info == nil || !cfg.TableFilter.MatchTable(dbName, table.Info.Name.O) { continue } - if !createdDatabase { - dbs = append(dbs, db) - createdDatabase = true - } files = append(files, table.Files...) tables = append(tables, table) } diff --git a/br/tests/br_backup_empty/run.sh b/br/tests/br_backup_empty/run.sh index 943acb9506c81..42d910f41fac4 100644 --- a/br/tests/br_backup_empty/run.sh +++ b/br/tests/br_backup_empty/run.sh @@ -18,16 +18,16 @@ set -eu DB="$TEST_NAME" # backup empty. -echo "backup start..." -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/empty_db" +echo "backup empty cluster start..." +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/empty_cluster" if [ $? -ne 0 ]; then echo "TEST: [$TEST_NAME] failed on backup empty cluster!" exit 1 fi # restore empty. -echo "restore start..." -run_br restore full -s "local://$TEST_DIR/empty_db" --pd $PD_ADDR --ratelimit 1024 +echo "restore empty cluster start..." +run_br restore full -s "local://$TEST_DIR/empty_cluster" --pd $PD_ADDR --ratelimit 1024 if [ $? -ne 0 ]; then echo "TEST: [$TEST_NAME] failed on restore empty cluster!" exit 1 @@ -35,17 +35,34 @@ fi # backup and restore empty tables. run_sql "CREATE DATABASE $DB;" +echo "backup empty db start..." +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/empty_db" +if [ $? -ne 0 ]; then + echo "TEST: [$TEST_NAME] failed on backup empty cluster!" + exit 1 +fi + +run_sql "DROP DATABASE $DB" + +# restore empty. +echo "restore empty db start..." +run_br restore full -s "local://$TEST_DIR/empty_db" --pd $PD_ADDR --ratelimit 1024 +if [ $? -ne 0 ]; then + echo "TEST: [$TEST_NAME] failed on restore empty cluster!" + exit 1 +fi + run_sql "CREATE TABLE $DB.usertable1 ( \ YCSB_KEY varchar(64) NOT NULL, \ FIELD0 varchar(1) DEFAULT NULL, \ PRIMARY KEY (YCSB_KEY) \ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;" -echo "backup start..." +echo "backup empty table start..." run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/empty_table" run_sql "DROP DATABASE $DB;" -echo "restore start..." +echo "restore empty table start..." run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/empty_table" # insert one row to make sure table is restored.