Skip to content

Commit c3b8485

Browse files
committed
br: restore checksum shouldn't rely on backup checksum (pingcap#56712)
close pingcap#56373 (cherry picked from commit 4f047be)
1 parent 65fd2ad commit c3b8485

File tree

15 files changed

+102
-55
lines changed

15 files changed

+102
-55
lines changed

br/cmd/br/backup.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ import (
2222

2323
func runBackupCommand(command *cobra.Command, cmdName string) error {
2424
cfg := task.BackupConfig{Config: task.Config{LogProgress: HasLogFile()}}
25-
if err := cfg.ParseFromFlags(command.Flags()); err != nil {
25+
if err := cfg.ParseFromFlags(command.Flags(), false); err != nil {
2626
command.SilenceUsage = false
2727
return errors.Trace(err)
2828
}
29+
overrideDefaultBackupConfigIfNeeded(&cfg, command)
2930

3031
if err := metricsutil.RegisterMetricsForBR(cfg.PD, cfg.KeyspaceName); err != nil {
3132
return errors.Trace(err)
@@ -211,3 +212,10 @@ func newTxnBackupCommand() *cobra.Command {
211212
task.DefineTxnBackupFlags(command)
212213
return command
213214
}
215+
216+
func overrideDefaultBackupConfigIfNeeded(config *task.BackupConfig, cmd *cobra.Command) {
217+
// override only if flag not set by user
218+
if !cmd.Flags().Changed(task.FlagChecksum) {
219+
config.Checksum = false
220+
}
221+
}

br/cmd/br/cmd.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ func timestampLogFileName() string {
8181
return filepath.Join(os.TempDir(), time.Now().Format("br.log.2006-01-02T15.04.05Z0700"))
8282
}
8383

84-
// AddFlags adds flags to the given cmd.
85-
func AddFlags(cmd *cobra.Command) {
84+
// DefineCommonFlags defines the common flags for all BR cmd operation.
85+
func DefineCommonFlags(cmd *cobra.Command) {
8686
cmd.Version = build.Info()
8787
cmd.Flags().BoolP(flagVersion, flagVersionShort, false, "Display version information about BR")
8888
cmd.SetVersionTemplate("{{printf \"%s\" .Version}}\n")
@@ -99,6 +99,8 @@ func AddFlags(cmd *cobra.Command) {
9999
"Set whether to redact sensitive info in log")
100100
cmd.PersistentFlags().String(FlagStatusAddr, "",
101101
"Set the HTTP listening address for the status report service. Set to empty string to disable")
102+
103+
// defines BR task common flags, this is shared by cmd and sql(brie)
102104
task.DefineCommonFlags(cmd.PersistentFlags())
103105

104106
cmd.PersistentFlags().StringP(FlagSlowLogFile, "", "",

br/cmd/br/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func main() {
2020
TraverseChildren: true,
2121
SilenceUsage: true,
2222
}
23-
AddFlags(rootCmd)
23+
DefineCommonFlags(rootCmd)
2424
SetDefaultContext(ctx)
2525
rootCmd.AddCommand(
2626
NewDebugCommand(),

br/cmd/br/restore.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525

2626
func runRestoreCommand(command *cobra.Command, cmdName string) error {
2727
cfg := task.RestoreConfig{Config: task.Config{LogProgress: HasLogFile()}}
28-
if err := cfg.ParseFromFlags(command.Flags()); err != nil {
28+
if err := cfg.ParseFromFlags(command.Flags(), false); err != nil {
2929
command.SilenceUsage = false
3030
return errors.Trace(err)
3131
}

br/pkg/backup/schema.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func (ss *Schemas) BackupSchemas(
106106
}
107107

108108
var checksum *checkpoint.ChecksumItem
109-
var exists bool = false
109+
var exists = false
110110
if ss.checkpointChecksum != nil && schema.tableInfo != nil {
111111
checksum, exists = ss.checkpointChecksum[schema.tableInfo.ID]
112112
}
@@ -145,7 +145,7 @@ func (ss *Schemas) BackupSchemas(
145145
zap.Uint64("Crc64Xor", schema.crc64xor),
146146
zap.Uint64("TotalKvs", schema.totalKvs),
147147
zap.Uint64("TotalBytes", schema.totalBytes),
148-
zap.Duration("calculate-take", calculateCost))
148+
zap.Duration("TimeTaken", calculateCost))
149149
}
150150
}
151151
if statsHandle != nil {

br/pkg/metautil/metafile.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,6 @@ type Table struct {
171171
StatsFileIndexes []*backuppb.StatsFileIndex
172172
}
173173

174-
// NoChecksum checks whether the table has a calculated checksum.
175-
func (tbl *Table) NoChecksum() bool {
176-
return tbl.Crc64Xor == 0 && tbl.TotalKvs == 0 && tbl.TotalBytes == 0
177-
}
178-
179174
// MetaReader wraps a reader to read both old and new version of backupmeta.
180175
type MetaReader struct {
181176
storage storage.ExternalStorage
@@ -240,14 +235,38 @@ func (reader *MetaReader) readDataFiles(ctx context.Context, output func(*backup
240235
}
241236

242237
// ArchiveSize return the size of Archive data
243-
func (*MetaReader) ArchiveSize(_ context.Context, files []*backuppb.File) uint64 {
238+
func ArchiveSize(files []*backuppb.File) uint64 {
244239
total := uint64(0)
245240
for _, file := range files {
246241
total += file.Size_
247242
}
248243
return total
249244
}
250245

246+
type ChecksumStats struct {
247+
Crc64Xor uint64
248+
TotalKvs uint64
249+
TotalBytes uint64
250+
}
251+
252+
func (stats ChecksumStats) ChecksumExists() bool {
253+
if stats.Crc64Xor == 0 && stats.TotalKvs == 0 && stats.TotalBytes == 0 {
254+
return false
255+
}
256+
return true
257+
}
258+
259+
// CalculateChecksumStatsOnFiles returns the ChecksumStats for the given files
260+
func CalculateChecksumStatsOnFiles(files []*backuppb.File) ChecksumStats {
261+
var stats ChecksumStats
262+
for _, file := range files {
263+
stats.Crc64Xor ^= file.Crc64Xor
264+
stats.TotalKvs += file.TotalKvs
265+
stats.TotalBytes += file.TotalBytes
266+
}
267+
return stats
268+
}
269+
251270
// ReadDDLs reads the ddls from the backupmeta.
252271
// This function is compatible with the old backupmeta.
253272
func (reader *MetaReader) ReadDDLs(ctx context.Context) ([]byte, error) {

br/pkg/task/backup.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ import (
4141
"github.com/spf13/pflag"
4242
"github.com/tikv/client-go/v2/oracle"
4343
kvutil "github.com/tikv/client-go/v2/util"
44-
"go.uber.org/multierr"
4544
"go.uber.org/zap"
4645
)
4746

@@ -159,7 +158,7 @@ func DefineBackupFlags(flags *pflag.FlagSet) {
159158
}
160159

161160
// ParseFromFlags parses the backup-related flags from the flag set.
162-
func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error {
161+
func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig bool) error {
163162
timeAgo, err := flags.GetDuration(flagBackupTimeago)
164163
if err != nil {
165164
return errors.Trace(err)
@@ -212,9 +211,13 @@ func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error {
212211
}
213212
cfg.CompressionConfig = *compressionCfg
214213

215-
if err = cfg.Config.ParseFromFlags(flags); err != nil {
216-
return errors.Trace(err)
214+
// parse common flags if needed
215+
if !skipCommonConfig {
216+
if err = cfg.Config.ParseFromFlags(flags); err != nil {
217+
return errors.Trace(err)
218+
}
217219
}
220+
218221
cfg.RemoveSchedulers, err = flags.GetBool(flagRemoveSchedulers)
219222
if err != nil {
220223
return errors.Trace(err)
@@ -789,18 +792,15 @@ func ParseTSString(ts string, tzCheck bool) (uint64, error) {
789792
return oracle.GoTimeToTS(t1), nil
790793
}
791794

792-
func DefaultBackupConfig() BackupConfig {
795+
func DefaultBackupConfig(commonConfig Config) BackupConfig {
793796
fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError)
794-
DefineCommonFlags(fs)
795797
DefineBackupFlags(fs)
796798
cfg := BackupConfig{}
797-
err := multierr.Combine(
798-
cfg.ParseFromFlags(fs),
799-
cfg.Config.ParseFromFlags(fs),
800-
)
799+
err := cfg.ParseFromFlags(fs, true)
801800
if err != nil {
802-
log.Panic("infallible operation failed.", zap.Error(err))
801+
log.Panic("failed to parse backup flags to config", zap.Error(err))
803802
}
803+
cfg.Config = commonConfig
804804
return cfg
805805
}
806806

br/pkg/task/common.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ const (
6464
flagRateLimit = "ratelimit"
6565
flagRateLimitUnit = "ratelimit-unit"
6666
flagConcurrency = "concurrency"
67-
flagChecksum = "checksum"
67+
FlagChecksum = "checksum"
6868
flagFilter = "filter"
6969
flagCaseSensitive = "case-sensitive"
7070
flagRemoveTiFlash = "remove-tiflash"
@@ -273,7 +273,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) {
273273
flags.Uint(flagChecksumConcurrency, variable.DefChecksumTableConcurrency, "The concurrency of checksumming in one table")
274274

275275
flags.Uint64(flagRateLimit, unlimited, "The rate limit of the task, MB/s per node")
276-
flags.Bool(flagChecksum, true, "Run checksum at end of task")
276+
flags.Bool(FlagChecksum, true, "Run checksum at end of task")
277277
flags.Bool(flagRemoveTiFlash, true,
278278
"Remove TiFlash replicas before backup or restore, for unsupported versions of TiFlash")
279279

@@ -318,7 +318,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) {
318318

319319
// HiddenFlagsForStream temporary hidden flags that stream cmd not support.
320320
func HiddenFlagsForStream(flags *pflag.FlagSet) {
321-
_ = flags.MarkHidden(flagChecksum)
321+
_ = flags.MarkHidden(FlagChecksum)
322322
_ = flags.MarkHidden(flagLoadStats)
323323
_ = flags.MarkHidden(flagChecksumConcurrency)
324324
_ = flags.MarkHidden(flagRateLimit)
@@ -506,7 +506,7 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error {
506506
return errors.Trace(err)
507507
}
508508

509-
if cfg.Checksum, err = flags.GetBool(flagChecksum); err != nil {
509+
if cfg.Checksum, err = flags.GetBool(FlagChecksum); err != nil {
510510
return errors.Trace(err)
511511
}
512512
if cfg.ChecksumConcurrency, err = flags.GetUint(flagChecksumConcurrency); err != nil {
@@ -619,6 +619,11 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error {
619619
return cfg.normalizePDURLs()
620620
}
621621

622+
// OverrideDefaultForBackup override common config for backup tasks
623+
func (cfg *Config) OverrideDefaultForBackup() {
624+
cfg.Checksum = false
625+
}
626+
622627
// NewMgr creates a new mgr at the given PD address.
623628
func NewMgr(ctx context.Context,
624629
g glue.Glue, pds []string,

br/pkg/task/common_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,10 @@ func expectedDefaultConfig() Config {
233233
}
234234

235235
func expectedDefaultBackupConfig() BackupConfig {
236+
defaultConfig := expectedDefaultConfig()
237+
defaultConfig.Checksum = false
236238
return BackupConfig{
237-
Config: expectedDefaultConfig(),
239+
Config: defaultConfig,
238240
GCTTL: utils.DefaultBRGCSafePointTTL,
239241
CompressionConfig: CompressionConfig{
240242
CompressionType: backup.CompressionType_ZSTD,
@@ -274,13 +276,16 @@ func TestDefault(t *testing.T) {
274276
}
275277

276278
func TestDefaultBackup(t *testing.T) {
277-
def := DefaultBackupConfig()
279+
commonConfig := DefaultConfig()
280+
commonConfig.OverrideDefaultForBackup()
281+
def := DefaultBackupConfig(commonConfig)
278282
defaultConfig := expectedDefaultBackupConfig()
279283
require.Equal(t, defaultConfig, def)
280284
}
281285

282286
func TestDefaultRestore(t *testing.T) {
283-
def := DefaultRestoreConfig()
287+
commonConfig := DefaultConfig()
288+
def := DefaultRestoreConfig(commonConfig)
284289
defaultConfig := expectedDefaultRestoreConfig()
285290
require.Equal(t, defaultConfig, def)
286291
}

br/pkg/task/restore.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ func (cfg *RestoreConfig) ParseStreamRestoreFlags(flags *pflag.FlagSet) error {
327327
}
328328

329329
// ParseFromFlags parses the restore-related flags from the flag set.
330-
func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet) error {
330+
func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig bool) error {
331331
var err error
332332
cfg.NoSchema, err = flags.GetBool(flagNoSchema)
333333
if err != nil {
@@ -337,10 +337,15 @@ func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet) error {
337337
if err != nil {
338338
return errors.Trace(err)
339339
}
340-
err = cfg.Config.ParseFromFlags(flags)
341-
if err != nil {
342-
return errors.Trace(err)
340+
341+
// parse common config if needed
342+
if !skipCommonConfig {
343+
err = cfg.Config.ParseFromFlags(flags)
344+
if err != nil {
345+
return errors.Trace(err)
346+
}
343347
}
348+
344349
err = cfg.RestoreCommonConfig.ParseFromFlags(flags)
345350
if err != nil {
346351
return errors.Trace(err)
@@ -620,20 +625,16 @@ func removeCheckpointDataForLogRestore(ctx context.Context, storageName string,
620625
return errors.Trace(checkpoint.RemoveCheckpointDataForLogRestore(ctx, s, taskName, clusterID))
621626
}
622627

623-
func DefaultRestoreConfig() RestoreConfig {
628+
func DefaultRestoreConfig(commonConfig Config) RestoreConfig {
624629
fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError)
625-
DefineCommonFlags(fs)
626630
DefineRestoreFlags(fs)
627631
cfg := RestoreConfig{}
628-
err := multierr.Combine(
629-
cfg.ParseFromFlags(fs),
630-
cfg.RestoreCommonConfig.ParseFromFlags(fs),
631-
cfg.Config.ParseFromFlags(fs),
632-
)
632+
err := cfg.ParseFromFlags(fs, true)
633633
if err != nil {
634-
log.Panic("infallible failed.", zap.Error(err))
634+
log.Panic("failed to parse restore flags to config", zap.Error(err))
635635
}
636636

637+
cfg.Config = commonConfig
637638
return cfg
638639
}
639640

@@ -785,7 +786,7 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
785786
return errors.Annotate(berrors.ErrRestoreInvalidBackup, "contain tables but no databases")
786787
}
787788

788-
archiveSize := reader.ArchiveSize(ctx, files)
789+
archiveSize := metautil.ArchiveSize(files)
789790
g.Record(summary.RestoreDataSize, archiveSize)
790791
//restore from tidb will fetch a general Size issue https://github.com/pingcap/tidb/issues/27247
791792
g.Record("Size", archiveSize)
@@ -1077,8 +1078,9 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
10771078
var finish <-chan struct{}
10781079
postHandleCh := afterTableRestoredCh
10791080

1080-
// pipeline checksum
1081-
if cfg.Checksum {
1081+
// pipeline checksum only when enabled and is not incremental snapshot repair mode cuz incremental doesn't have
1082+
// enough information in backup meta to validate checksum
1083+
if cfg.Checksum && !client.IsIncremental() {
10821084
postHandleCh = client.GoValidateChecksum(
10831085
ctx, postHandleCh, mgr.GetStorage().GetClient(), errCh, updateCh, cfg.ChecksumConcurrency)
10841086
}
@@ -1093,7 +1095,7 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
10931095

10941096
finish = dropToBlackhole(ctx, postHandleCh, errCh)
10951097

1096-
// Reset speed limit. ResetSpeedLimit must be called after client.InitBackupMeta has been called.
1098+
// Reset speed limit. ResetSpeedLimit must be called after client.LoadSchemaIfNeededAndInitClient has been called.
10971099
defer func() {
10981100
var resetErr error
10991101
// In future we may need a mechanism to set speed limit in ttl. like what we do in switchmode. TODO

0 commit comments

Comments
 (0)