Skip to content

Commit af330c1

Browse files
authored
br: restore checksum shouldn't rely on backup checksum (#56712) (#57503)
close #56373
1 parent c104575 commit af330c1

File tree

19 files changed

+169
-86
lines changed

19 files changed

+169
-86
lines changed

br/cmd/br/backup.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ import (
2222

2323
func runBackupCommand(command *cobra.Command, cmdName string) error {
2424
cfg := task.BackupConfig{Config: task.Config{LogProgress: HasLogFile()}}
25-
if err := cfg.ParseFromFlags(command.Flags()); err != nil {
25+
if err := cfg.ParseFromFlags(command.Flags(), false); err != nil {
2626
command.SilenceUsage = false
2727
return errors.Trace(err)
2828
}
29+
overrideDefaultBackupConfigIfNeeded(&cfg, command)
2930

3031
if err := metricsutil.RegisterMetricsForBR(cfg.PD, cfg.KeyspaceName); err != nil {
3132
return errors.Trace(err)
@@ -211,3 +212,10 @@ func newTxnBackupCommand() *cobra.Command {
211212
task.DefineTxnBackupFlags(command)
212213
return command
213214
}
215+
216+
func overrideDefaultBackupConfigIfNeeded(config *task.BackupConfig, cmd *cobra.Command) {
217+
// override only if flag not set by user
218+
if !cmd.Flags().Changed(task.FlagChecksum) {
219+
config.Checksum = false
220+
}
221+
}

br/cmd/br/cmd.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ func timestampLogFileName() string {
8585
return filepath.Join(os.TempDir(), time.Now().Format("br.log.2006-01-02T15.04.05Z0700"))
8686
}
8787

88-
// AddFlags adds flags to the given cmd.
89-
func AddFlags(cmd *cobra.Command) {
88+
// DefineCommonFlags defines the common flags for all BR cmd operation.
89+
func DefineCommonFlags(cmd *cobra.Command) {
9090
cmd.Version = build.Info()
9191
cmd.Flags().BoolP(flagVersion, flagVersionShort, false, "Display version information about BR")
9292
cmd.SetVersionTemplate("{{printf \"%s\" .Version}}\n")
@@ -103,6 +103,8 @@ func AddFlags(cmd *cobra.Command) {
103103
"Set whether to redact sensitive info in log")
104104
cmd.PersistentFlags().String(FlagStatusAddr, "",
105105
"Set the HTTP listening address for the status report service. Set to empty string to disable")
106+
107+
// defines BR task common flags, this is shared by cmd and sql(brie)
106108
task.DefineCommonFlags(cmd.PersistentFlags())
107109

108110
cmd.PersistentFlags().StringP(FlagSlowLogFile, "", "",

br/cmd/br/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func main() {
2020
TraverseChildren: true,
2121
SilenceUsage: true,
2222
}
23-
AddFlags(rootCmd)
23+
DefineCommonFlags(rootCmd)
2424
SetDefaultContext(ctx)
2525
rootCmd.AddCommand(
2626
NewDebugCommand(),

br/cmd/br/restore.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525

2626
func runRestoreCommand(command *cobra.Command, cmdName string) error {
2727
cfg := task.RestoreConfig{Config: task.Config{LogProgress: HasLogFile()}}
28-
if err := cfg.ParseFromFlags(command.Flags()); err != nil {
28+
if err := cfg.ParseFromFlags(command.Flags(), false); err != nil {
2929
command.SilenceUsage = false
3030
return errors.Trace(err)
3131
}

br/pkg/backup/schema.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func (ss *Schemas) BackupSchemas(
106106
}
107107

108108
var checksum *checkpoint.ChecksumItem
109-
var exists bool = false
109+
var exists = false
110110
if ss.checkpointChecksum != nil && schema.tableInfo != nil {
111111
checksum, exists = ss.checkpointChecksum[schema.tableInfo.ID]
112112
}
@@ -145,7 +145,7 @@ func (ss *Schemas) BackupSchemas(
145145
zap.Uint64("Crc64Xor", schema.crc64xor),
146146
zap.Uint64("TotalKvs", schema.totalKvs),
147147
zap.Uint64("TotalBytes", schema.totalBytes),
148-
zap.Duration("calculate-take", calculateCost))
148+
zap.Duration("TimeTaken", calculateCost))
149149
}
150150
}
151151
if statsHandle != nil {

br/pkg/metautil/metafile.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,6 @@ type Table struct {
166166
StatsFileIndexes []*backuppb.StatsFileIndex
167167
}
168168

169-
// NoChecksum checks whether the table has a calculated checksum.
170-
func (tbl *Table) NoChecksum() bool {
171-
return tbl.Crc64Xor == 0 && tbl.TotalKvs == 0 && tbl.TotalBytes == 0
172-
}
173-
174169
// MetaReader wraps a reader to read both old and new version of backupmeta.
175170
type MetaReader struct {
176171
storage storage.ExternalStorage
@@ -235,14 +230,38 @@ func (reader *MetaReader) readDataFiles(ctx context.Context, output func(*backup
235230
}
236231

237232
// ArchiveSize return the size of Archive data
238-
func (*MetaReader) ArchiveSize(_ context.Context, files []*backuppb.File) uint64 {
233+
func ArchiveSize(files []*backuppb.File) uint64 {
239234
total := uint64(0)
240235
for _, file := range files {
241236
total += file.Size_
242237
}
243238
return total
244239
}
245240

241+
type ChecksumStats struct {
242+
Crc64Xor uint64
243+
TotalKvs uint64
244+
TotalBytes uint64
245+
}
246+
247+
func (stats ChecksumStats) ChecksumExists() bool {
248+
if stats.Crc64Xor == 0 && stats.TotalKvs == 0 && stats.TotalBytes == 0 {
249+
return false
250+
}
251+
return true
252+
}
253+
254+
// CalculateChecksumStatsOnFiles returns the ChecksumStats for the given files
255+
func CalculateChecksumStatsOnFiles(files []*backuppb.File) ChecksumStats {
256+
var stats ChecksumStats
257+
for _, file := range files {
258+
stats.Crc64Xor ^= file.Crc64Xor
259+
stats.TotalKvs += file.TotalKvs
260+
stats.TotalBytes += file.TotalBytes
261+
}
262+
return stats
263+
}
264+
246265
// ReadDDLs reads the ddls from the backupmeta.
247266
// This function is compatible with the old backupmeta.
248267
func (reader *MetaReader) ReadDDLs(ctx context.Context) ([]byte, error) {

br/pkg/restore/snap_client/client.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -467,8 +467,8 @@ func (rc *SnapClient) needLoadSchemas(backupMeta *backuppb.BackupMeta) bool {
467467
return !(backupMeta.IsRawKv || backupMeta.IsTxnKv)
468468
}
469469

470-
// InitBackupMeta loads schemas from BackupMeta to initialize RestoreClient.
471-
func (rc *SnapClient) InitBackupMeta(
470+
// LoadSchemaIfNeededAndInitClient loads schemas from BackupMeta to initialize RestoreClient.
471+
func (rc *SnapClient) LoadSchemaIfNeededAndInitClient(
472472
c context.Context,
473473
backupMeta *backuppb.BackupMeta,
474474
backend *backuppb.StorageBackend,
@@ -989,7 +989,7 @@ func (rc *SnapClient) setSpeedLimit(ctx context.Context, rateLimit uint64) error
989989
return nil
990990
}
991991

992-
func (rc *SnapClient) execChecksum(
992+
func (rc *SnapClient) execAndValidateChecksum(
993993
ctx context.Context,
994994
tbl *CreatedTable,
995995
kvClient kv.Client,
@@ -1000,13 +1000,14 @@ func (rc *SnapClient) execChecksum(
10001000
zap.String("table", tbl.OldTable.Info.Name.O),
10011001
)
10021002

1003-
if tbl.OldTable.NoChecksum() {
1003+
expectedChecksumStats := metautil.CalculateChecksumStatsOnFiles(tbl.OldTable.Files)
1004+
if !expectedChecksumStats.ChecksumExists() {
10041005
logger.Warn("table has no checksum, skipping checksum")
10051006
return nil
10061007
}
10071008

10081009
if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
1009-
span1 := span.Tracer().StartSpan("Client.execChecksum", opentracing.ChildOf(span.Context()))
1010+
span1 := span.Tracer().StartSpan("Client.execAndValidateChecksum", opentracing.ChildOf(span.Context()))
10101011
defer span1.Finish()
10111012
ctx = opentracing.ContextWithSpan(ctx, span1)
10121013
}
@@ -1046,21 +1047,24 @@ func (rc *SnapClient) execChecksum(
10461047
}
10471048
}
10481049
}
1049-
table := tbl.OldTable
1050-
if item.Crc64xor != table.Crc64Xor ||
1051-
item.TotalKvs != table.TotalKvs ||
1052-
item.TotalBytes != table.TotalBytes {
1050+
checksumMatch := item.Crc64xor == expectedChecksumStats.Crc64Xor &&
1051+
item.TotalKvs == expectedChecksumStats.TotalKvs &&
1052+
item.TotalBytes == expectedChecksumStats.TotalBytes
1053+
failpoint.Inject("full-restore-validate-checksum", func(_ failpoint.Value) {
1054+
checksumMatch = false
1055+
})
1056+
if !checksumMatch {
10531057
logger.Error("failed in validate checksum",
1054-
zap.Uint64("origin tidb crc64", table.Crc64Xor),
1058+
zap.Uint64("expected tidb crc64", expectedChecksumStats.Crc64Xor),
10551059
zap.Uint64("calculated crc64", item.Crc64xor),
1056-
zap.Uint64("origin tidb total kvs", table.TotalKvs),
1060+
zap.Uint64("expected tidb total kvs", expectedChecksumStats.TotalKvs),
10571061
zap.Uint64("calculated total kvs", item.TotalKvs),
1058-
zap.Uint64("origin tidb total bytes", table.TotalBytes),
1062+
zap.Uint64("expected tidb total bytes", expectedChecksumStats.TotalBytes),
10591063
zap.Uint64("calculated total bytes", item.TotalBytes),
10601064
)
10611065
return errors.Annotate(berrors.ErrRestoreChecksumMismatch, "failed to validate checksum")
10621066
}
1063-
logger.Info("success in validate checksum")
1067+
logger.Info("success in validating checksum")
10641068
return nil
10651069
}
10661070

br/pkg/restore/snap_client/pipeline_items.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func (rc *SnapClient) GoValidateChecksum(
166166
elapsed := time.Since(start)
167167
summary.CollectSuccessUnit("table checksum", 1, elapsed)
168168
}()
169-
err := rc.execChecksum(c, tbl, kvClient, concurrency)
169+
err := rc.execAndValidateChecksum(c, tbl, kvClient, concurrency)
170170
if err != nil {
171171
return errors.Trace(err)
172172
}

br/pkg/task/backup.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ import (
4242
"github.com/spf13/pflag"
4343
"github.com/tikv/client-go/v2/oracle"
4444
kvutil "github.com/tikv/client-go/v2/util"
45-
"go.uber.org/multierr"
4645
"go.uber.org/zap"
4746
)
4847

@@ -159,7 +158,7 @@ func DefineBackupFlags(flags *pflag.FlagSet) {
159158
}
160159

161160
// ParseFromFlags parses the backup-related flags from the flag set.
162-
func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error {
161+
func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig bool) error {
163162
timeAgo, err := flags.GetDuration(flagBackupTimeago)
164163
if err != nil {
165164
return errors.Trace(err)
@@ -212,9 +211,13 @@ func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error {
212211
}
213212
cfg.CompressionConfig = *compressionCfg
214213

215-
if err = cfg.Config.ParseFromFlags(flags); err != nil {
216-
return errors.Trace(err)
214+
// parse common flags if needed
215+
if !skipCommonConfig {
216+
if err = cfg.Config.ParseFromFlags(flags); err != nil {
217+
return errors.Trace(err)
218+
}
217219
}
220+
218221
cfg.RemoveSchedulers, err = flags.GetBool(flagRemoveSchedulers)
219222
if err != nil {
220223
return errors.Trace(err)
@@ -788,18 +791,15 @@ func ParseTSString(ts string, tzCheck bool) (uint64, error) {
788791
return oracle.GoTimeToTS(t1), nil
789792
}
790793

791-
func DefaultBackupConfig() BackupConfig {
794+
func DefaultBackupConfig(commonConfig Config) BackupConfig {
792795
fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError)
793-
DefineCommonFlags(fs)
794796
DefineBackupFlags(fs)
795797
cfg := BackupConfig{}
796-
err := multierr.Combine(
797-
cfg.ParseFromFlags(fs),
798-
cfg.Config.ParseFromFlags(fs),
799-
)
798+
err := cfg.ParseFromFlags(fs, true)
800799
if err != nil {
801-
log.Panic("infallible operation failed.", zap.Error(err))
800+
log.Panic("failed to parse backup flags to config", zap.Error(err))
802801
}
802+
cfg.Config = commonConfig
803803
return cfg
804804
}
805805

br/pkg/task/common.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ const (
6464
flagRateLimit = "ratelimit"
6565
flagRateLimitUnit = "ratelimit-unit"
6666
flagConcurrency = "concurrency"
67-
flagChecksum = "checksum"
67+
FlagChecksum = "checksum"
6868
flagFilter = "filter"
6969
flagCaseSensitive = "case-sensitive"
7070
flagRemoveTiFlash = "remove-tiflash"
@@ -297,7 +297,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) {
297297
flags.Uint(flagChecksumConcurrency, variable.DefChecksumTableConcurrency, "The concurrency of checksumming in one table")
298298

299299
flags.Uint64(flagRateLimit, unlimited, "The rate limit of the task, MB/s per node")
300-
flags.Bool(flagChecksum, true, "Run checksum at end of task")
300+
flags.Bool(FlagChecksum, true, "Run checksum at end of task")
301301
flags.Bool(flagRemoveTiFlash, true,
302302
"Remove TiFlash replicas before backup or restore, for unsupported versions of TiFlash")
303303

@@ -359,7 +359,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) {
359359

360360
// HiddenFlagsForStream temporary hidden flags that stream cmd not support.
361361
func HiddenFlagsForStream(flags *pflag.FlagSet) {
362-
_ = flags.MarkHidden(flagChecksum)
362+
_ = flags.MarkHidden(FlagChecksum)
363363
_ = flags.MarkHidden(flagLoadStats)
364364
_ = flags.MarkHidden(flagChecksumConcurrency)
365365
_ = flags.MarkHidden(flagRateLimit)
@@ -609,7 +609,7 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error {
609609
return errors.Trace(err)
610610
}
611611

612-
if cfg.Checksum, err = flags.GetBool(flagChecksum); err != nil {
612+
if cfg.Checksum, err = flags.GetBool(FlagChecksum); err != nil {
613613
return errors.Trace(err)
614614
}
615615
if cfg.ChecksumConcurrency, err = flags.GetUint(flagChecksumConcurrency); err != nil {
@@ -777,6 +777,11 @@ func (cfg *Config) parseAndValidateMasterKeyInfo(hasPlaintextKey bool, flags *pf
777777
return nil
778778
}
779779

780+
// OverrideDefaultForBackup override common config for backup tasks
781+
func (cfg *Config) OverrideDefaultForBackup() {
782+
cfg.Checksum = false
783+
}
784+
780785
// NewMgr creates a new mgr at the given PD address.
781786
func NewMgr(ctx context.Context,
782787
g glue.Glue, pds []string,

0 commit comments

Comments
 (0)