Skip to content

Commit 738a65b

Browse files
authored
Revert "br: Add retry limit of connection err in fine grained backup … (pingcap#58503)
closes pingcap#57449
1 parent 6b8201d commit 738a65b

File tree

5 files changed

+5
-143
lines changed

5 files changed

+5
-143
lines changed

br/pkg/backup/BUILD.bazel

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,9 @@ go_test(
7272
embed = [":backup"],
7373
flaky = True,
7474
race = "on",
75-
shard_count = 14,
75+
shard_count = 13,
7676
deps = [
7777
"//br/pkg/conn",
78-
"//br/pkg/errors",
7978
"//br/pkg/gluetidb",
8079
"//br/pkg/metautil",
8180
"//br/pkg/mock",
@@ -92,7 +91,6 @@ go_test(
9291
"//pkg/util/codec",
9392
"//pkg/util/table-filter",
9493
"@com_github_golang_protobuf//proto",
95-
"@com_github_pingcap_errors//:errors",
9694
"@com_github_pingcap_kvproto//pkg/brpb",
9795
"@com_github_pingcap_kvproto//pkg/encryptionpb",
9896
"@com_github_pingcap_kvproto//pkg/errorpb",
@@ -105,6 +103,5 @@ go_test(
105103
"@io_opencensus_go//stats/view",
106104
"@org_golang_google_grpc//:grpc",
107105
"@org_uber_go_goleak//:goleak",
108-
"@org_uber_go_multierr//:multierr",
109106
],
110107
)

br/pkg/backup/client.go

Lines changed: 4 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -77,50 +77,12 @@ type Checksum struct {
7777
// ProgressUnit represents the unit of progress.
7878
type ProgressUnit string
7979

80-
func MakeStoreBasedErr(storeID uint64, err error) *StoreBasedErr {
81-
return &StoreBasedErr{storeID: storeID, err: err}
82-
}
83-
84-
type StoreBasedErr struct {
85-
storeID uint64
86-
err error
87-
}
88-
89-
func (e *StoreBasedErr) Error() string {
90-
return fmt.Sprintf("Store ID '%d': %v", e.storeID, e.err.Error())
91-
}
92-
93-
func (e *StoreBasedErr) Unwrap() error {
94-
return e.err
95-
}
96-
97-
func (e *StoreBasedErr) Cause() error {
98-
return e.Unwrap()
99-
}
100-
101-
// Errors implements errors.ErrorGroup.
102-
// For now `WalkDeep` cannot walk "subtree"s like:
103-
/* 1 - 2 - 5
104-
* |
105-
* + 3 - 4
106-
*/
107-
// It stops after walking `1` and then gave up.
108-
// This is a bug: see https://github.com/pingcap/errors/issues/72
109-
// We manually make this a multierr to workaround this...
110-
func (e *StoreBasedErr) Errors() []error {
111-
if errs, ok := e.err.(errors.ErrorGroup); ok {
112-
return errs.Errors()
113-
}
114-
return nil
115-
}
116-
11780
const (
11881
// backupFineGrainedMaxBackoff is 1 hour.
11982
// given it begins the fine-grained backup, there must be some problems in the cluster.
12083
// We need to be more patient.
12184
backupFineGrainedMaxBackoff = 3600000
12285
backupRetryTimes = 5
123-
disconnectRetryTimeout = 20000
12486
// RangeUnit represents the progress updated counter when a range finished.
12587
RangeUnit ProgressUnit = "range"
12688
// RegionUnit represents the progress updated counter when a region finished.
@@ -1160,7 +1122,6 @@ func (bc *Client) fineGrainedBackup(
11601122
})
11611123

11621124
bo := utils.AdaptTiKVBackoffer(ctx, backupFineGrainedMaxBackoff, berrors.ErrUnknown)
1163-
maxDisconnect := make(map[uint64]uint)
11641125
for {
11651126
// Step1, check whether there is any incomplete range
11661127
incomplete := pr.Res.GetIncompleteRange(req.StartKey, req.EndKey)
@@ -1208,19 +1169,8 @@ func (bc *Client) fineGrainedBackup(
12081169
for {
12091170
select {
12101171
case err := <-errCh:
1211-
if !berrors.Is(err, berrors.ErrFailedToConnect) {
1212-
return errors.Trace(err)
1213-
}
1214-
storeErr, ok := err.(*StoreBasedErr)
1215-
if !ok {
1216-
return errors.Trace(err)
1217-
}
1218-
1219-
storeID := storeErr.storeID
1220-
maxDisconnect[storeID]++
1221-
if maxDisconnect[storeID] > backupRetryTimes {
1222-
return errors.Annotatef(err, "Failed to connect to store %d more than %d times", storeID, backupRetryTimes)
1223-
}
1172+
// TODO: should we handle err here?
1173+
return errors.Trace(err)
12241174
case resp, ok := <-respCh:
12251175
if !ok {
12261176
// Finished.
@@ -1321,22 +1271,12 @@ func (bc *Client) handleFineGrained(
13211271
storeID := targetPeer.GetStoreId()
13221272
lockResolver := bc.mgr.GetLockResolver()
13231273
client, err := bc.mgr.GetBackupClient(ctx, storeID)
1324-
1325-
// inject a disconnect failpoint
1326-
failpoint.Inject("disconnect", func(_ failpoint.Value) {
1327-
logutil.CL(ctx).Warn("This is a injected disconnection error")
1328-
err = berrors.ErrFailedToConnect
1329-
})
1330-
13311274
if err != nil {
13321275
if berrors.Is(err, berrors.ErrFailedToConnect) {
13331276
// When the leader store is died,
13341277
// 20s for the default max duration before the raft election timer fires.
13351278
logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID))
1336-
return disconnectRetryTimeout, &StoreBasedErr{
1337-
storeID: storeID,
1338-
err: err,
1339-
}
1279+
return 20000, nil
13401280
}
13411281

13421282
logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID))
@@ -1375,10 +1315,7 @@ func (bc *Client) handleFineGrained(
13751315
// When the leader store is died,
13761316
// 20s for the default max duration before the raft election timer fires.
13771317
logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID))
1378-
return disconnectRetryTimeout, &StoreBasedErr{
1379-
storeID: storeID,
1380-
err: err,
1381-
}
1318+
return 20000, nil
13821319
}
13831320
logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err))
13841321
return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)",

br/pkg/backup/client_test.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,11 @@ import (
1010
"time"
1111

1212
"github.com/golang/protobuf/proto"
13-
"github.com/pingcap/errors"
1413
backuppb "github.com/pingcap/kvproto/pkg/brpb"
1514
"github.com/pingcap/kvproto/pkg/encryptionpb"
1615
"github.com/pingcap/kvproto/pkg/errorpb"
1716
"github.com/pingcap/tidb/br/pkg/backup"
1817
"github.com/pingcap/tidb/br/pkg/conn"
19-
berrors "github.com/pingcap/tidb/br/pkg/errors"
2018
"github.com/pingcap/tidb/br/pkg/gluetidb"
2119
"github.com/pingcap/tidb/br/pkg/metautil"
2220
"github.com/pingcap/tidb/br/pkg/mock"
@@ -36,7 +34,6 @@ import (
3634
"github.com/tikv/client-go/v2/txnkv/txnlock"
3735
pd "github.com/tikv/pd/client"
3836
"go.opencensus.io/stats/view"
39-
"go.uber.org/multierr"
4037
)
4138

4239
type testBackup struct {
@@ -395,11 +392,3 @@ func TestCheckBackupIsLocked(t *testing.T) {
395392
require.Error(t, err)
396393
require.Regexp(t, "backup lock file and sst file exist in(.+)", err.Error())
397394
}
398-
399-
func TestErr(t *testing.T) {
400-
serr := backup.MakeStoreBasedErr(42, multierr.Combine(
401-
errors.Annotate(berrors.ErrFailedToConnect, "oops"),
402-
berrors.ErrFailedToConnect.GenWithStack("whoa")))
403-
require.True(t, berrors.Is(serr, berrors.ErrFailedToConnect))
404-
require.False(t, berrors.Is(serr, berrors.ErrUnknown))
405-
}

br/tests/br_fine_grained_disconnect/run.sh

Lines changed: 0 additions & 49 deletions
This file was deleted.

br/tests/br_fine_grained_disconnect/workload

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)