From 8d4ff946dd4dba0d9563ecaf24e74f5c2f4e6ba2 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Tue, 22 Aug 2023 10:57:16 +0800 Subject: [PATCH 1/4] sync code Signed-off-by: lance6716 --- br/pkg/lightning/backend/local/region_job.go | 5 +++++ br/tests/lightning_local_backend/run.sh | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/br/pkg/lightning/backend/local/region_job.go b/br/pkg/lightning/backend/local/region_job.go index a07f6fa3c9bd6..5437c5931b0b3 100644 --- a/br/pkg/lightning/backend/local/region_job.go +++ b/br/pkg/lightning/backend/local/region_job.go @@ -408,6 +408,11 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error { } } + failpoint.Inject("NoLeader", func() { + log.FromContext(ctx).Warn("enter failpoint NoLeader") + leaderPeerMetas = nil + }) + // if there is not leader currently, we don't forward the stage to wrote and let caller // handle the retry. if len(leaderPeerMetas) == 0 { diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index 3fdde7328da13..acb5b8268be9c 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -56,7 +56,7 @@ check_contains 'sum(c): 46' run_sql 'DROP DATABASE cpeng;' rm -f "/tmp/tidb_lightning_checkpoint_local_backend_test.pb" -export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch")' +export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch");github.com/pingcap/tidb/br/pkg/lightning/backend/local/NoLeader=1*return("")' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "$CUR/config.toml" From 31df361e8b4dfb9feb77d05bc02693b37cfb8f3c Mon Sep 17 00:00:00 2001 From: lance6716 Date: Tue, 22 Aug 2023 10:57:46 +0800 Subject: [PATCH 2/4] try to fix another test Signed-off-by: lance6716 --- br/tests/lightning_local_backend/config.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/br/tests/lightning_local_backend/config.toml b/br/tests/lightning_local_backend/config.toml index 46ca06e09b4ab..73c54882430c7 100644 --- a/br/tests/lightning_local_backend/config.toml +++ b/br/tests/lightning_local_backend/config.toml @@ -1,5 +1,6 @@ [lightning] table-concurrency = 1 +index-concurrency = 1 [checkpoint] enable = true From 430f7545fbb0483f514dd1139c55efbc20742381 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Tue, 22 Aug 2023 11:21:21 +0800 Subject: [PATCH 3/4] sync code Signed-off-by: lance6716 --- br/pkg/lightning/backend/local/region_job.go | 3 +-- br/pkg/lightning/common/errors.go | 1 + br/pkg/lightning/common/retry.go | 1 + br/tests/lightning_local_backend/run.sh | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/br/pkg/lightning/backend/local/region_job.go b/br/pkg/lightning/backend/local/region_job.go index 5437c5931b0b3..5de40cca0e195 100644 --- a/br/pkg/lightning/backend/local/region_job.go +++ b/br/pkg/lightning/backend/local/region_job.go @@ -420,8 +420,7 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error { logutil.Region(region), logutil.Leader(j.region.Leader), zap.Uint64("leader_id", leaderID), logutil.SSTMeta(meta), zap.Int64("kv_pairs", totalCount), zap.Int64("total_bytes", totalSize)) - return errors.Errorf("write to tikv with no leader returned, region '%d', leader: %d", - region.Id, leaderID) + return common.ErrNoLeader.GenWithStackByArgs(region.Id, leaderID) } takeTime := time.Since(begin) diff --git a/br/pkg/lightning/common/errors.go b/br/pkg/lightning/common/errors.go index 9ea4e7b2af63d..109f72755720f 100644 --- a/br/pkg/lightning/common/errors.go +++ b/br/pkg/lightning/common/errors.go @@ -83,6 +83,7 @@ var ( ErrKVReadIndexNotReady = errors.Normalize("read index not ready", errors.RFCCodeText("Lightning:KV:ReadIndexNotReady")) ErrKVIngestFailed = errors.Normalize("ingest tikv failed", errors.RFCCodeText("Lightning:KV:ErrKVIngestFailed")) ErrKVRaftProposalDropped = errors.Normalize("raft proposal dropped", errors.RFCCodeText("Lightning:KV:ErrKVRaftProposalDropped")) + ErrNoLeader = errors.Normalize("write to tikv with no leader returned, region '%d', leader: %d", errors.RFCCodeText("Lightning:KV:ErrNoLeader")) ErrUnknownBackend = errors.Normalize("unknown backend %s", errors.RFCCodeText("Lightning:Restore:ErrUnknownBackend")) ErrCheckLocalFile = errors.Normalize("cannot find local file for table: %s engineDir: %s", errors.RFCCodeText("Lightning:Restore:ErrCheckLocalFile")) diff --git a/br/pkg/lightning/common/retry.go b/br/pkg/lightning/common/retry.go index f6db6cda86407..c3bb979a9bd32 100644 --- a/br/pkg/lightning/common/retry.go +++ b/br/pkg/lightning/common/retry.go @@ -70,6 +70,7 @@ func IsRetryableError(err error) bool { var retryableErrorIDs = map[errors.ErrorID]struct{}{ ErrKVEpochNotMatch.ID(): {}, ErrKVNotLeader.ID(): {}, + ErrNoLeader.ID(): {}, ErrKVRegionNotFound.ID(): {}, // common.ErrKVServerIsBusy is a little duplication with tmysql.ErrTiKVServerBusy // it's because the response of sst.ingest gives us a sst.IngestResponse which doesn't contain error code, diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index acb5b8268be9c..8e8f9b8080fb5 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -56,7 +56,7 @@ check_contains 'sum(c): 46' run_sql 'DROP DATABASE cpeng;' rm -f "/tmp/tidb_lightning_checkpoint_local_backend_test.pb" -export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch");github.com/pingcap/tidb/br/pkg/lightning/backend/local/NoLeader=1*return("")' +export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch");github.com/pingcap/tidb/br/pkg/lightning/backend/local/NoLeader=1*return()' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "$CUR/config.toml" From 8b9013599eb9c835fc48a2457e0e77559c923050 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Tue, 22 Aug 2023 11:44:25 +0800 Subject: [PATCH 4/4] fix errdoc Signed-off-by: lance6716 --- errors.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/errors.toml b/errors.toml index 6a2ca3036e5b7..afb49d8cd9f76 100644 --- a/errors.toml +++ b/errors.toml @@ -406,6 +406,11 @@ error = ''' raft proposal dropped ''' +["Lightning:KV:ErrNoLeader"] +error = ''' +write to tikv with no leader returned, region '%d', leader: %d +''' + ["Lightning:KV:NotLeader"] error = ''' not leader