@@ -18,6 +18,7 @@ import (
18
18
"container/heap"
19
19
"context"
20
20
"fmt"
21
+ "io"
21
22
"strings"
22
23
"sync"
23
24
"time"
@@ -179,12 +180,29 @@ func (j *regionJob) done(wg *sync.WaitGroup) {
179
180
}
180
181
181
182
// writeToTiKV writes the data to TiKV and mark this job as wrote stage.
182
- // if any write logic has error, writeToTiKV will set job to a proper stage and return nil. TODO: <-check this
183
+ // if any write logic has error, writeToTiKV will set job to a proper stage and return nil.
183
184
// if any underlying logic has error, writeToTiKV will return an error.
184
185
// we don't need to do cleanup for the pairs written to tikv if encounters an error,
185
186
// tikv will take the responsibility to do so.
186
187
// TODO: let client-go provide a high-level write interface.
187
188
func (local * Backend ) writeToTiKV (ctx context.Context , j * regionJob ) error {
189
+ err := local .doWrite (ctx , j )
190
+ if err == nil {
191
+ return nil
192
+ }
193
+ if ! common .IsRetryableError (err ) {
194
+ return err
195
+ }
196
+ // currently only one case will restart write
197
+ if strings .Contains (err .Error (), "RequestTooNew" ) {
198
+ j .convertStageTo (regionScanned )
199
+ return err
200
+ }
201
+ j .convertStageTo (needRescan )
202
+ return err
203
+ }
204
+
205
+ func (local * Backend ) doWrite (ctx context.Context , j * regionJob ) error {
188
206
if j .stage != regionScanned {
189
207
return nil
190
208
}
@@ -238,9 +256,25 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error {
238
256
ApiVersion : apiVersion ,
239
257
}
240
258
241
- annotateErr := func (in error , peer * metapb.Peer ) error {
259
+ failpoint .Inject ("changeEpochVersion" , func (val failpoint.Value ) {
260
+ cloned := * meta .RegionEpoch
261
+ meta .RegionEpoch = & cloned
262
+ i := val .(int )
263
+ if i >= 0 {
264
+ meta .RegionEpoch .Version += uint64 (i )
265
+ } else {
266
+ meta .RegionEpoch .ConfVer -= uint64 (- i )
267
+ }
268
+ })
269
+
270
+ annotateErr := func (in error , peer * metapb.Peer , msg string ) error {
242
271
// annotate the error with peer/store/region info to help debug.
243
- return errors .Annotatef (in , "peer %d, store %d, region %d, epoch %s" , peer .Id , peer .StoreId , region .Id , region .RegionEpoch .String ())
272
+ return errors .Annotatef (
273
+ in ,
274
+ "peer %d, store %d, region %d, epoch %s, %s" ,
275
+ peer .Id , peer .StoreId , region .Id , region .RegionEpoch .String (),
276
+ msg ,
277
+ )
244
278
}
245
279
246
280
leaderID := j .region .Leader .GetId ()
@@ -260,17 +294,17 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error {
260
294
for _ , peer := range region .GetPeers () {
261
295
cli , err := clientFactory .Create (ctx , peer .StoreId )
262
296
if err != nil {
263
- return annotateErr (err , peer )
297
+ return annotateErr (err , peer , "when create client" )
264
298
}
265
299
266
300
wstream , err := cli .Write (ctx )
267
301
if err != nil {
268
- return annotateErr (err , peer )
302
+ return annotateErr (err , peer , "when open write stream" )
269
303
}
270
304
271
305
// Bind uuid for this write request
272
306
if err = wstream .Send (req ); err != nil {
273
- return annotateErr (err , peer )
307
+ return annotateErr (err , peer , "when send meta" )
274
308
}
275
309
clients = append (clients , wstream )
276
310
allPeers = append (allPeers , peer )
@@ -309,7 +343,12 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error {
309
343
return errors .Trace (err )
310
344
}
311
345
if err := clients [i ].SendMsg (preparedMsg ); err != nil {
312
- return annotateErr (err , allPeers [i ])
346
+ if err == io .EOF {
347
+ // if it's EOF, need RecvMsg to get the error
348
+ dummy := & sst.WriteResponse {}
349
+ err = clients [i ].RecvMsg (dummy )
350
+ }
351
+ return annotateErr (err , allPeers [i ], "when send data" )
313
352
}
314
353
}
315
354
failpoint .Inject ("afterFlushKVs" , func () {
@@ -383,10 +422,10 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error {
383
422
for i , wStream := range clients {
384
423
resp , closeErr := wStream .CloseAndRecv ()
385
424
if closeErr != nil {
386
- return annotateErr (closeErr , allPeers [i ])
425
+ return annotateErr (closeErr , allPeers [i ], "when close write stream" )
387
426
}
388
427
if resp .Error != nil {
389
- return annotateErr (errors .New (resp .Error .Message ), allPeers [i ])
428
+ return annotateErr (errors .New (" resp error: " + resp .Error .Message ), allPeers [i ], "when close write stream" )
390
429
}
391
430
if leaderID == region .Peers [i ].GetId () {
392
431
leaderPeerMetas = resp .Metas
0 commit comments