@@ -80,6 +80,10 @@ const maxSplitKeysOnce = 10240
80
80
// rawKVBatchCount specifies the count of entries that the rawkv client puts into TiKV.
81
81
const rawKVBatchCount = 64
82
82
83
+ // session count for repairing ingest indexes. Currently only one TiDB node executes adding index jobs
84
+ // at the same time and the add-index job concurrency is about min(10, `TiDB CPUs / 4`).
85
+ const defaultRepairIndexSessionCount uint = 10
86
+
83
87
type LogClient struct {
84
88
cipher * backuppb.CipherInfo
85
89
pdClient pd.Client
@@ -223,10 +227,48 @@ func (rc *LogClient) StartCheckpointRunnerForLogRestore(ctx context.Context, g g
223
227
return runner , errors .Trace (err )
224
228
}
225
229
230
+ func createSession (ctx context.Context , g glue.Glue , store kv.Storage ) (glue.Session , error ) {
231
+ unsafeSession , err := g .CreateSession (store )
232
+ if err != nil {
233
+ return nil , errors .Trace (err )
234
+ }
235
+ // Set SQL mode to None for avoiding SQL compatibility problem
236
+ err = unsafeSession .Execute (ctx , "set @@sql_mode=''" )
237
+ if err != nil {
238
+ return nil , errors .Trace (err )
239
+ }
240
+ return unsafeSession , nil
241
+ }
242
+
243
+ func createSessions (ctx context.Context , g glue.Glue , store kv.Storage , count uint ) (createdUnsafeSessions []glue.Session , createErr error ) {
244
+ unsafeSessions := make ([]glue.Session , 0 , count )
245
+ defer func () {
246
+ if createErr != nil {
247
+ closeSessions (unsafeSessions )
248
+ }
249
+ }()
250
+ for range count {
251
+ unsafeSession , err := createSession (ctx , g , store )
252
+ if err != nil {
253
+ return nil , errors .Trace (err )
254
+ }
255
+ unsafeSessions = append (unsafeSessions , unsafeSession )
256
+ }
257
+ return unsafeSessions , nil
258
+ }
259
+
260
+ func closeSessions (sessions []glue.Session ) {
261
+ for _ , session := range sessions {
262
+ if session != nil {
263
+ session .Close ()
264
+ }
265
+ }
266
+ }
267
+
226
268
// Init create db connection and domain for storage.
227
- func (rc * LogClient ) Init (g glue.Glue , store kv.Storage ) error {
269
+ func (rc * LogClient ) Init (ctx context. Context , g glue.Glue , store kv.Storage ) error {
228
270
var err error
229
- rc .se , err = g . CreateSession ( store )
271
+ rc .se , err = createSession ( ctx , g , store )
230
272
if err != nil {
231
273
return errors .Trace (err )
232
274
}
@@ -1423,39 +1465,60 @@ func (rc *LogClient) RepairIngestIndex(ctx context.Context, ingestRecorder *inge
1423
1465
1424
1466
info := rc .dom .InfoSchema ()
1425
1467
console := glue .GetConsole (g )
1426
- NEXTSQL:
1427
- for _ , sql := range sqls {
1428
- progressTitle := fmt .Sprintf ("repair ingest index %s for table %s.%s" , sql .IndexName , sql .SchemaName , sql .TableName )
1429
-
1468
+ for i , sql := range sqls {
1430
1469
tableInfo , err := info .TableByName (ctx , sql .SchemaName , sql .TableName )
1431
1470
if err != nil {
1432
1471
return errors .Trace (err )
1433
1472
}
1434
- oldIndexIDFound := false
1473
+ sqls [i ].OldIndexIDFound = false
1474
+ sqls [i ].IndexRepaired = false
1435
1475
if fromCheckpoint {
1436
1476
for _ , idx := range tableInfo .Indices () {
1437
1477
indexInfo := idx .Meta ()
1438
1478
if indexInfo .ID == sql .IndexID {
1439
1479
// the original index id is not dropped
1440
- oldIndexIDFound = true
1480
+ sqls [ i ]. OldIndexIDFound = true
1441
1481
break
1442
1482
}
1443
1483
// what if index's state is not public?
1444
1484
if indexInfo .Name .O == sql .IndexName {
1485
+ progressTitle := fmt .Sprintf ("repair ingest index %s for table %s.%s" , sql .IndexName , sql .SchemaName , sql .TableName )
1445
1486
// find the same name index, but not the same index id,
1446
1487
// which means the repaired index id is created
1447
1488
if _ , err := fmt .Fprintf (console .Out (), "%s ... %s\n " , progressTitle , color .HiGreenString ("SKIPPED DUE TO CHECKPOINT MODE" )); err != nil {
1448
1489
return errors .Trace (err )
1449
1490
}
1450
- continue NEXTSQL
1491
+ sqls [i ].IndexRepaired = true
1492
+ break
1451
1493
}
1452
1494
}
1453
1495
}
1496
+ }
1454
1497
1455
- if err := func (sql checkpoint.CheckpointIngestIndexRepairSQL ) error {
1456
- w := console .StartProgressBar (progressTitle , glue .OnlyOneTask )
1457
- defer w .Close ()
1498
+ sessionCount := defaultRepairIndexSessionCount
1499
+ indexSessions , err := createSessions (ctx , g , rc .dom .Store (), sessionCount )
1500
+ if err != nil {
1501
+ return errors .Trace (err )
1502
+ }
1503
+ defer func () {
1504
+ closeSessions (indexSessions )
1505
+ }()
1506
+ workerpool := tidbutil .NewWorkerPool (sessionCount , "repair ingest index" )
1507
+ eg , ectx := errgroup .WithContext (ctx )
1508
+ mp := console .StartMultiProgress ()
1509
+ for _ , sql := range sqls {
1510
+ if sql .IndexRepaired {
1511
+ continue
1512
+ }
1513
+ if ectx .Err () != nil {
1514
+ break
1515
+ }
1516
+ progressTitle := fmt .Sprintf ("repair ingest index %s for table %s.%s" , sql .IndexName , sql .SchemaName , sql .TableName )
1517
+ w := mp .AddTextBar (progressTitle , 1 )
1518
+ workerpool .ApplyWithIDInErrorGroup (eg , func (id uint64 ) error {
1519
+ defer w .Done ()
1458
1520
1521
+ indexSession := indexSessions [id % uint64 (len (indexSessions ))]
1459
1522
// TODO: When the TiDB supports the DROP and CREATE the same name index in one SQL,
1460
1523
// the checkpoint for ingest recorder can be removed and directly use the SQL:
1461
1524
// ALTER TABLE db.tbl DROP INDEX `i_1`, ADD IDNEX `i_1` ...
@@ -1466,8 +1529,8 @@ NEXTSQL:
1466
1529
// restored metakv and then skips repairing it.
1467
1530
1468
1531
// only when first execution or old index id is not dropped
1469
- if ! fromCheckpoint || oldIndexIDFound {
1470
- if err := rc . se . ExecuteInternal (ctx , alterTableDropIndexSQL , sql .SchemaName .O , sql .TableName .O , sql .IndexName ); err != nil {
1532
+ if ! fromCheckpoint || sql . OldIndexIDFound {
1533
+ if err := indexSession . ExecuteInternal (ectx , alterTableDropIndexSQL , sql .SchemaName .O , sql .TableName .O , sql .IndexName ); err != nil {
1471
1534
return errors .Trace (err )
1472
1535
}
1473
1536
}
@@ -1477,17 +1540,15 @@ NEXTSQL:
1477
1540
}
1478
1541
})
1479
1542
// create the repaired index when first execution or not found it
1480
- if err := rc .se .ExecuteInternal (ctx , sql .AddSQL , sql .AddArgs ... ); err != nil {
1481
- return errors .Trace (err )
1482
- }
1483
- w .Inc ()
1484
- if err := w .Wait (ctx ); err != nil {
1543
+ if err := indexSession .ExecuteInternal (ectx , sql .AddSQL , sql .AddArgs ... ); err != nil {
1485
1544
return errors .Trace (err )
1486
1545
}
1546
+ w .Increment ()
1487
1547
return nil
1488
- }(sql ); err != nil {
1489
- return errors .Trace (err )
1490
- }
1548
+ })
1549
+ }
1550
+ if err := eg .Wait (); err != nil {
1551
+ return errors .Trace (err )
1491
1552
}
1492
1553
1493
1554
return nil
0 commit comments