@@ -84,6 +84,10 @@ const maxSplitKeysOnce = 10240
84
84
// rawKVBatchCount specifies the count of entries that the rawkv client puts into TiKV.
85
85
const rawKVBatchCount = 64
86
86
87
+ // session count for repairing ingest indexes. Currently only one TiDB node executes adding index jobs
88
+ // at the same time and the add-index job concurrency is about min(10, `TiDB CPUs / 4`).
89
+ const defaultRepairIndexSessionCount uint = 10
90
+
87
91
// LogRestoreManager is a comprehensive wrapper that encapsulates all logic related to log restoration,
88
92
// including concurrency management, checkpoint handling, and file importing for efficient log processing.
89
93
type LogRestoreManager struct {
@@ -456,16 +460,48 @@ func (rc *LogClient) CleanUpKVFiles(
456
460
return rc .logRestoreManager .fileImporter .ClearFiles (ctx , rc .pdClient , "v1" )
457
461
}
458
462
459
- // Init create db connection and domain for storage.
460
- func (rc * LogClient ) Init (ctx context.Context , g glue.Glue , store kv.Storage ) error {
461
- var err error
462
- rc .unsafeSession , err = g .CreateSession (store )
463
+ func createSession (ctx context.Context , g glue.Glue , store kv.Storage ) (glue.Session , error ) {
464
+ unsafeSession , err := g .CreateSession (store )
463
465
if err != nil {
464
- return errors .Trace (err )
466
+ return nil , errors .Trace (err )
465
467
}
466
-
467
468
// Set SQL mode to None for avoiding SQL compatibility problem
468
- err = rc .unsafeSession .Execute (ctx , "set @@sql_mode=''" )
469
+ err = unsafeSession .Execute (ctx , "set @@sql_mode=''" )
470
+ if err != nil {
471
+ return nil , errors .Trace (err )
472
+ }
473
+ return unsafeSession , nil
474
+ }
475
+
476
+ func createSessions (ctx context.Context , g glue.Glue , store kv.Storage , count uint ) (createdUnsafeSessions []glue.Session , createErr error ) {
477
+ unsafeSessions := make ([]glue.Session , 0 , count )
478
+ defer func () {
479
+ if createErr != nil {
480
+ closeSessions (unsafeSessions )
481
+ }
482
+ }()
483
+ for range count {
484
+ unsafeSession , err := createSession (ctx , g , store )
485
+ if err != nil {
486
+ return nil , errors .Trace (err )
487
+ }
488
+ unsafeSessions = append (unsafeSessions , unsafeSession )
489
+ }
490
+ return unsafeSessions , nil
491
+ }
492
+
493
+ func closeSessions (sessions []glue.Session ) {
494
+ for _ , session := range sessions {
495
+ if session != nil {
496
+ session .Close ()
497
+ }
498
+ }
499
+ }
500
+
501
+ // Init create db connection and domain for storage.
502
+ func (rc * LogClient ) Init (ctx context.Context , g glue.Glue , store kv.Storage ) error {
503
+ var err error
504
+ rc .unsafeSession , err = createSession (ctx , g , store )
469
505
if err != nil {
470
506
return errors .Trace (err )
471
507
}
@@ -1765,39 +1801,60 @@ func (rc *LogClient) RepairIngestIndex(ctx context.Context, ingestRecorder *inge
1765
1801
1766
1802
info := rc .dom .InfoSchema ()
1767
1803
console := glue .GetConsole (g )
1768
- NEXTSQL:
1769
- for _ , sql := range sqls {
1770
- progressTitle := fmt .Sprintf ("repair ingest index %s for table %s.%s" , sql .IndexName , sql .SchemaName , sql .TableName )
1771
-
1804
+ for i , sql := range sqls {
1772
1805
tableInfo , err := info .TableByName (ctx , sql .SchemaName , sql .TableName )
1773
1806
if err != nil {
1774
1807
return errors .Trace (err )
1775
1808
}
1776
- oldIndexIDFound := false
1809
+ sqls [i ].OldIndexIDFound = false
1810
+ sqls [i ].IndexRepaired = false
1777
1811
if fromCheckpoint {
1778
1812
for _ , idx := range tableInfo .Indices () {
1779
1813
indexInfo := idx .Meta ()
1780
1814
if indexInfo .ID == sql .IndexID {
1781
1815
// the original index id is not dropped
1782
- oldIndexIDFound = true
1816
+ sqls [ i ]. OldIndexIDFound = true
1783
1817
break
1784
1818
}
1785
1819
// what if index's state is not public?
1786
1820
if indexInfo .Name .O == sql .IndexName {
1821
+ progressTitle := fmt .Sprintf ("repair ingest index %s for table %s.%s" , sql .IndexName , sql .SchemaName , sql .TableName )
1787
1822
// find the same name index, but not the same index id,
1788
1823
// which means the repaired index id is created
1789
1824
if _ , err := fmt .Fprintf (console .Out (), "%s ... %s\n " , progressTitle , color .HiGreenString ("SKIPPED DUE TO CHECKPOINT MODE" )); err != nil {
1790
1825
return errors .Trace (err )
1791
1826
}
1792
- continue NEXTSQL
1827
+ sqls [i ].IndexRepaired = true
1828
+ break
1793
1829
}
1794
1830
}
1795
1831
}
1832
+ }
1796
1833
1797
- if err := func (sql checkpoint.CheckpointIngestIndexRepairSQL ) error {
1798
- w := console .StartProgressBar (progressTitle , glue .OnlyOneTask )
1799
- defer w .Close ()
1834
+ sessionCount := defaultRepairIndexSessionCount
1835
+ indexSessions , err := createSessions (ctx , g , rc .dom .Store (), sessionCount )
1836
+ if err != nil {
1837
+ return errors .Trace (err )
1838
+ }
1839
+ defer func () {
1840
+ closeSessions (indexSessions )
1841
+ }()
1842
+ workerpool := tidbutil .NewWorkerPool (sessionCount , "repair ingest index" )
1843
+ eg , ectx := errgroup .WithContext (ctx )
1844
+ mp := console .StartMultiProgress ()
1845
+ for _ , sql := range sqls {
1846
+ if sql .IndexRepaired {
1847
+ continue
1848
+ }
1849
+ if ectx .Err () != nil {
1850
+ break
1851
+ }
1852
+ progressTitle := fmt .Sprintf ("repair ingest index %s for table %s.%s" , sql .IndexName , sql .SchemaName , sql .TableName )
1853
+ w := mp .AddTextBar (progressTitle , 1 )
1854
+ workerpool .ApplyWithIDInErrorGroup (eg , func (id uint64 ) error {
1855
+ defer w .Done ()
1800
1856
1857
+ indexSession := indexSessions [id % uint64 (len (indexSessions ))]
1801
1858
// TODO: When the TiDB supports the DROP and CREATE the same name index in one SQL,
1802
1859
// the checkpoint for ingest recorder can be removed and directly use the SQL:
1803
1860
// ALTER TABLE db.tbl DROP INDEX `i_1`, ADD IDNEX `i_1` ...
@@ -1808,8 +1865,8 @@ NEXTSQL:
1808
1865
// restored metakv and then skips repairing it.
1809
1866
1810
1867
// only when first execution or old index id is not dropped
1811
- if ! fromCheckpoint || oldIndexIDFound {
1812
- if err := rc . unsafeSession . ExecuteInternal (ctx , alterTableDropIndexSQL , sql .SchemaName .O , sql .TableName .O , sql .IndexName ); err != nil {
1868
+ if ! fromCheckpoint || sql . OldIndexIDFound {
1869
+ if err := indexSession . ExecuteInternal (ectx , alterTableDropIndexSQL , sql .SchemaName .O , sql .TableName .O , sql .IndexName ); err != nil {
1813
1870
return errors .Trace (err )
1814
1871
}
1815
1872
}
@@ -1819,17 +1876,15 @@ NEXTSQL:
1819
1876
}
1820
1877
})
1821
1878
// create the repaired index when first execution or not found it
1822
- if err := rc .unsafeSession .ExecuteInternal (ctx , sql .AddSQL , sql .AddArgs ... ); err != nil {
1823
- return errors .Trace (err )
1824
- }
1825
- w .Inc ()
1826
- if err := w .Wait (ctx ); err != nil {
1879
+ if err := indexSession .ExecuteInternal (ectx , sql .AddSQL , sql .AddArgs ... ); err != nil {
1827
1880
return errors .Trace (err )
1828
1881
}
1882
+ w .Increment ()
1829
1883
return nil
1830
- }(sql ); err != nil {
1831
- return errors .Trace (err )
1832
- }
1884
+ })
1885
+ }
1886
+ if err := eg .Wait (); err != nil {
1887
+ return errors .Trace (err )
1833
1888
}
1834
1889
1835
1890
return nil
0 commit comments