@@ -503,7 +503,10 @@ func (m *JobManager) reportMetrics(se session.Session) {
503
503
504
504
// checkNotOwnJob removes the job whose current job owner is not yourself
505
505
func (m * JobManager ) checkNotOwnJob () {
506
- for _ , job := range m .runningJobs {
506
+ // reverse iteration so that we could remove the job safely in the loop
507
+ for i := len (m .runningJobs ) - 1 ; i >= 0 ; i -- {
508
+ job := m .runningJobs [i ]
509
+
507
510
tableStatus := m .tableStatusCache .Tables [job .tbl .ID ]
508
511
if tableStatus == nil || tableStatus .CurrentJobOwnerID != m .id {
509
512
logger := logutil .Logger (m .ctx ).With (zap .String ("jobID" , job .id ))
@@ -517,8 +520,11 @@ func (m *JobManager) checkNotOwnJob() {
517
520
}
518
521
519
522
func (m * JobManager ) checkFinishedJob (se session.Session ) {
523
+ // reverse iteration so that we could remove the job safely in the loop
520
524
j:
521
- for _ , job := range m .runningJobs {
525
+ for i := len (m .runningJobs ) - 1 ; i >= 0 ; i -- {
526
+ job := m .runningJobs [i ]
527
+
522
528
timeoutJobCtx , cancel := context .WithTimeout (m .ctx , ttlInternalSQLTimeout )
523
529
524
530
sql , args := cache .SelectFromTTLTaskWithJobID (job .id )
@@ -576,10 +582,24 @@ func (m *JobManager) rescheduleJobs(se session.Session, now time.Time) {
576
582
577
583
if ! variable .EnableTTLJob .Load () || ! timeutil .WithinDayTimePeriod (variable .TTLJobScheduleWindowStartTime .Load (), variable .TTLJobScheduleWindowEndTime .Load (), now ) {
578
584
if len (m .runningJobs ) > 0 {
585
+ << << << < HEAD
579
586
for _ , job := range m .runningJobs {
580
587
logutil .Logger (m .ctx ).Info ("cancel job because tidb_ttl_job_enable turned off" , zap .String ("jobID" , job .id ))
581
588
582
589
summary , err := summarizeErr (errors .New ("ttl job is disabled" ))
590
+ == == == =
591
+ // reverse iteration so that we could remove the job safely in the loop
592
+ for i := len (m .runningJobs ) - 1 ; i >= 0 ; i -- {
593
+ job := m .runningJobs [i ]
594
+
595
+ logger := logutil .Logger (m .ctx ).With (
596
+ zap .String ("jobID" , job .id ),
597
+ zap .Int64 ("tableID" , job .tbl .ID ),
598
+ zap .String ("table" , job .tbl .FullName ()),
599
+ )
600
+ logger .Info (fmt .Sprintf ("cancel job because %s" , cancelReason ))
601
+ summary , err := summarizeErr (errors .New (cancelReason ))
602
+ >> >> >> > b7aafa67ec2 (ttl : fix the issue that the TTL jobs are skipped or handled multiple times in one iteration (#59348 ))
583
603
if err != nil {
584
604
logutil .Logger (m .ctx ).Warn ("fail to summarize job" , zap .Error (err ))
585
605
}
@@ -595,7 +615,10 @@ func (m *JobManager) rescheduleJobs(se session.Session, now time.Time) {
595
615
}
596
616
597
617
// if the table of a running job disappears, also cancel it
598
- for _ , job := range m .runningJobs {
618
+ // reverse iteration so that we could remove the job safely in the loop
619
+ for i := len (m .runningJobs ) - 1 ; i >= 0 ; i -- {
620
+ job := m .runningJobs [i ]
621
+
599
622
_ , ok := m .infoSchemaCache .Tables [job .tbl .ID ]
600
623
if ok {
601
624
continue
0 commit comments