@@ -224,10 +224,7 @@ func (m *JobManager) jobLoop() error {
224
224
// Job Schedule loop:
225
225
case <- updateJobHeartBeatTicker :
226
226
updateHeartBeatCtx , cancel := context .WithTimeout (m .ctx , ttlInternalSQLTimeout )
227
- err = m .updateHeartBeat (updateHeartBeatCtx , se , now )
228
- if err != nil {
229
- logutil .Logger (m .ctx ).Warn ("fail to update job heart beat" , zap .Error (err ))
230
- }
227
+ m .updateHeartBeat (updateHeartBeatCtx , se , now )
231
228
cancel ()
232
229
case <- jobCheckTicker :
233
230
m .checkFinishedJob (se )
@@ -270,10 +267,7 @@ func (m *JobManager) jobLoop() error {
270
267
m .taskManager .resizeWorkersWithSysVar ()
271
268
case <- updateTaskHeartBeatTicker :
272
269
updateHeartBeatCtx , cancel := context .WithTimeout (m .ctx , ttlInternalSQLTimeout )
273
- err = m .taskManager .updateHeartBeat (updateHeartBeatCtx , se , now )
274
- if err != nil {
275
- logutil .Logger (m .ctx ).Warn ("fail to update task heart beat" , zap .Error (err ))
276
- }
270
+ m .taskManager .updateHeartBeat (updateHeartBeatCtx , se , now )
277
271
cancel ()
278
272
case <- checkScanTaskFinishedTicker :
279
273
if m .taskManager .handleScanFinishedTask () {
@@ -903,29 +897,42 @@ func (m *JobManager) appendLockedJob(id string, se session.Session, createTime t
903
897
}
904
898
905
899
// updateHeartBeat updates the heartbeat for all task with current instance as owner
906
- func (m * JobManager ) updateHeartBeat (ctx context.Context , se session.Session , now time.Time ) error {
900
+ func (m * JobManager ) updateHeartBeat (ctx context.Context , se session.Session , now time.Time ) {
907
901
for _ , job := range m .localJobs () {
908
- if job .createTime .Add (ttlJobTimeout ).Before (now ) {
909
- logutil .Logger (m .ctx ).Info ("job is timeout" , zap .String ("jobID" , job .id ))
910
- summary , err := summarizeErr (errors .New ("job is timeout" ))
911
- if err != nil {
912
- logutil .Logger (m .ctx ).Warn ("fail to summarize job" , zap .Error (err ))
913
- }
914
- err = job .finish (se , now , summary )
915
- if err != nil {
916
- logutil .Logger (m .ctx ).Warn ("fail to finish job" , zap .Error (err ))
917
- continue
918
- }
919
- m .removeJob (job )
902
+ err := m .updateHeartBeatForJob (ctx , se , now , job )
903
+ if err != nil {
904
+ logutil .Logger (m .ctx ).Warn ("fail to update heartbeat for job" , zap .Error (err ), zap .String ("jobID" , job .id ))
920
905
}
906
+ }
907
+ }
921
908
922
- intest .Assert (se .GetSessionVars ().TimeZone .String () == now .Location ().String ())
923
- sql , args := updateHeartBeatSQL (job .tbl .ID , now , m .id )
924
- _ , err := se .ExecuteSQL (ctx , sql , args ... )
909
+ func (m * JobManager ) updateHeartBeatForJob (ctx context.Context , se session.Session , now time.Time , job * ttlJob ) error {
910
+ if job .createTime .Add (ttlJobTimeout ).Before (now ) {
911
+ logutil .Logger (m .ctx ).Info ("job is timeout" , zap .String ("jobID" , job .id ))
912
+ summary , err := summarizeErr (errors .New ("job is timeout" ))
925
913
if err != nil {
926
- return errors .Wrapf (err , "execute sql: %s" , sql )
914
+ return errors .Wrapf (err , "fail to summarize job" )
927
915
}
916
+ err = job .finish (se , now , summary )
917
+ if err != nil {
918
+ return errors .Wrapf (err , "fail to finish job" )
919
+ }
920
+ m .removeJob (job )
921
+ return nil
928
922
}
923
+
924
+ intest .Assert (se .GetSessionVars ().TimeZone .String () == now .Location ().String ())
925
+ sql , args := updateHeartBeatSQL (job .tbl .ID , now , m .id )
926
+ _ , err := se .ExecuteSQL (ctx , sql , args ... )
927
+ if err != nil {
928
+ return errors .Wrapf (err , "execute sql: %s" , sql )
929
+ }
930
+
931
+ if se .GetSessionVars ().StmtCtx .AffectedRows () != 1 {
932
+ return errors .Errorf ("fail to update job heartbeat, maybe the owner is not myself (%s), affected rows: %d" ,
933
+ m .id , se .GetSessionVars ().StmtCtx .AffectedRows ())
934
+ }
935
+
929
936
return nil
930
937
}
931
938
0 commit comments