@@ -23,8 +23,9 @@ import (
23
23
"github.com/pingcap/errors"
24
24
sess "github.com/pingcap/tidb/pkg/ddl/session"
25
25
"github.com/pingcap/tidb/pkg/kv"
26
- "github.com/pingcap/tidb/pkg/session/types "
26
+ "github.com/pingcap/tidb/pkg/owner "
27
27
"github.com/pingcap/tidb/pkg/sessionctx"
28
+ "github.com/pingcap/tidb/pkg/util"
28
29
"github.com/pingcap/tidb/pkg/util/intest"
29
30
"github.com/pingcap/tidb/pkg/util/logutil"
30
31
"go.uber.org/zap"
@@ -79,9 +80,23 @@ func (id HandlerID) String() string {
79
80
}
80
81
}
81
82
83
+ // Ensure DDLNotifier implements the owner.Listener interface.
84
+ // The DDLNotifier is started only when the stats owner is elected to ensure consistency.
85
+ // This design is crucial because:
86
+ // 1. The stats handler(priority queue) processes DDLNotifier events in memory.
87
+ // 2. Keeping the stats handler and DDLNotifier on the same node maintains data integrity.
88
+ // 3. It prevents potential race conditions or inconsistencies that could arise from
89
+ // distributed processing of these events across multiple nodes.
90
+ var _ owner.Listener = (* DDLNotifier )(nil )
91
+
82
92
// DDLNotifier implements the subscription on DDL events.
83
93
type DDLNotifier struct {
84
- ownedSess types.Session
94
+ // The context is initialized in Start and canceled in Stop and Close.
95
+ ctx context.Context
96
+ cancel context.CancelFunc
97
+ wg util.WaitGroupWrapper
98
+ sysSessionPool util.SessionPool
99
+
85
100
store Store
86
101
handlers map [HandlerID ]SchemaChangeHandler
87
102
pollInterval time.Duration
@@ -90,19 +105,17 @@ type DDLNotifier struct {
90
105
handlersBitMap uint64
91
106
}
92
107
93
- // NewDDLNotifier initializes the global DDLNotifier. It should be called only
94
- // once and before any RegisterHandler call. The ownership of the sctx is passed
95
- // to the DDLNotifier.
108
+ // NewDDLNotifier initializes the global DDLNotifier.
96
109
func NewDDLNotifier (
97
- sess types. Session ,
110
+ sysSessionPool util. SessionPool ,
98
111
store Store ,
99
112
pollInterval time.Duration ,
100
113
) * DDLNotifier {
101
114
return & DDLNotifier {
102
- ownedSess : sess ,
103
- store : store ,
104
- handlers : make (map [HandlerID ]SchemaChangeHandler ),
105
- pollInterval : pollInterval ,
115
+ sysSessionPool : sysSessionPool ,
116
+ store : store ,
117
+ handlers : make (map [HandlerID ]SchemaChangeHandler ),
118
+ pollInterval : pollInterval ,
106
119
}
107
120
}
108
121
@@ -125,13 +138,14 @@ func (n *DDLNotifier) RegisterHandler(id HandlerID, handler SchemaChangeHandler)
125
138
n .handlers [id ] = handler
126
139
}
127
140
128
- // Start starts the DDLNotifier. It will block until the context is canceled.
129
- func (n * DDLNotifier ) Start (ctx context.Context ) {
141
+ // start starts the DDLNotifier. It will block until the context is canceled.
142
+ // Do not call this function directly. Use owner.Listener interface instead.
143
+ func (n * DDLNotifier ) start () {
130
144
for id := range n .handlers {
131
145
n .handlersBitMap |= 1 << id
132
146
}
133
147
134
- ctx = kv .WithInternalSourceType (ctx , kv .InternalDDLNotifier )
148
+ ctx : = kv .WithInternalSourceType (n . ctx , kv .InternalDDLNotifier )
135
149
ctx = logutil .WithCategory (ctx , "ddl-notifier" )
136
150
ticker := time .NewTicker (n .pollInterval )
137
151
defer ticker .Stop ()
@@ -148,7 +162,14 @@ func (n *DDLNotifier) Start(ctx context.Context) {
148
162
}
149
163
150
164
func (n * DDLNotifier ) processEvents (ctx context.Context ) error {
151
- changes , err := n .store .List (ctx , sess .NewSession (n .ownedSess ))
165
+ sysSession , err := n .sysSessionPool .Get ()
166
+ if err != nil {
167
+ return errors .Trace (err )
168
+ }
169
+ defer n .sysSessionPool .Put (sysSession )
170
+
171
+ session := sess .NewSession (sysSession .(sessionctx.Context ))
172
+ changes , err := n .store .List (ctx , session )
152
173
if err != nil {
153
174
return errors .Trace (err )
154
175
}
@@ -161,7 +182,7 @@ func (n *DDLNotifier) processEvents(ctx context.Context) error {
161
182
if _ , ok := skipHandlers [handlerID ]; ok {
162
183
continue
163
184
}
164
- if err2 := n .processEventForHandler (ctx , change , handlerID , handler ); err2 != nil {
185
+ if err2 := n .processEventForHandler (ctx , session , change , handlerID , handler ); err2 != nil {
165
186
skipHandlers [handlerID ] = struct {}{}
166
187
167
188
if ! goerr .Is (err2 , ErrNotReadyRetryLater ) {
@@ -187,7 +208,7 @@ func (n *DDLNotifier) processEvents(ctx context.Context) error {
187
208
if change .processedByFlag == n .handlersBitMap {
188
209
if err2 := n .store .DeleteAndCommit (
189
210
ctx ,
190
- sess . NewSession ( n . ownedSess ) ,
211
+ session ,
191
212
change .ddlJobID ,
192
213
int (change .multiSchemaChangeSeq ),
193
214
); err2 != nil {
@@ -206,6 +227,7 @@ const slowHandlerLogThreshold = time.Second * 5
206
227
207
228
func (n * DDLNotifier ) processEventForHandler (
208
229
ctx context.Context ,
230
+ session * sess.Session ,
209
231
change * schemaChange ,
210
232
handlerID HandlerID ,
211
233
handler SchemaChangeHandler ,
@@ -214,21 +236,19 @@ func (n *DDLNotifier) processEventForHandler(
214
236
return nil
215
237
}
216
238
217
- se := sess .NewSession (n .ownedSess )
218
-
219
- if err = se .Begin (ctx ); err != nil {
239
+ if err = session .Begin (ctx ); err != nil {
220
240
return errors .Trace (err )
221
241
}
222
242
defer func () {
223
243
if err == nil {
224
- err = errors .Trace (se .Commit (ctx ))
244
+ err = errors .Trace (session .Commit (ctx ))
225
245
} else {
226
- se .Rollback ()
246
+ session .Rollback ()
227
247
}
228
248
}()
229
249
230
250
now := time .Now ()
231
- if err = handler (ctx , n . ownedSess , change .event ); err != nil {
251
+ if err = handler (ctx , session . Context , change .event ); err != nil {
232
252
return errors .Trace (err )
233
253
}
234
254
if time .Since (now ) > slowHandlerLogThreshold {
@@ -243,7 +263,7 @@ func (n *DDLNotifier) processEventForHandler(
243
263
newFlag := change .processedByFlag | (1 << handlerID )
244
264
if err = n .store .UpdateProcessed (
245
265
ctx ,
246
- se ,
266
+ session ,
247
267
change .ddlJobID ,
248
268
change .multiSchemaChangeSeq ,
249
269
newFlag ,
@@ -255,7 +275,36 @@ func (n *DDLNotifier) processEventForHandler(
255
275
return nil
256
276
}
257
277
258
- // Close releases the resources.
259
- func (n * DDLNotifier ) Close () {
260
- n .ownedSess .Close ()
278
+ // Stop stops the background loop.
279
+ // Exposed for testing.
280
+ // Do not call this function directly. Use owner.Listener interface instead.
281
+ func (n * DDLNotifier ) Stop () {
282
+ // If the notifier is not started, the cancel function is nil.
283
+ if n .cancel == nil {
284
+ return
285
+ }
286
+ n .cancel ()
287
+ n .wg .Wait ()
288
+ }
289
+
290
+ // OnBecomeOwner implements the owner.Listener interface.
291
+ // We need to make sure only one DDLNotifier is running at any time.
292
+ func (n * DDLNotifier ) OnBecomeOwner () {
293
+ n .ctx , n .cancel = context .WithCancel (context .Background ())
294
+ n .wg .RunWithRecover (n .start , func (r any ) {
295
+ if r == nil {
296
+ return
297
+ }
298
+ // In unit tests, we want to panic directly to find the root cause.
299
+ if intest .InTest {
300
+ panic (r )
301
+ }
302
+ logutil .BgLogger ().Error ("panic in ddl notifier" , zap .Any ("recover" , r ), zap .Stack ("stack" ))
303
+ })
304
+ }
305
+
306
+ // OnRetireOwner implements the owner.Listener interface.
307
+ // After the owner is retired, we need to stop the DDLNotifier.
308
+ func (n * DDLNotifier ) OnRetireOwner () {
309
+ n .Stop ()
261
310
}
0 commit comments