Skip to content

Commit 0002c1a

Browse files
authored
statistics: fix wrong singleflight implementation for stats' syncload(#52301) (#52382) (#55726)
1 parent caa60c0 commit 0002c1a

File tree

5 files changed

+43
-64
lines changed

5 files changed

+43
-64
lines changed

parser/model/model.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,6 +1712,11 @@ type TableItemID struct {
17121712
IsIndex bool
17131713
}
17141714

1715+
// Key is used to generate unique key for TableItemID to use in the syncload
1716+
func (t TableItemID) Key() string {
1717+
return fmt.Sprintf("%d#%d#%t", t.ID, t.TableID, t.IsIndex)
1718+
}
1719+
17151720
// PolicyRefInfo is the struct to refer the placement policy.
17161721
type PolicyRefInfo struct {
17171722
ID int64 `json:"id"`

statistics/handle/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ go_library(
5353
"@com_github_pingcap_tipb//go-tipb",
5454
"@com_github_tikv_client_go_v2//oracle",
5555
"@org_golang_x_exp//slices",
56+
"@org_golang_x_sync//singleflight",
5657
"@org_uber_go_atomic//:atomic",
5758
"@org_uber_go_zap//:zap",
5859
],

statistics/handle/handle.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,6 @@ func NewHandle(ctx, initStatsCtx sessionctx.Context, lease time.Duration, pool s
499499
handle.StatsLoad.SubCtxs = make([]sessionctx.Context, cfg.Performance.StatsLoadConcurrency)
500500
handle.StatsLoad.NeededItemsCh = make(chan *NeededItemTask, cfg.Performance.StatsLoadQueueSize)
501501
handle.StatsLoad.TimeoutItemsCh = make(chan *NeededItemTask, cfg.Performance.StatsLoadQueueSize)
502-
handle.StatsLoad.WorkingColMap = map[model.TableItemID][]chan stmtctx.StatsLoadResult{}
503502
err := handle.RefreshVars()
504503
if err != nil {
505504
return nil, err

statistics/handle/handle_hist.go

Lines changed: 34 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/pingcap/tidb/util/logutil"
3535
"github.com/pingcap/tidb/util/sqlexec"
3636
"go.uber.org/zap"
37+
"golang.org/x/sync/singleflight"
3738
)
3839

3940
type statsWrapper struct {
@@ -47,7 +48,7 @@ type StatsLoad struct {
4748
SubCtxs []sessionctx.Context
4849
NeededItemsCh chan *NeededItemTask
4950
TimeoutItemsCh chan *NeededItemTask
50-
WorkingColMap map[model.TableItemID][]chan stmtctx.StatsLoadResult
51+
Singleflight singleflight.Group
5152
}
5253

5354
// NeededItemTask represents one needed column/indices with expire time.
@@ -235,49 +236,63 @@ func (h *Handle) HandleOneTask(sctx sessionctx.Context, lastTask *NeededItemTask
235236
} else {
236237
task = lastTask
237238
}
238-
return h.handleOneItemTask(task, readerCtx, ctx)
239+
resultChan := h.StatsLoad.Singleflight.DoChan(task.TableItemID.Key(), func() (any, error) {
240+
return h.handleOneItemTask(task, readerCtx, ctx)
241+
})
242+
timeout := time.Until(task.ToTimeout)
243+
select {
244+
case result := <-resultChan:
245+
if result.Err == nil {
246+
slr := result.Val.(*stmtctx.StatsLoadResult)
247+
if slr.Error != nil {
248+
return task, slr.Error
249+
}
250+
task.ResultCh <- *slr
251+
return nil, nil
252+
}
253+
return task, result.Err
254+
case <-time.After(timeout):
255+
return task, nil
256+
}
239257
}
240258

241-
func (h *Handle) handleOneItemTask(task *NeededItemTask, readerCtx *StatsReaderContext, ctx sqlexec.RestrictedSQLExecutor) (*NeededItemTask, error) {
242-
result := stmtctx.StatsLoadResult{Item: task.TableItemID}
259+
func (h *Handle) handleOneItemTask(task *NeededItemTask, readerCtx *StatsReaderContext, ctx sqlexec.RestrictedSQLExecutor) (result *stmtctx.StatsLoadResult, err error) {
260+
defer func() {
261+
// recover for each task, worker keeps working
262+
if r := recover(); r != nil {
263+
logutil.BgLogger().Error("handleOneItemTask panicked", zap.Any("recover", r), zap.Stack("stack"))
264+
err = errors.Errorf("stats loading panicked: %v", r)
265+
}
266+
}()
267+
result = &stmtctx.StatsLoadResult{Item: task.TableItemID}
243268
item := result.Item
244269
oldCache := h.statsCache.Load().(statsCache)
245270
tbl, ok := oldCache.Get(item.TableID)
246271
if !ok {
247-
h.writeToResultChan(task.ResultCh, result)
248-
return nil, nil
272+
return result, nil
249273
}
250-
var err error
251274
wrapper := &statsWrapper{}
252275
if item.IsIndex {
253276
index, ok := tbl.Indices[item.ID]
254277
if !ok || index.IsFullLoad() {
255-
h.writeToResultChan(task.ResultCh, result)
256-
return nil, nil
278+
return result, nil
257279
}
258280
wrapper.idx = index
259281
} else {
260282
col, ok := tbl.Columns[item.ID]
261283
if !ok || col.IsFullLoad() {
262-
h.writeToResultChan(task.ResultCh, result)
263-
return nil, nil
284+
return result, nil
264285
}
265286
wrapper.col = col
266287
}
267-
// to avoid duplicated handling in concurrent scenario
268-
working := h.setWorking(result.Item, task.ResultCh)
269-
if !working {
270-
h.writeToResultChan(task.ResultCh, result)
271-
return nil, nil
272-
}
273288
// refresh statsReader to get latest stats
274289
h.loadFreshStatsReader(readerCtx, ctx)
275290
t := time.Now()
276291
needUpdate := false
277292
wrapper, err = h.readStatsForOneItem(item, wrapper, readerCtx.reader)
278293
if err != nil {
279294
result.Error = err
280-
return task, err
295+
return result, err
281296
}
282297
if item.IsIndex {
283298
if wrapper.idx != nil {
@@ -290,9 +305,8 @@ func (h *Handle) handleOneItemTask(task *NeededItemTask, readerCtx *StatsReaderC
290305
}
291306
metrics.ReadStatsHistogram.Observe(float64(time.Since(t).Milliseconds()))
292307
if needUpdate && h.updateCachedItem(item, wrapper.col, wrapper.idx) {
293-
h.writeToResultChan(task.ResultCh, result)
308+
return result, nil
294309
}
295-
h.finishWorking(result)
296310
return nil, nil
297311
}
298312

@@ -509,32 +523,3 @@ func (h *Handle) updateCachedItem(item model.TableItemID, colHist *statistics.Co
509523
}
510524
return h.updateStatsCache(oldCache.update([]*statistics.Table{tbl}, nil, oldCache.version, WithTableStatsByQuery()))
511525
}
512-
513-
func (h *Handle) setWorking(item model.TableItemID, resultCh chan stmtctx.StatsLoadResult) bool {
514-
h.StatsLoad.Lock()
515-
defer h.StatsLoad.Unlock()
516-
chList, ok := h.StatsLoad.WorkingColMap[item]
517-
if ok {
518-
if chList[0] == resultCh {
519-
return true // just return for duplicate setWorking
520-
}
521-
h.StatsLoad.WorkingColMap[item] = append(chList, resultCh)
522-
return false
523-
}
524-
chList = []chan stmtctx.StatsLoadResult{}
525-
chList = append(chList, resultCh)
526-
h.StatsLoad.WorkingColMap[item] = chList
527-
return true
528-
}
529-
530-
func (h *Handle) finishWorking(result stmtctx.StatsLoadResult) {
531-
h.StatsLoad.Lock()
532-
defer h.StatsLoad.Unlock()
533-
if chList, ok := h.StatsLoad.WorkingColMap[result.Item]; ok {
534-
list := chList[1:]
535-
for _, ch := range list {
536-
h.writeToResultChan(ch, result)
537-
}
538-
}
539-
delete(h.StatsLoad.WorkingColMap, result.Item)
540-
}

statistics/handle/handle_hist_test.go

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -209,26 +209,15 @@ func TestConcurrentLoadHistWithPanicAndFail(t *testing.T) {
209209
task1, err1 := h.HandleOneTask(testKit.Session().(sessionctx.Context), nil, readerCtx, testKit.Session().(sqlexec.RestrictedSQLExecutor), exitCh)
210210
require.Error(t, err1)
211211
require.NotNil(t, task1)
212-
list, ok := h.StatsLoad.WorkingColMap[neededColumns[0]]
213-
require.True(t, ok)
214-
require.Len(t, list, 1)
215-
require.Equal(t, stmtCtx1.StatsLoad.ResultCh, list[0])
216-
217-
task2, err2 := h.HandleOneTask(testKit.Session().(sessionctx.Context), nil, readerCtx, testKit.Session().(sqlexec.RestrictedSQLExecutor), exitCh)
218-
require.Nil(t, err2)
219-
require.Nil(t, task2)
220-
list, ok = h.StatsLoad.WorkingColMap[neededColumns[0]]
221-
require.True(t, ok)
222-
require.Len(t, list, 2)
223-
require.Equal(t, stmtCtx2.StatsLoad.ResultCh, list[1])
224212

225213
require.NoError(t, failpoint.Disable(fp.failPath))
226214
task3, err3 := h.HandleOneTask(testKit.Session().(sessionctx.Context), task1, readerCtx, testKit.Session().(sqlexec.RestrictedSQLExecutor), exitCh)
227215
require.NoError(t, err3)
228216
require.Nil(t, task3)
229217

230-
require.Len(t, stmtCtx1.StatsLoad.ResultCh, 1)
231-
require.Len(t, stmtCtx2.StatsLoad.ResultCh, 1)
218+
task, err3 := h.HandleOneTask(testKit.Session().(sessionctx.Context), nil, readerCtx, testKit.Session().(sqlexec.RestrictedSQLExecutor), exitCh)
219+
require.NoError(t, err3)
220+
require.Nil(t, task)
232221

233222
rs1, ok1 := <-stmtCtx1.StatsLoad.ResultCh
234223
require.True(t, ok1)

0 commit comments

Comments
 (0)