Skip to content

Commit da5eed3

Browse files
authored
statistics: correct behavior of non-lite InitStats and stats sync load of no stats column (#57803) (#59590)
close #57804
1 parent 877671e commit da5eed3

File tree

7 files changed

+96
-54
lines changed

7 files changed

+96
-54
lines changed

pkg/statistics/handle/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ go_test(
5959
embed = [":handle"],
6060
flaky = True,
6161
race = "on",
62-
shard_count = 11,
62+
shard_count = 12,
6363
deps = [
6464
"//pkg/config",
6565
"//pkg/parser/model",

pkg/statistics/handle/bootstrap.go

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,16 @@ func (h *Handle) initStatsMeta(is infoschema.InfoSchema) (util.StatsCache, error
117117
return tables, nil
118118
}
119119

120+
// initStatsHistogramsSQLGen generates the SQL to load all stats_histograms records.
121+
func initStatsHistogramsSQLGen(isPaging bool) string {
122+
selectPrefix := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl) */ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms"
123+
orderSuffix := " order by table_id"
124+
if !isPaging {
125+
return selectPrefix + orderSuffix
126+
}
127+
return selectPrefix + " where table_id >= %? and table_id < %?" + orderSuffix
128+
}
129+
120130
func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache util.StatsCache, iter *chunk.Iterator4Chunk) {
121131
var table *statistics.Table
122132
for row := iter.Begin(); row != iter.End(); row = iter.Next() {
@@ -137,9 +147,9 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache u
137147
ndv := row.GetInt64(3)
138148
version := row.GetUint64(4)
139149
nullCount := row.GetInt64(5)
140-
statsVer := row.GetInt64(7)
141-
flag := row.GetInt64(9)
142-
lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
150+
statsVer := row.GetInt64(8)
151+
flag := row.GetInt64(10)
152+
lastAnalyzePos := row.GetDatum(11, types.NewFieldType(mysql.TypeBlob))
143153
tbl, _ := h.TableInfoByID(is, table.PhysicalID)
144154
if isIndex > 0 {
145155
var idxInfo *model.IndexInfo
@@ -176,7 +186,7 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache u
176186
if colInfo == nil {
177187
continue
178188
}
179-
hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, row.GetInt64(6))
189+
hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, row.GetInt64(7))
180190
hist.Correlation = row.GetFloat64(8)
181191
col := &statistics.Column{
182192
Histogram: *hist,
@@ -288,7 +298,9 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache util.
288298
StatsVer: statsVer,
289299
}
290300
// primary key column has no stats info, because primary key's is_index is false. so it cannot load the topn
291-
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
301+
if col.StatsAvailable() {
302+
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
303+
}
292304
lastAnalyzePos.Copy(&col.LastAnalyzePos)
293305
table.Columns[hist.ID] = col
294306
}
@@ -299,7 +311,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache util.
299311
}
300312

301313
func (h *Handle) initStatsHistogramsLite(is infoschema.InfoSchema, cache util.StatsCache) error {
302-
sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id"
314+
sql := initStatsHistogramsSQLGen(false)
303315
rc, err := util.Exec(h.initStatsCtx, sql)
304316
if err != nil {
305317
return errors.Trace(err)
@@ -322,7 +334,7 @@ func (h *Handle) initStatsHistogramsLite(is infoschema.InfoSchema, cache util.St
322334
}
323335

324336
func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, cache util.StatsCache) error {
325-
sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id"
337+
sql := initStatsHistogramsSQLGen(false)
326338
rc, err := util.Exec(h.initStatsCtx, sql)
327339
if err != nil {
328340
return errors.Trace(err)
@@ -359,10 +371,7 @@ func (h *Handle) initStatsHistogramsByPaging(is infoschema.InfoSchema, cache uti
359371
}()
360372

361373
sctx := se.(sessionctx.Context)
362-
// Why do we need to add `is_index=1` in the SQL?
363-
// because it is aligned to the `initStatsTopN` function, which only loads the topn of the index too.
364-
// the other will be loaded by sync load.
365-
sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms where table_id >= %? and table_id < %? and is_index=1"
374+
sql := initStatsHistogramsSQLGen(true)
366375
rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid)
367376
if err != nil {
368377
return errors.Trace(err)
@@ -676,7 +685,7 @@ func (h *Handle) initStatsBuckets(cache util.StatsCache, totalMemory uint64) err
676685
return errors.Trace(err)
677686
}
678687
} else {
679-
sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets order by table_id, is_index, hist_id, bucket_id"
688+
sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index=1 order by table_id, is_index, hist_id, bucket_id"
680689
rc, err := util.Exec(h.initStatsCtx, sql)
681690
if err != nil {
682691
return errors.Trace(err)
@@ -729,7 +738,7 @@ func (h *Handle) initStatsBucketsByPaging(cache util.StatsCache, task initstats.
729738
}
730739
}()
731740
sctx := se.(sessionctx.Context)
732-
sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id"
741+
sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index = 1 and table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id"
733742
rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid)
734743
if err != nil {
735744
return errors.Trace(err)

pkg/statistics/handle/handle_hist.go

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"github.com/pingcap/errors"
2424
"github.com/pingcap/failpoint"
2525
"github.com/pingcap/tidb/pkg/config"
26-
"github.com/pingcap/tidb/pkg/infoschema"
2726
"github.com/pingcap/tidb/pkg/metrics"
2827
"github.com/pingcap/tidb/pkg/parser/model"
2928
"github.com/pingcap/tidb/pkg/parser/mysql"
@@ -178,7 +177,7 @@ func (h *Handle) removeHistLoadedColumns(neededItems []model.TableItemID) []mode
178177
continue
179178
}
180179
colHist, ok := tbl.Columns[item.ID]
181-
if (ok && colHist.IsStatsInitialized() && !colHist.IsFullLoad()) || !ok {
180+
if ok && colHist.IsStatsInitialized() && !colHist.IsFullLoad() {
182181
remainedItems = append(remainedItems, item)
183182
}
184183
}
@@ -352,7 +351,7 @@ func (h *Handle) handleOneItemTask(task *NeededItemTask) (err error) {
352351
var errGetHistMeta = errors.New("fail to get stats version for this histogram")
353352

354353
// readStatsForOneItem reads hist for one column/index, TODO load data via kv-get asynchronously
355-
func (h *Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItemID, w *statsWrapper) (*statsWrapper, error) {
354+
func (*Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItemID, w *statsWrapper) (*statsWrapper, error) {
356355
failpoint.Inject("mockReadStatsForOnePanic", nil)
357356
failpoint.Inject("mockReadStatsForOneFail", func(val failpoint.Value) {
358357
if val.(bool) {
@@ -374,41 +373,9 @@ func (h *Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableIt
374373
return nil, errors.Trace(err)
375374
}
376375
} else {
377-
if c == nil {
378-
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
379-
tbl, ok := h.TableInfoByID(is, item.TableID)
380-
if !ok {
381-
return nil, errors.New("no table")
382-
}
383-
var colInfo *model.ColumnInfo
384-
for _, col := range tbl.Meta().Columns {
385-
if col.ID == item.ID {
386-
colInfo = col
387-
break
388-
}
389-
}
390-
if colInfo == nil {
391-
return nil, errors.New("no column")
392-
}
393-
hg, _, _, _, err = storage.HistMetaFromStorageWithHighPriority(sctx, &item, colInfo)
394-
if err != nil {
395-
return nil, err
396-
}
397-
if hg != nil {
398-
hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &colInfo.FieldType, hg.NDV, int(isIndexFlag), hg.LastUpdateVersion, hg.NullCount, hg.TotColSize, hg.Correlation)
399-
if err != nil {
400-
return nil, errors.Trace(err)
401-
}
402-
}
403-
c = &statistics.Column{
404-
Info: colInfo,
405-
IsHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
406-
}
407-
} else {
408-
hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &c.Info.FieldType, c.Histogram.NDV, int(isIndexFlag), c.LastUpdateVersion, c.NullCount, c.TotColSize, c.Correlation)
409-
if err != nil {
410-
return nil, errors.Trace(err)
411-
}
376+
hg, err = storage.HistogramFromStorage(sctx, item.TableID, item.ID, &c.Info.FieldType, c.Histogram.NDV, int(isIndexFlag), c.LastUpdateVersion, c.NullCount, c.TotColSize, c.Correlation)
377+
if err != nil {
378+
return nil, errors.Trace(err)
412379
}
413380
}
414381
var cms *statistics.CMSketch

pkg/statistics/handle/handle_hist_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,3 +403,55 @@ func TestSendLoadRequestsWaitTooLong(t *testing.T) {
403403
require.Error(t, rs1.Err)
404404
}
405405
}
406+
407+
func TestSyncLoadOnObjectWhichCanNotFoundInStorage(t *testing.T) {
408+
store, dom := testkit.CreateMockStoreAndDomain(t)
409+
tk := testkit.NewTestKit(t, store)
410+
tk.MustExec("use test")
411+
tk.MustExec("create table t(a int, b int, c int, primary key(a))")
412+
h := dom.StatsHandle()
413+
// Skip create table event.
414+
<-h.DDLEventCh()
415+
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
416+
tk.MustExec("analyze table t columns a, b")
417+
tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
418+
require.NoError(t, h.InitStatsLite(dom.InfoSchema()))
419+
require.NoError(t, err)
420+
require.NotNil(t, tbl)
421+
tblInfo := tbl.Meta()
422+
statsTbl, ok := h.Get(tblInfo.ID)
423+
require.True(t, ok)
424+
require.Equal(t, 2, len(statsTbl.Columns))
425+
// Do some DDL, one successfully handled by handleDDLEvent, the other not.
426+
tk.MustExec("alter table t add column d int default 2")
427+
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
428+
require.NoError(t, h.Update(dom.InfoSchema()))
429+
tbl, err = dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
430+
require.NoError(t, err)
431+
require.NotNil(t, tbl)
432+
tblInfo = tbl.Meta()
433+
statsTbl, ok = h.Get(tblInfo.ID)
434+
require.True(t, ok)
435+
require.Equal(t, 3, len(statsTbl.Columns))
436+
437+
// Try sync load.
438+
tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4")
439+
statsTbl, ok = h.Get(tblInfo.ID)
440+
require.True(t, ok)
441+
require.True(t, statsTbl.Columns[tblInfo.Columns[0].ID].IsFullLoad())
442+
require.True(t, statsTbl.Columns[tblInfo.Columns[1].ID].IsFullLoad())
443+
require.True(t, statsTbl.Columns[tblInfo.Columns[3].ID].IsFullLoad())
444+
require.Nil(t, statsTbl.Columns[tblInfo.Columns[2].ID])
445+
446+
// Analyze c then test sync load again
447+
tk.MustExec("analyze table t columns a, b, c")
448+
require.NoError(t, h.InitStatsLite(dom.InfoSchema()))
449+
tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4")
450+
statsTbl, ok = h.Get(tblInfo.ID)
451+
require.True(t, ok)
452+
// a, b, d's status is not changed.
453+
require.True(t, statsTbl.Columns[tblInfo.Columns[0].ID].IsFullLoad())
454+
require.True(t, statsTbl.Columns[tblInfo.Columns[1].ID].IsFullLoad())
455+
require.True(t, statsTbl.Columns[tblInfo.Columns[3].ID].IsFullLoad())
456+
require.True(t, statsTbl.Columns[tblInfo.Columns[2].ID].IsFullLoad())
457+
}

pkg/statistics/handle/handletest/handle_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1606,7 +1606,12 @@ func TestInitStatsLite(t *testing.T) {
16061606
statsTbl1 := h.GetTableStats(tblInfo)
16071607
checkAllEvicted(t, statsTbl1)
16081608
internal.AssertTableEqual(t, statsTbl0, statsTbl1)
1609-
1609+
for _, col := range statsTbl1.Columns {
1610+
require.Equal(t, int64(statistics.Version2), col.StatsVer)
1611+
}
1612+
for _, idx := range statsTbl1.Indices {
1613+
require.Equal(t, int64(statistics.Version2), idx.StatsVer)
1614+
}
16101615
// async stats load
16111616
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
16121617
tk.MustExec("explain select * from t where b > 1")

pkg/statistics/handle/handletest/initstats/load_stats_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func testConcurrentlyInitStats(t *testing.T) {
8686
tk.MustQuery(fmt.Sprintf("explain select * from t%v where b = 1", i)).CheckNotContain("pseudo")
8787
}
8888
for i := 1; i < 10; i++ {
89-
tk.MustQuery(fmt.Sprintf("explain select * from t%v where c = 1", i)).CheckNotContain("pseudo")
89+
tk.MustQuery(fmt.Sprintf("explain select * from t%v where c >= 1", i)).CheckNotContain("pseudo")
9090
}
9191
for i := 1; i < 10; i++ {
9292
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr(fmt.Sprintf("t%v", i)))

pkg/statistics/handle/handletest/statstest/stats_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ func TestInitStats(t *testing.T) {
270270
require.NoError(t, h.Update(is))
271271
// Index and pk are loaded.
272272
needed := fmt.Sprintf(`Table:%v RealtimeCount:6
273+
column:1 ndv:6 totColSize:0
274+
column:2 ndv:6 totColSize:6
275+
column:3 ndv:6 totColSize:6
273276
index:1 ndv:6
274277
num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0
275278
num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0
@@ -312,6 +315,12 @@ func TestInitStats2(t *testing.T) {
312315
h.Clear()
313316
require.NoError(t, h.Update(is))
314317
table1 := h.GetTableStats(tbl.Meta())
318+
// stats of pk will be loaded.
319+
require.Equal(t, true, table0.Columns[1].IsAllEvicted())
320+
require.Equal(t, true, table1.Columns[1].IsFullLoad())
321+
delete(table0.Columns, 1)
322+
delete(table1.Columns, 1)
323+
// result part is not changed.
315324
internal.AssertTableEqual(t, table0, table1)
316325
h.SetLease(0)
317326
}

0 commit comments

Comments
 (0)