Skip to content

Commit 21e9d3c

Browse files
planner, statistics: use the correct column ID when recording stats loading status (#52208)
close #52207
1 parent b4c8b52 commit 21e9d3c

14 files changed

+178
-60
lines changed

build/nogo_config.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@
175175
"fieldalignment": {
176176
"exclude_files": {
177177
"pkg/parser/parser.go": "parser/parser.go code",
178+
"pkg/statistics/table.go": "disable this limitation that prevents us from splitting struct fields for clarity",
178179
"external/": "no need to vet third party code",
179180
".*_generated\\.go$": "ignore generated code",
180181
".*mock.go$": "ignore generated code",

pkg/planner/cardinality/cross_estimation.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ func crossEstimateRowCount(sctx context.PlanContext,
139139
if col == nil || len(path.AccessConds) > 0 {
140140
return 0, false, corr
141141
}
142-
colID := col.UniqueID
142+
colUniqueID := col.UniqueID
143143
if corr < 0 {
144144
desc = !desc
145145
}
@@ -152,11 +152,11 @@ func crossEstimateRowCount(sctx context.PlanContext,
152152
return 0, err == nil, corr
153153
}
154154
idxID := int64(-1)
155-
idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID]
155+
idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colUniqueID]
156156
if idxExists && len(idxIDs) > 0 {
157157
idxID = idxIDs[0]
158158
}
159-
rangeCounts, ok := getColumnRangeCounts(sctx, colID, ranges, dsTableStats.HistColl, idxID)
159+
rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
160160
if !ok {
161161
return 0, false, corr
162162
}
@@ -168,7 +168,7 @@ func crossEstimateRowCount(sctx context.PlanContext,
168168
if idxExists {
169169
rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
170170
} else {
171-
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colID, convertedRanges)
171+
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
172172
}
173173
if err != nil {
174174
return 0, false, corr

pkg/planner/cardinality/row_count_column.go

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,27 @@ func init() {
3333
}
3434

3535
// GetRowCountByColumnRanges estimates the row count by a slice of Range.
36-
func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) {
36+
func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, colRanges []*ranger.Range) (result float64, err error) {
3737
var name string
3838
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
3939
debugtrace.EnterContextCommon(sctx)
40-
debugTraceGetRowCountInput(sctx, colID, colRanges)
40+
debugTraceGetRowCountInput(sctx, colUniqueID, colRanges)
4141
defer func() {
4242
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
4343
debugtrace.LeaveContextCommon(sctx)
4444
}()
4545
}
4646
sc := sctx.GetSessionVars().StmtCtx
47-
c, ok := coll.Columns[colID]
48-
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
47+
c, ok := coll.Columns[colUniqueID]
48+
colInfoID := colUniqueID
49+
if len(coll.UniqueID2colInfoID) > 0 {
50+
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
51+
}
52+
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
4953
if c != nil && c.Info != nil {
5054
name = c.Info.Name.O
5155
}
52-
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) {
56+
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
5357
result, err = getPseudoRowCountByColumnRanges(sc.TypeCtx(), float64(coll.RealtimeCount), colRanges, 0)
5458
if err == nil && sc.EnableOptimizerCETrace && ok {
5559
ceTraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
@@ -71,23 +75,27 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo
7175
}
7276

7377
// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
74-
func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) {
78+
func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.HistColl, colUniqueID int64, intRanges []*ranger.Range) (result float64, err error) {
7579
var name string
7680
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
7781
debugtrace.EnterContextCommon(sctx)
78-
debugTraceGetRowCountInput(sctx, colID, intRanges)
82+
debugTraceGetRowCountInput(sctx, colUniqueID, intRanges)
7983
defer func() {
8084
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
8185
debugtrace.LeaveContextCommon(sctx)
8286
}()
8387
}
8488
sc := sctx.GetSessionVars().StmtCtx
85-
c, ok := coll.Columns[colID]
86-
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
89+
c, ok := coll.Columns[colUniqueID]
90+
colInfoID := colUniqueID
91+
if len(coll.UniqueID2colInfoID) > 0 {
92+
colInfoID = coll.UniqueID2colInfoID[colUniqueID]
93+
}
94+
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID)
8795
if c != nil && c.Info != nil {
8896
name = c.Info.Name.O
8997
}
90-
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colID) {
98+
if statistics.ColumnStatsIsInvalid(c, sctx, coll, colUniqueID) {
9199
if len(intRanges) == 0 {
92100
return 0, nil
93101
}

pkg/planner/cardinality/row_count_index.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,19 +170,19 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC
170170
}
171171
var count float64
172172
var err error
173-
colIDs := coll.Idx2ColumnIDs[idxID]
174-
var colID int64
175-
if rangePosition >= len(colIDs) {
176-
colID = -1
173+
colUniqueIDs := coll.Idx2ColUniqueIDs[idxID]
174+
var colUniqueID int64
175+
if rangePosition >= len(colUniqueIDs) {
176+
colUniqueID = -1
177177
} else {
178-
colID = colIDs[rangePosition]
178+
colUniqueID = colUniqueIDs[rangePosition]
179179
}
180180
// prefer index stats over column stats
181-
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 {
181+
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
182182
idxID := idxIDs[0]
183183
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
184184
} else {
185-
count, err = GetRowCountByColumnRanges(sctx, coll, colID, []*ranger.Range{&rang})
185+
count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
186186
}
187187
if err != nil {
188188
return 0, errors.Trace(err)
@@ -422,7 +422,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll
422422
Collators: make([]collate.Collator, 1),
423423
},
424424
}
425-
colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID]
425+
colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
426426
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
427427
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
428428
// 1. Calc the selectivity of each column.
@@ -449,7 +449,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll
449449
count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan)
450450
selectivity = count / float64(coll.RealtimeCount)
451451
}
452-
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
452+
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 {
453453
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call
454454
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This
455455
// check avoids infinite recursion.

pkg/planner/cardinality/selectivity.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ func Selectivity(
182182
})
183183
continue
184184
}
185-
idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColumnIDs[id], id2Paths[idxStats.ID])
185+
idxCols := findPrefixOfIndexByCol(ctx, extractedCols, coll.Idx2ColUniqueIDs[id], id2Paths[idxStats.ID])
186186
if len(idxCols) > 0 {
187187
lengths := make([]int, 0, len(idxCols))
188188
for i := 0; i < len(idxCols) && i < len(idxStats.Info.Columns); i++ {
@@ -919,7 +919,7 @@ func findAvailableStatsForCol(sctx context.PlanContext, coll *statistics.HistCol
919919
return false, uniqueID
920920
}
921921
// try to find available stats in single column index stats (except for prefix index)
922-
for idxStatsIdx, cols := range coll.Idx2ColumnIDs {
922+
for idxStatsIdx, cols := range coll.Idx2ColUniqueIDs {
923923
if len(cols) == 1 && cols[0] == uniqueID {
924924
idxStats := coll.Indices[idxStatsIdx]
925925
if !statistics.IndexStatsIsInvalid(sctx, idxStats, coll, idxStatsIdx) &&
@@ -968,7 +968,7 @@ func getEqualCondSelectivity(sctx context.PlanContext, coll *statistics.HistColl
968968
return outOfRangeEQSelectivity(sctx, idx.NDV, realtimeCnt, int64(idx.TotalRowCount())), nil
969969
}
970970
// The equal condition only uses prefix columns of the index.
971-
colIDs := coll.Idx2ColumnIDs[idx.ID]
971+
colIDs := coll.Idx2ColUniqueIDs[idx.ID]
972972
var ndv int64
973973
for i, colID := range colIDs {
974974
if i >= usedColsLen {
@@ -1050,7 +1050,7 @@ func crossValidationSelectivity(
10501050
}()
10511051
}
10521052
minRowCount = math.MaxFloat64
1053-
cols := coll.Idx2ColumnIDs[idx.ID]
1053+
cols := coll.Idx2ColUniqueIDs[idx.ID]
10541054
crossValidationSelectivity = 1.0
10551055
totalRowCount := idx.TotalRowCount()
10561056
for i, colID := range cols {

pkg/planner/cardinality/selectivity_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -893,8 +893,8 @@ func generateMapsForMockStatsTbl(statsTbl *statistics.Table) {
893893
for _, idxIDs := range colID2IdxIDs {
894894
slices.Sort(idxIDs)
895895
}
896-
statsTbl.Idx2ColumnIDs = idx2Columns
897-
statsTbl.ColID2IdxIDs = colID2IdxIDs
896+
statsTbl.Idx2ColUniqueIDs = idx2Columns
897+
statsTbl.ColUniqueID2IdxIDs = colID2IdxIDs
898898
}
899899

900900
func TestIssue39593(t *testing.T) {

pkg/planner/core/casetest/planstats/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ go_test(
99
],
1010
data = glob(["testdata/**"]),
1111
flaky = True,
12-
shard_count = 4,
12+
shard_count = 5,
1313
deps = [
1414
"//pkg/config",
1515
"//pkg/domain",

pkg/planner/core/casetest/planstats/plan_stats_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,48 @@ func TestCollectDependingVirtualCols(t *testing.T) {
405405
require.Equal(t, output[i].OutputColNames, cols)
406406
}
407407
}
408+
409+
func TestPartialStatsInExplain(t *testing.T) {
410+
store, dom := testkit.CreateMockStoreAndDomain(t)
411+
tk := testkit.NewTestKit(t, store)
412+
tk.MustExec("use test")
413+
tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
414+
tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
415+
tk.MustExec("create table t2(a int, primary key(a))")
416+
tk.MustExec("insert into t2 values (1),(2),(3)")
417+
tk.MustExec(
418+
"create table tp(a int, b int, c int, index ic(c)) partition by range(a)" +
419+
"(partition p0 values less than (10)," +
420+
"partition p1 values less than (20)," +
421+
"partition p2 values less than maxvalue)",
422+
)
423+
tk.MustExec("insert into tp values (1,1,1),(2,2,2),(13,13,13),(14,14,14),(25,25,25),(36,36,36)")
424+
425+
oriLease := dom.StatsHandle().Lease()
426+
dom.StatsHandle().SetLease(1)
427+
defer func() {
428+
dom.StatsHandle().SetLease(oriLease)
429+
}()
430+
tk.MustExec("analyze table t")
431+
tk.MustExec("analyze table t2")
432+
tk.MustExec("analyze table tp")
433+
tk.RequireNoError(dom.StatsHandle().Update(dom.InfoSchema()))
434+
tk.MustQuery("explain select * from tp where a = 1")
435+
tk.MustExec("set @@tidb_stats_load_sync_wait = 0")
436+
var (
437+
input []string
438+
output []struct {
439+
Query string
440+
Result []string
441+
}
442+
)
443+
testData := GetPlanStatsData()
444+
testData.LoadTestCases(t, &input, &output)
445+
for i, sql := range input {
446+
testdata.OnRecord(func() {
447+
output[i].Query = input[i]
448+
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
449+
})
450+
tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
451+
}
452+
}

pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_in.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,13 @@
6262
]
6363
}
6464
]
65+
},
66+
{
67+
"name": "TestPartialStatsInExplain",
68+
"cases": [
69+
"explain format = brief select * from tp where b = 10",
70+
"explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
71+
"explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c"
72+
]
6573
}
6674
]

pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,5 +101,47 @@
101101
]
102102
}
103103
]
104+
},
105+
{
106+
"Name": "TestPartialStatsInExplain",
107+
"Cases": [
108+
{
109+
"Query": "explain format = brief select * from tp where b = 10",
110+
"Result": [
111+
"TableReader 0.01 root partition:all data:Selection",
112+
"└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)",
113+
" └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]"
114+
]
115+
},
116+
{
117+
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
118+
"Result": [
119+
"Projection 0.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
120+
"└─HashJoin 0.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
121+
" ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
122+
" │ └─Selection 0.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
123+
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
124+
" └─TableReader(Probe) 3.00 root data:Selection",
125+
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
126+
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
127+
]
128+
},
129+
{
130+
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
131+
"Result": [
132+
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
133+
"├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
134+
"│ ├─TableReader(Build) 0.33 root data:Selection",
135+
"│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
136+
"│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
137+
"│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
138+
"│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
139+
"│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
140+
"│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
141+
"└─TableReader(Probe) 1.00 root data:TableRangeScan",
142+
" └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
143+
]
144+
}
145+
]
104146
}
105147
]

0 commit comments

Comments
 (0)