Skip to content
Merged
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
a452f59
planner: Ensure index without stats survives for skyling pruning
terry1purcell Jan 7, 2025
d8267f0
revision after customer issue added
terry1purcell Jan 7, 2025
f962732
revision2
terry1purcell Jan 7, 2025
83e161f
Merge branch 'pingcap:master' into indexpseudo
terry1purcell Jan 7, 2025
a507959
revision3
terry1purcell Jan 7, 2025
a537a0d
new testcase
terry1purcell Jan 7, 2025
b9ad6ce
bazel
terry1purcell Jan 7, 2025
12e5604
testcase2
terry1purcell Jan 7, 2025
6c6aeae
revision3
terry1purcell Jan 8, 2025
75517e4
revision4
terry1purcell Jan 9, 2025
1b0a0ce
revision5
terry1purcell Jan 9, 2025
52a29b3
Merge branch 'pingcap:master' into indexpseudo
terry1purcell Jan 9, 2025
7278040
Merge branch 'pingcap:master' into indexpseudo
terry1purcell Jan 13, 2025
c759a18
significant revision after review comments
terry1purcell Jan 13, 2025
a0abe12
testcase after revision1
terry1purcell Jan 13, 2025
613d938
testcase after revision2
terry1purcell Jan 13, 2025
1ee1866
testcase after revision3
terry1purcell Jan 14, 2025
02fae63
testcase after revision4
terry1purcell Jan 14, 2025
28ead94
testcase after revision5
terry1purcell Jan 14, 2025
08390e6
regenerate tests1
terry1purcell Jan 15, 2025
4271328
regenerate tests2
terry1purcell Jan 15, 2025
5056fd9
regenerate tests3
terry1purcell Jan 15, 2025
50c1f9f
regenerate tests4
terry1purcell Jan 15, 2025
9022afb
Merge branch 'pingcap:master' into indexpseudo
terry1purcell Jan 15, 2025
7866441
refactor to limit scope
terry1purcell Jan 15, 2025
bcf0dbb
after refactor testcase1
terry1purcell Jan 15, 2025
469be20
after refactor testcase2
terry1purcell Jan 15, 2025
3f9ce12
after refactor testcase3
terry1purcell Jan 16, 2025
9cb6092
after refactor testcase4
terry1purcell Jan 16, 2025
0ba176a
after refactor testcase5
terry1purcell Jan 16, 2025
1aa46c9
after refactor testcase6
terry1purcell Jan 16, 2025
27f6f89
Merge branch 'pingcap:master' into indexpseudo
terry1purcell Jan 17, 2025
ce0dcf9
after refactor testcase7
terry1purcell Jan 17, 2025
5a4f44d
correct code for testcase1
terry1purcell Jan 18, 2025
c993b02
correct code for testcase2
terry1purcell Jan 18, 2025
6a5da89
correct code for testcase3
terry1purcell Jan 19, 2025
47c5756
correct code for testcase4
terry1purcell Jan 19, 2025
3460f82
Merge branch 'pingcap:master' into indexpseudo
terry1purcell Jan 19, 2025
49f6562
correct code for testcase5
terry1purcell Jan 19, 2025
081ee0e
add nil check
terry1purcell Jan 20, 2025
513fa70
update comment
terry1purcell Jan 20, 2025
549a7bf
update for review comments
terry1purcell Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
shard_count = 30,
shard_count = 31,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
21 changes: 21 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,9 @@ func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
}

func TestNewIndexWithoutStats(t *testing.T) {
// Test where there exists multple indexes - but (at least) one index does not have statistics
// Test 1) Prioritizing an index with stats vs one without - when both have the same number of equal predicates,
// Test 2) Prioritize the index with more equal predicates regardless
store, _ := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
Expand All @@ -363,6 +366,24 @@ func TestNewIndexWithoutStats(t *testing.T) {
testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxab(a, b)")
}

func TestIssue57948(t *testing.T) {
// Similar to test (above) TestNewIndexWithoutStats
// Test when only 1 index exists - prioritize that index if it is missing statistics
store, _ := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, c int)")
testKit.MustExec("set @@tidb_analyze_version=2")
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
testKit.MustExec("insert into t values (1, 1, 1)")
testKit.MustExec("insert into t select mod(a,250), mod(a,10), mod(a,100) from (with recursive x as (select 1 as a union all select a + 1 AS a from x where a < 500) select a from x) as subquery")
testKit.MustExec("analyze table t")
testKit.MustExec("create index idxb on t(b)")
// Create index after ANALYZE. SkyLine pruning should ensure that idxb is chosen because it has statistics
testKit.MustQuery("explain format='brief' select * from t where b = 5").CheckContain("idxb(b)")
}

func TestEstimationUniqueKeyEqualConds(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/cardinality/testdata/cardinality_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
{
"Start": 800,
"End": 900,
"Count": 775.7583333101078
"Count": 767.5083333101078
},
{
"Start": 900,
Expand Down Expand Up @@ -79,7 +79,7 @@
{
"Start": 800,
"End": 1000,
"Count": 1692.1437391478842
"Count": 1683.8937391478842
},
{
"Start": 900,
Expand All @@ -104,7 +104,7 @@
{
"Start": 200,
"End": 400,
"Count": 1678.5576419798163
"Count": 1699.5576419798163
},
{
"Start": 200,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,8 @@
"Plan": [
" TableReader root ",
" └─ExchangeSender cop[tiflash] ",
" └─Selection cop[tiflash] gt(test.t1.a, ?), gt(test.t1.c, ?), or(gt(test.t1.a, ?), lt(test.t1.b, ?))",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), keep order:false"
" └─Selection cop[tiflash] gt(test.t1.b, ?), gt(test.t1.c, ?), or(gt(test.t1.a, ?), lt(test.t1.b, ?))",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), keep order:false"
]
},
{
Expand Down
141 changes: 91 additions & 50 deletions pkg/planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ import (
h "github.com/pingcap/tidb/pkg/util/hint"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/ranger"
"github.com/pingcap/tidb/pkg/util/tracing"
"github.com/pingcap/tipb/go-tipb"
"go.uber.org/zap"
Expand Down Expand Up @@ -711,34 +710,73 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int {

// compareCandidates is the core of skyline pruning, which is used to decide which candidate path is better.
// The return value is 1 if lhs is better, -1 if rhs is better, 0 if they are equivalent or not comparable.
func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *property.PhysicalProperty, lhs, rhs *candidatePath) int {
func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableInfo *model.TableInfo, prop *property.PhysicalProperty, lhs, rhs *candidatePath, preferRange bool) (int, bool) {
// Due to #50125, full scan on MVIndex has been disabled, so MVIndex path might lead to 'can't find a proper plan' error at the end.
// Avoid MVIndex path to exclude all other paths and leading to 'can't find a proper plan' error, see #49438 for an example.
if isMVIndexPath(lhs.path) || isMVIndexPath(rhs.path) {
return 0
return 0, false
}
// lhsPseudo == lhs has pseudo (no) stats for the table or index for the lhs path.
// rhsPseudo == rhs has pseudo (no) stats for the table or index for the rhs path.
//
// For the return value - if lhs wins (1), we return lhsPseudo. If rhs wins (-1), we return rhsPseudo.
// If there is no winner (0), we return false.
//
// This return value is used later in SkyLinePruning to determine whether we should preference an index scan
// over a table scan. Allowing indexes without statistics to survive means they can win via heuristics where
// they otherwise would have lost on cost.
lhsPseudo, rhsPseudo, tablePseudo := false, false, false
lhsFullScan := lhs.path.IsFullScanRange(tableInfo)
rhsFullScan := rhs.path.IsFullScanRange(tableInfo)
if statsTbl != nil {
lhsPseudo, rhsPseudo, tablePseudo = statsTbl.HistColl.Pseudo, statsTbl.HistColl.Pseudo, statsTbl.HistColl.Pseudo
if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
if !lhsFullScan && lhs.path.Index != nil {
if statsTbl.ColAndIdxExistenceMap.HasAnalyzed(lhs.path.Index.ID, true) {
lhsPseudo = false // We have statistics for the lhs index
} else {
lhsPseudo = true
}
}
if !rhsFullScan && rhs.path.Index != nil {
if statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true) {
rhsPseudo = false // We have statistics on the rhs index
} else {
rhsPseudo = true
}
}
}
}

// If one index has statistics and the other does not, choose the index with statistics if it
// has the same or higher number of equal/IN predicates.
lhsHasStatistics := statsTbl.Pseudo
if statsTbl != nil && lhs.path.Index != nil {
lhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(lhs.path.Index.ID, true)
}
rhsHasStatistics := statsTbl.Pseudo
if statsTbl != nil && rhs.path.Index != nil {
rhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true)
}
if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
(!lhsHasStatistics || !rhsHasStatistics) && // At least one index doesn't have statistics
len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge due to unreliability
lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
if lhsHasStatistics && lhsTotalEqual > 0 && lhsTotalEqual >= rhsTotalEqual {
return 1
matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
scanResult, comparable2 := compareIndexBack(lhs, rhs)
sum := accessResult + scanResult + matchResult + globalResult

// First rules apply when an index doesn't have statistics and another object (index or table) has statistics
if (lhsPseudo != rhsPseudo || ((lhsPseudo || rhsPseudo) && !tablePseudo)) && !lhsFullScan && !rhsFullScan { // At least one index doesn't have statistics
// If one index has statistics and the other does not, choose the index with statistics if it
// has the same or higher number of equal/IN predicates.
if !lhsPseudo && globalResult >= 0 && sum >= 0 &&
lhs.path.EqOrInCondCount > 0 && lhs.path.EqOrInCondCount >= rhs.path.EqOrInCondCount {
return 1, false // left wins and has statistics
}
if !rhsPseudo && globalResult <= 0 && sum <= 0 &&
rhs.path.EqOrInCondCount > 0 && rhs.path.EqOrInCondCount >= lhs.path.EqOrInCondCount {
return -1, false // right wins and has statistics
}
if rhsHasStatistics && rhsTotalEqual > 0 && rhsTotalEqual >= lhsTotalEqual {
return -1
if preferRange {
// keep an index without statistics if that index has more equal/IN predicates, AND:
// 1) there are at least 2 equal/INs
// 2) OR - it's a full index match for all index predicates
if lhsPseudo && lhs.path.EqOrInCondCount > rhs.path.EqOrInCondCount && globalResult >= 0 && sum >= 0 &&
(lhs.path.EqOrInCondCount > 1 || (lhs.path.EqOrInCondCount > 0 && len(lhs.indexCondsColMap) >= len(lhs.path.Index.Columns))) {
return 1, true // left wins and does NOT have statistics
}
if rhsPseudo && rhs.path.EqOrInCondCount > lhs.path.EqOrInCondCount && globalResult <= 0 && sum <= 0 &&
(rhs.path.EqOrInCondCount > 1 || (rhs.path.EqOrInCondCount > 0 && len(rhs.indexCondsColMap) >= len(rhs.path.Index.Columns))) {
return -1, true // right wins and does NOT have statistics
}
}
}

Expand All @@ -750,38 +788,34 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
if threshold > 0 { // set it to 0 to disable this rule
if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold {
return -1
return -1, false
}
if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold {
return 1
return 1, false
}
}
}

// Below compares the two candidate paths on three dimensions:
// Below compares the two candidate paths on four dimensions:
// (1): the set of columns that occurred in the access condition,
// (2): does it require a double scan,
// (3): whether or not it matches the physical property,
// (4): it's a global index path or not.
// If `x` is not worse than `y` at all factors,
// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
if !comparable1 {
return 0
return 0, false
}
scanResult, comparable2 := compareIndexBack(lhs, rhs)
if !comparable2 {
return 0
return 0, false
}
matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
sum := accessResult + scanResult + matchResult + globalResult
if accessResult >= 0 && scanResult >= 0 && matchResult >= 0 && globalResult >= 0 && sum > 0 {
return 1
return 1, false
}
if accessResult <= 0 && scanResult <= 0 && matchResult <= 0 && globalResult <= 0 && sum < 0 {
return -1
return -1, false
}
return 0
return 0, false
}

func isMatchProp(ds *logicalop.DataSource, path *util.AccessPath, prop *property.PhysicalProperty) bool {
Expand Down Expand Up @@ -1128,6 +1162,9 @@ func getIndexMergeCandidate(ds *logicalop.DataSource, path *util.AccessPath, pro
// there exists a path that is not worse than it at all factors and there is at least one better factor.
func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) []*candidatePath {
candidates := make([]*candidatePath, 0, 4)
idxMissingStats := false
// tidb_opt_prefer_range_scan is the master switch to control index preferencing
preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan()
for _, path := range ds.PossibleAccessPaths {
// We should check whether the possible access path is valid first.
if path.StoreType != kv.TiFlash && prop.IsFlashProp() {
Expand Down Expand Up @@ -1168,7 +1205,12 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
if candidates[i].path.StoreType == kv.TiFlash {
continue
}
result := compareCandidates(ds.SCtx(), ds.StatisticTable, prop, candidates[i], currentCandidate)
var result int
currentMissingStats := false
result, currentMissingStats = compareCandidates(ds.SCtx(), ds.StatisticTable, ds.TableInfo, prop, candidates[i], currentCandidate, preferRange)
if currentMissingStats {
idxMissingStats = true
}
if result == 1 {
pruned = true
// We can break here because the current candidate cannot prune others anymore.
Expand All @@ -1188,28 +1230,23 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
fixcontrol.Fix52869,
false,
)
// tidb_opt_prefer_range_scan is the master switch to control index preferencing
preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan() &&
(preferMerge || (ds.TableStats.HistColl.Pseudo || ds.TableStats.RowCount < 1))
if preferRange {
// Override preferRange with the following limitations to scope
preferRange = preferMerge || idxMissingStats || ds.TableStats.HistColl.Pseudo || ds.TableStats.RowCount < 1
}
if preferRange && len(candidates) > 1 {
// If a candidate path is TiFlash-path or forced-path or MV index, we just keep them. For other candidate paths, if there exists
// any range scan path, we remove full scan paths and keep range scan paths.
// If a candidate path is TiFlash-path or forced-path or MV index or global index, we just keep them. For other
// candidate paths, if there exists any range scan path, we remove full scan paths and keep range scan paths.
preferredPaths := make([]*candidatePath, 0, len(candidates))
var hasRangeScanPath bool
for _, c := range candidates {
if c.path.Forced || c.path.StoreType == kv.TiFlash || (c.path.Index != nil && c.path.Index.MVIndex) {
if c.path.Forced || c.path.StoreType == kv.TiFlash || (c.path.Index != nil && (c.path.Index.Global || c.path.Index.MVIndex)) {
preferredPaths = append(preferredPaths, c)
continue
}
var unsignedIntHandle bool
if c.path.IsIntHandlePath && ds.TableInfo.PKIsHandle {
if pkColInfo := ds.TableInfo.GetPkColInfo(); pkColInfo != nil {
unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
}
}
if !ranger.HasFullRange(c.path.Ranges, unsignedIntHandle) {
if !c.path.IsFullScanRange(ds.TableInfo) {
// Preference plans with equals/IN predicates or where there is more filtering in the index than against the table
indexFilters := c.path.EqCondCount > 0 || c.path.EqOrInCondCount > 0 || len(c.path.TableFilters) < len(c.path.IndexFilters)
indexFilters := c.path.EqOrInCondCount > 0 || len(c.path.TableFilters) < len(c.path.IndexFilters)
if preferMerge || (indexFilters && (prop.IsSortItemEmpty() || c.isMatchProp)) {
preferredPaths = append(preferredPaths, c)
hasRangeScanPath = true
Expand Down Expand Up @@ -1407,6 +1444,10 @@ func findBestTask4LogicalDataSource(lp base.LogicalPlan, prop *property.Physical
if ds.PreferStoreType&h.PreferTiFlash != 0 && path.StoreType == kv.TiKV {
continue
}
// prefer tikv, while current table path is tiflash, skip it.
if ds.PreferStoreType&h.PreferTiKV != 0 && path.StoreType == kv.TiFlash {
continue
}
idxMergeTask, err := convertToIndexMergeScan(ds, prop, candidate, opt)
if err != nil {
return nil, 0, err
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/logical_plans_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2031,7 +2031,7 @@ func TestSkylinePruning(t *testing.T) {
},
{
sql: "select * from pt2_global_index where b > 1 and c > 1",
result: "b_c_global", // will prune `b_c`
result: "PRIMARY_KEY,c_d_e,b_c_global", // will prune `b_c`
},
{
sql: "select * from pt2_global_index where b > 1 and c > 1 and d > 1",
Expand Down
16 changes: 16 additions & 0 deletions pkg/planner/util/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/meta/model"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/planner/planctx"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/collate"
Expand Down Expand Up @@ -406,3 +407,18 @@ func (path *AccessPath) GetCol2LenFromAccessConds(ctx planctx.PlanContext) Col2L
}
return ExtractCol2Len(ctx.GetExprCtx().GetEvalCtx(), path.AccessConds, path.IdxCols, path.IdxColLens)
}

// IsFullScanRange checks that a table scan does not have any filtering such that it can limit the range of
// the table scan.
func (path *AccessPath) IsFullScanRange(tableInfo *model.TableInfo) bool {
var unsignedIntHandle bool
if path.IsIntHandlePath && tableInfo.PKIsHandle {
if pkColInfo := tableInfo.GetPkColInfo(); pkColInfo != nil {
unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
}
}
if ranger.HasFullRange(path.Ranges, unsignedIntHandle) {
return true
}
return false
}