planner: add skew risk ratio for range pred (pingcap#62035)

ichen032 · web-flow · commit 81370d4c725e · 2025-07-07T20:32:30.000Z
close pingcap#62093
diff --git a/pkg/planner/cardinality/BUILD.bazel b/pkg/planner/cardinality/BUILD.bazel
@@ -60,7 +60,7 @@ go_test(
     data = glob(["testdata/**"]),
     embed = [":cardinality"],
     flaky = True,
-    shard_count = 33,
+    shard_count = 34,
     deps = [
         "//pkg/config",
         "//pkg/domain",
diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go
@@ -600,7 +600,7 @@ func matchPrefix(row chunk.Row, colIdx int, ad *types.Datum) bool {
 }
 
 // betweenRowCountOnIndex estimates the row count for interval [l, r).
-// The input sctx is just for debug trace, you can pass nil safely if that's not needed.
+// The input sctx is required for stats version 2. For version 1, it is just for debug trace, you can pass nil safely.
 func betweenRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, l, r types.Datum) float64 {
 	histBetweenCnt := idx.Histogram.BetweenRowCount(sctx, l, r)
 	if idx.StatsVer == statistics.Version1 {
diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
@@ -1578,3 +1578,56 @@ func TestRiskEqSkewRatio(t *testing.T) {
 	// Reset global variable to default.
 	testKit.MustExec("set @@global.tidb_opt_risk_eq_skew_ratio = default")
 }
+
+func TestRiskRangeSkewRatio(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	testKit := testkit.NewTestKit(t, store)
+
+	testKit.MustExec("use test")
+	testKit.MustExec("drop table if exists t")
+	testKit.MustExec("create table t(a int, index idx(a))")
+	is := dom.InfoSchema()
+	tb, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t"))
+	require.NoError(t, err)
+	tblInfo := tb.Meta()
+
+	// Insert enough rows to produce skewed distribution.
+	testKit.MustExec("insert into t values (1), (1), (1), (1), (2), (2), (3), (4), (5), (5)")
+	// Do not collect topn and only collect 1 bucket to ensure later queries will be within a bucket.
+	testKit.MustExec(`analyze table t with 0 topn, 1 buckets`)
+	h := dom.StatsHandle()
+	require.NoError(t, h.DumpStatsDeltaToKV(true))
+
+	sctx := testKit.Session()
+	idxID := tblInfo.Indices[0].ID
+	statsTbl := h.GetTableStats(tb.Meta())
+	// Search for the range from 2 to 3, since there is only one bucket it will be a query within
+	// a bucket.
+	testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 0")
+	count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3))
+	require.NoError(t, err)
+	testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 0.5")
+	count2, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3))
+	require.NoError(t, err2)
+	// Result of count2 should be larger than count because the risk ratio is higher
+	require.Less(t, count, count2)
+	testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 1")
+	count3, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3))
+	require.NoError(t, err3)
+	// Result of count3 should be larger because the risk ratio is higher
+	require.Less(t, count2, count3)
+	// Repeat the prior test by setting the global variable instead of the session variable. This should have no effect.
+	testKit.MustExec("set @@global.tidb_opt_risk_range_skew_ratio = 0.5")
+	count4, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3))
+	require.NoError(t, err4)
+	require.Less(t, count2, count4)
+	// Repeat the prior test by setting the session variable to the default. Count4 should inherit the global
+	// variable and be less than count3.
+	testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = default")
+	count4, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3))
+	require.NoError(t, err4)
+	require.Less(t, count4, count3)
+	// Reset global variable to default.
+	testKit.MustExec("set @@global.tidb_opt_risk_range_skew_ratio = default")
+	testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = default")
+}
diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go
@@ -326,6 +326,9 @@ const (
 	// TiDBOptRiskEqSkewRatio controls the amount of skew is applied to equal predicate estimation when a value is not found in TopN/buckets.
 	TiDBOptRiskEqSkewRatio = "tidb_opt_risk_eq_skew_ratio"
 
+	// TiDBOptRiskRangeSkewRatio controls the amount of skew that is applied to range predicate estimation when a range falls within a bucket.
+	TiDBOptRiskRangeSkewRatio = "tidb_opt_risk_range_skew_ratio"
+
 	// TiDBOptCPUFactor is the CPU cost of processing one expression for one row.
 	TiDBOptCPUFactor = "tidb_opt_cpu_factor"
 	// TiDBOptCopCPUFactor is the CPU cost of processing one expression for one row in coprocessor.
@@ -1327,6 +1330,7 @@ const (
 	DefOptCorrelationThreshold              = 0.9
 	DefOptCorrelationExpFactor              = 1
 	DefOptRiskEqSkewRatio                   = 0.0
+	DefOptRiskRangeSkewRatio                = 0.0
 	DefOptCPUFactor                         = 3.0
 	DefOptCopCPUFactor                      = 3.0
 	DefOptTiFlashConcurrencyFactor          = 24.0
diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go
@@ -1021,6 +1021,9 @@ type SessionVars struct {
 	// RiskEqSkewRatio is used to control the ratio of skew that is applied to equal predicates not found in TopN/buckets.
 	RiskEqSkewRatio float64
 
+	// RiskRangeSkewRatio is used to control the ratio of skew that is applied to range predicates that fall within a single bucket.
+	RiskRangeSkewRatio float64
+
 	// cpuFactor is the CPU cost of processing one expression for one row.
 	cpuFactor float64
 	// copCPUFactor is the CPU cost of processing one expression for one row in coprocessor.
@@ -2196,6 +2199,7 @@ func NewSessionVars(hctx HookContext) *SessionVars {
 		CorrelationThreshold:          vardef.DefOptCorrelationThreshold,
 		CorrelationExpFactor:          vardef.DefOptCorrelationExpFactor,
 		RiskEqSkewRatio:               vardef.DefOptRiskEqSkewRatio,
+		RiskRangeSkewRatio:            vardef.DefOptRiskRangeSkewRatio,
 		cpuFactor:                     vardef.DefOptCPUFactor,
 		copCPUFactor:                  vardef.DefOptCopCPUFactor,
 		CopTiFlashConcurrencyFactor:   vardef.DefOptTiFlashConcurrencyFactor,
diff --git a/pkg/sessionctx/variable/setvar_affect.go b/pkg/sessionctx/variable/setvar_affect.go
@@ -72,6 +72,7 @@ var isHintUpdatableVerified = map[string]struct{}{
 	"tidb_opt_table_tiflash_scan_cost_factor":         {},
 	"tidb_opt_topn_cost_factor":                       {},
 	"tidb_opt_skew_ratio":                             {},
+	"tidb_opt_range_ratio":                            {},
 	"tidb_index_join_batch_size":                      {},
 	"tidb_index_lookup_size":                          {},
 	"tidb_index_serial_scan_concurrency":              {},
diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go
@@ -2034,6 +2034,10 @@ var defaultSysVars = []*SysVar{
 		s.RiskEqSkewRatio = tidbOptFloat64(val, vardef.DefOptRiskEqSkewRatio)
 		return nil
 	}},
+	{Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptRiskRangeSkewRatio, Value: strconv.FormatFloat(vardef.DefOptRiskRangeSkewRatio, 'f', -1, 64), Type: vardef.TypeFloat, MinValue: 0, MaxValue: 1, SetSession: func(s *SessionVars, val string) error {
+		s.RiskRangeSkewRatio = tidbOptFloat64(val, vardef.DefOptRiskRangeSkewRatio)
+		return nil
+	}},
 	{Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptCPUFactor, Value: strconv.FormatFloat(vardef.DefOptCPUFactor, 'f', -1, 64), Type: vardef.TypeFloat, MinValue: 0, MaxValue: math.MaxUint64, SetSession: func(s *SessionVars, val string) error {
 		s.cpuFactor = tidbOptFloat64(val, vardef.DefOptCPUFactor)
 		return nil
diff --git a/pkg/statistics/handle/globalstats/global_stats_internal_test.go b/pkg/statistics/handle/globalstats/global_stats_internal_test.go
@@ -405,6 +405,8 @@ func testGlobalStatsAndSQLBinding(tk *testkit.TestKit) {
 	tk.MustExec("use test_global_stats")
 	tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'")
 	tk.MustExec("set tidb_cost_model_version=2")
+	// Disable auto analyze to ensure that stats are not automatically collected
+	tk.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
 
 	// hash and range and list partition
 	tk.MustExec("create table thash(a int, b int, key(a)) partition by hash(a) partitions 4")
@@ -439,8 +441,6 @@ func testGlobalStatsAndSQLBinding(tk *testkit.TestKit) {
 	tk.MustExec("insert into trange values " + strings.Join(vals, ","))
 	tk.MustExec("insert into tlist values " + strings.Join(listVals, ","))
 
-	// Disable auto analyze to ensure that stats are not automatically collected
-	tk.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
 	// before analyzing, the planner will choose TableScan to access the 1% of records
 	tk.MustHavePlan("select * from thash where a<100", "TableFullScan")
 	tk.MustHavePlan("select * from trange where a<100", "TableFullScan")
diff --git a/pkg/statistics/histogram.go b/pkg/statistics/histogram.go
@@ -561,19 +561,40 @@ func (hg *Histogram) LessRowCount(sctx planctx.PlanContext, value types.Datum) f
 }
 
 // BetweenRowCount estimates the row count where column greater or equal to a and less than b.
-// The input sctx is just for debug trace, you can pass nil safely if that's not needed.
+// The input sctx is required for stats version 2. For version 1, it is just for debug trace, you can pass nil safely.
 func (hg *Histogram) BetweenRowCount(sctx planctx.PlanContext, a, b types.Datum) float64 {
-	lessCountA := hg.LessRowCount(sctx, a)
-	lessCountB := hg.LessRowCount(sctx, b)
+	lessCountA, bktIndexA := hg.LessRowCountWithBktIdx(sctx, a)
+	lessCountB, bktIndexB := hg.LessRowCountWithBktIdx(sctx, b)
 	rangeEst := lessCountB - lessCountA
 	lowEqual, _ := hg.EqualRowCount(sctx, a, false)
 	ndvAvg := hg.NotNullCount() / float64(hg.NDV)
 	// If values fall in the same bucket, we may underestimate the fractional result. So estimate the low value (a) as an equals, and
 	// estimate the high value as the default (because the input high value may be "larger" than the true high value). The range should
 	// not be less than both the low+high - or the lesser of the estimate for the individual range of a or b is used as a bound.
-	if rangeEst < math.Max(lowEqual, ndvAvg) && hg.NDV > 0 {
-		result := math.Min(lessCountB, hg.NotNullCount()-lessCountA)
-		return math.Min(result, lowEqual+ndvAvg)
+	if rangeEst < max(lowEqual, ndvAvg) && hg.NDV > 0 {
+		result := min(lessCountB, hg.NotNullCount()-lessCountA)
+		rangeEst = min(result, lowEqual+ndvAvg)
+	}
+	// If values in the same bucket, use skewRatio to adjust the range estimate to account for potential skew.
+	if len(hg.Buckets) != 0 && bktIndexA == bktIndexB {
+		// sctx may be nil for stats version 1
+		if sctx != nil {
+			skewRatio := sctx.GetSessionVars().RiskRangeSkewRatio
+			sctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptRiskRangeSkewRatio)
+			if skewRatio > 0 {
+				// Worst case skew is if the range includes all the rows in the bucket
+				skewEstimate := hg.Buckets[bktIndexA].Count
+				if bktIndexA > 0 {
+					skewEstimate -= hg.Buckets[bktIndexA-1].Count
+				}
+				// If range does not include last value of its bucket, remove the repeat count from the skew estimate.
+				if lessCountB <= float64(hg.Buckets[bktIndexA].Count-hg.Buckets[bktIndexA].Repeat) {
+					skewEstimate -= hg.Buckets[bktIndexA].Repeat
+				}
+				// Add a scaled ratio of the worst case skewed estimate to our regular estimate
+				return rangeEst + max(0, (float64(skewEstimate)-rangeEst)*skewRatio)
+			}
+		}
 	}
 	return rangeEst
 }
diff --git a/pkg/statistics/statistics_test.go b/pkg/statistics/statistics_test.go
@@ -521,7 +521,7 @@ func SubTestBuild() func(*testing.T) {
 		require.Equal(t, 0.0, count)
 		count, _ = col.EqualRowCount(nil, types.NewIntDatum(200000000), false)
 		require.Equal(t, 0.0, count)
-		count = col.BetweenRowCount(nil, types.NewIntDatum(3000), types.NewIntDatum(3500))
+		count = col.BetweenRowCount(ctx, types.NewIntDatum(3000), types.NewIntDatum(3500))
 		require.Equal(t, 4994, int(count))
 		count = col.LessRowCount(nil, types.NewIntDatum(1))
 		require.Equal(t, 5, int(count))
@@ -547,7 +547,7 @@ func SubTestBuild() func(*testing.T) {
 		require.Equal(t, 90010, int(count))
 		count = colv2.GreaterRowCount(types.NewIntDatum(200000000))
 		require.Equal(t, 0.0, count)
-		count = colv2.BetweenRowCount(nil, types.NewIntDatum(3000), types.NewIntDatum(3500))
+		count = colv2.BetweenRowCount(ctx, types.NewIntDatum(3000), types.NewIntDatum(3500))
 		require.Equal(t, 5001, int(count))
 		count = colv2.LessRowCount(nil, types.NewIntDatum(1))
 		require.Equal(t, 0, int(count))
@@ -579,9 +579,9 @@ func SubTestBuild() func(*testing.T) {
 		require.Equal(t, 1, int(count))
 		count = col.LessRowCount(nil, encodeKey(types.NewIntDatum(20000)))
 		require.Equal(t, 19999, int(count))
-		count = col.BetweenRowCount(nil, encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000)))
+		count = col.BetweenRowCount(ctx, encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000)))
 		require.Equal(t, 4999, int(count))
-		count = col.BetweenRowCount(nil, encodeKey(types.MinNotNullDatum()), encodeKey(types.NewIntDatum(0)))
+		count = col.BetweenRowCount(ctx, encodeKey(types.MinNotNullDatum()), encodeKey(types.NewIntDatum(0)))
 		require.Equal(t, 0, int(count))
 		count = col.LessRowCount(nil, encodeKey(types.NewIntDatum(0)))
 		require.Equal(t, 0, int(count))
@@ -596,7 +596,7 @@ func SubTestBuild() func(*testing.T) {
 		require.Equal(t, 1, int(count))
 		count = col.LessRowCount(nil, types.NewIntDatum(20000))
 		require.Equal(t, 20000, int(count))
-		count = col.BetweenRowCount(nil, types.NewIntDatum(30000), types.NewIntDatum(35000))
+		count = col.BetweenRowCount(ctx, types.NewIntDatum(30000), types.NewIntDatum(35000))
 		require.Equal(t, 5000, int(count))
 		count = col.GreaterRowCount(types.NewIntDatum(1001))
 		require.Equal(t, 98998, int(count))

Original file line number	Diff line number	Diff line change
`@@ -600,7 +600,7 @@ func matchPrefix(row chunk.Row, colIdx int, ad *types.Datum) bool {`
`600`	`600`	`}`
`601`	`601`
`602`	`602`	`// betweenRowCountOnIndex estimates the row count for interval [l, r).`
`603`		`-// The input sctx is just for debug trace, you can pass nil safely if that's not needed.`
	`603`	`+// The input sctx is required for stats version 2. For version 1, it is just for debug trace, you can pass nil safely.`
`604`	`604`	`func betweenRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, l, r types.Datum) float64 {`
`605`	`605`	`histBetweenCnt := idx.Histogram.BetweenRowCount(sctx, l, r)`
`606`	`606`	`if idx.StatsVer == statistics.Version1 {`