Skip to content

Commit 469af9d

Browse files
authored
planner: Use realtimeRowCount when all topN collected (pingcap#56848) (pingcap#58174)
close pingcap#47400
1 parent 899e5cc commit 469af9d

File tree

4 files changed

+87
-5
lines changed

4 files changed

+87
-5
lines changed

pkg/planner/cardinality/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ go_test(
6060
data = glob(["testdata/**"]),
6161
embed = [":cardinality"],
6262
flaky = True,
63-
shard_count = 26,
63+
shard_count = 27,
6464
deps = [
6565
"//pkg/config",
6666
"//pkg/domain",

pkg/planner/cardinality/row_count_column.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
package cardinality
1616

1717
import (
18+
"math"
19+
1820
"github.com/pingcap/errors"
1921
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
2022
"github.com/pingcap/tidb/pkg/planner/util/fixcontrol"
@@ -173,12 +175,27 @@ func equalRowCountOnColumn(sctx sessionctx.Context, c *statistics.Column, val ty
173175
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
174176
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
175177
if histNDV <= 0 {
176-
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
178+
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
179+
// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
180+
//
181+
// If the table hasn't been modified, it's safe to return 0.
177182
if modifyCount == 0 {
178183
return 0, nil
179184
}
180-
return 1, nil
185+
// ELSE calculate an approximate estimate based upon newly inserted rows.
186+
//
187+
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
188+
if c.Histogram.NDV > 0 {
189+
histNDV = float64(c.Histogram.NDV)
190+
} else {
191+
histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
192+
}
193+
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
194+
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
195+
totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
196+
return max(1, totalRowCount/histNDV), nil
181197
}
198+
// return the average histogram rows (which excludes topN) and NDV that excluded topN
182199
return c.Histogram.NotNullCount() / histNDV, nil
183200
}
184201

pkg/planner/cardinality/row_count_index.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,27 @@ func equalRowCountOnIndex(sctx sessionctx.Context, idx *statistics.Index, b []by
402402
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
403403
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
404404
if histNDV <= 0 {
405-
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
405+
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
406+
// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
407+
//
408+
// If the table hasn't been modified, it's safe to return 0.
406409
if modifyCount == 0 {
407410
return 0
408411
}
409-
return 1
412+
// ELSE calculate an approximate estimate based upon newly inserted rows.
413+
//
414+
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
415+
if idx.Histogram.NDV > 0 {
416+
histNDV = float64(idx.Histogram.NDV)
417+
} else {
418+
histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
419+
}
420+
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
421+
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
422+
totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
423+
return max(1, totalRowCount/histNDV)
410424
}
425+
// return the average histogram rows (which excludes topN) and NDV that excluded topN
411426
return idx.Histogram.NotNullCount() / histNDV
412427
}
413428

pkg/planner/cardinality/selectivity_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,3 +1192,53 @@ func TestIgnoreRealtimeStats(t *testing.T) {
11921192
testKit.MustExec("set @@tidb_opt_objective = 'determinate'")
11931193
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(analyzedPlan...))
11941194
}
1195+
1196+
func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
1197+
store, dom := testkit.CreateMockStoreAndDomain(t)
1198+
testKit := testkit.NewTestKit(t, store)
1199+
testKit.MustExec("use test")
1200+
testKit.MustExec("drop table if exists t")
1201+
testKit.MustExec("create table t(a int, key idx(a))")
1202+
testKit.MustExec("set @@tidb_analyze_version=2")
1203+
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
1204+
for i := 1; i <= 10; i++ {
1205+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
1206+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
1207+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
1208+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
1209+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
1210+
testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
1211+
}
1212+
testKit.MustExec("analyze table t")
1213+
h := dom.StatsHandle()
1214+
require.Nil(t, h.DumpStatsDeltaToKV(true))
1215+
1216+
table, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
1217+
require.NoError(t, err)
1218+
statsTbl := h.GetTableStats(table.Meta())
1219+
1220+
// Search for a found value == 10.0
1221+
sctx := mock.NewContext()
1222+
col := statsTbl.Columns[1]
1223+
count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
1224+
require.NoError(t, err)
1225+
require.Equal(t, 10.0, count)
1226+
1227+
// Search for a not found value with zero modifyCount. Defaults to count == 1.0
1228+
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
1229+
require.NoError(t, err)
1230+
require.Equal(t, 1.0, count)
1231+
1232+
// Add another 200 rows to the table
1233+
testKit.MustExec("insert into t select a+10 from t")
1234+
testKit.MustExec("insert into t select a+10 from t where a <= 10")
1235+
require.Nil(t, h.DumpStatsDeltaToKV(true))
1236+
require.Nil(t, h.Update(dom.InfoSchema()))
1237+
statsTblnew := h.GetTableStats(table.Meta())
1238+
1239+
// Search for a not found value based upon statistics - count should be >= 10 and <=40
1240+
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
1241+
require.NoError(t, err)
1242+
require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
1243+
require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
1244+
}

0 commit comments

Comments
 (0)