Skip to content

Commit cec48bb

Browse files
planner: Use realtimeRowCount when all topN collected (#56848)
close #47400
1 parent e2505e9 commit cec48bb

File tree

4 files changed

+87
-5
lines changed

4 files changed

+87
-5
lines changed

pkg/planner/cardinality/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ go_test(
5959
data = glob(["testdata/**"]),
6060
embed = [":cardinality"],
6161
flaky = True,
62-
shard_count = 28,
62+
shard_count = 29,
6363
deps = [
6464
"//pkg/config",
6565
"//pkg/domain",

pkg/planner/cardinality/row_count_column.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
package cardinality
1616

1717
import (
18+
"math"
19+
1820
"github.com/pingcap/errors"
1921
"github.com/pingcap/tidb/pkg/planner/planctx"
2022
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
@@ -173,12 +175,27 @@ func equalRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, val t
173175
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
174176
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
175177
if histNDV <= 0 {
176-
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
178+
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
179+
// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
180+
//
181+
// If the table hasn't been modified, it's safe to return 0.
177182
if modifyCount == 0 {
178183
return 0, nil
179184
}
180-
return 1, nil
185+
// ELSE calculate an approximate estimate based upon newly inserted rows.
186+
//
187+
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
188+
if c.Histogram.NDV > 0 {
189+
histNDV = float64(c.Histogram.NDV)
190+
} else {
191+
histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
192+
}
193+
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
194+
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
195+
totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
196+
return max(1, totalRowCount/histNDV), nil
181197
}
198+
// return the average histogram rows (which excludes topN) and NDV that excluded topN
182199
return c.Histogram.NotNullCount() / histNDV, nil
183200
}
184201

pkg/planner/cardinality/row_count_index.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,12 +415,27 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b
415415
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
416416
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
417417
if histNDV <= 0 {
418-
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
418+
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
419+
// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
420+
//
421+
// If the table hasn't been modified, it's safe to return 0.
419422
if modifyCount == 0 {
420423
return 0
421424
}
422-
return 1
425+
// ELSE calculate an approximate estimate based upon newly inserted rows.
426+
//
427+
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
428+
if idx.Histogram.NDV > 0 {
429+
histNDV = float64(idx.Histogram.NDV)
430+
} else {
431+
histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
432+
}
433+
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
434+
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
435+
totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
436+
return max(1, totalRowCount/histNDV)
423437
}
438+
// return the average histogram rows (which excludes topN) and NDV that excluded topN
424439
return idx.Histogram.NotNullCount() / histNDV
425440
}
426441

pkg/planner/cardinality/selectivity_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,56 @@ func TestEstimationForUnknownValues(t *testing.T) {
289289
require.Equal(t, 0.0, count)
290290
}
291291

292+
func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
293+
store, dom := testkit.CreateMockStoreAndDomain(t)
294+
testKit := testkit.NewTestKit(t, store)
295+
testKit.MustExec("use test")
296+
testKit.MustExec("drop table if exists t")
297+
testKit.MustExec("create table t(a int, key idx(a))")
298+
testKit.MustExec("set @@tidb_analyze_version=2")
299+
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
300+
for i := 1; i <= 10; i++ {
301+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
302+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
303+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
304+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
305+
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
306+
testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
307+
}
308+
testKit.MustExec("analyze table t")
309+
h := dom.StatsHandle()
310+
require.Nil(t, h.DumpStatsDeltaToKV(true))
311+
312+
table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
313+
require.NoError(t, err)
314+
statsTbl := h.GetTableStats(table.Meta())
315+
316+
// Search for a found value == 10.0
317+
sctx := mock.NewContext()
318+
col := statsTbl.GetCol(table.Meta().Columns[0].ID)
319+
count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
320+
require.NoError(t, err)
321+
require.Equal(t, 10.0, count)
322+
323+
// Search for a not found value with zero modifyCount. Defaults to count == 1.0
324+
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
325+
require.NoError(t, err)
326+
require.Equal(t, 1.0, count)
327+
328+
// Add another 200 rows to the table
329+
testKit.MustExec("insert into t select a+10 from t")
330+
testKit.MustExec("insert into t select a+10 from t where a <= 10")
331+
require.Nil(t, h.DumpStatsDeltaToKV(true))
332+
require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
333+
statsTblnew := h.GetTableStats(table.Meta())
334+
335+
// Search for a not found value based upon statistics - count should be >= 10 and <=40
336+
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
337+
require.NoError(t, err)
338+
require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
339+
require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
340+
}
341+
292342
func TestEstimationUniqueKeyEqualConds(t *testing.T) {
293343
store, dom := testkit.CreateMockStoreAndDomain(t)
294344
testKit := testkit.NewTestKit(t, store)

0 commit comments

Comments
 (0)