Skip to content

Commit 47a990e

Browse files
planner: adjust estimated rows to account for modified rows (#50970)
close #47523
1 parent 893104a commit 47a990e

File tree

4 files changed

+53
-45
lines changed

4 files changed

+53
-45
lines changed

pkg/planner/cardinality/row_count_column.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -282,15 +282,17 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
282282
cnt = mathutil.Clamp(cnt, 0, c.TotalRowCount())
283283

284284
// If the current table row count has changed, we should scale the row count accordingly.
285-
cnt *= c.GetIncreaseFactor(realtimeRowCount)
285+
increaseFactor := c.GetIncreaseFactor(realtimeRowCount)
286+
cnt *= increaseFactor
286287

287-
histNDV := c.NDV
288-
if c.StatsVer == statistics.Version2 {
289-
histNDV = histNDV - int64(c.TopN.Num())
290-
}
291288
// handling the out-of-range part
292289
if (c.OutOfRange(lowVal) && !lowVal.IsNull()) || c.OutOfRange(highVal) {
293-
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV)
290+
histNDV := c.NDV
291+
// Exclude the TopN
292+
if c.StatsVer == statistics.Version2 {
293+
histNDV -= int64(c.TopN.Num())
294+
}
295+
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV, increaseFactor)
294296
}
295297

296298
if debugTrace {

pkg/planner/cardinality/row_count_index.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
222222
defer debugtrace.LeaveContextCommon(sctx)
223223
}
224224
totalCount := float64(0)
225-
isSingleCol := len(idx.Info.Columns) == 1
225+
isSingleColIdx := len(idx.Info.Columns) == 1
226226
for _, indexRange := range indexRanges {
227227
var count float64
228228
lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
@@ -278,7 +278,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
278278
l := types.NewBytesDatum(lb)
279279
r := types.NewBytesDatum(rb)
280280
lowIsNull := bytes.Equal(lb, nullKeyBytes)
281-
if isSingleCol && lowIsNull {
281+
if isSingleColIdx && lowIsNull {
282282
count += float64(idx.Histogram.NullCount)
283283
}
284284
expBackoffSuccess := false
@@ -325,15 +325,24 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
325325
}
326326

327327
// If the current table row count has changed, we should scale the row count accordingly.
328-
count *= idx.GetIncreaseFactor(realtimeRowCount)
328+
increaseFactor := idx.GetIncreaseFactor(realtimeRowCount)
329+
count *= increaseFactor
329330

330-
histNDV := idx.NDV
331-
if idx.StatsVer == statistics.Version2 {
332-
histNDV = histNDV - int64(idx.TopN.Num())
333-
}
334331
// handling the out-of-range part
335-
if (outOfRangeOnIndex(idx, l) && !(isSingleCol && lowIsNull)) || outOfRangeOnIndex(idx, r) {
336-
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV)
332+
if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) {
333+
histNDV := idx.NDV
334+
// Exclude the TopN in Stats Version 2
335+
if idx.StatsVer == statistics.Version2 {
336+
c, ok := coll.Columns[idx.Histogram.ID]
337+
// If this is single column of a multi-column index - use the column's NDV rather than index NDV
338+
isSingleColRange := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == 1
339+
if isSingleColRange && !isSingleColIdx && ok && c != nil && c.Histogram.NDV > 0 {
340+
histNDV = c.Histogram.NDV - int64(c.TopN.Num())
341+
} else {
342+
histNDV -= int64(idx.TopN.Num())
343+
}
344+
}
345+
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor)
337346
}
338347

339348
if debugTrace {

pkg/planner/cardinality/testdata/cardinality_suite_out.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
{
2525
"Start": 800,
2626
"End": 900,
27-
"Count": 723.504166655054
27+
"Count": 735.504166655054
2828
},
2929
{
3030
"Start": 900,
@@ -79,7 +79,7 @@
7979
{
8080
"Start": 800,
8181
"End": 1000,
82-
"Count": 1181.696869573942
82+
"Count": 1193.696869573942
8383
},
8484
{
8585
"Start": 900,
@@ -104,7 +104,7 @@
104104
{
105105
"Start": 200,
106106
"End": 400,
107-
"Count": 1190.2788209899081
107+
"Count": 1237.5288209899081
108108
},
109109
{
110110
"Start": 200,

pkg/statistics/histogram.go

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,7 @@ func (hg *Histogram) OutOfRange(val types.Datum) bool {
937937
func (hg *Histogram) OutOfRangeRowCount(
938938
sctx context.PlanContext,
939939
lDatum, rDatum *types.Datum,
940-
modifyCount, histNDV int64,
940+
modifyCount, histNDV int64, increaseFactor float64,
941941
) (result float64) {
942942
debugTrace := sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace
943943
if debugTrace {
@@ -1052,38 +1052,35 @@ func (hg *Histogram) OutOfRangeRowCount(
10521052
rightPercent = (math.Pow(boundR-actualL, 2) - math.Pow(boundR-actualR, 2)) / math.Pow(histWidth, 2)
10531053
}
10541054

1055-
totalPercent := leftPercent*0.5 + rightPercent*0.5
1056-
if totalPercent > 1 {
1057-
totalPercent = 1
1058-
}
1055+
totalPercent := min(leftPercent*0.5+rightPercent*0.5, 1.0)
10591056
rowCount = totalPercent * hg.NotNullCount()
10601057

1061-
// Upper bound logic
1058+
// Upper & lower bound logic.
1059+
upperBound := rowCount
1060+
if histNDV > 0 {
1061+
upperBound = hg.NotNullCount() / float64(histNDV)
1062+
}
10621063

10631064
allowUseModifyCount := sctx.GetSessionVars().GetOptObjective() != variable.OptObjectiveDeterminate
1064-
// Use the modifyCount as the upper bound. Note that modifyCount contains insert, delete and update. So this is
1065-
// a rather loose upper bound.
1066-
// There are some scenarios where we need to handle out-of-range estimation after both insert and delete happen.
1067-
// But we don't know how many increases are in the modifyCount. So we have to use this loose bound to ensure it
1068-
// can produce a reasonable results in this scenario.
1069-
if rowCount > float64(modifyCount) && allowUseModifyCount {
1070-
return float64(modifyCount)
1071-
}
1072-
1073-
// In OptObjectiveDeterminate mode, we can't rely on the modify count anymore.
1074-
// An upper bound is necessary to make the estimation make sense for predicates with bound on only one end, like a > 1.
1075-
// But it's impossible to have a reliable upper bound in all cases.
1076-
// We use 1/NDV here (only the Histogram part is considered) and it seems reasonable and good enough for now.
1065+
10771066
if !allowUseModifyCount {
1078-
var upperBound float64
1079-
if histNDV > 0 {
1080-
upperBound = hg.NotNullCount() / float64(histNDV)
1081-
}
1082-
if rowCount > upperBound {
1083-
return upperBound
1084-
}
1067+
// In OptObjectiveDeterminate mode, we can't rely on the modify count anymore.
1068+
// An upper bound is necessary to make the estimation make sense for predicates with bound on only one end, like a > 1.
1069+
// We use 1/NDV here (only the Histogram part is considered) and it seems reasonable and good enough for now.
1070+
return min(rowCount, upperBound)
1071+
}
1072+
1073+
// If the modifyCount is large (compared to original table rows), then any out of range estimate is unreliable.
1074+
// Assume at least 1/NDV is returned
1075+
if float64(modifyCount) > hg.NotNullCount() && rowCount < upperBound {
1076+
rowCount = upperBound
1077+
} else if rowCount < upperBound {
1078+
// Adjust by increaseFactor if our estimate is low
1079+
rowCount *= increaseFactor
10851080
}
1086-
return rowCount
1081+
1082+
// Use modifyCount as a final bound
1083+
return min(rowCount, float64(modifyCount))
10871084
}
10881085

10891086
// Copy deep copies the histogram.

0 commit comments

Comments
 (0)