From 00b15fbd6ed01297a0fc7577a9df361a76385e91 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 14 Oct 2024 15:14:53 +0800 Subject: [PATCH 1/7] planner: set min for high risk plan steps --- pkg/planner/cardinality/row_size.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go index b57939d010ad0..3e6ec882ebd73 100644 --- a/pkg/planner/cardinality/row_size.go +++ b/pkg/planner/cardinality/row_size.go @@ -54,6 +54,7 @@ func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols size += 8 /* row_id length */ } } + size = max(0, size) return } @@ -80,6 +81,8 @@ func GetAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*e } } } + // Avoid errors related to size less than zero + size = max(0, size) if sessionVars.EnableChunkRPC && !isForScan { // Add 1/8 byte for each column's nullBitMap byte. return size + float64(len(cols))/8 @@ -107,7 +110,7 @@ func GetAvgRowSizeDataInDiskByRows(coll *statistics.HistColl, cols []*expression } } // Add 8 byte for each column's size record. See `DataInDiskByRows` for details. - return size + float64(8*len(cols)) + return max(0, size+float64(8*len(cols))) } // AvgColSize is the average column size of the histogram. These sizes are derived from function `encode` @@ -126,7 +129,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 { histCount := c.TotalRowCount() notNullRatio := 1.0 if histCount > 0 { - notNullRatio = 1.0 - float64(c.NullCount)/histCount + notNullRatio = max(0, 1.0-float64(c.NullCount)/histCount) } switch c.Histogram.Tp.GetType() { case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp: @@ -137,7 +140,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 { } } // Keep two decimal place. - return math.Round(float64(c.TotColSize)/float64(count)*100) / 100 + return max(0, math.Round(float64(c.TotColSize)/float64(count)*100)/100) } // AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode` @@ -147,7 +150,7 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 { return 0 } fixedLen := chunk.GetFixedLen(c.Histogram.Tp) - if fixedLen != -1 { + if fixedLen >= 0 { return float64(fixedLen) } // Keep two decimal place. @@ -155,9 +158,9 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 { // Minus Log2(avgSize) for unfixed-len type LEN. avgSize := float64(c.TotColSize) / float64(count) if avgSize < 1 { - return math.Round(avgSize*100)/100 + 8 + return max(0, math.Round(avgSize*100)/100) + 8 } - return math.Round((avgSize-math.Log2(avgSize))*100)/100 + 8 + return max(0, math.Round((avgSize-math.Log2(avgSize))*100)/100) + 8 } // AvgColSizeDataInDiskByRows is the average column size of the histogram. These sizes are derived @@ -172,14 +175,14 @@ func AvgColSizeDataInDiskByRows(c *statistics.Column, count int64) float64 { notNullRatio = 1.0 - float64(c.NullCount)/histCount } size := chunk.GetFixedLen(c.Histogram.Tp) - if size != -1 { + if size >= 0 { return float64(size) * notNullRatio } // Keep two decimal place. // Minus Log2(avgSize) for unfixed-len type LEN. avgSize := float64(c.TotColSize) / float64(count) if avgSize < 1 { - return math.Round((avgSize)*100) / 100 + return max(0, math.Round((avgSize)*100)/100) } return math.Round((avgSize-math.Log2(avgSize))*100) / 100 } From ef856fabf276eae090c5d58a6026694247a5b2f2 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 14 Oct 2024 15:47:48 +0800 Subject: [PATCH 2/7] add costver --- pkg/planner/cardinality/row_size.go | 1 + pkg/planner/core/plan_cost_ver1.go | 4 +- pkg/planner/core/plan_cost_ver2.go | 58 +++++++++++++++-------------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go index 3e6ec882ebd73..fd2e9220cb5d7 100644 --- a/pkg/planner/cardinality/row_size.go +++ b/pkg/planner/cardinality/row_size.go @@ -54,6 +54,7 @@ func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols size += 8 /* row_id length */ } } + // Avoid errors related to size less than zero size = max(0, size) return } diff --git a/pkg/planner/core/plan_cost_ver1.go b/pkg/planner/core/plan_cost_ver1.go index f6bfc1c236195..c94c406a5bef9 100644 --- a/pkg/planner/core/plan_cost_ver1.go +++ b/pkg/planner/core/plan_cost_ver1.go @@ -1251,10 +1251,10 @@ func getCardinality(operator base.PhysicalPlan, costFlag uint64) float64 { if actualProbeCnt == 0 { return 0 } - return getOperatorActRows(operator) / float64(actualProbeCnt) + return max(0, getOperatorActRows(operator)/float64(actualProbeCnt)) } rows := operator.StatsCount() - if rows == 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 { + if rows <= 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 { // 0 est-row can lead to 0 operator cost which makes plan choice unstable. rows = 1 } diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go index c5d5221f3e379..95adaffd1e712 100644 --- a/pkg/planner/core/plan_cost_ver2.go +++ b/pkg/planner/core/plan_cost_ver2.go @@ -103,6 +103,17 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option return p.PlanCostVer2, nil } +const ( + // MinNumRows provides a minimum to avoid underestimation + MinNumRows = 1.0 + // MinRowSize provides a minimum to avoid underestimation + MinRowSize = 2.0 + // TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans + TiFlashStartupRowPenalty = 10000 + // MaxPenaltyRowCount applies a penalty for high risk scans + MaxPenaltyRowCount = 1000 +) + // GetPlanCostVer2 returns the plan-cost of this sub-plan, which is: // plan-cost = rows * log2(row-size) * scan-factor // log2(row-size) is from experiments. @@ -112,7 +123,7 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option * } rows := getCardinality(p, option.CostFlag) - rowSize := math.Max(getAvgRowSize(p.StatsInfo(), p.schema.Columns), 2.0) // consider all index columns + rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns) // consider all index columns scanFactor := getTaskScanFactorVer2(p, kv.TiKV, taskType) p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor) @@ -120,15 +131,6 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option * return p.PlanCostVer2, nil } -const ( - // MinRowSize provides a minimum to avoid underestimation - MinRowSize = 2.0 - // TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans - TiFlashStartupRowPenalty = 10000 - // MaxPenaltyRowCount applies a penalty for high risk scans - MaxPenaltyRowCount = 1000 -) - // GetPlanCostVer2 returns the plan-cost of this sub-plan, which is: // plan-cost = rows * log2(row-size) * scan-factor // log2(row-size) is from experiments. @@ -137,17 +139,19 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option * return p.PlanCostVer2, nil } - rows := getCardinality(p, option.CostFlag) - var columns []*expression.Column if p.StoreType == kv.TiKV { // Assume all columns for TiKV columns = p.tblCols } else { // TiFlash columns = p.schema.Columns } + rows := getCardinality(p, option.CostFlag) rowSize := getAvgRowSize(p.StatsInfo(), columns) - // Ensure rowSize has a reasonable minimum value to avoid underestimation - rowSize = math.Max(rowSize, MinRowSize) + // Ensure rows and rowSize have a reasonable minimum value to avoid underestimation + if !p.isChildOfIndexLookUp { + rows = max(MinNumRows, rows) + rowSize = max(rowSize, MinRowSize) + } scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType) p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor) @@ -177,7 +181,7 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option * shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate) if shouldApplyPenalty { - newRowCount := math.Min(MaxPenaltyRowCount, math.Max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount))) + newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount))) p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor)) } } @@ -235,7 +239,7 @@ func (p *PhysicalTableReader) GetPlanCostVer2(taskType property.TaskType, option } rows := getCardinality(p.tablePlan, option.CostFlag) - rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns) + rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.schema.Columns)) netFactor := getTaskNetFactorVer2(p, taskType) concurrency := float64(p.SCtx().GetSessionVars().DistSQLScanConcurrency()) childType := property.CopSingleReadTaskType @@ -395,8 +399,8 @@ func (p *PhysicalSort) GetPlanCostVer2(taskType property.TaskType, option *optim return p.PlanCostVer2, nil } - rows := math.Max(getCardinality(p.Children()[0], option.CostFlag), 1) - rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns) + rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag)) + rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns)) cpuFactor := getTaskCPUFactorVer2(p, taskType) memFactor := getTaskMemFactorVer2(p, taskType) diskFactor := defaultVer2Factors.TiDBDisk @@ -443,14 +447,14 @@ func (p *PhysicalTopN) GetPlanCostVer2(taskType property.TaskType, option *optim return p.PlanCostVer2, nil } - rows := getCardinality(p.Children()[0], option.CostFlag) + rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag)) n := max(1, float64(p.Count+p.Offset)) if n > 10000 { // It's only used to prevent some extreme cases, e.g. `select * from t order by a limit 18446744073709551615`. // For normal cases, considering that `rows` may be under-estimated, better to keep `n` unchanged. n = min(n, rows) } - rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns) + rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns)) cpuFactor := getTaskCPUFactorVer2(p, taskType) memFactor := getTaskMemFactorVer2(p, taskType) @@ -499,9 +503,9 @@ func (p *PhysicalHashAgg) GetPlanCostVer2(taskType property.TaskType, option *op return p.PlanCostVer2, nil } - inputRows := getCardinality(p.Children()[0], option.CostFlag) - outputRows := getCardinality(p, option.CostFlag) - outputRowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns) + inputRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag)) + outputRows := max(MinNumRows, getCardinality(p, option.CostFlag)) + outputRowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns)) cpuFactor := getTaskCPUFactorVer2(p, taskType) memFactor := getTaskMemFactorVer2(p, taskType) concurrency := float64(p.SCtx().GetSessionVars().HashAggFinalConcurrency()) @@ -531,8 +535,8 @@ func (p *PhysicalMergeJoin) GetPlanCostVer2(taskType property.TaskType, option * return p.PlanCostVer2, nil } - leftRows := getCardinality(p.Children()[0], option.CostFlag) - rightRows := getCardinality(p.Children()[1], option.CostFlag) + leftRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag)) + rightRows := max(MinNumRows, getCardinality(p.Children()[1], option.CostFlag)) cpuFactor := getTaskCPUFactorVer2(p, taskType) filterCost := costusage.SumCostVer2(filterCostVer2(option, leftRows, p.LeftConditions, cpuFactor), @@ -570,9 +574,9 @@ func (p *PhysicalHashJoin) GetPlanCostVer2(taskType property.TaskType, option *o build, probe = probe, build buildFilters, probeFilters = probeFilters, buildFilters } - buildRows := getCardinality(build, option.CostFlag) + buildRows := max(MinNumRows, getCardinality(build, option.CostFlag)) probeRows := getCardinality(probe, option.CostFlag) - buildRowSize := getAvgRowSize(build.StatsInfo(), build.Schema().Columns) + buildRowSize := max(MinRowSize, getAvgRowSize(build.StatsInfo(), build.Schema().Columns)) tidbConcurrency := float64(p.Concurrency) mppConcurrency := float64(3) // TODO: remove this empirical value cpuFactor := getTaskCPUFactorVer2(p, taskType) From ae3ce06217e136b6d316c15a14a107741ec2a3b2 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 14 Oct 2024 16:10:35 +0800 Subject: [PATCH 3/7] testcase1 --- .../integrationtest/r/explain_complex.result | 26 +++++++++---------- .../r/planner/core/plan_cost_ver2.result | 6 ++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/integrationtest/r/explain_complex.result b/tests/integrationtest/r/explain_complex.result index c4e83774e1355..297cbfec24365 100644 --- a/tests/integrationtest/r/explain_complex.result +++ b/tests/integrationtest/r/explain_complex.result @@ -251,19 +251,19 @@ Sort 1.00 root explain_complex.org_department.left_value └─HashAgg 1.00 root group by:explain_complex.org_department.id, funcs:firstrow(explain_complex.org_department.id)->explain_complex.org_department.id, funcs:firstrow(explain_complex.org_department.ctx)->explain_complex.org_department.ctx, funcs:firstrow(explain_complex.org_department.name)->explain_complex.org_department.name, funcs:firstrow(explain_complex.org_department.left_value)->explain_complex.org_department.left_value, funcs:firstrow(explain_complex.org_department.right_value)->explain_complex.org_department.right_value, funcs:firstrow(explain_complex.org_department.depth)->explain_complex.org_department.depth, funcs:firstrow(explain_complex.org_department.leader_id)->explain_complex.org_department.leader_id, funcs:firstrow(explain_complex.org_department.status)->explain_complex.org_department.status, funcs:firstrow(explain_complex.org_department.created_on)->explain_complex.org_department.created_on, funcs:firstrow(explain_complex.org_department.updated_on)->explain_complex.org_department.updated_on └─Selection 0.01 root or(eq(explain_complex.org_employee_position.user_id, 62), or(eq(explain_complex.org_department.id, 20), eq(explain_complex.org_department.id, 20))) └─HashJoin 0.02 root left outer join, equal:[eq(explain_complex.org_position.id, explain_complex.org_employee_position.position_id)] - ├─IndexJoin(Build) 0.01 root left outer join, inner:IndexLookUp, outer key:explain_complex.org_department.id, inner key:explain_complex.org_position.department_id, equal cond:eq(explain_complex.org_department.id, explain_complex.org_position.department_id) - │ ├─IndexLookUp(Build) 0.01 root - │ │ ├─IndexRangeScan(Build) 10.00 cop[tikv] table:d, index:org_department_ctx_index(ctx) range:[1,1], keep order:false, stats:pseudo - │ │ └─Selection(Probe) 0.01 cop[tikv] eq(explain_complex.org_department.status, 1000) - │ │ └─TableRowIDScan 10.00 cop[tikv] table:d keep order:false, stats:pseudo - │ └─IndexLookUp(Probe) 0.01 root - │ ├─Selection(Build) 12.50 cop[tikv] not(isnull(explain_complex.org_position.department_id)) - │ │ └─IndexRangeScan 12.51 cop[tikv] table:p, index:org_position_department_id_index(department_id) range: decided by [eq(explain_complex.org_position.department_id, explain_complex.org_department.id)], keep order:false, stats:pseudo - │ └─Selection(Probe) 0.01 cop[tikv] eq(explain_complex.org_position.status, 1000) - │ └─TableRowIDScan 12.50 cop[tikv] table:p keep order:false, stats:pseudo - └─TableReader(Probe) 9.99 root data:Selection - └─Selection 9.99 cop[tikv] eq(explain_complex.org_employee_position.status, 1000), not(isnull(explain_complex.org_employee_position.position_id)) - └─TableFullScan 10000.00 cop[tikv] table:ep keep order:false, stats:pseudo + ├─TableReader(Build) 9.99 root data:Selection + │ └─Selection 9.99 cop[tikv] eq(explain_complex.org_employee_position.status, 1000), not(isnull(explain_complex.org_employee_position.position_id)) + │ └─TableFullScan 10000.00 cop[tikv] table:ep keep order:false, stats:pseudo + └─IndexJoin(Probe) 0.01 root left outer join, inner:IndexLookUp, outer key:explain_complex.org_department.id, inner key:explain_complex.org_position.department_id, equal cond:eq(explain_complex.org_department.id, explain_complex.org_position.department_id) + ├─IndexLookUp(Build) 0.01 root + │ ├─IndexRangeScan(Build) 10.00 cop[tikv] table:d, index:org_department_ctx_index(ctx) range:[1,1], keep order:false, stats:pseudo + │ └─Selection(Probe) 0.01 cop[tikv] eq(explain_complex.org_department.status, 1000) + │ └─TableRowIDScan 10.00 cop[tikv] table:d keep order:false, stats:pseudo + └─IndexLookUp(Probe) 0.01 root + ├─Selection(Build) 12.50 cop[tikv] not(isnull(explain_complex.org_position.department_id)) + │ └─IndexRangeScan 12.51 cop[tikv] table:p, index:org_position_department_id_index(department_id) range: decided by [eq(explain_complex.org_position.department_id, explain_complex.org_department.id)], keep order:false, stats:pseudo + └─Selection(Probe) 0.01 cop[tikv] eq(explain_complex.org_position.status, 1000) + └─TableRowIDScan 12.50 cop[tikv] table:p keep order:false, stats:pseudo set tidb_cost_model_version=1; create table Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2)); create table Tab_B (id int primary key,name varchar(20)); diff --git a/tests/integrationtest/r/planner/core/plan_cost_ver2.result b/tests/integrationtest/r/planner/core/plan_cost_ver2.result index 50f366d8b6229..0da0be65c36af 100644 --- a/tests/integrationtest/r/planner/core/plan_cost_ver2.result +++ b/tests/integrationtest/r/planner/core/plan_cost_ver2.result @@ -254,7 +254,7 @@ explain format='true_card_cost' select * from t; Error 1105 (HY000): 'explain format=true_card_cost' cannot work without 'analyze', please use 'explain analyze format=true_card_cost' explain analyze format='true_card_cost' select * from t where a<3; id estRows estCost costFormula actRows task access object execution info operator info memory disk -TableReader_7 3323.33 13566.67 (((cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(0*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))))) + (net(0*rowsize(16)*tidb_kv_net_factor(3.96))))/15.00 0 root -└─Selection_6 3323.33 203500.00 (cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(0*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7)))) 0 cop[tikv] - └─TableFullScan_5 10000.00 203500.00 (scan(0*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))) 0 cop[tikv] table:t +TableReader_7 3323.33 13580.23 (((cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(1*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))))) + (net(0*rowsize(16)*tidb_kv_net_factor(3.96))))/15.00 0 root +└─Selection_6 3323.33 203703.50 (cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(1*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7)))) 0 cop[tikv] + └─TableFullScan_5 10000.00 203703.50 (scan(1*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))) 0 cop[tikv] table:t set @@tidb_cost_model_version=DEFAULT; From c9b9581a150d1348c33b1daf8970a6f62205cb5d Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 14 Oct 2024 16:56:52 +0800 Subject: [PATCH 4/7] testcase2 --- .../core/casetest/testdata/integration_suite_out.json | 2 +- .../core/casetest/testdata/plan_normalized_suite_out.json | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/planner/core/casetest/testdata/integration_suite_out.json b/pkg/planner/core/casetest/testdata/integration_suite_out.json index ffe733cb7fec8..c90d518f16176 100644 --- a/pkg/planner/core/casetest/testdata/integration_suite_out.json +++ b/pkg/planner/core/casetest/testdata/integration_suite_out.json @@ -165,7 +165,7 @@ { "SQL": "explain format = 'verbose' select (2) in (select /*+ read_from_storage(tiflash[t1]) */ count(*) from t1) from (select t.b < (select /*+ read_from_storage(tiflash[t2]) */ t.b from t2 limit 1 ) from t3 t) t; -- we do generate the agg pushed-down plan of mpp, but cost-cmp failed", "Plan": [ - "HashJoin_17 3.00 32770.77 root CARTESIAN left outer semi join", + "HashJoin_17 3.00 32781.07 root CARTESIAN left outer semi join", "├─Selection_22(Build) 0.80 31149.25 root eq(2, Column#18)", "│ └─StreamAgg_29 1.00 31099.35 root funcs:count(1)->Column#18", "│ └─TableReader_41 3.00 30949.65 root MppVersion: 2, data:ExchangeSender_40", diff --git a/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json b/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json index eace2a1ef0f2f..a6ffb4afdc27a 100644 --- a/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json +++ b/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json @@ -419,8 +419,8 @@ " TableReader root ", " └─ExchangeSender cop[tiflash] ", " └─Projection cop[tiflash] test.t1.a", - " └─Selection cop[tiflash] gt(test.t1.a, ?)", - " └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), keep order:false" + " └─Selection cop[tiflash] gt(test.t1.b, ?)", + " └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), keep order:false" ] }, { @@ -445,8 +445,8 @@ "Plan": [ " TableReader root ", " └─ExchangeSender cop[tiflash] ", - " └─Selection cop[tiflash] gt(test.t1.a, ?), or(lt(test.t1.a, ?), lt(test.t1.b, ?))", - " └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), keep order:false" + " └─Selection cop[tiflash] gt(test.t1.b, ?), or(lt(test.t1.a, ?), lt(test.t1.b, ?))", + " └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), keep order:false" ] }, { From 0300107d5510eae61660118934224abb70c408fa Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 14 Oct 2024 17:56:39 +0800 Subject: [PATCH 5/7] testcase3 --- .../partition/testdata/partition_pruner_out.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json index 6e154007ec9d3..11d9e2650f85a 100644 --- a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json +++ b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json @@ -470,12 +470,12 @@ "Plan": [ "Projection 0.00 root test_partition.t1.id, test_partition.t1.a, test_partition.t1.b, test_partition.t2.id, test_partition.t2.a, test_partition.t2.b", "└─HashJoin 0.00 root CARTESIAN inner join", - " ├─TableReader(Build) 0.00 root partition:p1 data:Selection", - " │ └─Selection 0.00 cop[tikv] eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)", - " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", - " └─TableReader(Probe) 0.01 root partition:p0 data:Selection", - " └─Selection 0.01 cop[tikv] eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + " ├─TableReader(Build) 0.01 root partition:p0 data:Selection", + " │ └─Selection 0.01 cop[tikv] eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 0.00 root partition:p1 data:Selection", + " └─Selection 0.00 cop[tikv] eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" ], "IndexPlan": [ "HashJoin 0.03 root CARTESIAN inner join", From 02e01c5fc36d07c19356c3560079976c586fb4b1 Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 14 Oct 2024 18:31:41 +0800 Subject: [PATCH 6/7] testcase4 --- .../testdata/plan_stats_suite_out.json | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json index e61f53ded7ca2..f71b3e2fed5f0 100644 --- a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json +++ b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json @@ -130,16 +130,16 @@ "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c", "Result": [ "HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]", - "├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)", - "│ ├─TableReader(Build) 0.33 root data:Selection", - "│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))", - "│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]", - "│ └─IndexLookUp(Probe) 0.33 root partition:p0 ", - "│ ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))", - "│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]", - "│ └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]", - "└─TableReader(Probe) 1.00 root data:TableRangeScan", - " └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]" + "├─TableReader(Build) 1.00 root data:TableRangeScan", + "│ └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]", + "└─IndexJoin(Probe) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)", + " ├─TableReader(Build) 0.33 root data:Selection", + " │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))", + " │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]", + " └─IndexLookUp(Probe) 0.33 root partition:p0 ", + " ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))", + " │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]", + " └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]" ] } ] From 865eba1d7a1f29d03e2381f85f8b0221eab59f3d Mon Sep 17 00:00:00 2001 From: tpp Date: Mon, 28 Oct 2024 06:18:47 -0700 Subject: [PATCH 7/7] review comments --- pkg/planner/core/plan_cost_ver2.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go index 95adaffd1e712..f62628f1abd84 100644 --- a/pkg/planner/core/plan_cost_ver2.go +++ b/pkg/planner/core/plan_cost_ver2.go @@ -104,9 +104,12 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option } const ( - // MinNumRows provides a minimum to avoid underestimation + // MinNumRows provides a minimum to avoid underestimation. As selectivity estimation approaches + // zero, all plan choices result in a low cost - making it difficult to differentiate plan choices. + // A low value of 1.0 here is used for most (non probe acceses) to reduce this risk. MinNumRows = 1.0 - // MinRowSize provides a minimum to avoid underestimation + // MinRowSize provides a minimum column length to ensure that any adjustment or calculation + // in costing does not go below this value. 2.0 is used as a reasonable lowest column length. MinRowSize = 2.0 // TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans TiFlashStartupRowPenalty = 10000