From 00b15fbd6ed01297a0fc7577a9df361a76385e91 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 14 Oct 2024 15:14:53 +0800
Subject: [PATCH 1/7] planner: set min for high risk plan steps

---
 pkg/planner/cardinality/row_size.go | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go
index b57939d010ad0..3e6ec882ebd73 100644
--- a/pkg/planner/cardinality/row_size.go
+++ b/pkg/planner/cardinality/row_size.go
@@ -54,6 +54,7 @@ func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols
 			size += 8 /* row_id length */
 		}
 	}
+	size = max(0, size)
 	return
 }
 
@@ -80,6 +81,8 @@ func GetAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*e
 			}
 		}
 	}
+	// Avoid errors related to size less than zero
+	size = max(0, size)
 	if sessionVars.EnableChunkRPC && !isForScan {
 		// Add 1/8 byte for each column's nullBitMap byte.
 		return size + float64(len(cols))/8
@@ -107,7 +110,7 @@ func GetAvgRowSizeDataInDiskByRows(coll *statistics.HistColl, cols []*expression
 		}
 	}
 	// Add 8 byte for each column's size record. See `DataInDiskByRows` for details.
-	return size + float64(8*len(cols))
+	return max(0, size+float64(8*len(cols)))
 }
 
 // AvgColSize is the average column size of the histogram. These sizes are derived from function `encode`
@@ -126,7 +129,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
 	histCount := c.TotalRowCount()
 	notNullRatio := 1.0
 	if histCount > 0 {
-		notNullRatio = 1.0 - float64(c.NullCount)/histCount
+		notNullRatio = max(0, 1.0-float64(c.NullCount)/histCount)
 	}
 	switch c.Histogram.Tp.GetType() {
 	case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
@@ -137,7 +140,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
 		}
 	}
 	// Keep two decimal place.
-	return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
+	return max(0, math.Round(float64(c.TotColSize)/float64(count)*100)/100)
 }
 
 // AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode`
@@ -147,7 +150,7 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 {
 		return 0
 	}
 	fixedLen := chunk.GetFixedLen(c.Histogram.Tp)
-	if fixedLen != -1 {
+	if fixedLen >= 0 {
 		return float64(fixedLen)
 	}
 	// Keep two decimal place.
@@ -155,9 +158,9 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 {
 	// Minus Log2(avgSize) for unfixed-len type LEN.
 	avgSize := float64(c.TotColSize) / float64(count)
 	if avgSize < 1 {
-		return math.Round(avgSize*100)/100 + 8
+		return max(0, math.Round(avgSize*100)/100) + 8
 	}
-	return math.Round((avgSize-math.Log2(avgSize))*100)/100 + 8
+	return max(0, math.Round((avgSize-math.Log2(avgSize))*100)/100) + 8
 }
 
 // AvgColSizeDataInDiskByRows is the average column size of the histogram. These sizes are derived
@@ -172,14 +175,14 @@ func AvgColSizeDataInDiskByRows(c *statistics.Column, count int64) float64 {
 		notNullRatio = 1.0 - float64(c.NullCount)/histCount
 	}
 	size := chunk.GetFixedLen(c.Histogram.Tp)
-	if size != -1 {
+	if size >= 0 {
 		return float64(size) * notNullRatio
 	}
 	// Keep two decimal place.
 	// Minus Log2(avgSize) for unfixed-len type LEN.
 	avgSize := float64(c.TotColSize) / float64(count)
 	if avgSize < 1 {
-		return math.Round((avgSize)*100) / 100
+		return max(0, math.Round((avgSize)*100)/100)
 	}
 	return math.Round((avgSize-math.Log2(avgSize))*100) / 100
 }

From ef856fabf276eae090c5d58a6026694247a5b2f2 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 14 Oct 2024 15:47:48 +0800
Subject: [PATCH 2/7] add costver

---
 pkg/planner/cardinality/row_size.go |  1 +
 pkg/planner/core/plan_cost_ver1.go  |  4 +-
 pkg/planner/core/plan_cost_ver2.go  | 58 +++++++++++++++--------------
 3 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go
index 3e6ec882ebd73..fd2e9220cb5d7 100644
--- a/pkg/planner/cardinality/row_size.go
+++ b/pkg/planner/cardinality/row_size.go
@@ -54,6 +54,7 @@ func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols
 			size += 8 /* row_id length */
 		}
 	}
+	// Avoid errors related to size less than zero
 	size = max(0, size)
 	return
 }
diff --git a/pkg/planner/core/plan_cost_ver1.go b/pkg/planner/core/plan_cost_ver1.go
index f6bfc1c236195..c94c406a5bef9 100644
--- a/pkg/planner/core/plan_cost_ver1.go
+++ b/pkg/planner/core/plan_cost_ver1.go
@@ -1251,10 +1251,10 @@ func getCardinality(operator base.PhysicalPlan, costFlag uint64) float64 {
 		if actualProbeCnt == 0 {
 			return 0
 		}
-		return getOperatorActRows(operator) / float64(actualProbeCnt)
+		return max(0, getOperatorActRows(operator)/float64(actualProbeCnt))
 	}
 	rows := operator.StatsCount()
-	if rows == 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
+	if rows <= 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
 		// 0 est-row can lead to 0 operator cost which makes plan choice unstable.
 		rows = 1
 	}
diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go
index c5d5221f3e379..95adaffd1e712 100644
--- a/pkg/planner/core/plan_cost_ver2.go
+++ b/pkg/planner/core/plan_cost_ver2.go
@@ -103,6 +103,17 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option
 	return p.PlanCostVer2, nil
 }
 
+const (
+	// MinNumRows provides a minimum to avoid underestimation
+	MinNumRows = 1.0
+	// MinRowSize provides a minimum to avoid underestimation
+	MinRowSize = 2.0
+	// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
+	TiFlashStartupRowPenalty = 10000
+	// MaxPenaltyRowCount applies a penalty for high risk scans
+	MaxPenaltyRowCount = 1000
+)
+
 // GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
 // plan-cost = rows * log2(row-size) * scan-factor
 // log2(row-size) is from experiments.
@@ -112,7 +123,7 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *
 	}
 
 	rows := getCardinality(p, option.CostFlag)
-	rowSize := math.Max(getAvgRowSize(p.StatsInfo(), p.schema.Columns), 2.0) // consider all index columns
+	rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns) // consider all index columns
 	scanFactor := getTaskScanFactorVer2(p, kv.TiKV, taskType)
 
 	p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
@@ -120,15 +131,6 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *
 	return p.PlanCostVer2, nil
 }
 
-const (
-	// MinRowSize provides a minimum to avoid underestimation
-	MinRowSize = 2.0
-	// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
-	TiFlashStartupRowPenalty = 10000
-	// MaxPenaltyRowCount applies a penalty for high risk scans
-	MaxPenaltyRowCount = 1000
-)
-
 // GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
 // plan-cost = rows * log2(row-size) * scan-factor
 // log2(row-size) is from experiments.
@@ -137,17 +139,19 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
 		return p.PlanCostVer2, nil
 	}
 
-	rows := getCardinality(p, option.CostFlag)
-
 	var columns []*expression.Column
 	if p.StoreType == kv.TiKV { // Assume all columns for TiKV
 		columns = p.tblCols
 	} else { // TiFlash
 		columns = p.schema.Columns
 	}
+	rows := getCardinality(p, option.CostFlag)
 	rowSize := getAvgRowSize(p.StatsInfo(), columns)
-	// Ensure rowSize has a reasonable minimum value to avoid underestimation
-	rowSize = math.Max(rowSize, MinRowSize)
+	// Ensure rows and rowSize have a reasonable minimum value to avoid underestimation
+	if !p.isChildOfIndexLookUp {
+		rows = max(MinNumRows, rows)
+		rowSize = max(rowSize, MinRowSize)
+	}
 
 	scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType)
 	p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
@@ -177,7 +181,7 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
 
 		shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
 		if shouldApplyPenalty {
-			newRowCount := math.Min(MaxPenaltyRowCount, math.Max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
+			newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
 			p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
 		}
 	}
@@ -235,7 +239,7 @@ func (p *PhysicalTableReader) GetPlanCostVer2(taskType property.TaskType, option
 	}
 
 	rows := getCardinality(p.tablePlan, option.CostFlag)
-	rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns)
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.schema.Columns))
 	netFactor := getTaskNetFactorVer2(p, taskType)
 	concurrency := float64(p.SCtx().GetSessionVars().DistSQLScanConcurrency())
 	childType := property.CopSingleReadTaskType
@@ -395,8 +399,8 @@ func (p *PhysicalSort) GetPlanCostVer2(taskType property.TaskType, option *optim
 		return p.PlanCostVer2, nil
 	}
 
-	rows := math.Max(getCardinality(p.Children()[0], option.CostFlag), 1)
-	rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 	diskFactor := defaultVer2Factors.TiDBDisk
@@ -443,14 +447,14 @@ func (p *PhysicalTopN) GetPlanCostVer2(taskType property.TaskType, option *optim
 		return p.PlanCostVer2, nil
 	}
 
-	rows := getCardinality(p.Children()[0], option.CostFlag)
+	rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
 	n := max(1, float64(p.Count+p.Offset))
 	if n > 10000 {
 		// It's only used to prevent some extreme cases, e.g. `select * from t order by a limit 18446744073709551615`.
 		// For normal cases, considering that `rows` may be under-estimated, better to keep `n` unchanged.
 		n = min(n, rows)
 	}
-	rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 
@@ -499,9 +503,9 @@ func (p *PhysicalHashAgg) GetPlanCostVer2(taskType property.TaskType, option *op
 		return p.PlanCostVer2, nil
 	}
 
-	inputRows := getCardinality(p.Children()[0], option.CostFlag)
-	outputRows := getCardinality(p, option.CostFlag)
-	outputRowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
+	inputRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	outputRows := max(MinNumRows, getCardinality(p, option.CostFlag))
+	outputRowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 	memFactor := getTaskMemFactorVer2(p, taskType)
 	concurrency := float64(p.SCtx().GetSessionVars().HashAggFinalConcurrency())
@@ -531,8 +535,8 @@ func (p *PhysicalMergeJoin) GetPlanCostVer2(taskType property.TaskType, option *
 		return p.PlanCostVer2, nil
 	}
 
-	leftRows := getCardinality(p.Children()[0], option.CostFlag)
-	rightRows := getCardinality(p.Children()[1], option.CostFlag)
+	leftRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
+	rightRows := max(MinNumRows, getCardinality(p.Children()[1], option.CostFlag))
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)
 
 	filterCost := costusage.SumCostVer2(filterCostVer2(option, leftRows, p.LeftConditions, cpuFactor),
@@ -570,9 +574,9 @@ func (p *PhysicalHashJoin) GetPlanCostVer2(taskType property.TaskType, option *o
 		build, probe = probe, build
 		buildFilters, probeFilters = probeFilters, buildFilters
 	}
-	buildRows := getCardinality(build, option.CostFlag)
+	buildRows := max(MinNumRows, getCardinality(build, option.CostFlag))
 	probeRows := getCardinality(probe, option.CostFlag)
-	buildRowSize := getAvgRowSize(build.StatsInfo(), build.Schema().Columns)
+	buildRowSize := max(MinRowSize, getAvgRowSize(build.StatsInfo(), build.Schema().Columns))
 	tidbConcurrency := float64(p.Concurrency)
 	mppConcurrency := float64(3) // TODO: remove this empirical value
 	cpuFactor := getTaskCPUFactorVer2(p, taskType)

From ae3ce06217e136b6d316c15a14a107741ec2a3b2 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 14 Oct 2024 16:10:35 +0800
Subject: [PATCH 3/7] testcase1

---
 .../integrationtest/r/explain_complex.result  | 26 +++++++++----------
 .../r/planner/core/plan_cost_ver2.result      |  6 ++---
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/integrationtest/r/explain_complex.result b/tests/integrationtest/r/explain_complex.result
index c4e83774e1355..297cbfec24365 100644
--- a/tests/integrationtest/r/explain_complex.result
+++ b/tests/integrationtest/r/explain_complex.result
@@ -251,19 +251,19 @@ Sort	1.00	root		explain_complex.org_department.left_value
 └─HashAgg	1.00	root		group by:explain_complex.org_department.id, funcs:firstrow(explain_complex.org_department.id)->explain_complex.org_department.id, funcs:firstrow(explain_complex.org_department.ctx)->explain_complex.org_department.ctx, funcs:firstrow(explain_complex.org_department.name)->explain_complex.org_department.name, funcs:firstrow(explain_complex.org_department.left_value)->explain_complex.org_department.left_value, funcs:firstrow(explain_complex.org_department.right_value)->explain_complex.org_department.right_value, funcs:firstrow(explain_complex.org_department.depth)->explain_complex.org_department.depth, funcs:firstrow(explain_complex.org_department.leader_id)->explain_complex.org_department.leader_id, funcs:firstrow(explain_complex.org_department.status)->explain_complex.org_department.status, funcs:firstrow(explain_complex.org_department.created_on)->explain_complex.org_department.created_on, funcs:firstrow(explain_complex.org_department.updated_on)->explain_complex.org_department.updated_on
   └─Selection	0.01	root		or(eq(explain_complex.org_employee_position.user_id, 62), or(eq(explain_complex.org_department.id, 20), eq(explain_complex.org_department.id, 20)))
     └─HashJoin	0.02	root		left outer join, equal:[eq(explain_complex.org_position.id, explain_complex.org_employee_position.position_id)]
-      ├─IndexJoin(Build)	0.01	root		left outer join, inner:IndexLookUp, outer key:explain_complex.org_department.id, inner key:explain_complex.org_position.department_id, equal cond:eq(explain_complex.org_department.id, explain_complex.org_position.department_id)
-      │ ├─IndexLookUp(Build)	0.01	root		
-      │ │ ├─IndexRangeScan(Build)	10.00	cop[tikv]	table:d, index:org_department_ctx_index(ctx)	range:[1,1], keep order:false, stats:pseudo
-      │ │ └─Selection(Probe)	0.01	cop[tikv]		eq(explain_complex.org_department.status, 1000)
-      │ │   └─TableRowIDScan	10.00	cop[tikv]	table:d	keep order:false, stats:pseudo
-      │ └─IndexLookUp(Probe)	0.01	root		
-      │   ├─Selection(Build)	12.50	cop[tikv]		not(isnull(explain_complex.org_position.department_id))
-      │   │ └─IndexRangeScan	12.51	cop[tikv]	table:p, index:org_position_department_id_index(department_id)	range: decided by [eq(explain_complex.org_position.department_id, explain_complex.org_department.id)], keep order:false, stats:pseudo
-      │   └─Selection(Probe)	0.01	cop[tikv]		eq(explain_complex.org_position.status, 1000)
-      │     └─TableRowIDScan	12.50	cop[tikv]	table:p	keep order:false, stats:pseudo
-      └─TableReader(Probe)	9.99	root		data:Selection
-        └─Selection	9.99	cop[tikv]		eq(explain_complex.org_employee_position.status, 1000), not(isnull(explain_complex.org_employee_position.position_id))
-          └─TableFullScan	10000.00	cop[tikv]	table:ep	keep order:false, stats:pseudo
+      ├─TableReader(Build)	9.99	root		data:Selection
+      │ └─Selection	9.99	cop[tikv]		eq(explain_complex.org_employee_position.status, 1000), not(isnull(explain_complex.org_employee_position.position_id))
+      │   └─TableFullScan	10000.00	cop[tikv]	table:ep	keep order:false, stats:pseudo
+      └─IndexJoin(Probe)	0.01	root		left outer join, inner:IndexLookUp, outer key:explain_complex.org_department.id, inner key:explain_complex.org_position.department_id, equal cond:eq(explain_complex.org_department.id, explain_complex.org_position.department_id)
+        ├─IndexLookUp(Build)	0.01	root		
+        │ ├─IndexRangeScan(Build)	10.00	cop[tikv]	table:d, index:org_department_ctx_index(ctx)	range:[1,1], keep order:false, stats:pseudo
+        │ └─Selection(Probe)	0.01	cop[tikv]		eq(explain_complex.org_department.status, 1000)
+        │   └─TableRowIDScan	10.00	cop[tikv]	table:d	keep order:false, stats:pseudo
+        └─IndexLookUp(Probe)	0.01	root		
+          ├─Selection(Build)	12.50	cop[tikv]		not(isnull(explain_complex.org_position.department_id))
+          │ └─IndexRangeScan	12.51	cop[tikv]	table:p, index:org_position_department_id_index(department_id)	range: decided by [eq(explain_complex.org_position.department_id, explain_complex.org_department.id)], keep order:false, stats:pseudo
+          └─Selection(Probe)	0.01	cop[tikv]		eq(explain_complex.org_position.status, 1000)
+            └─TableRowIDScan	12.50	cop[tikv]	table:p	keep order:false, stats:pseudo
 set tidb_cost_model_version=1;
 create table Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2));
 create table Tab_B (id int primary key,name varchar(20));
diff --git a/tests/integrationtest/r/planner/core/plan_cost_ver2.result b/tests/integrationtest/r/planner/core/plan_cost_ver2.result
index 50f366d8b6229..0da0be65c36af 100644
--- a/tests/integrationtest/r/planner/core/plan_cost_ver2.result
+++ b/tests/integrationtest/r/planner/core/plan_cost_ver2.result
@@ -254,7 +254,7 @@ explain format='true_card_cost' select * from t;
 Error 1105 (HY000): 'explain format=true_card_cost' cannot work without 'analyze', please use 'explain analyze format=true_card_cost'
 explain analyze format='true_card_cost' select * from t where a<3;
 id	estRows	estCost	costFormula	actRows	task	access object	execution info	operator info	memory	disk
-TableReader_7	3323.33	13566.67	(((cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(0*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))))) + (net(0*rowsize(16)*tidb_kv_net_factor(3.96))))/15.00	0	root		<execution_info>	<operator_info>	<memory>	<disk>
-└─Selection_6	3323.33	203500.00	(cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(0*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))))	0	cop[tikv]		<execution_info>	<operator_info>	<memory>	<disk>
-  └─TableFullScan_5	10000.00	203500.00	(scan(0*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7)))	0	cop[tikv]	table:t	<execution_info>	<operator_info>	<memory>	<disk>
+TableReader_7	3323.33	13580.23	(((cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(1*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))))) + (net(0*rowsize(16)*tidb_kv_net_factor(3.96))))/15.00	0	root		<execution_info>	<operator_info>	<memory>	<disk>
+└─Selection_6	3323.33	203703.50	(cpu(0*filters(1)*tikv_cpu_factor(49.9))) + ((scan(1*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7))))	0	cop[tikv]		<execution_info>	<operator_info>	<memory>	<disk>
+  └─TableFullScan_5	10000.00	203703.50	(scan(1*logrowsize(32)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(32)*tikv_scan_factor(40.7)))	0	cop[tikv]	table:t	<execution_info>	<operator_info>	<memory>	<disk>
 set @@tidb_cost_model_version=DEFAULT;

From c9b9581a150d1348c33b1daf8970a6f62205cb5d Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 14 Oct 2024 16:56:52 +0800
Subject: [PATCH 4/7] testcase2

---
 .../core/casetest/testdata/integration_suite_out.json     | 2 +-
 .../core/casetest/testdata/plan_normalized_suite_out.json | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pkg/planner/core/casetest/testdata/integration_suite_out.json b/pkg/planner/core/casetest/testdata/integration_suite_out.json
index ffe733cb7fec8..c90d518f16176 100644
--- a/pkg/planner/core/casetest/testdata/integration_suite_out.json
+++ b/pkg/planner/core/casetest/testdata/integration_suite_out.json
@@ -165,7 +165,7 @@
       {
         "SQL": "explain format = 'verbose' select (2) in (select /*+ read_from_storage(tiflash[t1]) */ count(*) from t1) from (select t.b < (select /*+ read_from_storage(tiflash[t2]) */ t.b from t2 limit 1 )  from t3 t) t; -- we do generate the agg pushed-down plan of mpp, but cost-cmp failed",
         "Plan": [
-          "HashJoin_17 3.00 32770.77 root  CARTESIAN left outer semi join",
+          "HashJoin_17 3.00 32781.07 root  CARTESIAN left outer semi join",
           "├─Selection_22(Build) 0.80 31149.25 root  eq(2, Column#18)",
           "│ └─StreamAgg_29 1.00 31099.35 root  funcs:count(1)->Column#18",
           "│   └─TableReader_41 3.00 30949.65 root  MppVersion: 2, data:ExchangeSender_40",
diff --git a/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json b/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json
index eace2a1ef0f2f..a6ffb4afdc27a 100644
--- a/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json
+++ b/pkg/planner/core/casetest/testdata/plan_normalized_suite_out.json
@@ -419,8 +419,8 @@
           " TableReader           root         ",
           " └─ExchangeSender      cop[tiflash] ",
           "   └─Projection        cop[tiflash] test.t1.a",
-          "     └─Selection       cop[tiflash] gt(test.t1.a, ?)",
-          "       └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), keep order:false"
+          "     └─Selection       cop[tiflash] gt(test.t1.b, ?)",
+          "       └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), keep order:false"
         ]
       },
       {
@@ -445,8 +445,8 @@
         "Plan": [
           " TableReader         root         ",
           " └─ExchangeSender    cop[tiflash] ",
-          "   └─Selection       cop[tiflash] gt(test.t1.a, ?), or(lt(test.t1.a, ?), lt(test.t1.b, ?))",
-          "     └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), keep order:false"
+          "   └─Selection       cop[tiflash] gt(test.t1.b, ?), or(lt(test.t1.a, ?), lt(test.t1.b, ?))",
+          "     └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), keep order:false"
         ]
       },
       {

From 0300107d5510eae61660118934224abb70c408fa Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 14 Oct 2024 17:56:39 +0800
Subject: [PATCH 5/7] testcase3

---
 .../partition/testdata/partition_pruner_out.json     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
index 6e154007ec9d3..11d9e2650f85a 100644
--- a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
+++ b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
@@ -470,12 +470,12 @@
         "Plan": [
           "Projection 0.00 root  test_partition.t1.id, test_partition.t1.a, test_partition.t1.b, test_partition.t2.id, test_partition.t2.a, test_partition.t2.b",
           "└─HashJoin 0.00 root  CARTESIAN inner join",
-          "  ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
-          "  │ └─Selection 0.00 cop[tikv]  eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
-          "  │   └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
-          "  └─TableReader(Probe) 0.01 root partition:p0 data:Selection",
-          "    └─Selection 0.01 cop[tikv]  eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
-          "      └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
+          "  ├─TableReader(Build) 0.01 root partition:p0 data:Selection",
+          "  │ └─Selection 0.01 cop[tikv]  eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
+          "  │   └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
+          "  └─TableReader(Probe) 0.00 root partition:p1 data:Selection",
+          "    └─Selection 0.00 cop[tikv]  eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
+          "      └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
         ],
         "IndexPlan": [
           "HashJoin 0.03 root  CARTESIAN inner join",

From 02e01c5fc36d07c19356c3560079976c586fb4b1 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 14 Oct 2024 18:31:41 +0800
Subject: [PATCH 6/7] testcase4

---
 .../testdata/plan_stats_suite_out.json        | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json
index e61f53ded7ca2..f71b3e2fed5f0 100644
--- a/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json
+++ b/pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json
@@ -130,16 +130,16 @@
         "Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
         "Result": [
           "HashJoin 0.33 root  inner join, equal:[eq(test.tp.c, test.t2.a)]",
-          "├─IndexJoin(Build) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
-          "│ ├─TableReader(Build) 0.33 root  data:Selection",
-          "│ │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
-          "│ │   └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
-          "│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
-          "│   ├─Selection(Build) 0.33 cop[tikv]  gt(test.tp.c, 10), not(isnull(test.tp.c))",
-          "│   │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
-          "│   └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
-          "└─TableReader(Probe) 1.00 root  data:TableRangeScan",
-          "  └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
+          "├─TableReader(Build) 1.00 root  data:TableRangeScan",
+          "│ └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]",
+          "└─IndexJoin(Probe) 0.33 root  inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
+          "  ├─TableReader(Build) 0.33 root  data:Selection",
+          "  │ └─Selection 0.33 cop[tikv]  gt(test.t.b, 10), not(isnull(test.t.b))",
+          "  │   └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
+          "  └─IndexLookUp(Probe) 0.33 root partition:p0 ",
+          "    ├─Selection(Build) 0.33 cop[tikv]  gt(test.tp.c, 10), not(isnull(test.tp.c))",
+          "    │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
+          "    └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]"
         ]
       }
     ]

From 865eba1d7a1f29d03e2381f85f8b0221eab59f3d Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 28 Oct 2024 06:18:47 -0700
Subject: [PATCH 7/7] review comments

---
 pkg/planner/core/plan_cost_ver2.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go
index 95adaffd1e712..f62628f1abd84 100644
--- a/pkg/planner/core/plan_cost_ver2.go
+++ b/pkg/planner/core/plan_cost_ver2.go
@@ -104,9 +104,12 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option
 }
 
 const (
-	// MinNumRows provides a minimum to avoid underestimation
+	// MinNumRows provides a minimum to avoid underestimation. As selectivity estimation approaches
+	// zero, all plan choices result in a low cost - making it difficult to differentiate plan choices.
+	// A low value of 1.0 here is used for most (non probe acceses) to reduce this risk.
 	MinNumRows = 1.0
-	// MinRowSize provides a minimum to avoid underestimation
+	// MinRowSize provides a minimum column length to ensure that any adjustment or calculation
+	// in costing does not go below this value. 2.0 is used as a reasonable lowest column length.
 	MinRowSize = 2.0
 	// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
 	TiFlashStartupRowPenalty = 10000