diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go index d44e20103a807..6359d047269e3 100644 --- a/pkg/planner/core/stats.go +++ b/pkg/planner/core/stats.go @@ -140,8 +140,222 @@ func (p *baseLogicalPlan) recursiveDeriveStats(colGroups [][]*expression.Column) return p.self.DeriveStats(childStats, p.self.Schema(), childSchema, colGroups) } +<<<<<<< HEAD // ExtractColGroups implements LogicalPlan ExtractColGroups interface. func (*baseLogicalPlan) ExtractColGroups(_ [][]*expression.Column) [][]*expression.Column { +======= +func fillIndexPath(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression) error { + if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(ds.SCtx()) + defer debugtrace.LeaveContextCommon(ds.SCtx()) + } + path.Ranges = ranger.FullRange() + path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount) + path.CorrCountAfterAccess = 0 + path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index) + path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index) + if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) { + handleCol := ds.GetPKIsHandleCol() + if handleCol != nil && !mysql.HasUnsignedFlag(handleCol.RetType.GetFlag()) { + alreadyHandle := false + for _, col := range path.IdxCols { + if col.ID == model.ExtraHandleID || col.EqualColumn(handleCol) { + alreadyHandle = true + } + } + // Don't add one column twice to the index. May cause unexpected errors. + if !alreadyHandle { + path.FullIdxCols = append(path.FullIdxCols, handleCol) + path.FullIdxColLens = append(path.FullIdxColLens, types.UnspecifiedLength) + path.IdxCols = append(path.IdxCols, handleCol) + path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength) + // Also updates the map that maps the index id to its prefix column ids. + if len(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) { + ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID) + } + } + } + } + err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl) + return err +} + +// deriveIndexPathStats will fulfill the information that the AccessPath need. +// conds is the conditions used to generate the DetachRangeResult for path. +// isIm indicates whether this function is called to generate the partial path for IndexMerge. +func deriveIndexPathStats(ds *logicalop.DataSource, path *util.AccessPath, _ []expression.Expression, isIm bool) { + if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(ds.SCtx()) + defer debugtrace.LeaveContextCommon(ds.SCtx()) + } + if path.EqOrInCondCount == len(path.AccessConds) { + accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount) + path.AccessConds = append(path.AccessConds, accesses...) + path.TableFilters = remained + if len(accesses) > 0 && ds.StatisticTable.Pseudo { + path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable) + } else { + selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount) + for i := range accesses { + col := path.IdxCols[path.EqOrInCondCount+i] + ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID) + ndv *= selectivity + if ndv < 1 { + ndv = 1.0 + } + path.CountAfterAccess = path.CountAfterAccess / ndv + } + } + } + var indexFilters []expression.Expression + indexFilters, path.TableFilters = splitIndexFilterConditions(ds, path.TableFilters, path.FullIdxCols, path.FullIdxColLens) + path.IndexFilters = append(path.IndexFilters, indexFilters...) + // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. + // We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity. + // Add an arbitrary tolerance factor to account for comparison with floating point + if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm { + path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount)) + } + if path.IndexFilters != nil { + selectivity, _, err := cardinality.Selectivity(ds.SCtx(), ds.TableStats.HistColl, path.IndexFilters, nil) + if err != nil { + logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err)) + selectivity = cost.SelectionFactor + } + if isIm { + path.CountAfterIndex = path.CountAfterAccess * selectivity + } else { + path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.StatsInfo().RowCount) + } + } else { + path.CountAfterIndex = path.CountAfterAccess + } +} + +// deriveTablePathStats will fulfill the information that the AccessPath need. +// isIm indicates whether this function is called to generate the partial path for IndexMerge. +func deriveTablePathStats(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression, isIm bool) error { + if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { + debugtrace.EnterContextCommon(ds.SCtx()) + defer debugtrace.LeaveContextCommon(ds.SCtx()) + } + if path.IsCommonHandlePath { + return deriveCommonHandleTablePathStats(ds, path, conds, isIm) + } + var err error + path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount) + path.TableFilters = conds + var pkCol *expression.Column + isUnsigned := false + if ds.TableInfo.PKIsHandle { + if pkColInfo := ds.TableInfo.GetPkColInfo(); pkColInfo != nil { + isUnsigned = mysql.HasUnsignedFlag(pkColInfo.GetFlag()) + pkCol = expression.ColInfo2Col(ds.Schema().Columns, pkColInfo) + } + } else { + pkCol = ds.Schema().GetExtraHandleColumn() + } + if pkCol == nil { + path.Ranges = ranger.FullIntRange(isUnsigned) + return nil + } + + path.Ranges = ranger.FullIntRange(isUnsigned) + if len(conds) == 0 { + return nil + } + // for cnf condition combination, c=1 and c=2 and (1 member of (a)), + // c=1 and c=2 will derive invalid range represented by an access condition as constant of 0 (false). + // later this constant of 0 will be built as empty range. + path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.SCtx().GetRangerCtx(), conds, pkCol) + // If there's no access cond, we try to find that whether there's expression containing correlated column that + // can be used to access data. + corColInAccessConds := false + if len(path.AccessConds) == 0 { + for i, filter := range path.TableFilters { + eqFunc, ok := filter.(*expression.ScalarFunction) + if !ok || eqFunc.FuncName.L != ast.EQ { + continue + } + lCol, lOk := eqFunc.GetArgs()[0].(*expression.Column) + if lOk && lCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) { + _, rOk := eqFunc.GetArgs()[1].(*expression.CorrelatedColumn) + if rOk { + path.AccessConds = append(path.AccessConds, filter) + path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...) + corColInAccessConds = true + break + } + } + rCol, rOk := eqFunc.GetArgs()[1].(*expression.Column) + if rOk && rCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) { + _, lOk := eqFunc.GetArgs()[0].(*expression.CorrelatedColumn) + if lOk { + path.AccessConds = append(path.AccessConds, filter) + path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...) + corColInAccessConds = true + break + } + } + } + } + if corColInAccessConds { + path.CountAfterAccess = 1 + return nil + } + var remainedConds []expression.Expression + path.Ranges, path.AccessConds, remainedConds, err = ranger.BuildTableRange(path.AccessConds, ds.SCtx().GetRangerCtx(), pkCol.RetType, ds.SCtx().GetSessionVars().RangeMaxSize) + path.TableFilters = append(path.TableFilters, remainedConds...) + if err != nil { + return err + } + path.CountAfterAccess, err = cardinality.GetRowCountByIntColumnRanges(ds.SCtx(), &ds.StatisticTable.HistColl, pkCol.ID, path.Ranges) + // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. + // We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity. + // Add an arbitrary tolerance factor to account for comparison with floating point + if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm { + path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount)) + } + return err +} + +func deriveCommonHandleTablePathStats(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression, isIm bool) error { + path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount) + path.Ranges = ranger.FullNotNullRange() + path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index) + path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index) + if len(conds) == 0 { + return nil + } + if err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl); err != nil { + return err + } + if path.EqOrInCondCount == len(path.AccessConds) { + accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount) + path.AccessConds = append(path.AccessConds, accesses...) + path.TableFilters = remained + if len(accesses) > 0 && ds.StatisticTable.Pseudo { + path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable) + } else { + selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount) + for i := range accesses { + col := path.IdxCols[path.EqOrInCondCount+i] + ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID) + ndv *= selectivity + if ndv < 1 { + ndv = 1.0 + } + path.CountAfterAccess = path.CountAfterAccess / ndv + } + } + } + // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. + // We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity. + // Add an arbitrary tolerance factor to account for comparison with floating point + if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm { + path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount)) + } +>>>>>>> f66e8b1e796 (planner: fix the possible panic when fixcontrol#44855 enabled (#59763)) return nil } diff --git a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result index 8389b574b7c89..b325b272139fe 100644 --- a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result +++ b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result @@ -681,6 +681,7 @@ JOIN tceb7972c ON tceb7972c.col_19 = t61a85298.col_71 WHERE 16739493649928310215 MEMBER OF (derived_table.col_60767) OR NOT (JSON_CONTAINS(derived_table.col_60767, '6019730272580550835')); id estRows task access object operator info +<<<<<<< HEAD Projection_12 10000.00 root 1->Column#19 └─HashJoin_13 10000.00 root inner join, equal:[eq(planner__core__issuetest__planner_issue.tceb7972c.col_19, Column#20)] ├─Projection_19(Build) 10000.00 root cast(planner__core__issuetest__planner_issue.t61a85298.col_71, double BINARY)->Column#20 @@ -691,3 +692,88 @@ Projection_12 10000.00 root 1->Column#19 └─Selection_17 8000.00 cop[tikv] or(json_memberof(cast(16739493649928310215, json BINARY), planner__core__issuetest__planner_issue.tceb7972c.col_17), not(istrue_with_null(json_contains(planner__core__issuetest__planner_issue.tceb7972c.col_17, cast("6019730272580550835", json BINARY))))) └─TableFullScan_16 10000.00 cop[tikv] table:tceb7972c keep order:false, stats:pseudo set @@tidb_enable_global_index=0; +======= +Projection_11 6.00 root 1->Column#18 +└─HashJoin_13 6.00 root inner join, equal:[eq(test.tceb7972c.col_19, Column#19)] + ├─TableReader_16(Build) 4.80 root partition:all data:Selection_15 + │ └─Selection_15 4.80 cop[tikv] or(json_memberof(cast(16739493649928310215, json BINARY), test.tceb7972c.col_17), not(istrue_with_null(json_contains(test.tceb7972c.col_17, cast("6019730272580550835", json BINARY))))) + │ └─TableFullScan_14 6.00 cop[tikv] table:tceb7972c keep order:false, stats:partial[col_17:missing] + └─Projection_17(Probe) 10000.00 root cast(test.t61a85298.col_71, double BINARY)->Column#19 + └─TableReader_19 10000.00 root data:TableFullScan_18 + └─TableFullScan_18 10000.00 cop[tikv] table:t61a85298 keep order:false, stats:pseudo +drop table if exists t0, t1; +CREATE TABLE t0(c0 int); +CREATE TABLE t1(c0 int); +SELECT t0.c0, t1.c0 FROM t0 NATURAL JOIN t1 WHERE '1' AND (t0.c0 IN (SELECT c0 FROM t0)); +c0 c0 +drop table if exists t1, t2, t3, t4; +CREATE TABLE t1 (a int, b int, c int); +CREATE TABLE t2 (a int, b int, c int); +CREATE TABLE t3 (a int, b int, c int); +CREATE TABLE t4 (a int, b int, c int); +INSERT INTO t1 VALUES (1,3,0), (2,2,0), (3,2,0); +INSERT INTO t2 VALUES (3,3,0), (4,2,0), (5,3,0); +INSERT INTO t3 VALUES (1,2,0), (2,2,0); +INSERT INTO t4 VALUES (3,2,0), (4,2,0); +CREATE INDEX idx_b ON t2(b); +SELECT t2.a,t2.b,t3.a,t3.b,t4.a,t4.b +FROM (t3,t4) +LEFT JOIN +(t1,t2) +ON t3.a=1 AND t3.b=t2.b AND t2.b=t4.b order by 1, 2, 3, 4, 5; +a b a b a b +NULL NULL 2 2 3 2 +NULL NULL 2 2 4 2 +4 2 1 2 3 2 +4 2 1 2 3 2 +4 2 1 2 3 2 +4 2 1 2 4 2 +4 2 1 2 4 2 +4 2 1 2 4 2 +show warnings; +Level Code Message +drop table if exists t1, t2, t3, t4; +drop table if exists t0, v0; +drop view if exists v0; +CREATE TABLE t0(c0 INTEGER); +CREATE VIEW v0(c0) AS SELECT 'a' FROM t0 WHERE (CASE t0.c0 WHEN t0.c0 THEN false END ); +SELECT t0.c0 FROM v0, t0 WHERE RAND(); +c0 +drop table if exists tl6e913fb9; +CREATE TABLE `tl6e913fb9` ( +`col_36` varchar(175) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT 'asMF', +KEY `idx_35_5` (`col_36`(1)), +PRIMARY KEY (`col_36`) /*T![clustered_index] NONCLUSTERED */, +KEY `idx_65` (`col_36`(5)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +with cte_192 ( col_1101,col_1102,col_1103,col_1104 ) AS ( select /*+ use_index_merge( tl6e913fb9 ) */ replace( tl6e913fb9.col_36 , tl6e913fb9.col_36 , tl6e913fb9.col_36 ) as r0 , space( 0 ) as r1 , min( distinct tl6e913fb9.col_36 ) as r2 , count( distinct tl6e913fb9.col_36 ) as r3 from tl6e913fb9 where tl6e913fb9.col_36 between 'n92ok$B%W#UU%O' and '()c=KVQ=T%-vzGJ' and tl6e913fb9.col_36 in ( 'T+kf' ,'Lvluod2H' ,'3#Omx@pC^fFkeH' ,'=b$z' ) group by tl6e913fb9.col_36 having tl6e913fb9.col_36 = 'xjV@' or IsNull( tl6e913fb9.col_36 ) ) ( select 1,col_1101,col_1102,col_1103,col_1104 from cte_192 where not( IsNull( cte_192.col_1102 ) ) order by 1,2,3,4,5 limit 72850972 ); +1 col_1101 col_1102 col_1103 col_1104 +drop table if exists t; +create table t (id int unique key, c int); +insert into t values (1, 10); +insert into t values (2, 20); +insert into t values (3, 30); +select _tidb_rowid from t where id in (1, 2, 3); +_tidb_rowid +1 +2 +3 +drop table if exists t, t1; +create table t(a int); +create table t1(a int primary key, b int, index idx(b)); +insert into t values(1), (2), (123); +insert into t1 values(2, 123), (123, 2); +set tidb_opt_fix_control='44855:on'; +explain select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123; +id estRows task access object operator info +Projection_9 12.50 root test.t.a, test.t1.a, test.t1.b +└─IndexJoin_12 12.50 root inner join, inner:IndexReader_11, outer key:test.t.a, inner key:test.t1.a, equal cond:eq(test.t.a, test.t1.a) + ├─TableReader_20(Build) 9990.00 root data:Selection_19 + │ └─Selection_19 9990.00 cop[tikv] not(isnull(test.t.a)) + │ └─TableFullScan_18 10000.00 cop[tikv] table:t keep order:false, stats:pseudo + └─IndexReader_11(Probe) 12.50 root index:IndexRangeScan_10 + └─IndexRangeScan_10 12.50 cop[tikv] table:t1, index:idx(b) range: decided by [eq(test.t1.a, test.t.a) eq(test.t1.b, 123)], keep order:false, stats:pseudo +select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123; +a a b +2 2 123 +>>>>>>> f66e8b1e796 (planner: fix the possible panic when fixcontrol#44855 enabled (#59763)) diff --git a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test index 64abe19dd5f76..47fef2437a603 100644 --- a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test +++ b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test @@ -477,4 +477,67 @@ FROM ( ) AS derived_table WHERE 16739493649928310215 MEMBER OF (derived_table.col_60767) OR NOT (JSON_CONTAINS(derived_table.col_60767, '6019730272580550835')); +<<<<<<< HEAD set @@tidb_enable_global_index=0; +======= + +# TestIssue53766 +drop table if exists t0, t1; +CREATE TABLE t0(c0 int); +CREATE TABLE t1(c0 int); +SELECT t0.c0, t1.c0 FROM t0 NATURAL JOIN t1 WHERE '1' AND (t0.c0 IN (SELECT c0 FROM t0)); + +# TestIssue56472 +drop table if exists t1, t2, t3, t4; +CREATE TABLE t1 (a int, b int, c int); +CREATE TABLE t2 (a int, b int, c int); +CREATE TABLE t3 (a int, b int, c int); +CREATE TABLE t4 (a int, b int, c int); +INSERT INTO t1 VALUES (1,3,0), (2,2,0), (3,2,0); +INSERT INTO t2 VALUES (3,3,0), (4,2,0), (5,3,0); +INSERT INTO t3 VALUES (1,2,0), (2,2,0); +INSERT INTO t4 VALUES (3,2,0), (4,2,0); +CREATE INDEX idx_b ON t2(b); +SELECT t2.a,t2.b,t3.a,t3.b,t4.a,t4.b + FROM (t3,t4) + LEFT JOIN + (t1,t2) + ON t3.a=1 AND t3.b=t2.b AND t2.b=t4.b order by 1, 2, 3, 4, 5; +show warnings; +drop table if exists t1, t2, t3, t4; + +# TestIssue56270 +drop table if exists t0, v0; +drop view if exists v0; +CREATE TABLE t0(c0 INTEGER); +CREATE VIEW v0(c0) AS SELECT 'a' FROM t0 WHERE (CASE t0.c0 WHEN t0.c0 THEN false END ); +SELECT t0.c0 FROM v0, t0 WHERE RAND(); + +# TestIssue56479 +drop table if exists tl6e913fb9; +CREATE TABLE `tl6e913fb9` ( + `col_36` varchar(175) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT 'asMF', + KEY `idx_35_5` (`col_36`(1)), + PRIMARY KEY (`col_36`) /*T![clustered_index] NONCLUSTERED */, + KEY `idx_65` (`col_36`(5)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +with cte_192 ( col_1101,col_1102,col_1103,col_1104 ) AS ( select /*+ use_index_merge( tl6e913fb9 ) */ replace( tl6e913fb9.col_36 , tl6e913fb9.col_36 , tl6e913fb9.col_36 ) as r0 , space( 0 ) as r1 , min( distinct tl6e913fb9.col_36 ) as r2 , count( distinct tl6e913fb9.col_36 ) as r3 from tl6e913fb9 where tl6e913fb9.col_36 between 'n92ok$B%W#UU%O' and '()c=KVQ=T%-vzGJ' and tl6e913fb9.col_36 in ( 'T+kf' ,'Lvluod2H' ,'3#Omx@pC^fFkeH' ,'=b$z' ) group by tl6e913fb9.col_36 having tl6e913fb9.col_36 = 'xjV@' or IsNull( tl6e913fb9.col_36 ) ) ( select 1,col_1101,col_1102,col_1103,col_1104 from cte_192 where not( IsNull( cte_192.col_1102 ) ) order by 1,2,3,4,5 limit 72850972 ); + +# TestIssue58581 +drop table if exists t; +create table t (id int unique key, c int); +insert into t values (1, 10); +insert into t values (2, 20); +insert into t values (3, 30); +select _tidb_rowid from t where id in (1, 2, 3); + +# TestIssue59762 +drop table if exists t, t1; +create table t(a int); +create table t1(a int primary key, b int, index idx(b)); +insert into t values(1), (2), (123); +insert into t1 values(2, 123), (123, 2); +set tidb_opt_fix_control='44855:on'; +explain select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123; +select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123; +>>>>>>> f66e8b1e796 (planner: fix the possible panic when fixcontrol#44855 enabled (#59763))