Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions pkg/planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,222 @@ func (p *baseLogicalPlan) recursiveDeriveStats(colGroups [][]*expression.Column)
return p.self.DeriveStats(childStats, p.self.Schema(), childSchema, colGroups)
}

<<<<<<< HEAD
// ExtractColGroups implements LogicalPlan ExtractColGroups interface.
func (*baseLogicalPlan) ExtractColGroups(_ [][]*expression.Column) [][]*expression.Column {
=======
func fillIndexPath(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression) error {
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(ds.SCtx())
defer debugtrace.LeaveContextCommon(ds.SCtx())
}
path.Ranges = ranger.FullRange()
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
path.CorrCountAfterAccess = 0
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) {
handleCol := ds.GetPKIsHandleCol()
if handleCol != nil && !mysql.HasUnsignedFlag(handleCol.RetType.GetFlag()) {
alreadyHandle := false
for _, col := range path.IdxCols {
if col.ID == model.ExtraHandleID || col.EqualColumn(handleCol) {
alreadyHandle = true
}
}
// Don't add one column twice to the index. May cause unexpected errors.
if !alreadyHandle {
path.FullIdxCols = append(path.FullIdxCols, handleCol)
path.FullIdxColLens = append(path.FullIdxColLens, types.UnspecifiedLength)
path.IdxCols = append(path.IdxCols, handleCol)
path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
// Also updates the map that maps the index id to its prefix column ids.
if len(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
}
}
}
}
err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl)
return err
}

// deriveIndexPathStats will fulfill the information that the AccessPath need.
// conds is the conditions used to generate the DetachRangeResult for path.
// isIm indicates whether this function is called to generate the partial path for IndexMerge.
func deriveIndexPathStats(ds *logicalop.DataSource, path *util.AccessPath, _ []expression.Expression, isIm bool) {
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(ds.SCtx())
defer debugtrace.LeaveContextCommon(ds.SCtx())
}
if path.EqOrInCondCount == len(path.AccessConds) {
accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount)
path.AccessConds = append(path.AccessConds, accesses...)
path.TableFilters = remained
if len(accesses) > 0 && ds.StatisticTable.Pseudo {
path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable)
} else {
selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount)
for i := range accesses {
col := path.IdxCols[path.EqOrInCondCount+i]
ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID)
ndv *= selectivity
if ndv < 1 {
ndv = 1.0
}
path.CountAfterAccess = path.CountAfterAccess / ndv
}
}
}
var indexFilters []expression.Expression
indexFilters, path.TableFilters = splitIndexFilterConditions(ds, path.TableFilters, path.FullIdxCols, path.FullIdxColLens)
path.IndexFilters = append(path.IndexFilters, indexFilters...)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
// Add an arbitrary tolerance factor to account for comparison with floating point
if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
}
if path.IndexFilters != nil {
selectivity, _, err := cardinality.Selectivity(ds.SCtx(), ds.TableStats.HistColl, path.IndexFilters, nil)
if err != nil {
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = cost.SelectionFactor
}
if isIm {
path.CountAfterIndex = path.CountAfterAccess * selectivity
} else {
path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.StatsInfo().RowCount)
}
} else {
path.CountAfterIndex = path.CountAfterAccess
}
}

// deriveTablePathStats will fulfill the information that the AccessPath need.
// isIm indicates whether this function is called to generate the partial path for IndexMerge.
func deriveTablePathStats(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression, isIm bool) error {
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(ds.SCtx())
defer debugtrace.LeaveContextCommon(ds.SCtx())
}
if path.IsCommonHandlePath {
return deriveCommonHandleTablePathStats(ds, path, conds, isIm)
}
var err error
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
path.TableFilters = conds
var pkCol *expression.Column
isUnsigned := false
if ds.TableInfo.PKIsHandle {
if pkColInfo := ds.TableInfo.GetPkColInfo(); pkColInfo != nil {
isUnsigned = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
pkCol = expression.ColInfo2Col(ds.Schema().Columns, pkColInfo)
}
} else {
pkCol = ds.Schema().GetExtraHandleColumn()
}
if pkCol == nil {
path.Ranges = ranger.FullIntRange(isUnsigned)
return nil
}

path.Ranges = ranger.FullIntRange(isUnsigned)
if len(conds) == 0 {
return nil
}
// for cnf condition combination, c=1 and c=2 and (1 member of (a)),
// c=1 and c=2 will derive invalid range represented by an access condition as constant of 0 (false).
// later this constant of 0 will be built as empty range.
path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.SCtx().GetRangerCtx(), conds, pkCol)
// If there's no access cond, we try to find that whether there's expression containing correlated column that
// can be used to access data.
corColInAccessConds := false
if len(path.AccessConds) == 0 {
for i, filter := range path.TableFilters {
eqFunc, ok := filter.(*expression.ScalarFunction)
if !ok || eqFunc.FuncName.L != ast.EQ {
continue
}
lCol, lOk := eqFunc.GetArgs()[0].(*expression.Column)
if lOk && lCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) {
_, rOk := eqFunc.GetArgs()[1].(*expression.CorrelatedColumn)
if rOk {
path.AccessConds = append(path.AccessConds, filter)
path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...)
corColInAccessConds = true
break
}
}
rCol, rOk := eqFunc.GetArgs()[1].(*expression.Column)
if rOk && rCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) {
_, lOk := eqFunc.GetArgs()[0].(*expression.CorrelatedColumn)
if lOk {
path.AccessConds = append(path.AccessConds, filter)
path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...)
corColInAccessConds = true
break
}
}
}
}
if corColInAccessConds {
path.CountAfterAccess = 1
return nil
}
var remainedConds []expression.Expression
path.Ranges, path.AccessConds, remainedConds, err = ranger.BuildTableRange(path.AccessConds, ds.SCtx().GetRangerCtx(), pkCol.RetType, ds.SCtx().GetSessionVars().RangeMaxSize)
path.TableFilters = append(path.TableFilters, remainedConds...)
if err != nil {
return err
}
path.CountAfterAccess, err = cardinality.GetRowCountByIntColumnRanges(ds.SCtx(), &ds.StatisticTable.HistColl, pkCol.ID, path.Ranges)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
// Add an arbitrary tolerance factor to account for comparison with floating point
if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
}
return err
}

func deriveCommonHandleTablePathStats(ds *logicalop.DataSource, path *util.AccessPath, conds []expression.Expression, isIm bool) error {
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
path.Ranges = ranger.FullNotNullRange()
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
if len(conds) == 0 {
return nil
}
if err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl); err != nil {
return err
}
if path.EqOrInCondCount == len(path.AccessConds) {
accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount)
path.AccessConds = append(path.AccessConds, accesses...)
path.TableFilters = remained
if len(accesses) > 0 && ds.StatisticTable.Pseudo {
path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable)
} else {
selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount)
for i := range accesses {
col := path.IdxCols[path.EqOrInCondCount+i]
ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID)
ndv *= selectivity
if ndv < 1 {
ndv = 1.0
}
path.CountAfterAccess = path.CountAfterAccess / ndv
}
}
}
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
// Add an arbitrary tolerance factor to account for comparison with floating point
if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
}
>>>>>>> f66e8b1e796 (planner: fix the possible panic when fixcontrol#44855 enabled (#59763))
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ JOIN tceb7972c ON tceb7972c.col_19 = t61a85298.col_71
WHERE 16739493649928310215 MEMBER OF (derived_table.col_60767)
OR NOT (JSON_CONTAINS(derived_table.col_60767, '6019730272580550835'));
id estRows task access object operator info
<<<<<<< HEAD
Projection_12 10000.00 root 1->Column#19
└─HashJoin_13 10000.00 root inner join, equal:[eq(planner__core__issuetest__planner_issue.tceb7972c.col_19, Column#20)]
├─Projection_19(Build) 10000.00 root cast(planner__core__issuetest__planner_issue.t61a85298.col_71, double BINARY)->Column#20
Expand All @@ -691,3 +692,88 @@ Projection_12 10000.00 root 1->Column#19
└─Selection_17 8000.00 cop[tikv] or(json_memberof(cast(16739493649928310215, json BINARY), planner__core__issuetest__planner_issue.tceb7972c.col_17), not(istrue_with_null(json_contains(planner__core__issuetest__planner_issue.tceb7972c.col_17, cast("6019730272580550835", json BINARY)))))
└─TableFullScan_16 10000.00 cop[tikv] table:tceb7972c keep order:false, stats:pseudo
set @@tidb_enable_global_index=0;
=======
Projection_11 6.00 root 1->Column#18
└─HashJoin_13 6.00 root inner join, equal:[eq(test.tceb7972c.col_19, Column#19)]
├─TableReader_16(Build) 4.80 root partition:all data:Selection_15
│ └─Selection_15 4.80 cop[tikv] or(json_memberof(cast(16739493649928310215, json BINARY), test.tceb7972c.col_17), not(istrue_with_null(json_contains(test.tceb7972c.col_17, cast("6019730272580550835", json BINARY)))))
│ └─TableFullScan_14 6.00 cop[tikv] table:tceb7972c keep order:false, stats:partial[col_17:missing]
└─Projection_17(Probe) 10000.00 root cast(test.t61a85298.col_71, double BINARY)->Column#19
└─TableReader_19 10000.00 root data:TableFullScan_18
└─TableFullScan_18 10000.00 cop[tikv] table:t61a85298 keep order:false, stats:pseudo
drop table if exists t0, t1;
CREATE TABLE t0(c0 int);
CREATE TABLE t1(c0 int);
SELECT t0.c0, t1.c0 FROM t0 NATURAL JOIN t1 WHERE '1' AND (t0.c0 IN (SELECT c0 FROM t0));
c0 c0
drop table if exists t1, t2, t3, t4;
CREATE TABLE t1 (a int, b int, c int);
CREATE TABLE t2 (a int, b int, c int);
CREATE TABLE t3 (a int, b int, c int);
CREATE TABLE t4 (a int, b int, c int);
INSERT INTO t1 VALUES (1,3,0), (2,2,0), (3,2,0);
INSERT INTO t2 VALUES (3,3,0), (4,2,0), (5,3,0);
INSERT INTO t3 VALUES (1,2,0), (2,2,0);
INSERT INTO t4 VALUES (3,2,0), (4,2,0);
CREATE INDEX idx_b ON t2(b);
SELECT t2.a,t2.b,t3.a,t3.b,t4.a,t4.b
FROM (t3,t4)
LEFT JOIN
(t1,t2)
ON t3.a=1 AND t3.b=t2.b AND t2.b=t4.b order by 1, 2, 3, 4, 5;
a b a b a b
NULL NULL 2 2 3 2
NULL NULL 2 2 4 2
4 2 1 2 3 2
4 2 1 2 3 2
4 2 1 2 3 2
4 2 1 2 4 2
4 2 1 2 4 2
4 2 1 2 4 2
show warnings;
Level Code Message
drop table if exists t1, t2, t3, t4;
drop table if exists t0, v0;
drop view if exists v0;
CREATE TABLE t0(c0 INTEGER);
CREATE VIEW v0(c0) AS SELECT 'a' FROM t0 WHERE (CASE t0.c0 WHEN t0.c0 THEN false END );
SELECT t0.c0 FROM v0, t0 WHERE RAND();
c0
drop table if exists tl6e913fb9;
CREATE TABLE `tl6e913fb9` (
`col_36` varchar(175) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT 'asMF',
KEY `idx_35_5` (`col_36`(1)),
PRIMARY KEY (`col_36`) /*T![clustered_index] NONCLUSTERED */,
KEY `idx_65` (`col_36`(5))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
with cte_192 ( col_1101,col_1102,col_1103,col_1104 ) AS ( select /*+ use_index_merge( tl6e913fb9 ) */ replace( tl6e913fb9.col_36 , tl6e913fb9.col_36 , tl6e913fb9.col_36 ) as r0 , space( 0 ) as r1 , min( distinct tl6e913fb9.col_36 ) as r2 , count( distinct tl6e913fb9.col_36 ) as r3 from tl6e913fb9 where tl6e913fb9.col_36 between 'n92ok$B%W#UU%O' and '()c=KVQ=T%-vzGJ' and tl6e913fb9.col_36 in ( 'T+kf' ,'Lvluod2H' ,'3#Omx@pC^fFkeH' ,'=b$z' ) group by tl6e913fb9.col_36 having tl6e913fb9.col_36 = 'xjV@' or IsNull( tl6e913fb9.col_36 ) ) ( select 1,col_1101,col_1102,col_1103,col_1104 from cte_192 where not( IsNull( cte_192.col_1102 ) ) order by 1,2,3,4,5 limit 72850972 );
1 col_1101 col_1102 col_1103 col_1104
drop table if exists t;
create table t (id int unique key, c int);
insert into t values (1, 10);
insert into t values (2, 20);
insert into t values (3, 30);
select _tidb_rowid from t where id in (1, 2, 3);
_tidb_rowid
1
2
3
drop table if exists t, t1;
create table t(a int);
create table t1(a int primary key, b int, index idx(b));
insert into t values(1), (2), (123);
insert into t1 values(2, 123), (123, 2);
set tidb_opt_fix_control='44855:on';
explain select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123;
id estRows task access object operator info
Projection_9 12.50 root test.t.a, test.t1.a, test.t1.b
└─IndexJoin_12 12.50 root inner join, inner:IndexReader_11, outer key:test.t.a, inner key:test.t1.a, equal cond:eq(test.t.a, test.t1.a)
├─TableReader_20(Build) 9990.00 root data:Selection_19
│ └─Selection_19 9990.00 cop[tikv] not(isnull(test.t.a))
│ └─TableFullScan_18 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
└─IndexReader_11(Probe) 12.50 root index:IndexRangeScan_10
└─IndexRangeScan_10 12.50 cop[tikv] table:t1, index:idx(b) range: decided by [eq(test.t1.a, test.t.a) eq(test.t1.b, 123)], keep order:false, stats:pseudo
select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123;
a a b
2 2 123
>>>>>>> f66e8b1e796 (planner: fix the possible panic when fixcontrol#44855 enabled (#59763))
63 changes: 63 additions & 0 deletions tests/integrationtest/t/planner/core/issuetest/planner_issue.test
Original file line number Diff line number Diff line change
Expand Up @@ -477,4 +477,67 @@ FROM (
) AS derived_table
WHERE 16739493649928310215 MEMBER OF (derived_table.col_60767)
OR NOT (JSON_CONTAINS(derived_table.col_60767, '6019730272580550835'));
<<<<<<< HEAD
set @@tidb_enable_global_index=0;
=======

# TestIssue53766
drop table if exists t0, t1;
CREATE TABLE t0(c0 int);
CREATE TABLE t1(c0 int);
SELECT t0.c0, t1.c0 FROM t0 NATURAL JOIN t1 WHERE '1' AND (t0.c0 IN (SELECT c0 FROM t0));

# TestIssue56472
drop table if exists t1, t2, t3, t4;
CREATE TABLE t1 (a int, b int, c int);
CREATE TABLE t2 (a int, b int, c int);
CREATE TABLE t3 (a int, b int, c int);
CREATE TABLE t4 (a int, b int, c int);
INSERT INTO t1 VALUES (1,3,0), (2,2,0), (3,2,0);
INSERT INTO t2 VALUES (3,3,0), (4,2,0), (5,3,0);
INSERT INTO t3 VALUES (1,2,0), (2,2,0);
INSERT INTO t4 VALUES (3,2,0), (4,2,0);
CREATE INDEX idx_b ON t2(b);
SELECT t2.a,t2.b,t3.a,t3.b,t4.a,t4.b
FROM (t3,t4)
LEFT JOIN
(t1,t2)
ON t3.a=1 AND t3.b=t2.b AND t2.b=t4.b order by 1, 2, 3, 4, 5;
show warnings;
drop table if exists t1, t2, t3, t4;

# TestIssue56270
drop table if exists t0, v0;
drop view if exists v0;
CREATE TABLE t0(c0 INTEGER);
CREATE VIEW v0(c0) AS SELECT 'a' FROM t0 WHERE (CASE t0.c0 WHEN t0.c0 THEN false END );
SELECT t0.c0 FROM v0, t0 WHERE RAND();

# TestIssue56479
drop table if exists tl6e913fb9;
CREATE TABLE `tl6e913fb9` (
`col_36` varchar(175) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT 'asMF',
KEY `idx_35_5` (`col_36`(1)),
PRIMARY KEY (`col_36`) /*T![clustered_index] NONCLUSTERED */,
KEY `idx_65` (`col_36`(5))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
with cte_192 ( col_1101,col_1102,col_1103,col_1104 ) AS ( select /*+ use_index_merge( tl6e913fb9 ) */ replace( tl6e913fb9.col_36 , tl6e913fb9.col_36 , tl6e913fb9.col_36 ) as r0 , space( 0 ) as r1 , min( distinct tl6e913fb9.col_36 ) as r2 , count( distinct tl6e913fb9.col_36 ) as r3 from tl6e913fb9 where tl6e913fb9.col_36 between 'n92ok$B%W#UU%O' and '()c=KVQ=T%-vzGJ' and tl6e913fb9.col_36 in ( 'T+kf' ,'Lvluod2H' ,'3#Omx@pC^fFkeH' ,'=b$z' ) group by tl6e913fb9.col_36 having tl6e913fb9.col_36 = 'xjV@' or IsNull( tl6e913fb9.col_36 ) ) ( select 1,col_1101,col_1102,col_1103,col_1104 from cte_192 where not( IsNull( cte_192.col_1102 ) ) order by 1,2,3,4,5 limit 72850972 );

# TestIssue58581
drop table if exists t;
create table t (id int unique key, c int);
insert into t values (1, 10);
insert into t values (2, 20);
insert into t values (3, 30);
select _tidb_rowid from t where id in (1, 2, 3);

# TestIssue59762
drop table if exists t, t1;
create table t(a int);
create table t1(a int primary key, b int, index idx(b));
insert into t values(1), (2), (123);
insert into t1 values(2, 123), (123, 2);
set tidb_opt_fix_control='44855:on';
explain select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123;
select /*+ inl_join(t1), use_index(t1, idx) */ * from t join t1 on t.a = t1.a and t1.b = 123;
>>>>>>> f66e8b1e796 (planner: fix the possible panic when fixcontrol#44855 enabled (#59763))