Skip to content
Merged
46 changes: 46 additions & 0 deletions pkg/planner/core/rule_predicate_simplification.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ func splitCNF(conditions []expression.Expression) []expression.Expression {
func applyPredicateSimplification(sctx base.PlanContext, predicates []expression.Expression) []expression.Expression {
simplifiedPredicate := shortCircuitLogicalConstants(sctx, predicates)
simplifiedPredicate = mergeInAndNotEQLists(sctx, simplifiedPredicate)
removeRedundantORBranch(sctx, simplifiedPredicate)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason of the order of the sub-rule?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no strong reasons. Actually, after I put this new sub-rule in a different order, the new test cases don't have any changes.
I have several minor considerations though:

  1. I try not to change the order of existing sub-rules.
  2. mergeInAndNotEQLists() simplifies the predicates by merging some of them, i.e., constructing some new expressions. Probably there will be new redundant expressions after that sub-rule, so put the new sub-rule after it might be a good idea.
  3. pruneEmptyORBranches() just removes useless OR branches, and should not produce new redundant expressions. So probably it's useless to put the new sub-rule after it.

pruneEmptyORBranches(sctx, simplifiedPredicate)
simplifiedPredicate = splitCNF(simplifiedPredicate)
return simplifiedPredicate
Expand Down Expand Up @@ -424,6 +425,51 @@ func shortCircuitLogicalConstants(sctx base.PlanContext, predicates []expression
return finalResult
}

// removeRedundantORBranch recursively iterates over a list of predicates, try to find OR lists and remove redundant in
// each OR list.
// It modifies the input slice in place.
func removeRedundantORBranch(sctx base.PlanContext, predicates []expression.Expression) {
for i, predicate := range predicates {
predicates[i] = recursiveRemoveRedundantORBranch(sctx, predicate)
}
}

func recursiveRemoveRedundantORBranch(sctx base.PlanContext, predicate expression.Expression) expression.Expression {
_, tp := FindPredicateType(sctx, predicate)
if tp != orPredicate {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a recursive function (indirectly through removeRedundantORBranch). If you intend to cover the And case (like the code in the function) then you just need a general logic for OR AND lists.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't intend to cover the AND case (since that's already covered by other existing logic), I want to handle OR lists nested in another AND list.

return predicate
}
orFunc := predicate.(*expression.ScalarFunction)
orList := expression.SplitDNFItems(orFunc)

dedupMap := make(map[string]struct{}, len(orList))
newORList := make([]expression.Expression, 0, len(orList))

for _, orItem := range orList {
_, tp := FindPredicateType(sctx, orItem)
// 1. If it's an AND predicate, we recursively call removeRedundantORBranch() on it.
if tp == andPredicate {
andFunc := orItem.(*expression.ScalarFunction)
andList := expression.SplitCNFItems(andFunc)
removeRedundantORBranch(sctx, andList)
newORList = append(newORList, expression.ComposeCNFCondition(sctx.GetExprCtx(), andList...))
} else {
// 2. Otherwise, we check if it's a duplicate predicate by checking HashCode().
hashCode := string(orItem.HashCode())
// 2-1. If it's not a duplicate, we need to keep this predicate.
if _, ok := dedupMap[hashCode]; !ok {
dedupMap[hashCode] = struct{}{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: the value of the map can be anything and may be just a simple constant like True.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, yes. But I think using map[T]struct{} as a set has become a common practice in Golang.
https://stackoverflow.com/a/47544821

newORList = append(newORList, orItem)
} else if expression.IsMutableEffectsExpr(orItem) {
// 2-2. If it's a duplicate, but it's nondeterministic or has side effects, we also need to keep it.
newORList = append(newORList, orItem)
}
// 2-3. Otherwise, we remove it.
}
}
return expression.ComposeDNFCondition(sctx.GetExprCtx(), newORList...)
}

// Name implements base.LogicalOptRule.<1st> interface.
func (*PredicateSimplification) Name() string {
return "predicate_simplification"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ a b
explain format='brief' SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2;
id estRows task access object operator info
TableReader 3.00 root partition:p0,p1,p2 data:Selection
└─Selection 3.00 cop[tikv] or(eq(executor__partition__partition_boundaries.t.a, -1), or(eq(executor__partition__partition_boundaries.t.a, 0), or(eq(executor__partition__partition_boundaries.t.a, 1), eq(executor__partition__partition_boundaries.t.a, 2))))
└─Selection 3.00 cop[tikv] or(or(eq(executor__partition__partition_boundaries.t.a, -1), eq(executor__partition__partition_boundaries.t.a, 0)), or(eq(executor__partition__partition_boundaries.t.a, 1), eq(executor__partition__partition_boundaries.t.a, 2)))
└─TableFullScan 7.00 cop[tikv] table:t keep order:false
SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2;
a b
Expand Down Expand Up @@ -563,7 +563,7 @@ a b
explain format='brief' SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2 OR a = 3 OR a = 4;
id estRows task access object operator info
TableReader 5.00 root partition:p0,p1,p2,p3,p4 data:Selection
└─Selection 5.00 cop[tikv] or(or(eq(executor__partition__partition_boundaries.t.a, -1), eq(executor__partition__partition_boundaries.t.a, 0)), or(or(eq(executor__partition__partition_boundaries.t.a, 1), eq(executor__partition__partition_boundaries.t.a, 2)), or(eq(executor__partition__partition_boundaries.t.a, 3), eq(executor__partition__partition_boundaries.t.a, 4))))
└─Selection 5.00 cop[tikv] or(or(eq(executor__partition__partition_boundaries.t.a, -1), or(eq(executor__partition__partition_boundaries.t.a, 0), eq(executor__partition__partition_boundaries.t.a, 1))), or(eq(executor__partition__partition_boundaries.t.a, 2), or(eq(executor__partition__partition_boundaries.t.a, 3), eq(executor__partition__partition_boundaries.t.a, 4))))
└─TableFullScan 7.00 cop[tikv] table:t keep order:false
SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2 OR a = 3 OR a = 4;
a b
Expand Down Expand Up @@ -644,7 +644,7 @@ a b
explain format='brief' SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2 OR a = 3 OR a = 4 OR a = 5 OR a = 6;
id estRows task access object operator info
TableReader 7.00 root partition:all data:Selection
└─Selection 7.00 cop[tikv] or(or(eq(executor__partition__partition_boundaries.t.a, -1), or(eq(executor__partition__partition_boundaries.t.a, 0), eq(executor__partition__partition_boundaries.t.a, 1))), or(or(eq(executor__partition__partition_boundaries.t.a, 2), eq(executor__partition__partition_boundaries.t.a, 3)), or(eq(executor__partition__partition_boundaries.t.a, 4), or(eq(executor__partition__partition_boundaries.t.a, 5), eq(executor__partition__partition_boundaries.t.a, 6)))))
└─Selection 7.00 cop[tikv] or(or(or(eq(executor__partition__partition_boundaries.t.a, -1), eq(executor__partition__partition_boundaries.t.a, 0)), or(eq(executor__partition__partition_boundaries.t.a, 1), eq(executor__partition__partition_boundaries.t.a, 2))), or(or(eq(executor__partition__partition_boundaries.t.a, 3), eq(executor__partition__partition_boundaries.t.a, 4)), or(eq(executor__partition__partition_boundaries.t.a, 5), eq(executor__partition__partition_boundaries.t.a, 6))))
└─TableFullScan 7.00 cop[tikv] table:t keep order:false
SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2 OR a = 3 OR a = 4 OR a = 5 OR a = 6;
a b
Expand Down Expand Up @@ -686,7 +686,7 @@ a b
explain format='brief' SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2 OR a = 3 OR a = 4 OR a = 5 OR a = 6 OR a = 7;
id estRows task access object operator info
TableReader 7.00 root partition:all data:Selection
└─Selection 7.00 cop[tikv] or(or(eq(executor__partition__partition_boundaries.t.a, -1), or(eq(executor__partition__partition_boundaries.t.a, 0), or(eq(executor__partition__partition_boundaries.t.a, 1), eq(executor__partition__partition_boundaries.t.a, 2)))), or(or(eq(executor__partition__partition_boundaries.t.a, 3), eq(executor__partition__partition_boundaries.t.a, 4)), or(eq(executor__partition__partition_boundaries.t.a, 5), or(eq(executor__partition__partition_boundaries.t.a, 6), eq(executor__partition__partition_boundaries.t.a, 7)))))
└─Selection 7.00 cop[tikv] or(or(or(eq(executor__partition__partition_boundaries.t.a, -1), eq(executor__partition__partition_boundaries.t.a, 0)), or(eq(executor__partition__partition_boundaries.t.a, 1), eq(executor__partition__partition_boundaries.t.a, 2))), or(or(eq(executor__partition__partition_boundaries.t.a, 3), eq(executor__partition__partition_boundaries.t.a, 4)), or(eq(executor__partition__partition_boundaries.t.a, 5), or(eq(executor__partition__partition_boundaries.t.a, 6), eq(executor__partition__partition_boundaries.t.a, 7)))))
└─TableFullScan 7.00 cop[tikv] table:t keep order:false
SELECT * FROM t WHERE 1 = 0 OR a = -1 OR a = 0 OR a = 1 OR a = 2 OR a = 3 OR a = 4 OR a = 5 OR a = 6 OR a = 7;
a b
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,87 @@ count(*)
SELECT count(*) FROM t1 WHERE (1 = 0 AND a1 = 1) OR (b1 = 2);
count(*)
1
drop table if exists t1;
create table t1 (a int, b decimal(65,30), c int);
explain format=brief select * from t1 where a = 1 or a = 2 or a = 5 or a = 5 or a = 5;
id estRows task access object operator info
TableReader 30.00 root data:Selection
└─Selection 30.00 cop[tikv] or(eq(planner__core__casetest__predicate_simplification.t1.a, 1), or(eq(planner__core__casetest__predicate_simplification.t1.a, 2), eq(planner__core__casetest__predicate_simplification.t1.a, 5)))
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where a = 1 or a = 2 or a = 5 or a = 5 or a = 5 or b = 1.1 or b = 2.2 or b = 3.3 or b = 3.3;
id estRows task access object operator info
TableReader 59.91 root data:Selection
└─Selection 59.91 cop[tikv] or(or(eq(planner__core__casetest__predicate_simplification.t1.a, 1), or(eq(planner__core__casetest__predicate_simplification.t1.a, 2), eq(planner__core__casetest__predicate_simplification.t1.a, 5))), or(eq(planner__core__casetest__predicate_simplification.t1.b, 1.1), or(eq(planner__core__casetest__predicate_simplification.t1.b, 2.2), eq(planner__core__casetest__predicate_simplification.t1.b, 3.3))))
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where a = 1 and (b = 1.1 or b = 2.2 or b = 3.3 or b = 3.3);
id estRows task access object operator info
TableReader 0.03 root data:Selection
└─Selection 0.03 cop[tikv] eq(planner__core__casetest__predicate_simplification.t1.a, 1), or(eq(planner__core__casetest__predicate_simplification.t1.b, 1.1), or(eq(planner__core__casetest__predicate_simplification.t1.b, 2.2), eq(planner__core__casetest__predicate_simplification.t1.b, 3.3)))
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where a = 1 or (b = 1.1 and (a = 1 or a = 2 or a = 5 or a = 5 or a = 5));
id estRows task access object operator info
TableReader 10.03 root data:Selection
└─Selection 10.03 cop[tikv] or(eq(planner__core__casetest__predicate_simplification.t1.a, 1), and(eq(planner__core__casetest__predicate_simplification.t1.b, 1.1), or(eq(planner__core__casetest__predicate_simplification.t1.a, 1), or(eq(planner__core__casetest__predicate_simplification.t1.a, 2), eq(planner__core__casetest__predicate_simplification.t1.a, 5)))))
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where (a = 1 and (b = 2.2 or (c = 1 and (b = 1 or b = 1)))) or (b = 1.1 and b = 1.1 and (a = 1 or a = 2 or a = 5 or a = 5 or a = 5));
id estRows task access object operator info
TableReader 0.04 root data:Selection
└─Selection 0.04 cop[tikv] or(and(eq(planner__core__casetest__predicate_simplification.t1.a, 1), or(eq(planner__core__casetest__predicate_simplification.t1.b, 2.2), and(eq(planner__core__casetest__predicate_simplification.t1.c, 1), eq(planner__core__casetest__predicate_simplification.t1.b, 1)))), and(eq(planner__core__casetest__predicate_simplification.t1.b, 1.1), or(eq(planner__core__casetest__predicate_simplification.t1.a, 1), or(eq(planner__core__casetest__predicate_simplification.t1.a, 2), eq(planner__core__casetest__predicate_simplification.t1.a, 5)))))
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where (c = 10 or (b + 1 > 10 and (a + 1 < 5 or a + 1 < 5 or a = 20))) and c + 1 < 10 and (a = 1 or a = 2 or a = 5 or a = 5 or b = 5 or b = 5);
id estRows task access object operator info
TableReader 20.48 root data:Selection
└─Selection 20.48 cop[tikv] lt(plus(planner__core__casetest__predicate_simplification.t1.c, 1), 10), or(eq(planner__core__casetest__predicate_simplification.t1.c, 10), and(gt(plus(planner__core__casetest__predicate_simplification.t1.b, 1), 10), or(lt(plus(planner__core__casetest__predicate_simplification.t1.a, 1), 5), eq(planner__core__casetest__predicate_simplification.t1.a, 20)))), or(or(eq(planner__core__casetest__predicate_simplification.t1.a, 1), eq(planner__core__casetest__predicate_simplification.t1.a, 2)), or(eq(planner__core__casetest__predicate_simplification.t1.a, 5), eq(planner__core__casetest__predicate_simplification.t1.b, 5)))
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where (rand() * 10 = 1) or (rand() * 10 = 1);
id estRows task access object operator info
Selection 8000.00 root or(eq(mul(rand(), 10), 1), eq(mul(rand(), 10), 1))
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain format=brief select * from t1 where (@a:=@a+1) or (@a:=@a+1);
id estRows task access object operator info
Selection 8000.00 root or(istrue_with_null(setvar("a", plus(cast(getvar("a"), double BINARY), 1))), istrue_with_null(setvar("a", plus(getvar("a"), 1))))
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
drop table if exists t2;
create table t2 (a datetime(6), b timestamp(6), index ia(a), index iab(a,b));
explain format=brief select * from t2 where a = '2025-01-01 00:00:00' or a = '2025-01-01 00:00:00' or a = '2025-01-01 00:00:00';
id estRows task access object operator info
IndexReader 10.00 root index:IndexRangeScan
└─IndexRangeScan 10.00 cop[tikv] table:t2, index:iab(a, b) range:[2025-01-01 00:00:00.000000,2025-01-01 00:00:00.000000], keep order:false, stats:pseudo
explain format=brief select * from t2 where (a = '2025-01-01 00:00:00' or a = '2025-01-01 00:00:00' or a = '2025-01-01 00:00:00') and (b = '2025-01-01 00:00:00' or b = '2025-01-01 00:00:00' or b = '2025-01-01 00:00:00');
id estRows task access object operator info
IndexReader 0.10 root index:IndexRangeScan
└─IndexRangeScan 0.10 cop[tikv] table:t2, index:iab(a, b) range:[2025-01-01 00:00:00.000000 2025-01-01 00:00:00.000000,2025-01-01 00:00:00.000000 2025-01-01 00:00:00.000000], keep order:false, stats:pseudo
drop table if exists t3;
create table t3 (a varchar(10) collate utf8mb4_general_ci, b varchar(10) collate utf8mb4_bin, index ia(a), index ib(b));
explain format=brief select * from t3 where a = 'a' or a = 'a' or a = 'A';
id estRows task access object operator info
IndexLookUp 10.00 root
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t3, index:ia(a) range:["\x00A","\x00A"], keep order:false, stats:pseudo
└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t3 keep order:false, stats:pseudo
explain format=brief select * from t3 where a = 'a' or a = 'a' or a = 'A' or b = _utf8mb4'b' or b = _latin1'b' or b = 'B';
id estRows task access object operator info
IndexMerge 29.98 root type: union
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t3, index:ia(a) range:["\x00A","\x00A"], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t3, index:ia(a) range:["\x00A","\x00A"], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t3, index:ib(b) range:["b","b"], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t3, index:ib(b) range:["B","B"], keep order:false, stats:pseudo
└─TableRowIDScan(Probe) 29.98 cop[tikv] table:t3 keep order:false, stats:pseudo
explain format=brief select * from t3 where a = _utf8mb4'a' collate utf8mb4_unicode_ci or a = _utf8mb4'a' collate utf8mb4_0900_ai_ci or a = 'A' or b = 'b' or b = 'b' or b = 'B';
id estRows task access object operator info
TableReader 8006.00 root data:Selection
└─Selection 8006.00 cop[tikv] or(or(eq(planner__core__casetest__predicate_simplification.t3.a, "a"), eq(planner__core__casetest__predicate_simplification.t3.a, "A")), or(eq(planner__core__casetest__predicate_simplification.t3.b, "b"), eq(planner__core__casetest__predicate_simplification.t3.b, "B")))
└─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo
drop table if exists t4;
create table t4(a int, b int, c int, d int, index iab(a,b), index iac(a,c), index iad(a,d));
explain format=brief select /*+ use_index_merge(t4) */ * from t4 where a = 1 and (b = 2 or c = 4 or b = 12 or c = 5 or d = 6 or c = 4 or c = 5 or d = 6);
id estRows task access object operator info
IndexMerge 0.00 root type: union
├─IndexRangeScan(Build) 0.10 cop[tikv] table:t4, index:iab(a, b) range:[1 2,1 2], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 0.10 cop[tikv] table:t4, index:iac(a, c) range:[1 4,1 4], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 0.10 cop[tikv] table:t4, index:iab(a, b) range:[1 12,1 12], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 0.10 cop[tikv] table:t4, index:iac(a, c) range:[1 5,1 5], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 0.10 cop[tikv] table:t4, index:iad(a, d) range:[1 6,1 6], keep order:false, stats:pseudo
└─Selection(Probe) 0.00 cop[tikv] eq(planner__core__casetest__predicate_simplification.t4.a, 1), or(or(eq(planner__core__casetest__predicate_simplification.t4.b, 2), eq(planner__core__casetest__predicate_simplification.t4.c, 4)), or(eq(planner__core__casetest__predicate_simplification.t4.b, 12), or(eq(planner__core__casetest__predicate_simplification.t4.c, 5), eq(planner__core__casetest__predicate_simplification.t4.d, 6))))
└─TableRowIDScan 8.00 cop[tikv] table:t4 keep order:false, stats:pseudo
3 changes: 1 addition & 2 deletions tests/integrationtest/r/planner/core/indexmerge_path.result
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@ EXPLAIN format = brief SELECT * FROM t WHERE a = 1 AND (c = 13 OR c = 15 OR c =
id estRows task access object operator info
IndexLookUp 0.05 root
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:ia(a) range:[1,1], keep order:false, stats:pseudo
└─Selection(Probe) 0.05 cop[tikv] or(or(eq(planner__core__indexmerge_path.t.c, 13), or(eq(planner__core__indexmerge_path.t.c, 15), eq(planner__core__indexmerge_path.t.c, 5))), or(eq(planner__core__indexmerge_path.t.b, "12"), or(eq(planner__core__indexmerge_path.t.c, 13), eq(planner__core__indexmerge_path.t.b, "11"))))
└─Selection(Probe) 0.05 cop[tikv] or(or(eq(planner__core__indexmerge_path.t.c, 13), eq(planner__core__indexmerge_path.t.c, 15)), or(eq(planner__core__indexmerge_path.t.c, 5), or(eq(planner__core__indexmerge_path.t.b, "12"), eq(planner__core__indexmerge_path.t.b, "11"))))
└─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo
SET @@tidb_opt_fix_control = '52869:on';
EXPLAIN format = brief SELECT * FROM t WHERE a = 1 AND (b = '2' OR c = 3 OR d = '4');
Expand Down Expand Up @@ -1058,7 +1058,6 @@ IndexMerge 0.05 root type: union
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:ic(c) range:[15,15], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:ic(c) range:[5,5], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:ib(b) range:["12","12"], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:ic(c) range:[13,13], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:ib(b) range:["11","11"], keep order:false, stats:pseudo
└─Selection(Probe) 0.05 cop[tikv] eq(planner__core__indexmerge_path.t.a, 1)
└─TableRowIDScan 49.94 cop[tikv] table:t keep order:false, stats:pseudo
Expand Down
Loading