Skip to content

Commit d73e5f2

Browse files
seawindedataroaring
authored andcommitted
[improvement](mtmv) Not roll up when aggregate rewrite if roll up group by expr is uniform (#38387)
## Proposed changes Not roll up when aggregate rewrite if roll up group by expr is uniform Such as mv name is mv3_0, and def is: ```sql CREATE MATERIALIZED VIEW mv3_0 BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS select o_orderdate, o_shippriority, o_comment, sum(o_totalprice) as sum_total, max(o_totalprice) as max_total, min(o_totalprice) as min_total, count(*) as count_all from orders group by o_orderdate, o_shippriority, o_comment; ``` query sql is as following: ```sql select o_comment, sum(o_totalprice), max(o_totalprice), min(o_totalprice), count(*) from orders where o_orderdate = '2023-12-09' and o_shippriority = 1 group by o_comment; ``` after rewrite the plan is as following, not need to add aggregate ``` PhysicalResultSink --filter((mv3_0.o_orderdate = '2023-12-09') and (mv3_0.o_shippriority = 1)) ----PhysicalOlapScan[mv3_0] ```
1 parent eb17f8a commit d73e5f2

File tree

5 files changed

+774
-89
lines changed

5 files changed

+774
-89
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java

Lines changed: 113 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.doris.common.Pair;
2121
import org.apache.doris.nereids.CascadesContext;
2222
import org.apache.doris.nereids.jobs.executor.Rewriter;
23+
import org.apache.doris.nereids.properties.DataTrait;
2324
import org.apache.doris.nereids.rules.analysis.NormalizeRepeat;
2425
import org.apache.doris.nereids.rules.exploration.mv.AbstractMaterializedViewAggregateRule.AggregateExpressionRewriteContext.ExpressionRewriteMode;
2526
import org.apache.doris.nereids.rules.exploration.mv.StructInfo.PlanCheckContext;
@@ -45,6 +46,8 @@
4546
import org.apache.doris.nereids.trees.plans.Plan;
4647
import org.apache.doris.nereids.trees.plans.algebra.Repeat;
4748
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
49+
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
50+
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
4851
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
4952
import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat;
5053
import org.apache.doris.nereids.trees.plans.visitor.ExpressionLineageReplacer;
@@ -113,7 +116,7 @@ protected Plan rewriteQueryByView(MatchMode matchMode,
113116
boolean queryContainsGroupSets = queryAggregate.getSourceRepeat().isPresent();
114117
// If group by expression between query and view is equals, try to rewrite expression directly
115118
if (!queryContainsGroupSets && isGroupByEquals(queryTopPlanAndAggPair, viewTopPlanAndAggPair,
116-
viewToQuerySlotMapping, queryStructInfo, viewStructInfo, materializationContext,
119+
viewToQuerySlotMapping, queryStructInfo, viewStructInfo, tempRewritedPlan, materializationContext,
117120
cascadesContext)) {
118121
List<Expression> rewrittenQueryExpressions = rewriteExpression(queryTopPlan.getOutput(),
119122
queryTopPlan,
@@ -324,18 +327,21 @@ private boolean isGroupByEquals(Pair<Plan, LogicalAggregate<Plan>> queryTopPlanA
324327
SlotMapping viewToQuerySlotMapping,
325328
StructInfo queryStructInfo,
326329
StructInfo viewStructInfo,
330+
Plan tempRewrittenPlan,
327331
MaterializationContext materializationContext,
328332
CascadesContext cascadesContext) {
333+
334+
if (materializationContext instanceof SyncMaterializationContext) {
335+
// For data correctness, should always add aggregate node if rewritten by sync materialized view
336+
return false;
337+
}
329338
Plan queryTopPlan = queryTopPlanAndAggPair.key();
330339
Plan viewTopPlan = viewTopPlanAndAggPair.key();
331340
LogicalAggregate<Plan> queryAggregate = queryTopPlanAndAggPair.value();
332341
LogicalAggregate<Plan> viewAggregate = viewTopPlanAndAggPair.value();
333342

334-
Set<Expression> queryGroupShuttledExpression = new HashSet<>();
335-
for (Expression queryExpression : ExpressionUtils.shuttleExpressionWithLineage(
336-
queryAggregate.getGroupByExpressions(), queryTopPlan, queryStructInfo.getTableBitSet())) {
337-
queryGroupShuttledExpression.add(queryExpression);
338-
}
343+
Set<Expression> queryGroupShuttledExpression = new HashSet<>(ExpressionUtils.shuttleExpressionWithLineage(
344+
queryAggregate.getGroupByExpressions(), queryTopPlan, queryStructInfo.getTableBitSet()));
339345

340346
// try to eliminate group by dimension by function dependency if group by expression is not in query
341347
Map<Expression, Expression> viewShuttledExpressionQueryBasedToGroupByExpressionMap = new HashMap<>();
@@ -355,22 +361,112 @@ private boolean isGroupByEquals(Pair<Plan, LogicalAggregate<Plan>> queryTopPlanA
355361
viewGroupExpressionQueryBased
356362
);
357363
}
358-
if (queryGroupShuttledExpression.equals(viewShuttledExpressionQueryBasedToGroupByExpressionMap.values())) {
364+
if (queryGroupShuttledExpression.equals(viewShuttledExpressionQueryBasedToGroupByExpressionMap.keySet())) {
359365
// return true, if equals directly
360366
return true;
361367
}
368+
369+
boolean isGroupByEquals = false;
370+
// check is equals by group by eliminate
371+
isGroupByEquals |= isGroupByEqualsAfterGroupByEliminate(queryGroupShuttledExpression,
372+
viewShuttledExpressionQueryBasedToGroupByExpressionMap,
373+
groupByExpressionToViewShuttledExpressionQueryBasedMap,
374+
viewAggregate,
375+
cascadesContext);
376+
// check is equals by equal filter eliminate
377+
Optional<LogicalFilter<Plan>> filterOptional = tempRewrittenPlan.collectFirst(LogicalFilter.class::isInstance);
378+
if (!filterOptional.isPresent()) {
379+
return isGroupByEquals;
380+
}
381+
isGroupByEquals |= isGroupByEqualsAfterEqualFilterEliminate(
382+
(LogicalPlan) tempRewrittenPlan,
383+
queryGroupShuttledExpression,
384+
viewShuttledExpressionQueryBasedToGroupByExpressionMap,
385+
materializationContext);
386+
return isGroupByEquals;
387+
}
388+
389+
/**
390+
* Check group by is equals by equal filter eliminate
391+
* For example query is select a, b, c from t1 where a = 1 and d = 'xx' group by a, b, c;
392+
* mv is select a, b, c, d from t1 group by a, b, c, d;
393+
* the group by expression between query and view is equals after equal filter eliminate
394+
* should not aggregate roll up
395+
* */
396+
private static boolean isGroupByEqualsAfterEqualFilterEliminate(
397+
LogicalPlan tempRewrittenPlan,
398+
Set<Expression> queryGroupShuttledExpression,
399+
Map<Expression, Expression> viewShuttledExprQueryBasedToViewGroupByExprMap,
400+
MaterializationContext materializationContext) {
401+
402+
Map<Expression, Expression> viewShuttledExprToScanExprMapping =
403+
materializationContext.getShuttledExprToScanExprMapping().flattenMap().get(0);
404+
Set<Expression> viewShuttledExprQueryBasedSet = viewShuttledExprQueryBasedToViewGroupByExprMap.keySet();
405+
// view group by expr can not cover query group by expr
406+
if (!viewShuttledExprQueryBasedSet.containsAll(queryGroupShuttledExpression)) {
407+
return false;
408+
}
409+
Set<Expression> viewShouldUniformExpressionSet = new HashSet<>();
410+
// calc the group by expr which is needed to roll up and should be uniform
411+
for (Map.Entry<Expression, Expression> expressionEntry :
412+
viewShuttledExprQueryBasedToViewGroupByExprMap.entrySet()) {
413+
if (queryGroupShuttledExpression.contains(expressionEntry.getKey())) {
414+
// the group expr which query has, do not require uniform
415+
continue;
416+
}
417+
viewShouldUniformExpressionSet.add(expressionEntry.getValue());
418+
}
419+
420+
DataTrait dataTrait = tempRewrittenPlan.computeDataTrait();
421+
for (Expression shouldUniformExpr : viewShouldUniformExpressionSet) {
422+
Expression viewScanExpression = viewShuttledExprToScanExprMapping.get(shouldUniformExpr);
423+
if (viewScanExpression == null) {
424+
return false;
425+
}
426+
if (!(viewScanExpression instanceof Slot)) {
427+
return false;
428+
}
429+
if (!dataTrait.isUniform((Slot) viewScanExpression)) {
430+
return false;
431+
}
432+
}
433+
return true;
434+
}
435+
436+
/**
437+
* Check group by is equal or not after group by eliminate by functional dependency
438+
* Such as query group by expression is (l_orderdate#1, l_supperkey#2)
439+
* materialized view is group by expression is (l_orderdate#4, l_supperkey#5, l_partkey#6)
440+
* materialized view expression mapping is
441+
* {l_orderdate#4:l_orderdate#10, l_supperkey#5:l_supperkey#11, l_partkey#6:l_partkey#12}
442+
* 1. viewShuttledExpressionQueryBasedToGroupByExpressionMap
443+
* is {l_orderdate#1:l_orderdate#10, l_supperkey#2:l_supperkey#11}
444+
* groupByExpressionToViewShuttledExpressionQueryBasedMap
445+
* is {l_orderdate#10:l_orderdate#1, l_supperkey#11:l_supperkey#2:}
446+
* 2. construct projects query used by view group expressions
447+
* projects (l_orderdate#10, l_supperkey#11)
448+
* 3. try to eliminate materialized view group expression
449+
* projects (l_orderdate#10, l_supperkey#11)
450+
* viewAggregate
451+
* 4. check the viewAggregate group by expression is equals queryAggregate expression or not
452+
*/
453+
private static boolean isGroupByEqualsAfterGroupByEliminate(Set<Expression> queryGroupShuttledExpression,
454+
Map<Expression, Expression> viewShuttledExpressionQueryBasedToGroupByExpressionMap,
455+
Map<Expression, Expression> groupByExpressionToViewShuttledExpressionQueryBasedMap,
456+
LogicalAggregate<Plan> viewAggregate,
457+
CascadesContext cascadesContext) {
362458
List<NamedExpression> projects = new ArrayList<>();
459+
// construct projects query used by view group expressions
363460
for (Expression expression : queryGroupShuttledExpression) {
364-
if (!viewShuttledExpressionQueryBasedToGroupByExpressionMap.containsKey(expression)) {
365-
// query group expression is not in view group by expression
461+
Expression chosenExpression = viewShuttledExpressionQueryBasedToGroupByExpressionMap.get(expression);
462+
if (chosenExpression == null) {
366463
return false;
367464
}
368-
Expression chosenExpression = viewShuttledExpressionQueryBasedToGroupByExpressionMap.get(expression);
369465
projects.add(chosenExpression instanceof NamedExpression
370466
? (NamedExpression) chosenExpression : new Alias(chosenExpression));
371467
}
372468
LogicalProject<LogicalAggregate<Plan>> project = new LogicalProject<>(projects, viewAggregate);
373-
// try to eliminate group by expression which is not in query group by expression
469+
// try to eliminate view group by expression which is not in query group by expression
374470
Plan rewrittenPlan = MaterializedViewUtils.rewriteByRules(cascadesContext,
375471
childContext -> {
376472
Rewriter.getCteChildrenRewriter(childContext,
@@ -383,20 +479,21 @@ private boolean isGroupByEquals(Pair<Plan, LogicalAggregate<Plan>> queryTopPlanA
383479
if (!aggreagateOptional.isPresent()) {
384480
return false;
385481
}
482+
// check result after view group by eliminate by functional dependency
386483
List<Expression> viewEliminatedGroupByExpressions = aggreagateOptional.get().getGroupByExpressions();
387484
if (viewEliminatedGroupByExpressions.size() != queryGroupShuttledExpression.size()) {
388485
return false;
389486
}
390487
Set<Expression> viewGroupShuttledExpressionQueryBased = new HashSet<>();
391488
for (Expression viewExpression : aggreagateOptional.get().getGroupByExpressions()) {
392-
if (!groupByExpressionToViewShuttledExpressionQueryBasedMap.containsKey(viewExpression)) {
489+
Expression viewExpressionQueryBased =
490+
groupByExpressionToViewShuttledExpressionQueryBasedMap.get(viewExpression);
491+
if (viewExpressionQueryBased == null) {
393492
return false;
394493
}
395-
viewGroupShuttledExpressionQueryBased.add(
396-
groupByExpressionToViewShuttledExpressionQueryBasedMap.get(viewExpression));
494+
viewGroupShuttledExpressionQueryBased.add(viewExpressionQueryBased);
397495
}
398-
return materializationContext instanceof SyncMaterializationContext ? false
399-
: queryGroupShuttledExpression.equals(viewGroupShuttledExpressionQueryBased);
496+
return queryGroupShuttledExpression.equals(viewGroupShuttledExpressionQueryBased);
400497
}
401498

402499
/**
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !query1_0_before --
3+
2023-12-10 46.00 33.50 2 0
4+
5+
-- !shape1_0_after --
6+
PhysicalResultSink
7+
--hashAgg[GLOBAL]
8+
----hashAgg[LOCAL]
9+
------filter((mv1_0.o_custkey = 1) and (mv1_0.o_orderkey = 3))
10+
--------PhysicalOlapScan[mv1_0]
11+
12+
-- !query1_0_after --
13+
2023-12-10 46.00 33.50 2 0
14+
15+
-- !query2_0_before --
16+
2 2 2 1.0 1.0 1 1
17+
18+
-- !shape2_0_after --
19+
PhysicalResultSink
20+
--hashAgg[DISTINCT_LOCAL]
21+
----hashAgg[GLOBAL]
22+
------hashAgg[LOCAL]
23+
--------filter((mv2_0.o_orderkey = 1) and (mv2_0.o_orderstatus = 'o'))
24+
----------PhysicalOlapScan[mv2_0]
25+
26+
-- !query2_0_after --
27+
2 2 2 1.0 1.0 1 1
28+
29+
-- !query3_0_before --
30+
yy 11.50 11.50 11.50 1
31+
32+
-- !shape3_0_after --
33+
PhysicalResultSink
34+
--filter((mv3_0.o_orderdate = '2023-12-09') and (mv3_0.o_shippriority = 1))
35+
----PhysicalOlapScan[mv3_0]
36+
37+
-- !query3_0_after --
38+
yy 11.50 11.50 11.50 1
39+
40+
-- !query3_1_before --
41+
mi 56.20 56.20 56.20 1
42+
43+
-- !shape3_1_after --
44+
PhysicalResultSink
45+
--hashAgg[GLOBAL]
46+
----hashAgg[LOCAL]
47+
------filter((orders.o_orderdate = '2023-12-12') and (orders.o_shippriority = 2) and (orders.o_totalprice = 56.20))
48+
--------PhysicalOlapScan[orders]
49+
50+
-- !query3_1_after --
51+
mi 56.20 56.20 56.20 1
52+
53+
-- !query4_0_before --
54+
yy 11.50 11.50 11.50 1
55+
56+
-- !query4_0_after --
57+
yy 11.50 11.50 11.50 1
58+
59+
-- !query5_0_before --
60+
3 2023-12-12 57.40 56.20 2 0
61+
62+
-- !shape5_0_after --
63+
PhysicalResultSink
64+
--hashAgg[GLOBAL]
65+
----hashAgg[LOCAL]
66+
------filter((mv5_0.l_partkey = 2) and (mv5_0.l_shipdate = '2023-12-12'))
67+
--------PhysicalOlapScan[mv5_0]
68+
69+
-- !query5_0_after --
70+
3 2023-12-12 57.40 56.20 2 0
71+
72+
-- !query6_0_before --
73+
2 2 2 2 1.0 1.0 1 1
74+
75+
-- !shape6_0_after --
76+
PhysicalResultSink
77+
--hashAgg[DISTINCT_LOCAL]
78+
----hashAgg[GLOBAL]
79+
------hashAgg[LOCAL]
80+
--------filter((mv6_0.o_orderkey = 1) and (mv6_0.o_orderstatus = 'o'))
81+
----------PhysicalOlapScan[mv6_0]
82+
83+
-- !query6_0_after --
84+
2 2 2 2 1.0 1.0 1 1
85+
86+
-- !query7_0_before --
87+
yy 4 11.50 11.50 11.50 1
88+
89+
-- !shape7_0_after --
90+
PhysicalResultSink
91+
--filter((mv7_0.o_orderdate = '2023-12-09') and (mv7_0.o_shippriority = 1))
92+
----PhysicalOlapScan[mv7_0]
93+
94+
-- !query7_0_after --
95+
yy 4 11.50 11.50 11.50 1
96+
97+
-- !query7_1_before --
98+
yy 4 11.50 11.50 11.50 1
99+
100+
-- !shape7_1_after --
101+
PhysicalResultSink
102+
--hashAgg[GLOBAL]
103+
----hashAgg[LOCAL]
104+
------hashJoin[INNER_JOIN] hashCondition=((t1.l_orderkey = orders.o_orderkey) and (t1.l_shipdate = orders.o_orderdate)) otherCondition=()
105+
--------filter((t1.l_shipdate = '2023-12-09'))
106+
----------PhysicalOlapScan[lineitem]
107+
--------filter((orders.o_orderdate = '2023-12-09') and (orders.o_shippriority = 1) and (orders.o_totalprice = 11.50))
108+
----------PhysicalOlapScan[orders]
109+
110+
-- !query7_1_after --
111+
yy 4 11.50 11.50 11.50 1
112+
113+
-- !query8_0_before --
114+
yy 4 11.50 11.50 11.50 1
115+
116+
-- !shape8_0_after --
117+
PhysicalResultSink
118+
--hashAgg[GLOBAL]
119+
----hashAgg[LOCAL]
120+
------filter((mv8_0.o_orderdate = '2023-12-09'))
121+
--------PhysicalOlapScan[mv8_0]
122+
123+
-- !query8_0_after --
124+
yy 4 11.50 11.50 11.50 1
125+

0 commit comments

Comments
 (0)