Skip to content

Commit 72dff44

Browse files
committed
[improvement](mtmv) Not roll up when aggregate rewrite if roll up group by expr is uniform
1 parent ed4abc7 commit 72dff44

File tree

3 files changed

+326
-33
lines changed

3 files changed

+326
-33
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java

Lines changed: 112 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.doris.common.Pair;
2121
import org.apache.doris.nereids.CascadesContext;
2222
import org.apache.doris.nereids.jobs.executor.Rewriter;
23+
import org.apache.doris.nereids.properties.DataTrait;
2324
import org.apache.doris.nereids.rules.analysis.NormalizeRepeat;
2425
import org.apache.doris.nereids.rules.exploration.mv.AbstractMaterializedViewAggregateRule.AggregateExpressionRewriteContext.ExpressionRewriteMode;
2526
import org.apache.doris.nereids.rules.exploration.mv.StructInfo.PlanCheckContext;
@@ -45,6 +46,8 @@
4546
import org.apache.doris.nereids.trees.plans.Plan;
4647
import org.apache.doris.nereids.trees.plans.algebra.Repeat;
4748
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
49+
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
50+
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
4851
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
4952
import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat;
5053
import org.apache.doris.nereids.trees.plans.visitor.ExpressionLineageReplacer;
@@ -113,7 +116,7 @@ protected Plan rewriteQueryByView(MatchMode matchMode,
113116
boolean queryContainsGroupSets = queryAggregate.getSourceRepeat().isPresent();
114117
// If group by expression between query and view is equals, try to rewrite expression directly
115118
if (!queryContainsGroupSets && isGroupByEquals(queryTopPlanAndAggPair, viewTopPlanAndAggPair,
116-
viewToQuerySlotMapping, queryStructInfo, viewStructInfo, materializationContext,
119+
viewToQuerySlotMapping, queryStructInfo, viewStructInfo, tempRewritedPlan, materializationContext,
117120
cascadesContext)) {
118121
List<Expression> rewrittenQueryExpressions = rewriteExpression(queryTopPlan.getOutput(),
119122
queryTopPlan,
@@ -325,8 +328,13 @@ private boolean isGroupByEquals(Pair<Plan, LogicalAggregate<Plan>> queryTopPlanA
325328
SlotMapping viewToQuerySlotMapping,
326329
StructInfo queryStructInfo,
327330
StructInfo viewStructInfo,
331+
Plan tempRewrittenPlan,
328332
MaterializationContext materializationContext,
329333
CascadesContext cascadesContext) {
334+
335+
if (materializationContext instanceof SyncMaterializationContext) {
336+
return false;
337+
}
330338
Plan queryTopPlan = queryTopPlanAndAggPair.key();
331339
Plan viewTopPlan = viewTopPlanAndAggPair.key();
332340
LogicalAggregate<Plan> queryAggregate = queryTopPlanAndAggPair.value();
@@ -360,18 +368,111 @@ private boolean isGroupByEquals(Pair<Plan, LogicalAggregate<Plan>> queryTopPlanA
360368
// return true, if equals directly
361369
return true;
362370
}
371+
372+
boolean isGroupByEquals = false;
373+
// check is equals by group by eliminate
374+
isGroupByEquals |= isGroupByEqualsAfterGroupByEliminate(queryGroupShuttledExpression,
375+
viewShuttledExpressionQueryBasedToGroupByExpressionMap,
376+
groupByExpressionToViewShuttledExpressionQueryBasedMap,
377+
viewAggregate,
378+
cascadesContext);
379+
// check is equals by equal filter eliminate
380+
Optional<LogicalFilter<Plan>> filterOptional = tempRewrittenPlan.collectFirst(LogicalFilter.class::isInstance);
381+
if (!filterOptional.isPresent()) {
382+
return false;
383+
}
384+
isGroupByEquals |= isGroupByEqualsAfterEqualFilterEliminate(
385+
(LogicalPlan) tempRewrittenPlan,
386+
queryGroupShuttledExpression,
387+
viewShuttledExpressionQueryBasedToGroupByExpressionMap,
388+
materializationContext);
389+
return isGroupByEquals;
390+
}
391+
392+
/**
393+
* Check group by is equals by equal filter eliminate
394+
* For example query is select a, b, c from t1 where a = 1 and d = 'xx' group by a, b, c;
395+
* mv is select a, b, c, d from t1 group by a, b, c, d;
396+
* the group by expression between query and view is equals after equal filter eliminate
397+
* should not aggregate roll up
398+
* */
399+
private static boolean isGroupByEqualsAfterEqualFilterEliminate(
400+
LogicalPlan tempRewrittenPlan,
401+
Set<Expression> queryGroupShuttledExpression,
402+
Map<Expression, Expression> viewShuttledExprQueryBasedToViewGroupByExprMap,
403+
MaterializationContext materializationContext) {
404+
405+
Map<Expression, Expression> viewShuttledExprToScanExprMapping =
406+
materializationContext.getShuttledExprToScanExprMapping().flattenMap().get(0);
407+
Set<Expression> viewShuttledExprQueryBasedSet = viewShuttledExprQueryBasedToViewGroupByExprMap.keySet();
408+
// view group by expr can not cover query group by expr
409+
if (!viewShuttledExprQueryBasedSet.containsAll(queryGroupShuttledExpression)) {
410+
return false;
411+
}
412+
Set<Expression> viewShouldUniformExpressionSet = new HashSet<>();
413+
for (Map.Entry<Expression, Expression> expressionEntry :
414+
viewShuttledExprQueryBasedToViewGroupByExprMap.entrySet()) {
415+
if (queryGroupShuttledExpression.contains(expressionEntry.getKey())) {
416+
// the group expr which query has, do not require uniform
417+
continue;
418+
}
419+
viewShouldUniformExpressionSet.add(expressionEntry.getValue());
420+
}
421+
422+
DataTrait dataTrait = tempRewrittenPlan.computeDataTrait();
423+
for (Expression shouldUniformExpr : viewShouldUniformExpressionSet) {
424+
Expression viewScanExpression = viewShuttledExprToScanExprMapping.get(shouldUniformExpr);
425+
if (viewScanExpression == null) {
426+
return false;
427+
}
428+
if (!(viewScanExpression instanceof Slot)) {
429+
return false;
430+
}
431+
if (!dataTrait.isUniform((Slot) viewScanExpression)) {
432+
return false;
433+
}
434+
}
435+
return true;
436+
}
437+
438+
/**
439+
* Check group by is equal or not after group by eliminate
440+
* Such as query group by expression is (l_orderdate#1, l_supperkey#2)
441+
* materialized view is group by expression is (l_orderdate#4, l_supperkey#5, l_partkey#6)
442+
* materialized view expression mapping is
443+
* {l_orderdate#4:l_orderdate#10, l_supperkey#5:l_supperkey#11, l_partkey#6:l_partkey#12}
444+
*
445+
* 1. viewShuttledExpressionQueryBasedToGroupByExpressionMap
446+
* is {l_orderdate#1:l_orderdate#10, l_supperkey#2:l_supperkey#11}
447+
* groupByExpressionToViewShuttledExpressionQueryBasedMap
448+
* is {l_orderdate#10:l_orderdate#1, l_supperkey#11:l_supperkey#2:}
449+
*
450+
* 2. construct projects query used by view group expressions
451+
* projects (l_orderdate#10, l_supperkey#11)
452+
*
453+
* 3. try to eliminate materialized view group expression
454+
* projects (l_orderdate#10, l_supperkey#11)
455+
* viewAggregate
456+
*
457+
* 4. check the viewAggregate group by expression is equals queryAggregate expression or not
458+
*/
459+
private static boolean isGroupByEqualsAfterGroupByEliminate(Set<Expression> queryGroupShuttledExpression,
460+
Map<Expression, Expression> viewShuttledExpressionQueryBasedToGroupByExpressionMap,
461+
Map<Expression, Expression> groupByExpressionToViewShuttledExpressionQueryBasedMap,
462+
LogicalAggregate<Plan> viewAggregate,
463+
CascadesContext cascadesContext) {
363464
List<NamedExpression> projects = new ArrayList<>();
465+
// construct projects query used by view group expressions
364466
for (Expression expression : queryGroupShuttledExpression) {
365-
if (!viewShuttledExpressionQueryBasedToGroupByExpressionMap.containsKey(expression)) {
366-
// query group expression is not in view group by expression
467+
Expression chosenExpression = viewShuttledExpressionQueryBasedToGroupByExpressionMap.get(expression);
468+
if (chosenExpression == null) {
367469
return false;
368470
}
369-
Expression chosenExpression = viewShuttledExpressionQueryBasedToGroupByExpressionMap.get(expression);
370471
projects.add(chosenExpression instanceof NamedExpression
371472
? (NamedExpression) chosenExpression : new Alias(chosenExpression));
372473
}
373474
LogicalProject<LogicalAggregate<Plan>> project = new LogicalProject<>(projects, viewAggregate);
374-
// try to eliminate group by expression which is not in query group by expression
475+
// try to eliminate view group by expression which is not in query group by expression
375476
Plan rewrittenPlan = MaterializedViewUtils.rewriteByRules(cascadesContext,
376477
childContext -> {
377478
Rewriter.getCteChildrenRewriter(childContext,
@@ -384,20 +485,21 @@ private boolean isGroupByEquals(Pair<Plan, LogicalAggregate<Plan>> queryTopPlanA
384485
if (!aggreagateOptional.isPresent()) {
385486
return false;
386487
}
488+
// check result after view group by eliminate by functional dependency
387489
List<Expression> viewEliminatedGroupByExpressions = aggreagateOptional.get().getGroupByExpressions();
388490
if (viewEliminatedGroupByExpressions.size() != queryGroupShuttledExpression.size()) {
389491
return false;
390492
}
391493
Set<Expression> viewGroupShuttledExpressionQueryBased = new HashSet<>();
392494
for (Expression viewExpression : aggreagateOptional.get().getGroupByExpressions()) {
393-
if (!groupByExpressionToViewShuttledExpressionQueryBasedMap.containsKey(viewExpression)) {
495+
Expression viewExpressionQueryBased =
496+
groupByExpressionToViewShuttledExpressionQueryBasedMap.get(viewExpression);
497+
if (viewExpressionQueryBased == null) {
394498
return false;
395499
}
396-
viewGroupShuttledExpressionQueryBased.add(
397-
groupByExpressionToViewShuttledExpressionQueryBasedMap.get(viewExpression));
500+
viewGroupShuttledExpressionQueryBased.add(viewExpressionQueryBased);
398501
}
399-
return materializationContext instanceof SyncMaterializationContext ? false
400-
: queryGroupShuttledExpression.equals(viewGroupShuttledExpressionQueryBased);
502+
return queryGroupShuttledExpression.equals(viewGroupShuttledExpressionQueryBased);
401503
}
402504

403505
/**
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
package mv.agg_optimize_when_uniform
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
19+
suite("agg_optimize_when_uniform") {
20+
String db = context.config.getDbNameByFile(context.file)
21+
sql "use ${db}"
22+
sql "set runtime_filter_mode=OFF";
23+
sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
24+
25+
sql """
26+
drop table if exists orders
27+
"""
28+
29+
sql """
30+
CREATE TABLE IF NOT EXISTS orders (
31+
o_orderkey INTEGER NOT NULL,
32+
o_custkey INTEGER NOT NULL,
33+
o_orderstatus CHAR(1) NOT NULL,
34+
o_totalprice DECIMALV3(15,2) NOT NULL,
35+
o_orderdate DATE NOT NULL,
36+
o_orderpriority CHAR(15) NOT NULL,
37+
o_clerk CHAR(15) NOT NULL,
38+
o_shippriority INTEGER NOT NULL,
39+
O_COMMENT VARCHAR(79) NOT NULL
40+
)
41+
DUPLICATE KEY(o_orderkey, o_custkey)
42+
PARTITION BY RANGE(o_orderdate) (
43+
PARTITION `day_2` VALUES LESS THAN ('2023-12-9'),
44+
PARTITION `day_3` VALUES LESS THAN ("2023-12-11"),
45+
PARTITION `day_4` VALUES LESS THAN ("2023-12-30")
46+
)
47+
DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3
48+
PROPERTIES (
49+
"replication_num" = "1"
50+
);
51+
"""
52+
53+
sql """
54+
drop table if exists lineitem
55+
"""
56+
57+
sql"""
58+
CREATE TABLE IF NOT EXISTS lineitem (
59+
l_orderkey INTEGER NOT NULL,
60+
l_partkey INTEGER NOT NULL,
61+
l_suppkey INTEGER NOT NULL,
62+
l_linenumber INTEGER NOT NULL,
63+
l_quantity DECIMALV3(15,2) NOT NULL,
64+
l_extendedprice DECIMALV3(15,2) NOT NULL,
65+
l_discount DECIMALV3(15,2) NOT NULL,
66+
l_tax DECIMALV3(15,2) NOT NULL,
67+
l_returnflag CHAR(1) NOT NULL,
68+
l_linestatus CHAR(1) NOT NULL,
69+
l_shipdate DATE NOT NULL,
70+
l_commitdate DATE NOT NULL,
71+
l_receiptdate DATE NOT NULL,
72+
l_shipinstruct CHAR(25) NOT NULL,
73+
l_shipmode CHAR(10) NOT NULL,
74+
l_comment VARCHAR(44) NOT NULL
75+
)
76+
DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber)
77+
PARTITION BY RANGE(l_shipdate) (
78+
PARTITION `day_1` VALUES LESS THAN ('2023-12-9'),
79+
PARTITION `day_2` VALUES LESS THAN ("2023-12-11"),
80+
PARTITION `day_3` VALUES LESS THAN ("2023-12-30"))
81+
DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3
82+
PROPERTIES (
83+
"replication_num" = "1"
84+
)
85+
"""
86+
87+
sql """
88+
drop table if exists partsupp
89+
"""
90+
91+
sql """
92+
CREATE TABLE IF NOT EXISTS partsupp (
93+
ps_partkey INTEGER NOT NULL,
94+
ps_suppkey INTEGER NOT NULL,
95+
ps_availqty INTEGER NOT NULL,
96+
ps_supplycost DECIMALV3(15,2) NOT NULL,
97+
ps_comment VARCHAR(199) NOT NULL
98+
)
99+
DUPLICATE KEY(ps_partkey, ps_suppkey)
100+
DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3
101+
PROPERTIES (
102+
"replication_num" = "1"
103+
)
104+
"""
105+
106+
sql """ insert into lineitem values
107+
(1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
108+
(2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
109+
(3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
110+
(4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
111+
(5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx');
112+
"""
113+
114+
sql """
115+
insert into orders values
116+
(1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'),
117+
(1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'),
118+
(2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'),
119+
(3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'),
120+
(3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'),
121+
(4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'),
122+
(5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'),
123+
(5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi');
124+
"""
125+
126+
sql """
127+
insert into partsupp values
128+
(2, 3, 9, 10.01, 'supply1'),
129+
(2, 3, 10, 11.01, 'supply2');
130+
"""
131+
132+
def check_rewrite_with_mv_partition = { mv_sql, query_sql, mv_name, partition_column ->
133+
134+
sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}"""
135+
sql"""
136+
CREATE MATERIALIZED VIEW ${mv_name}
137+
BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL
138+
PARTITION BY (${partition_column})
139+
DISTRIBUTED BY RANDOM BUCKETS 2
140+
PROPERTIES ('replication_num' = '1')
141+
AS ${mv_sql}
142+
"""
143+
144+
def job_name = getJobName(db, mv_name);
145+
waitingMTMVTaskFinished(job_name)
146+
explain {
147+
sql("${query_sql}")
148+
contains("${mv_name}(${mv_name})")
149+
}
150+
}
151+
152+
def check_rewrite_but_not_chose = { mv_sql, query_sql, mv_name ->
153+
154+
sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}"""
155+
sql"""
156+
CREATE MATERIALIZED VIEW ${mv_name}
157+
BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL
158+
DISTRIBUTED BY RANDOM BUCKETS 2
159+
PROPERTIES ('replication_num' = '1')
160+
AS ${mv_sql}
161+
"""
162+
163+
def job_name = getJobName(db, mv_name);
164+
waitingMTMVTaskFinished(job_name)
165+
explain {
166+
sql("${query_sql}")
167+
check {result ->
168+
def splitResult = result.split("MaterializedViewRewriteFail")
169+
splitResult.length == 2 ? splitResult[0].contains(mv_name) : false
170+
}
171+
}
172+
}
173+
174+
// multi table
175+
// filter inside + left + use roll up dimension
176+
def mv1_0 = """
177+
178+
"""
179+
def query1_0 =
180+
"""
181+
182+
"""
183+
order_qt_query1_0_before "${query1_0}"
184+
check_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0")
185+
order_qt_query1_0_after "${query1_0}"
186+
sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0"""
187+
}

0 commit comments

Comments
 (0)