17
17
*/
18
18
package org .apache .hadoop .hive .ql .optimizer .calcite .rules ;
19
19
20
+ import com .google .common .collect .ImmutableList ;
21
+ import org .apache .calcite .plan .RelOptCluster ;
22
+ import org .apache .calcite .plan .RelOptPredicateList ;
20
23
import org .apache .calcite .plan .RelOptRule ;
21
24
import org .apache .calcite .plan .RelOptRuleCall ;
22
25
import org .apache .calcite .plan .RelOptUtil ;
26
+ import org .apache .calcite .plan .RexImplicationChecker ;
23
27
import org .apache .calcite .plan .Strong ;
24
28
import org .apache .calcite .rel .RelNode ;
25
29
import org .apache .calcite .rel .core .Filter ;
26
30
import org .apache .calcite .rel .core .Join ;
27
31
import org .apache .calcite .rel .core .JoinRelType ;
28
32
import org .apache .calcite .rel .core .Project ;
33
+ import org .apache .calcite .rel .metadata .RelMetadataQuery ;
34
+ import org .apache .calcite .rel .type .RelDataTypeField ;
35
+ import org .apache .calcite .rex .RexBuilder ;
29
36
import org .apache .calcite .rex .RexCall ;
37
+ import org .apache .calcite .rex .RexExecutor ;
38
+ import org .apache .calcite .rex .RexExecutorImpl ;
30
39
import org .apache .calcite .rex .RexNode ;
40
+ import org .apache .calcite .rex .RexUtil ;
31
41
import org .apache .calcite .rex .RexVisitorImpl ;
32
42
import org .apache .calcite .sql .SqlKind ;
33
43
import org .apache .calcite .sql .fun .SqlStdOperatorTable ;
44
+ import org .apache .calcite .util .ImmutableBitSet ;
45
+ import org .apache .calcite .util .Util ;
34
46
import org .apache .hadoop .hive .ql .optimizer .calcite .HiveCalciteUtil ;
35
47
import org .apache .hadoop .hive .ql .optimizer .calcite .reloperators .HiveAntiJoin ;
36
48
import org .slf4j .Logger ;
37
49
import org .slf4j .LoggerFactory ;
38
50
39
51
import java .util .ArrayList ;
40
- import java .util .Collections ;
41
52
import java .util .List ;
42
- import java .util .concurrent . atomic . AtomicBoolean ;
53
+ import java .util .Optional ;
43
54
44
55
/**
45
56
* Planner rule that converts a join plus filter to anti join.
@@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
86
97
87
98
assert (filter != null );
88
99
89
- List <RexNode > filterList = getResidualFilterNodes (filter , join );
90
- if (filterList == null ) {
100
+ ImmutableBitSet rhsFields = HiveCalciteUtil .getRightSideBitset (join );
101
+ Optional <List <RexNode >> optFilterList = getResidualFilterNodes (filter , join , rhsFields );
102
+ if (optFilterList .isEmpty ()) {
91
103
return ;
92
104
}
105
+ List <RexNode > filterList = optFilterList .get ();
93
106
94
107
// If any projection is there from right side, then we can not convert to anti join.
95
- boolean hasProjection = HiveCalciteUtil .hasAnyExpressionFromRightSide (join , project .getProjects ());
96
- if (hasProjection ) {
108
+ ImmutableBitSet projectedFields = RelOptUtil .InputFinder .bits (project .getProjects (), null );
109
+ boolean projectionUsesRHS = projectedFields .intersects (rhsFields );
110
+ if (projectionUsesRHS ) {
97
111
return ;
98
112
}
99
113
@@ -119,13 +133,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
119
133
/**
120
134
* Extracts the non-null filter conditions from given filter node.
121
135
*
122
- * @param filter The filter condition to be checked.
123
- * @param join Join node whose right side has to be searched.
136
+ * @param filter The filter condition to be checked.
137
+ * @param join Join node whose right side has to be searched.
138
+ * @param rhsFields
124
139
* @return null : Anti join condition is not matched for filter.
125
- * Empty list : No residual filter conditions present.
126
- * Valid list containing the filter to be applied after join.
140
+ * Empty list : No residual filter conditions present.
141
+ * Valid list containing the filter to be applied after join.
127
142
*/
128
- private List <RexNode > getResidualFilterNodes (Filter filter , Join join ) {
143
+ private Optional < List <RexNode >> getResidualFilterNodes (Filter filter , Join join , ImmutableBitSet rhsFields ) {
129
144
// 1. If null filter is not present from right side then we can not convert to anti join.
130
145
// 2. If any non-null filter is present from right side, we can not convert it to anti join.
131
146
// 3. Keep other filters which needs to be executed after join.
@@ -135,43 +150,123 @@ private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
135
150
List <RexNode > aboveFilters = RelOptUtil .conjunctions (filter .getCondition ());
136
151
boolean hasNullFilterOnRightSide = false ;
137
152
List <RexNode > filterList = new ArrayList <>();
153
+ final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide (join );
154
+
138
155
for (RexNode filterNode : aboveFilters ) {
139
- if (filterNode .getKind () == SqlKind .IS_NULL ) {
140
- // Null filter from right side table can be removed and its a pre-condition for anti join conversion.
141
- if (HiveCalciteUtil .hasAllExpressionsFromRightSide (join , Collections .singletonList (filterNode ))
142
- && isStrong (((RexCall ) filterNode ).getOperands ().get (0 ))) {
143
- hasNullFilterOnRightSide = true ;
144
- } else {
145
- filterList .add (filterNode );
146
- }
147
- } else {
148
- if (HiveCalciteUtil .hasAnyExpressionFromRightSide (join , Collections .singletonList (filterNode ))) {
149
- // If some non null condition is present from right side, we can not convert the join to anti join as
150
- // anti join does not project the fields from right side.
151
- return null ;
152
- } else {
153
- filterList .add (filterNode );
154
- }
156
+ final ImmutableBitSet usedFields = RelOptUtil .InputFinder .bits (filterNode );
157
+ boolean usesFieldFromRHS = usedFields .intersects (rhsFields );
158
+
159
+ if (!usesFieldFromRHS ) {
160
+ // Only LHS fields or constants, so the filterNode is part of the residual filter
161
+ filterList .add (filterNode );
162
+ continue ;
163
+ }
164
+
165
+ // In the following we check for filter nodes that let us deduce that
166
+ // "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched"
167
+
168
+ if (filterNode .getKind () != SqlKind .IS_NULL ) {
169
+ return Optional .empty ();
170
+ }
171
+
172
+ boolean usesRHSFieldsOnly = rhsFields .contains (usedFields );
173
+ if (!usesRHSFieldsOnly ) {
174
+ // If there is a mix between LHS and RHS fields, don't convert to anti-join
175
+ return Optional .empty ();
176
+ }
177
+
178
+ // Null filter from right side table can be removed and it is a pre-condition for anti join conversion.
179
+ RexNode arg = ((RexCall ) filterNode ).getOperands ().get (0 );
180
+ if (isStrong (arg , notNullColumnsFromRightSide )) {
181
+ hasNullFilterOnRightSide = true ;
182
+ } else if (!isStrong (arg , rhsFields )) {
183
+ // if all RHS fields are null and the IS NULL is still not fulfilled, bail out
184
+ return Optional .empty ();
155
185
}
156
186
}
157
187
158
188
if (!hasNullFilterOnRightSide ) {
159
- return null ;
189
+ return Optional . empty () ;
160
190
}
161
- return filterList ;
191
+ return Optional . of ( filterList ) ;
162
192
}
163
193
164
- private boolean isStrong (RexNode rexNode ) {
165
- AtomicBoolean hasCast = new AtomicBoolean (false );
166
- rexNode .accept (new RexVisitorImpl <Void >(true ) {
167
- @ Override
168
- public Void visitCall (RexCall call ) {
169
- if (call .getKind () == SqlKind .CAST ) {
170
- hasCast .set (true );
171
- }
172
- return super .visitCall (call );
194
+ private ImmutableBitSet getNotNullColumnsFromRightSide (RelNode joinRel ) {
195
+ // we need to shift the indices of the second child to the right
196
+ int shift = (joinRel .getInput (0 )).getRowType ().getFieldCount ();
197
+ ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns (joinRel .getInput (1 ));
198
+ return rhsNotnullColumns .shift (shift );
199
+ }
200
+
201
+ /**
202
+ * Deduce which columns of the <code>relNode</code> are definitively NOT NULL.
203
+ */
204
+ private ImmutableBitSet deduceNotNullColumns (RelNode relNode ) {
205
+ // adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
206
+ RelOptCluster cluster = relNode .getCluster ();
207
+ final RexBuilder rexBuilder = cluster .getRexBuilder ();
208
+ final RelMetadataQuery mq = cluster .getMetadataQuery ();
209
+ ImmutableBitSet .Builder result = ImmutableBitSet .builder ();
210
+ ImmutableBitSet .Builder candidatesBuilder = ImmutableBitSet .builder ();
211
+ List <RelDataTypeField > fieldList = relNode .getRowType ().getFieldList ();
212
+ for (int i =0 ; i <fieldList .size (); i ++) {
213
+ if (fieldList .get (i ).getType ().isNullable ()) {
214
+ candidatesBuilder .set (i );
215
+ }
216
+ else {
217
+ result .set (i );
218
+ }
219
+ }
220
+ ImmutableBitSet candidates = candidatesBuilder .build ();
221
+ if (candidates .isEmpty ()) {
222
+ // All columns are declared NOT NULL, no need to change
223
+ return result .build ();
224
+ }
225
+ final RexExecutor executor = cluster .getPlanner ().getExecutor ();
226
+ if (!(executor instanceof RexExecutorImpl )) {
227
+ // Cannot proceed without an executor.
228
+ return result .build ();
229
+ }
230
+
231
+ final RexImplicationChecker checker =
232
+ new RexImplicationChecker (rexBuilder , executor , relNode .getRowType ());
233
+ final RelOptPredicateList predicates = mq .getPulledUpPredicates (relNode );
234
+
235
+ ImmutableList <RexNode > preds = predicates .pulledUpPredicates ;
236
+ final List <RexNode > antecedent = new ArrayList <>(preds );
237
+ final RexNode first = RexUtil .composeConjunction (rexBuilder , antecedent );
238
+ for (int c : candidates ) {
239
+ RelDataTypeField field = fieldList .get (c );
240
+ final RexNode second = rexBuilder .makeCall (SqlStdOperatorTable .IS_NOT_NULL ,
241
+ rexBuilder .makeInputRef (field .getType (), field .getIndex ()));
242
+ // Suppose we have EMP(empno INT NOT NULL, mgr INT),
243
+ // and predicates [empno > 0, mgr > 0].
244
+ // We make first: "empno > 0 AND mgr > 0"
245
+ // and second: "mgr IS NOT NULL"
246
+ // and ask whether first implies second.
247
+ // It does, so we have no nullable columns.
248
+ if (checker .implies (first , second )) {
249
+ result .set (c );
173
250
}
174
- });
175
- return !hasCast .get () && Strong .isStrong (rexNode );
251
+ }
252
+ return result .build ();
253
+ }
254
+
255
+ private boolean isStrong (RexNode rexNode , ImmutableBitSet rightSideBitset ) {
256
+ try {
257
+ rexNode .accept (new RexVisitorImpl <Void >(true ) {
258
+ @ Override
259
+ public Void visitCall (RexCall call ) {
260
+ if (call .getKind () == SqlKind .CAST ) {
261
+ throw Util .FoundOne .NULL ;
262
+ }
263
+ return super .visitCall (call );
264
+ }
265
+ });
266
+ } catch (Util .FoundOne e ) {
267
+ // Hive's CAST might introduce NULL for NOT NULL fields
268
+ return false ;
269
+ }
270
+ return Strong .isNull (rexNode , rightSideBitset );
176
271
}
177
272
}
0 commit comments