Skip to content

Commit bc29981

Browse files
committed
HIVE-29176: Wrong result when HiveAntiJoin is replacing an IS NULL filter on a nullable column
1 parent 6f53c7f commit bc29981

File tree

7 files changed

+869
-115
lines changed

7 files changed

+869
-115
lines changed

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,43 +1233,17 @@ public FixNullabilityShuttle(RexBuilder rexBuilder,
12331233
}
12341234

12351235
/**
1236-
* Checks if any of the expression given as list expressions are from right side of the join.
1237-
* This is used during anti join conversion.
1238-
*
1239-
* @param joinRel Join node whose right side has to be searched.
1240-
* @param expressions The list of expression to search.
1241-
* @return true if any of the expressions is from right side of join.
1236+
* Given a join, creates a bitset of the joined columns originating from the right-hand side.
1237+
* @param joinRel a join that concatenates all columns from its inputs (so no semi-join)
1238+
* @return a bitset
12421239
*/
1243-
public static boolean hasAnyExpressionFromRightSide(RelNode joinRel, List<RexNode> expressions) {
1244-
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
1245-
int nTotalFields = joinFields.size();
1246-
List<RelDataTypeField> leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList();
1247-
int nFieldsLeft = leftFields.size();
1248-
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields);
1249-
1250-
for (RexNode node : expressions) {
1251-
ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
1252-
if (rightBitmap.contains(inputBits)) {
1253-
return true;
1254-
}
1255-
}
1256-
return false;
1257-
}
1258-
1259-
public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, List<RexNode> expressions) {
1260-
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
1261-
int nTotalFields = joinFields.size();
1262-
List<RelDataTypeField> leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList();
1263-
int nFieldsLeft = leftFields.size();
1264-
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields);
1265-
1266-
for (RexNode node : expressions) {
1267-
ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
1268-
if (!rightBitmap.contains(inputBits)) {
1269-
return false;
1270-
}
1240+
public static ImmutableBitSet getRightSideBitset(RelNode joinRel) {
1241+
if(joinRel.getInputs().size() != 2) {
1242+
throw new IllegalArgumentException("The relation must have exactly two children:\n" + RelOptUtil.toString(joinRel));
12711243
}
1272-
return true;
1244+
int nTotalFields = joinRel.getRowType().getFieldCount();
1245+
int nFieldsLeft = (joinRel.getInputs().get(0)).getRowType().getFieldCount();
1246+
return ImmutableBitSet.range(nFieldsLeft, nTotalFields);
12731247
}
12741248

12751249
/**

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java

Lines changed: 135 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,40 @@
1717
*/
1818
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
1919

20+
import com.google.common.collect.ImmutableList;
21+
import org.apache.calcite.plan.RelOptCluster;
22+
import org.apache.calcite.plan.RelOptPredicateList;
2023
import org.apache.calcite.plan.RelOptRule;
2124
import org.apache.calcite.plan.RelOptRuleCall;
2225
import org.apache.calcite.plan.RelOptUtil;
26+
import org.apache.calcite.plan.RexImplicationChecker;
2327
import org.apache.calcite.plan.Strong;
2428
import org.apache.calcite.rel.RelNode;
2529
import org.apache.calcite.rel.core.Filter;
2630
import org.apache.calcite.rel.core.Join;
2731
import org.apache.calcite.rel.core.JoinRelType;
2832
import org.apache.calcite.rel.core.Project;
33+
import org.apache.calcite.rel.metadata.RelMetadataQuery;
34+
import org.apache.calcite.rel.type.RelDataTypeField;
35+
import org.apache.calcite.rex.RexBuilder;
2936
import org.apache.calcite.rex.RexCall;
37+
import org.apache.calcite.rex.RexExecutor;
38+
import org.apache.calcite.rex.RexExecutorImpl;
3039
import org.apache.calcite.rex.RexNode;
40+
import org.apache.calcite.rex.RexUtil;
3141
import org.apache.calcite.rex.RexVisitorImpl;
3242
import org.apache.calcite.sql.SqlKind;
3343
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
44+
import org.apache.calcite.util.ImmutableBitSet;
45+
import org.apache.calcite.util.Util;
3446
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
3547
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
3648
import org.slf4j.Logger;
3749
import org.slf4j.LoggerFactory;
3850

3951
import java.util.ArrayList;
40-
import java.util.Collections;
4152
import java.util.List;
42-
import java.util.concurrent.atomic.AtomicBoolean;
53+
import java.util.Optional;
4354

4455
/**
4556
* Planner rule that converts a join plus filter to anti join.
@@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
8697

8798
assert (filter != null);
8899

89-
List<RexNode> filterList = getResidualFilterNodes(filter, join);
90-
if (filterList == null) {
100+
ImmutableBitSet rhsFields = HiveCalciteUtil.getRightSideBitset(join);
101+
Optional<List<RexNode>> optFilterList = getResidualFilterNodes(filter, join, rhsFields);
102+
if (optFilterList.isEmpty()) {
91103
return;
92104
}
105+
List<RexNode> filterList = optFilterList.get();
93106

94107
// If any projection is there from right side, then we can not convert to anti join.
95-
boolean hasProjection = HiveCalciteUtil.hasAnyExpressionFromRightSide(join, project.getProjects());
96-
if (hasProjection) {
108+
ImmutableBitSet projectedFields = RelOptUtil.InputFinder.bits(project.getProjects(), null);
109+
boolean projectionUsesRHS = projectedFields.intersects(rhsFields);
110+
if (projectionUsesRHS) {
97111
return;
98112
}
99113

@@ -119,13 +133,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
119133
/**
120134
* Extracts the non-null filter conditions from given filter node.
121135
*
122-
* @param filter The filter condition to be checked.
123-
* @param join Join node whose right side has to be searched.
136+
* @param filter The filter condition to be checked.
137+
* @param join Join node whose right side has to be searched.
138+
* @param rhsFields
124139
* @return null : Anti join condition is not matched for filter.
125-
* Empty list : No residual filter conditions present.
126-
* Valid list containing the filter to be applied after join.
140+
* Empty list : No residual filter conditions present.
141+
* Valid list containing the filter to be applied after join.
127142
*/
128-
private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
143+
private Optional<List<RexNode>> getResidualFilterNodes(Filter filter, Join join, ImmutableBitSet rhsFields) {
129144
// 1. If null filter is not present from right side then we can not convert to anti join.
130145
// 2. If any non-null filter is present from right side, we can not convert it to anti join.
131146
// 3. Keep other filters which needs to be executed after join.
@@ -135,43 +150,123 @@ private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
135150
List<RexNode> aboveFilters = RelOptUtil.conjunctions(filter.getCondition());
136151
boolean hasNullFilterOnRightSide = false;
137152
List<RexNode> filterList = new ArrayList<>();
153+
final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide(join);
154+
138155
for (RexNode filterNode : aboveFilters) {
139-
if (filterNode.getKind() == SqlKind.IS_NULL) {
140-
// Null filter from right side table can be removed and its a pre-condition for anti join conversion.
141-
if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, Collections.singletonList(filterNode))
142-
&& isStrong(((RexCall) filterNode).getOperands().get(0))) {
143-
hasNullFilterOnRightSide = true;
144-
} else {
145-
filterList.add(filterNode);
146-
}
147-
} else {
148-
if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, Collections.singletonList(filterNode))) {
149-
// If some non null condition is present from right side, we can not convert the join to anti join as
150-
// anti join does not project the fields from right side.
151-
return null;
152-
} else {
153-
filterList.add(filterNode);
154-
}
156+
final ImmutableBitSet usedFields = RelOptUtil.InputFinder.bits(filterNode);
157+
boolean usesFieldFromRHS = usedFields.intersects(rhsFields);
158+
159+
if(!usesFieldFromRHS) {
160+
// Only LHS fields or constants, so the filterNode is part of the residual filter
161+
filterList.add(filterNode);
162+
continue;
163+
}
164+
165+
// In the following we check for filter nodes that let us deduce that
166+
// "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched"
167+
168+
if(filterNode.getKind() != SqlKind.IS_NULL) {
169+
return Optional.empty();
170+
}
171+
172+
boolean usesRHSFieldsOnly = rhsFields.contains(usedFields);
173+
if (!usesRHSFieldsOnly) {
174+
// If there is a mix between LHS and RHS fields, don't convert to anti-join
175+
return Optional.empty();
176+
}
177+
178+
// Null filter from right side table can be removed and it is a pre-condition for anti join conversion.
179+
RexNode arg = ((RexCall) filterNode).getOperands().get(0);
180+
if (isStrong(arg, notNullColumnsFromRightSide)) {
181+
hasNullFilterOnRightSide = true;
182+
} else if(!isStrong(arg, rhsFields)) {
183+
// if all RHS fields are null and the IS NULL is still not fulfilled, bail out
184+
return Optional.empty();
155185
}
156186
}
157187

158188
if (!hasNullFilterOnRightSide) {
159-
return null;
189+
return Optional.empty();
160190
}
161-
return filterList;
191+
return Optional.of(filterList);
162192
}
163193

164-
private boolean isStrong(RexNode rexNode) {
165-
AtomicBoolean hasCast = new AtomicBoolean(false);
166-
rexNode.accept(new RexVisitorImpl<Void>(true) {
167-
@Override
168-
public Void visitCall(RexCall call) {
169-
if (call.getKind() == SqlKind.CAST) {
170-
hasCast.set(true);
171-
}
172-
return super.visitCall(call);
194+
private ImmutableBitSet getNotNullColumnsFromRightSide(RelNode joinRel) {
195+
// we need to shift the indices of the second child to the right
196+
int shift = (joinRel.getInput(0)).getRowType().getFieldCount();
197+
ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns(joinRel.getInput(1));
198+
return rhsNotnullColumns.shift(shift);
199+
}
200+
201+
/**
202+
* Deduce which columns of the <code>relNode</code> are definitively NOT NULL.
203+
*/
204+
private ImmutableBitSet deduceNotNullColumns(RelNode relNode) {
205+
// adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
206+
RelOptCluster cluster = relNode.getCluster();
207+
final RexBuilder rexBuilder = cluster.getRexBuilder();
208+
final RelMetadataQuery mq = cluster.getMetadataQuery();
209+
ImmutableBitSet.Builder result = ImmutableBitSet.builder();
210+
ImmutableBitSet.Builder candidatesBuilder = ImmutableBitSet.builder();
211+
List<RelDataTypeField> fieldList = relNode.getRowType().getFieldList();
212+
for (int i=0; i<fieldList.size(); i++) {
213+
if (fieldList.get(i).getType().isNullable()) {
214+
candidatesBuilder.set(i);
215+
}
216+
else {
217+
result.set(i);
218+
}
219+
}
220+
ImmutableBitSet candidates = candidatesBuilder.build();
221+
if (candidates.isEmpty()) {
222+
// All columns are declared NOT NULL, no need to change
223+
return result.build();
224+
}
225+
final RexExecutor executor = cluster.getPlanner().getExecutor();
226+
if (!(executor instanceof RexExecutorImpl)) {
227+
// Cannot proceed without an executor.
228+
return result.build();
229+
}
230+
231+
final RexImplicationChecker checker =
232+
new RexImplicationChecker(rexBuilder, executor, relNode.getRowType());
233+
final RelOptPredicateList predicates = mq.getPulledUpPredicates(relNode);
234+
235+
ImmutableList<RexNode> preds = predicates.pulledUpPredicates;
236+
final List<RexNode> antecedent = new ArrayList<>(preds);
237+
final RexNode first = RexUtil.composeConjunction(rexBuilder, antecedent);
238+
for (int c : candidates) {
239+
RelDataTypeField field = fieldList.get(c);
240+
final RexNode second = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
241+
rexBuilder.makeInputRef(field.getType(), field.getIndex()));
242+
// Suppose we have EMP(empno INT NOT NULL, mgr INT),
243+
// and predicates [empno > 0, mgr > 0].
244+
// We make first: "empno > 0 AND mgr > 0"
245+
// and second: "mgr IS NOT NULL"
246+
// and ask whether first implies second.
247+
// It does, so we have no nullable columns.
248+
if(checker.implies(first, second)) {
249+
result.set(c);
173250
}
174-
});
175-
return !hasCast.get() && Strong.isStrong(rexNode);
251+
}
252+
return result.build();
253+
}
254+
255+
private boolean isStrong(RexNode rexNode, ImmutableBitSet rightSideBitset) {
256+
try {
257+
rexNode.accept(new RexVisitorImpl<Void>(true) {
258+
@Override
259+
public Void visitCall(RexCall call) {
260+
if (call.getKind() == SqlKind.CAST) {
261+
throw Util.FoundOne.NULL;
262+
}
263+
return super.visitCall(call);
264+
}
265+
});
266+
} catch (Util.FoundOne e) {
267+
// Hive's CAST might introduce NULL for NOT NULL fields
268+
return false;
269+
}
270+
return Strong.isNull(rexNode, rightSideBitset);
176271
}
177272
}

0 commit comments

Comments
 (0)