Skip to content

Commit f82197f

Browse files
time-and-fateti-chi-bot
authored andcommitted
This is an automated cherry-pick of pingcap#48984
Signed-off-by: ti-chi-bot <[email protected]>
1 parent 069631e commit f82197f

File tree

6 files changed

+210
-6
lines changed

6 files changed

+210
-6
lines changed

pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,7 +2195,7 @@
21952195
},
21962196
{
21972197
"SQL": "select a from t where c_str like 'abc_'",
2198-
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
2198+
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
21992199
},
22002200
{
22012201
"SQL": "select a from t where c_str like 'abc%af'",
@@ -2223,7 +2223,7 @@
22232223
},
22242224
{
22252225
"SQL": "select a from t where c_str like 'abc\\__'",
2226-
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
2226+
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
22272227
},
22282228
{
22292229
"SQL": "select a from t where c_str like 123",

pkg/util/ranger/checker.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,19 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
166166
if err != nil {
167167
return false, true
168168
}
169+
<<<<<<< HEAD
170+
=======
171+
likeFuncReserve := !c.isFullLengthColumn()
172+
173+
// Different from `=`, trailing spaces are always significant, and can't be ignored in `like`.
174+
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
175+
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
176+
// the `like` function. Therefore, a Selection is needed to filter the data.
177+
if isPadSpaceCollation(collation) {
178+
likeFuncReserve = true
179+
}
180+
181+
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
169182
if len(patternStr) == 0 {
170183
return true, !c.isFullLengthColumn()
171184
}

pkg/util/ranger/points.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/pingcap/tidb/pkg/errno"
2525
"github.com/pingcap/tidb/pkg/expression"
2626
"github.com/pingcap/tidb/pkg/parser/ast"
27+
"github.com/pingcap/tidb/pkg/parser/charset"
2728
"github.com/pingcap/tidb/pkg/parser/mysql"
2829
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
2930
"github.com/pingcap/tidb/pkg/types"
@@ -678,9 +679,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
678679
break
679680
} else if pattern[i] == '_' {
680681
// Get the prefix, but exclude the prefix.
681-
// e.g., "abc_x", the start point exclude "abc",
682-
// because the string length is more than 3.
683-
exclude = true
682+
// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
683+
//
684+
// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
685+
// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
686+
// 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
687+
// we will actually miss 'xxx ', which will cause wrong results.
688+
if !isPadSpaceCollation(collation) {
689+
exclude = true
690+
}
684691
isExactMatch = false
685692
break
686693
}
@@ -715,7 +722,19 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
715722
return []*point{startPoint, endPoint}
716723
}
717724

725+
<<<<<<< HEAD
718726
func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point {
727+
=======
728+
// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
729+
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
730+
// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
731+
// implemented in the future.
732+
func isPadSpaceCollation(collation string) bool {
733+
return collation != charset.CollationBin
734+
}
735+
736+
func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
737+
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))
719738
switch n := expr.FuncName.L; n {
720739
case ast.IsTruthWithoutNull:
721740
return r.buildFromIsTrue(expr, 1, false)

pkg/util/ranger/ranger_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1106,7 +1106,7 @@ create table t(
11061106
exprStr: "a LIKE 'abc_'",
11071107
accessConds: "[like(test.t.a, abc_, 92)]",
11081108
filterConds: "[like(test.t.a, abc_, 92)]",
1109-
resultStr: "[(\"abc\",\"abd\")]",
1109+
resultStr: "[[\"abc\",\"abd\")]",
11101110
},
11111111
{
11121112
indexPos: 0,

tests/integrationtest/r/planner/core/issuetest/planner_issue.result

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,139 @@ LEFT JOIN tmp3 c3 ON c3.id = '1';
180180
id id
181181
1 1
182182
1 1
183+
<<<<<<< HEAD
184+
=======
185+
drop table if exists t;
186+
create table t(a int, b int);
187+
set @@tidb_max_chunk_size = 32;
188+
insert into t values(1, 1);
189+
insert into t select a+1, a+1 from t;
190+
insert into t select a+2, a+2 from t;
191+
insert into t select a+4, a+4 from t;
192+
insert into t select a+8, a+8 from t;
193+
insert into t select a+16, a+16 from t;
194+
insert into t select a+32, a+32 from t;
195+
select a from (select 100 as a, 100 as b union all select * from t) t where b != 0;
196+
a
197+
100
198+
1
199+
2
200+
3
201+
4
202+
5
203+
6
204+
7
205+
8
206+
9
207+
10
208+
11
209+
12
210+
13
211+
14
212+
15
213+
16
214+
17
215+
18
216+
19
217+
20
218+
21
219+
22
220+
23
221+
24
222+
25
223+
26
224+
27
225+
28
226+
29
227+
30
228+
31
229+
32
230+
33
231+
34
232+
35
233+
36
234+
37
235+
38
236+
39
237+
40
238+
41
239+
42
240+
43
241+
44
242+
45
243+
46
244+
47
245+
48
246+
49
247+
50
248+
51
249+
52
250+
53
251+
54
252+
55
253+
56
254+
57
255+
58
256+
59
257+
60
258+
61
259+
62
260+
63
261+
64
262+
set @@tidb_max_chunk_size = default;
263+
drop table if exists t1, t2;
264+
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
265+
insert into t1 value('测试'),('测试 '),('xxx ');
266+
explain format = brief select *,length(a) from t1 where a like '测试 %';
267+
id estRows task access object operator info
268+
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
269+
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
270+
└─IndexReader 250.00 root index:Selection
271+
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92)
272+
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo
273+
explain format = brief select *,length(a) from t1 where a like '测试';
274+
id estRows task access object operator info
275+
Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
276+
└─UnionScan 10.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
277+
└─IndexReader 10.00 root index:Selection
278+
└─Selection 10.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试", 92)
279+
└─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo
280+
select *,length(a) from t1 where a like '测试 %';
281+
a length(a)
282+
测试 8
283+
select *,length(a) from t1 where a like '测试';
284+
a length(a)
285+
测试 6
286+
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
287+
id estRows task access object operator info
288+
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a
289+
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
290+
└─IndexReader 250.00 root index:Selection
291+
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
292+
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo
293+
select * from t1 use index (ia) where a like 'xxx_';
294+
a
295+
xxx
296+
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
297+
insert into t2 value('测试'),('测试 ');
298+
explain format = brief select *,length(a) from t2 where a like '测试 %';
299+
id estRows task access object operator info
300+
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
301+
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
302+
└─TableReader 8000.00 root data:Selection
303+
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92)
304+
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
305+
explain format = brief select *,length(a) from t2 where a like '测试';
306+
id estRows task access object operator info
307+
Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3
308+
└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
309+
└─TableReader 8000.00 root data:Selection
310+
└─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试", 92)
311+
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
312+
select *,length(a) from t2 where a like '测试 %';
313+
a length(a)
314+
测试 6
315+
select *,length(a) from t2 where a like '测试';
316+
a length(a)
317+
测试 4
318+
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))

tests/integrationtest/t/planner/core/issuetest/planner_issue.test

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,39 @@ FROM
136136
t2 db
137137
LEFT JOIN tmp3 c2 ON c2.id = '1'
138138
LEFT JOIN tmp3 c3 ON c3.id = '1';
139+
<<<<<<< HEAD
140+
=======
141+
142+
# https://github.com/pingcap/tidb/issues/48755
143+
drop table if exists t;
144+
create table t(a int, b int);
145+
set @@tidb_max_chunk_size = 32;
146+
# insert into more than 32 rows to the table.
147+
insert into t values(1, 1);
148+
insert into t select a+1, a+1 from t;
149+
insert into t select a+2, a+2 from t;
150+
insert into t select a+4, a+4 from t;
151+
insert into t select a+8, a+8 from t;
152+
insert into t select a+16, a+16 from t;
153+
insert into t select a+32, a+32 from t;
154+
select a from (select 100 as a, 100 as b union all select * from t) t where b != 0;
155+
set @@tidb_max_chunk_size = default;
156+
157+
# https://github.com/pingcap/tidb/issues/48821
158+
# https://github.com/pingcap/tidb/issues/48983
159+
drop table if exists t1, t2;
160+
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
161+
insert into t1 value('测试'),('测试 '),('xxx ');
162+
explain format = brief select *,length(a) from t1 where a like '测试 %';
163+
explain format = brief select *,length(a) from t1 where a like '测试';
164+
select *,length(a) from t1 where a like '测试 %';
165+
select *,length(a) from t1 where a like '测试';
166+
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
167+
select * from t1 use index (ia) where a like 'xxx_';
168+
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
169+
insert into t2 value('测试'),('测试 ');
170+
explain format = brief select *,length(a) from t2 where a like '测试 %';
171+
explain format = brief select *,length(a) from t2 where a like '测试';
172+
select *,length(a) from t2 where a like '测试 %';
173+
select *,length(a) from t2 where a like '测试';
174+
>>>>>>> 39df07d44b5 (util/ranger: don't exclude start key for range from `_` in `like` function (#48984))

0 commit comments

Comments
 (0)