Skip to content

Commit b700bf3

Browse files
time-and-fateti-chi-bot
authored andcommitted
This is an automated cherry-pick of pingcap#48522
Signed-off-by: ti-chi-bot <[email protected]>
1 parent 634602f commit b700bf3

File tree

13 files changed

+1664
-46
lines changed

13 files changed

+1664
-46
lines changed

pkg/planner/core/integration_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,14 +2221,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
22212221
tk.MustExec("drop table if exists t1, t2")
22222222
tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))")
22232223
tk.MustExec("create table t2(d int)")
2224-
tk.MustExec("set @@tidb_opt_range_max_size=1275")
2225-
// 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
2224+
tk.MustExec("set @@tidb_opt_range_max_size=1260")
2225+
// 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc].
22262226
rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows()
22272227
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]"))
22282228
tk.MustQuery("show warnings").Check(testkit.Rows())
22292229
rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows()
2230-
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]"))
2231-
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
2230+
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")
2231+
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen"))
22322232

22332233
tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'")
22342234
tk.MustExec("set @a='a', @b='b', @c='c'")
@@ -2243,13 +2243,13 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) {
22432243
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
22442244
rows = tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Rows()
22452245
// We don't limit range mem usage when rebuilding index join ranges for the cached plan. So [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc] can be built.
2246-
require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]"))
2246+
require.Contains(t, rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, aaaaaa, bbbbbb, cccccc)]")
22472247

22482248
// Test the plan with range fallback would not be put into cache.
22492249
tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'")
22502250
tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'")
22512251
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
2252-
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
2252+
tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen",
22532253
"Warning 1105 skip prepared plan-cache: in-list is too long"))
22542254
tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e")
22552255
tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0"))

pkg/planner/core/testdata/index_merge_suite_out.json

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@
131131
"IndexMerge 0.00 root type: intersection",
132132
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo",
133133
"├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo",
134-
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo",
135-
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo",
134+
"├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo",
135+
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo",
136136
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo"
137137
],
138138
"Result": [
@@ -144,7 +144,7 @@
144144
"Plan": [
145145
"IndexMerge 0.03 root type: intersection",
146146
"├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo",
147-
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo",
147+
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo",
148148
"└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))",
149149
" └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo"
150150
],
@@ -172,13 +172,21 @@
172172
{
173173
"SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'",
174174
"Plan": [
175+
<<<<<<< HEAD
175176
"Selection 1.42 root eq(test.t8.s5, \"test,2\")",
176177
"└─IndexMerge 0.59 root type: intersection",
177178
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(0x616263,+inf], keep order:false, stats:pseudo",
178179
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(0x636261,+inf], keep order:false, stats:pseudo",
180+
=======
181+
"Selection 0.04 root eq(test.t8.s5, \"test,2\")",
182+
"└─IndexMerge 0.06 root type: intersection",
183+
" ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo",
184+
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo",
185+
" ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo",
186+
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
179187
" ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo",
180-
" └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
181-
" └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo"
188+
" └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)",
189+
" └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo"
182190
],
183191
"Result": [
184192
"啊aabbccdd abcc cccc aA tEsT,2"

pkg/util/ranger/BUILD.bazel

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ go_library(
3030
"//pkg/util/codec",
3131
"//pkg/util/collate",
3232
"//pkg/util/dbterror",
33+
<<<<<<< HEAD
3334
"//pkg/util/mathutil",
35+
=======
36+
"//pkg/util/hack",
37+
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
3438
"@com_github_pingcap_errors//:errors",
3539
],
3640
)

pkg/util/ranger/checker.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction
139139

140140
func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) {
141141
_, collation := scalar.CharsetAndCollation()
142-
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
143-
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
144-
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
145-
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
146-
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
147-
// Finally, the range comes to be [`, A], which is actually an empty range.
148-
// See https://github.com/pingcap/tidb/issues/31174 for more details.
149-
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
150-
return false, true
151-
}
152142
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) {
153143
return false, true
154144
}

pkg/util/ranger/detacher.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIs
242242
// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
243243
// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
244244
func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
245-
lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) {
245+
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*cnfItemRangeResult, []*valueInfo, error) {
246246
if len(conds) < 2 {
247247
return nil, nil, nil
248248
}
@@ -261,7 +261,7 @@ func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expres
261261
// We build ranges for `(a,b) in ((1,1),(1,2))` and get `[1 1, 1 1] [1 2, 1 2]`, which are point ranges and we can
262262
// append `c = 1` to the point ranges. However, if we choose to merge consecutive ranges here, we get `[1 1, 1 2]`,
263263
// which are not point ranges, and we cannot append `c = 1` anymore.
264-
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize)
264+
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize, convertToSortKey)
265265
if err != nil {
266266
return nil, nil, err
267267
}
@@ -376,7 +376,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
376376
optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan,
377377
}
378378
if considerDNF {
379-
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
379+
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize, d.convertToSortKey)
380380
if err != nil {
381381
return nil, err
382382
}
@@ -627,12 +627,22 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
627627
}
628628
// Multiple Eq/In conditions for one column in CNF, apply intersection on them
629629
// Lazily compute the points for the previously visited Eq/In
630+
newTp := newFieldType(cols[offset].GetType())
630631
collator := collate.GetCollator(cols[offset].GetType().GetCollate())
631632
if mergedAccesses[offset] == nil {
632633
mergedAccesses[offset] = accesses[offset]
634+
<<<<<<< HEAD
633635
points[offset] = rb.build(accesses[offset], collator)
634636
}
635637
points[offset] = rb.intersection(points[offset], rb.build(cond, collator), collator)
638+
=======
639+
// Note that this is a relatively special usage of build(). We will restore the points back to Expression for
640+
// later use and may build the Expression to points again.
641+
// We need to keep the original value here, which means we neither cut prefix nor convert to sort key.
642+
points[offset] = rb.build(accesses[offset], newTp, types.UnspecifiedLength, false)
643+
}
644+
points[offset] = rb.intersection(points[offset], rb.build(cond, newTp, types.UnspecifiedLength, false), collator)
645+
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
636646
if len(points[offset]) == 0 { // Early termination if false expression found
637647
if expression.MaybeOverOptimized4PlanCache(sctx, conditions) {
638648
// `a>@x and a<@y` --> `invalid-range if @x>=@y`
@@ -772,9 +782,14 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
772782
if shouldReserve {
773783
hasResidual = true
774784
}
785+
<<<<<<< HEAD
775786
points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate()))
787+
=======
788+
points := rb.build(item, newTpSlice[0], d.lengths[0], d.convertToSortKey)
789+
tmpNewTp := convertStringFTToBinaryCollate(newTpSlice[0])
790+
>>>>>>> e053c27f068 (util/ranger: support use `like` to build range for new collation columns (#48522))
776791
// TODO: restrict the mem usage of ranges
777-
ranges, rangeFallback, err := points2Ranges(d.sctx, points, newTpSlice[0], d.rangeMaxSize)
792+
ranges, rangeFallback, err := points2Ranges(d.sctx, points, tmpNewTp, d.rangeMaxSize)
778793
if err != nil {
779794
return nil, nil, nil, false, errors.Trace(err)
780795
}
@@ -870,6 +885,7 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
870885
cols: cols,
871886
lengths: lengths,
872887
mergeConsecutive: true,
888+
convertToSortKey: true,
873889
rangeMaxSize: rangeMaxSize,
874890
}
875891
return d.detachCondAndBuildRangeForCols()
@@ -878,13 +894,14 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre
878894
// detachCondAndBuildRangeWithoutMerging detaches the index filters from table filters and uses them to build ranges.
879895
// When building ranges, it doesn't merge consecutive ranges.
880896
func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
881-
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
897+
lengths []int, rangeMaxSize int64, convertToSortKey bool) (*DetachRangeResult, error) {
882898
d := &rangeDetacher{
883899
sctx: sctx,
884900
allConds: conditions,
885901
cols: cols,
886902
lengths: lengths,
887903
mergeConsecutive: false,
904+
convertToSortKey: convertToSortKey,
888905
rangeMaxSize: rangeMaxSize,
889906
}
890907
return d.detachCondAndBuildRangeForCols()
@@ -896,7 +913,7 @@ func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions [
896913
// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation.
897914
func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
898915
lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) {
899-
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize)
916+
return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize, false)
900917
}
901918

902919
type rangeDetacher struct {
@@ -905,6 +922,7 @@ type rangeDetacher struct {
905922
cols []*expression.Column
906923
lengths []int
907924
mergeConsecutive bool
925+
convertToSortKey bool
908926
rangeMaxSize int64
909927
}
910928

@@ -951,6 +969,7 @@ func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions [
951969
cols: cols,
952970
lengths: lengths,
953971
mergeConsecutive: true,
972+
convertToSortKey: true,
954973
rangeMaxSize: rangeMaxSize,
955974
}
956975
res, err := d.detachCNFCondAndBuildRangeForIndex(conditions, newTpSlice, false)

0 commit comments

Comments
 (0)