@@ -27,6 +27,7 @@ import (
27
27
"github.com/pingcap/tidb/pkg/expression/contextstatic"
28
28
"github.com/pingcap/tidb/pkg/kv"
29
29
"github.com/pingcap/tidb/pkg/parser/ast"
30
+ "github.com/pingcap/tidb/pkg/parser/charset"
30
31
"github.com/pingcap/tidb/pkg/parser/model"
31
32
"github.com/pingcap/tidb/pkg/parser/mysql"
32
33
"github.com/pingcap/tidb/pkg/parser/terror"
@@ -287,13 +288,31 @@ func (t *PhysicalTable) SplitScanRanges(ctx context.Context, store kv.Storage, s
287
288
switch ft .GetType () {
288
289
case mysql .TypeTiny , mysql .TypeShort , mysql .TypeLong , mysql .TypeLonglong , mysql .TypeInt24 :
289
290
if len (t .KeyColumns ) > 1 {
290
- return t .splitCommonHandleRanges (ctx , tikvStore , splitCnt , true , mysql .HasUnsignedFlag (ft .GetFlag ()), false )
291
+ return t .splitCommonHandleRanges (ctx , tikvStore , splitCnt , true , mysql .HasUnsignedFlag (ft .GetFlag ()), nil )
291
292
}
292
293
return t .splitIntRanges (ctx , tikvStore , splitCnt )
293
294
case mysql .TypeBit :
294
- return t .splitCommonHandleRanges (ctx , tikvStore , splitCnt , false , false , true )
295
+ return t .splitCommonHandleRanges (ctx , tikvStore , splitCnt , false , false , nil )
295
296
case mysql .TypeString , mysql .TypeVarString , mysql .TypeVarchar :
296
- return t .splitCommonHandleRanges (ctx , tikvStore , splitCnt , false , false , mysql .HasBinaryFlag (ft .GetFlag ()))
297
+ var decode func ([]byte ) types.Datum
298
+ if ! mysql .HasBinaryFlag (ft .GetFlag ()) {
299
+ switch ft .GetCharset () {
300
+ case charset .CharsetASCII , charset .CharsetLatin1 :
301
+ // ASCII and Latin1 are 8-bit charset, we can use GetASCIIPrefixDatumFromBytes to decode it.
302
+ decode = GetASCIIPrefixDatumFromBytes
303
+ case charset .CharsetUTF8 , charset .CharsetUTF8MB4 :
304
+ switch ft .GetCollate () {
305
+ case charset .CollationUTF8 , charset .CollationUTF8MB4 , "utf8mb4_0900_bin" :
306
+ // We can only use GetASCIIPrefixDatumFromBytes to decode UTF8 and UTF8MB4 when they are
307
+ // "utf8_bin" or "utf8mb4_bin" collation.
308
+ decode = GetASCIIPrefixDatumFromBytes
309
+ }
310
+ }
311
+ if decode == nil {
312
+ return []ScanRange {newFullRange ()}, nil
313
+ }
314
+ }
315
+ return t .splitCommonHandleRanges (ctx , tikvStore , splitCnt , false , false , decode )
297
316
}
298
317
return []ScanRange {newFullRange ()}, nil
299
318
}
@@ -363,7 +382,7 @@ func (t *PhysicalTable) splitIntRanges(ctx context.Context, store tikv.Storage,
363
382
}
364
383
365
384
func (t * PhysicalTable ) splitCommonHandleRanges (
366
- ctx context.Context , store tikv.Storage , splitCnt int , isInt bool , unsigned bool , binary bool ,
385
+ ctx context.Context , store tikv.Storage , splitCnt int , isInt bool , unsigned bool , decode func ([] byte ) types. Datum ,
367
386
) ([]ScanRange , error ) {
368
387
recordPrefix := tablecodec .GenTableRecordPrefix (t .ID )
369
388
startKey , endKey := recordPrefix , recordPrefix .PrefixNext ()
@@ -385,8 +404,8 @@ func (t *PhysicalTable) splitCommonHandleRanges(
385
404
curScanEnd = GetNextIntDatumFromCommonHandle (keyRange .EndKey , recordPrefix , unsigned )
386
405
} else {
387
406
curScanEnd = GetNextBytesHandleDatum (keyRange .EndKey , recordPrefix )
388
- if ! binary {
389
- curScanEnd = GetASCIIPrefixDatumFromBytes (curScanEnd .GetBytes ())
407
+ if decode != nil {
408
+ curScanEnd = decode (curScanEnd .GetBytes ())
390
409
}
391
410
392
411
// "" is the smallest value for string/[]byte, skip to add it to ranges.
0 commit comments