Skip to content

Commit 7efcb83

Browse files
committed
ttl: support split TTL tasks for non-binary key column
1 parent 4eeeef8 commit 7efcb83

File tree

4 files changed

+230
-66
lines changed

4 files changed

+230
-66
lines changed

pkg/ttl/cache/BUILD.bazel

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,13 @@ go_test(
5050
],
5151
embed = [":cache"],
5252
flaky = True,
53-
shard_count = 17,
53+
shard_count = 18,
5454
deps = [
5555
"//pkg/infoschema",
5656
"//pkg/kv",
5757
"//pkg/parser/ast",
5858
"//pkg/parser/model",
59+
"//pkg/parser/mysql",
5960
"//pkg/server",
6061
"//pkg/session",
6162
"//pkg/store/helper",

pkg/ttl/cache/split_test.go

Lines changed: 181 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/pingcap/tidb/pkg/infoschema"
2727
"github.com/pingcap/tidb/pkg/kv"
2828
"github.com/pingcap/tidb/pkg/parser/model"
29+
"github.com/pingcap/tidb/pkg/parser/mysql"
2930
"github.com/pingcap/tidb/pkg/store/helper"
3031
"github.com/pingcap/tidb/pkg/tablecodec"
3132
"github.com/pingcap/tidb/pkg/testkit"
@@ -250,21 +251,21 @@ func createTTLTableWithSQL(t *testing.T, tk *testkit.TestKit, name string, sql s
250251
return ttlTbl
251252
}
252253

253-
func checkRange(t *testing.T, r cache.ScanRange, start, end types.Datum) {
254+
func checkRange(t *testing.T, r cache.ScanRange, start, end types.Datum, msgAndArgs ...any) {
254255
if start.IsNull() {
255-
require.Nil(t, r.Start)
256+
require.Nil(t, r.Start, msgAndArgs...)
256257
} else {
257-
require.Equal(t, 1, len(r.Start))
258-
require.Equal(t, start.Kind(), r.Start[0].Kind())
259-
require.Equal(t, start.GetValue(), r.Start[0].GetValue())
258+
require.Equal(t, 1, len(r.Start), msgAndArgs...)
259+
require.Equal(t, start.Kind(), r.Start[0].Kind(), msgAndArgs...)
260+
require.Equal(t, start.GetValue(), r.Start[0].GetValue(), msgAndArgs...)
260261
}
261262

262263
if end.IsNull() {
263-
require.Nil(t, r.End)
264+
require.Nil(t, r.End, msgAndArgs...)
264265
} else {
265-
require.Equal(t, 1, len(r.End))
266-
require.Equal(t, end.Kind(), r.End[0].Kind())
267-
require.Equal(t, end.GetValue(), r.End[0].GetValue())
266+
require.Equal(t, 1, len(r.End), msgAndArgs...)
267+
require.Equal(t, end.Kind(), r.End[0].Kind(), msgAndArgs...)
268+
require.Equal(t, end.GetValue(), r.End[0].GetValue(), msgAndArgs...)
268269
}
269270
}
270271

@@ -516,47 +517,133 @@ func TestSplitTTLScanRangesWithBytes(t *testing.T) {
516517
createTTLTable(t, tk, "t3", "varchar(32) CHARACTER SET BINARY"),
517518
createTTLTable(t, tk, "t4", "bit(32)"),
518519
create2PKTTLTable(t, tk, "t5", "binary(32)"),
520+
createTTLTable(t, tk, "t6", "char(32) CHARACTER SET UTF8MB4"),
521+
create2PKTTLTable(t, tk, "t7", "char(32) CHARACTER SET gbk"),
522+
}
523+
524+
cases := []struct {
525+
name string
526+
regionEdges []kv.Handle
527+
splitCnt int
528+
binaryExpected [][]types.Datum
529+
stringExpected [][]types.Datum
530+
}{
531+
{
532+
name: "2 regions with binary split",
533+
regionEdges: []kv.Handle{
534+
bytesHandle(t, []byte{1, 2, 3}),
535+
},
536+
splitCnt: 4,
537+
binaryExpected: [][]types.Datum{
538+
{types.Datum{}, types.NewBytesDatum([]byte{1, 2, 3})},
539+
{types.NewBytesDatum([]byte{1, 2, 3}), types.Datum{}},
540+
},
541+
stringExpected: [][]types.Datum{
542+
{types.Datum{}, types.Datum{}},
543+
},
544+
},
545+
{
546+
name: "6 regions with binary split",
547+
regionEdges: []kv.Handle{
548+
bytesHandle(t, []byte{1, 2, 3}),
549+
bytesHandle(t, []byte{1, 2, 3, 4}),
550+
bytesHandle(t, []byte{1, 2, 3, 4, 5}),
551+
bytesHandle(t, []byte{1, 2, 4}),
552+
bytesHandle(t, []byte{1, 2, 5}),
553+
},
554+
splitCnt: 4,
555+
binaryExpected: [][]types.Datum{
556+
{types.Datum{}, types.NewBytesDatum([]byte{1, 2, 3, 4})},
557+
{types.NewBytesDatum([]byte{1, 2, 3, 4}), types.NewBytesDatum([]byte{1, 2, 4})},
558+
{types.NewBytesDatum([]byte{1, 2, 4}), types.NewBytesDatum([]byte{1, 2, 5})},
559+
{types.NewBytesDatum([]byte{1, 2, 5}), types.Datum{}},
560+
},
561+
stringExpected: [][]types.Datum{
562+
{types.Datum{}, types.Datum{}},
563+
},
564+
},
565+
{
566+
name: "2 regions with utf8 split",
567+
regionEdges: []kv.Handle{
568+
bytesHandle(t, []byte("中文")),
569+
},
570+
splitCnt: 4,
571+
binaryExpected: [][]types.Datum{
572+
{types.Datum{}, types.NewBytesDatum([]byte("中文"))},
573+
{types.NewBytesDatum([]byte("中文")), types.Datum{}},
574+
},
575+
stringExpected: [][]types.Datum{
576+
{types.Datum{}, types.Datum{}},
577+
},
578+
},
579+
{
580+
name: "several regions with mixed split",
581+
regionEdges: []kv.Handle{
582+
bytesHandle(t, []byte("abc")),
583+
bytesHandle(t, []byte("ab\x7f0")),
584+
bytesHandle(t, []byte("ab\xff0")),
585+
bytesHandle(t, []byte("ac\x001")),
586+
bytesHandle(t, []byte("ad\x0a1")),
587+
bytesHandle(t, []byte("ad23")),
588+
bytesHandle(t, []byte("ad230\xff")),
589+
bytesHandle(t, []byte("befh")),
590+
bytesHandle(t, []byte("中文")),
591+
},
592+
splitCnt: 10,
593+
binaryExpected: [][]types.Datum{
594+
{types.Datum{}, types.NewBytesDatum([]byte("abc"))},
595+
{types.NewBytesDatum([]byte("abc")), types.NewBytesDatum([]byte("ab\x7f0"))},
596+
{types.NewBytesDatum([]byte("ab\x7f0")), types.NewBytesDatum([]byte("ab\xff0"))},
597+
{types.NewBytesDatum([]byte("ab\xff0")), types.NewBytesDatum([]byte("ac\x001"))},
598+
{types.NewBytesDatum([]byte("ac\x001")), types.NewBytesDatum([]byte("ad\x0a1"))},
599+
{types.NewBytesDatum([]byte("ad\x0a1")), types.NewBytesDatum([]byte("ad23"))},
600+
{types.NewBytesDatum([]byte("ad23")), types.NewBytesDatum([]byte("ad230\xff"))},
601+
{types.NewBytesDatum([]byte("ad230\xff")), types.NewBytesDatum([]byte("befh"))},
602+
{types.NewBytesDatum([]byte("befh")), types.NewBytesDatum([]byte("中文"))},
603+
{types.NewBytesDatum([]byte("中文")), types.Datum{}},
604+
},
605+
stringExpected: [][]types.Datum{
606+
{types.Datum{}, types.NewStringDatum("abc")},
607+
{types.NewStringDatum("abc"), types.NewStringDatum("ac")},
608+
{types.NewStringDatum("ac"), types.NewStringDatum("ad\n1")},
609+
{types.NewStringDatum("ad\n1"), types.NewStringDatum("ad23")},
610+
{types.NewStringDatum("ad23"), types.NewStringDatum("ad230")},
611+
{types.NewStringDatum("ad230"), types.NewStringDatum("befh")},
612+
{types.NewStringDatum("befh"), types.Datum{}},
613+
},
614+
},
519615
}
520616

521617
tikvStore := newMockTiKVStore(t)
522618
for _, tbl := range tbls {
523-
// test only one region
524-
tikvStore.clearRegions()
525-
ranges, err := tbl.SplitScanRanges(context.TODO(), tikvStore, 4)
526-
require.NoError(t, err)
527-
require.Equal(t, 1, len(ranges))
528-
checkRange(t, ranges[0], types.Datum{}, types.Datum{})
529-
530-
// test share regions with other table
531-
tikvStore.clearRegions()
532-
tikvStore.addRegion(
533-
tablecodec.GenTablePrefix(tbl.ID-1),
534-
tablecodec.GenTablePrefix(tbl.ID+1),
535-
)
536-
ranges, err = tbl.SplitScanRanges(context.TODO(), tikvStore, 4)
537-
require.NoError(t, err)
538-
require.Equal(t, 1, len(ranges))
539-
checkRange(t, ranges[0], types.Datum{}, types.Datum{})
619+
for _, c := range cases {
620+
tikvStore.clearRegions()
621+
require.Greater(t, len(c.regionEdges), 0)
622+
for i, edge := range c.regionEdges {
623+
if i == 0 {
624+
tikvStore.addRegionBeginWithTablePrefix(tbl.ID, edge)
625+
} else {
626+
tikvStore.addRegionWithTablePrefix(tbl.ID, c.regionEdges[i-1], edge)
627+
}
628+
}
629+
tikvStore.addRegionEndWithTablePrefix(c.regionEdges[len(c.regionEdges)-1], tbl.ID)
630+
ranges, err := tbl.SplitScanRanges(context.TODO(), tikvStore, c.splitCnt)
631+
require.NoError(t, err)
632+
633+
keyTp := tbl.KeyColumnTypes[0]
634+
var expected [][]types.Datum
635+
if keyTp.GetType() == mysql.TypeBit || mysql.HasBinaryFlag(keyTp.GetFlag()) {
636+
expected = c.binaryExpected
637+
} else {
638+
expected = c.stringExpected
639+
}
540640

541-
// test one table has multiple regions
542-
tikvStore.clearRegions()
543-
tikvStore.addRegionBeginWithTablePrefix(tbl.ID, bytesHandle(t, []byte{1, 2, 3}))
544-
tikvStore.addRegionWithTablePrefix(
545-
tbl.ID, bytesHandle(t, []byte{1, 2, 3}), bytesHandle(t, []byte{1, 2, 3, 4}))
546-
tikvStore.addRegionWithTablePrefix(
547-
tbl.ID, bytesHandle(t, []byte{1, 2, 3, 4}), bytesHandle(t, []byte{1, 2, 3, 4, 5}))
548-
tikvStore.addRegionWithTablePrefix(
549-
tbl.ID, bytesHandle(t, []byte{1, 2, 3, 4, 5}), bytesHandle(t, []byte{1, 2, 4}))
550-
tikvStore.addRegionWithTablePrefix(
551-
tbl.ID, bytesHandle(t, []byte{1, 2, 4}), bytesHandle(t, []byte{1, 2, 5}))
552-
tikvStore.addRegionEndWithTablePrefix(bytesHandle(t, []byte{1, 2, 5}), tbl.ID)
553-
ranges, err = tbl.SplitScanRanges(context.TODO(), tikvStore, 4)
554-
require.NoError(t, err)
555-
require.Equal(t, 4, len(ranges))
556-
checkRange(t, ranges[0], types.Datum{}, types.NewBytesDatum([]byte{1, 2, 3, 4}))
557-
checkRange(t, ranges[1], types.NewBytesDatum([]byte{1, 2, 3, 4}), types.NewBytesDatum([]byte{1, 2, 4}))
558-
checkRange(t, ranges[2], types.NewBytesDatum([]byte{1, 2, 4}), types.NewBytesDatum([]byte{1, 2, 5}))
559-
checkRange(t, ranges[3], types.NewBytesDatum([]byte{1, 2, 5}), types.Datum{})
641+
require.Equal(t, len(expected), len(ranges), "tbl: %s, case: %s", tbl.Name, c.name)
642+
for i, r := range ranges {
643+
checkRange(t, r, expected[i][0], expected[i][1],
644+
"tbl: %s, case: %s, i: %d", tbl.Name, c.name, i)
645+
}
646+
}
560647
}
561648
}
562649

@@ -565,10 +652,10 @@ func TestNoTTLSplitSupportTables(t *testing.T) {
565652
tk := testkit.NewTestKit(t, store)
566653

567654
tbls := []*cache.PhysicalTable{
568-
createTTLTable(t, tk, "t1", "char(32) CHARACTER SET UTF8MB4"),
569-
createTTLTable(t, tk, "t2", "varchar(32) CHARACTER SET UTF8MB4"),
570-
createTTLTable(t, tk, "t4", "decimal(32, 2)"),
571-
create2PKTTLTable(t, tk, "t5", "char(32) CHARACTER SET UTF8MB4"),
655+
createTTLTable(t, tk, "t1", "decimal(32, 2)"),
656+
createTTLTable(t, tk, "t2", "date"),
657+
createTTLTable(t, tk, "t3", "datetime"),
658+
createTTLTable(t, tk, "t4", "timestamp"),
572659
}
573660

574661
tikvStore := newMockTiKVStore(t)
@@ -827,6 +914,51 @@ func TestGetNextBytesHandleDatum(t *testing.T) {
827914
}
828915
}
829916

917+
func TestGetASCIIPrefixDatumFromBytes(t *testing.T) {
918+
cases := []struct {
919+
bytes []byte
920+
expected string
921+
}{
922+
{bytes: nil, expected: ""},
923+
{bytes: []byte{}, expected: ""},
924+
{bytes: []byte{0}, expected: ""},
925+
{bytes: []byte{1}, expected: ""},
926+
{bytes: []byte{8}, expected: ""},
927+
{bytes: []byte{9}, expected: "\t"},
928+
{bytes: []byte{10}, expected: "\n"},
929+
{bytes: []byte{11}, expected: ""},
930+
{bytes: []byte{12}, expected: ""},
931+
{bytes: []byte{13}, expected: "\r"},
932+
{bytes: []byte{14}, expected: ""},
933+
{bytes: []byte{0x19}, expected: ""},
934+
{bytes: []byte{0x20}, expected: " "},
935+
{bytes: []byte{0x21}, expected: "!"},
936+
{bytes: []byte{0x7D}, expected: "}"},
937+
{bytes: []byte{0x7E}, expected: "~"},
938+
{bytes: []byte{0x7F}, expected: ""},
939+
{bytes: []byte{0xFF}, expected: ""},
940+
{bytes: []byte{0x0, 'a', 'b'}, expected: ""},
941+
{bytes: []byte{0xFF, 'a', 'b'}, expected: ""},
942+
{bytes: []byte{'0', '1', 0x0, 'a', 'b'}, expected: "01"},
943+
{bytes: []byte{'0', '1', 0x15, 'a', 'b'}, expected: "01"},
944+
{bytes: []byte{'0', '1', 0xFF, 'a', 'b'}, expected: "01"},
945+
{bytes: []byte{'a', 'b', 0x0}, expected: "ab"},
946+
{bytes: []byte{'a', 'b', 0x15}, expected: "ab"},
947+
{bytes: []byte{'a', 'b', 0xFF}, expected: "ab"},
948+
{bytes: []byte("ab\rcd\tef\nAB!~GH()tt ;;"), expected: "ab\rcd\tef\nAB!~GH()tt ;;"},
949+
{bytes: []byte("中文"), expected: ""},
950+
{bytes: []byte("cn中文"), expected: "cn"},
951+
{bytes: []byte("😀"), expected: ""},
952+
{bytes: []byte("emoji😀"), expected: "emoji"},
953+
}
954+
955+
for i, c := range cases {
956+
d := cache.GetASCIIPrefixDatumFromBytes(c.bytes)
957+
require.Equalf(t, types.KindString, d.Kind(), "i: %d", i)
958+
require.Equalf(t, c.expected, d.GetString(), "i: %d, bs: %v", i, c.bytes)
959+
}
960+
}
961+
830962
func TestGetNextIntHandle(t *testing.T) {
831963
tblID := int64(7)
832964
cases := []struct {

pkg/ttl/cache/table.go

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -287,15 +287,13 @@ func (t *PhysicalTable) SplitScanRanges(ctx context.Context, store kv.Storage, s
287287
switch ft.GetType() {
288288
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24:
289289
if len(t.KeyColumns) > 1 {
290-
return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, true, mysql.HasUnsignedFlag(ft.GetFlag()))
290+
return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, true, mysql.HasUnsignedFlag(ft.GetFlag()), false)
291291
}
292292
return t.splitIntRanges(ctx, tikvStore, splitCnt)
293293
case mysql.TypeBit:
294-
return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, false, false)
294+
return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, false, false, true)
295295
case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar:
296-
if mysql.HasBinaryFlag(ft.GetFlag()) {
297-
return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, false, false)
298-
}
296+
return t.splitCommonHandleRanges(ctx, tikvStore, splitCnt, false, false, mysql.HasBinaryFlag(ft.GetFlag()))
299297
}
300298
return []ScanRange{newFullRange()}, nil
301299
}
@@ -365,7 +363,7 @@ func (t *PhysicalTable) splitIntRanges(ctx context.Context, store tikv.Storage,
365363
}
366364

367365
func (t *PhysicalTable) splitCommonHandleRanges(
368-
ctx context.Context, store tikv.Storage, splitCnt int, isInt bool, unsigned bool,
366+
ctx context.Context, store tikv.Storage, splitCnt int, isInt bool, unsigned bool, binary bool,
369367
) ([]ScanRange, error) {
370368
recordPrefix := tablecodec.GenTableRecordPrefix(t.ID)
371369
startKey, endKey := recordPrefix, recordPrefix.PrefixNext()
@@ -381,20 +379,26 @@ func (t *PhysicalTable) splitCommonHandleRanges(
381379
scanRanges := make([]ScanRange, 0, len(keyRanges))
382380
curScanStart := nullDatum()
383381
for i, keyRange := range keyRanges {
384-
if i != 0 && curScanStart.IsNull() {
385-
break
386-
}
387-
388382
curScanEnd := nullDatum()
389383
if i != len(keyRanges)-1 {
390384
if isInt {
391385
curScanEnd = GetNextIntDatumFromCommonHandle(keyRange.EndKey, recordPrefix, unsigned)
392386
} else {
393387
curScanEnd = GetNextBytesHandleDatum(keyRange.EndKey, recordPrefix)
388+
if !binary {
389+
curScanEnd = GetASCIIPrefixDatumFromBytes(curScanEnd.GetBytes())
390+
}
391+
392+
// "" is the smallest value for string/[]byte, skip to add it to ranges.
393+
if len(curScanEnd.GetBytes()) == 0 {
394+
continue
395+
}
394396
}
395397
}
396398

397399
if !curScanStart.IsNull() && !curScanEnd.IsNull() {
400+
// Sometimes curScanStart >= curScanEnd because the edge datum is an approximate value.
401+
// At this time, we should skip this range to ensure the incremental of ranges.
398402
cmp, err := curScanStart.Compare(types.StrictContext, &curScanEnd, collate.GetBinaryCollator())
399403
intest.AssertNoError(err)
400404
if err != nil {
@@ -407,6 +411,9 @@ func (t *PhysicalTable) splitCommonHandleRanges(
407411
}
408412

409413
scanRanges = append(scanRanges, newDatumRange(curScanStart, curScanEnd))
414+
if curScanEnd.IsNull() {
415+
break
416+
}
410417
curScanStart = curScanEnd
411418
}
412419
return scanRanges, nil
@@ -648,3 +655,27 @@ func GetNextBytesHandleDatum(key kv.Key, recordPrefix []byte) (d types.Datum) {
648655
d.SetBytes(val)
649656
return d
650657
}
658+
659+
// GetASCIIPrefixDatumFromBytes is used to convert bytes to string datum which only contains ASCII prefix string.
660+
// The ASCII prefix string only contains visible characters and `\t`, `\n`, `\r`.
661+
// "abc" -> "abc"
662+
// "\0abc" -> ""
663+
// "ab\x01c" -> "ab"
664+
// "ab\xffc" -> "ab"
665+
// "ab\rc\xff" -> "ab\rc"
666+
func GetASCIIPrefixDatumFromBytes(bs []byte) types.Datum {
667+
for i, c := range bs {
668+
if c >= 0x20 && c <= 0x7E {
669+
// visible characters from ` ` to `~`
670+
continue
671+
}
672+
673+
if c == '\t' || c == '\n' || c == '\r' {
674+
continue
675+
}
676+
677+
bs = bs[:i]
678+
break
679+
}
680+
return types.NewStringDatum(string(bs))
681+
}

0 commit comments

Comments
 (0)