Skip to content

Commit 4ba4bcd

Browse files
authored
lightning: return 0 early on empty parquet files (#52519) (#52521)
close #52518
1 parent 29491d4 commit 4ba4bcd

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

pkg/lightning/mydump/loader.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store
826826
// SampleParquetDataSize samples the data size of the parquet file.
827827
func SampleParquetDataSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (int64, error) {
828828
totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta)
829-
if err != nil {
829+
if totalRowCount == 0 || err != nil {
830830
return 0, err
831831
}
832832

pkg/lightning/mydump/loader_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,7 +1108,7 @@ func TestSampleFileCompressRatio(t *testing.T) {
11081108
require.InDelta(t, ratio, 5000.0/float64(bf.Len()), 1e-5)
11091109
}
11101110

1111-
func TestSampleParquetDataSize(t *testing.T) {
1111+
func testSampleParquetDataSize(t *testing.T, count int) {
11121112
s := newTestMydumpLoaderSuite(t)
11131113
store, err := storage.NewLocalStorage(s.sourceDir)
11141114
require.NoError(t, err)
@@ -1133,7 +1133,7 @@ func TestSampleParquetDataSize(t *testing.T) {
11331133
t.Logf("seed: %d. To reproduce the random behaviour, manually set `rand.New(rand.NewSource(seed))`", seed)
11341134
rnd := rand.New(rand.NewSource(seed))
11351135
totalRowSize := 0
1136-
for i := 0; i < 1000; i++ {
1136+
for i := 0; i < count; i++ {
11371137
kl := rnd.Intn(20) + 1
11381138
key := make([]byte, kl)
11391139
kl, err = rnd.Read(key)
@@ -1167,6 +1167,11 @@ func TestSampleParquetDataSize(t *testing.T) {
11671167
require.InDelta(t, totalRowSize, size, float64(totalRowSize)/10)
11681168
}
11691169

1170+
func TestSampleParquetDataSize(t *testing.T) {
1171+
t.Run("count=1000", func(t *testing.T) { testSampleParquetDataSize(t, 1000) })
1172+
t.Run("count=0", func(t *testing.T) { testSampleParquetDataSize(t, 0) })
1173+
}
1174+
11701175
func TestSetupOptions(t *testing.T) {
11711176
// those functions are only used in other components, add this to avoid they
11721177
// be deleted mistakenly.

0 commit comments

Comments
 (0)