Skip to content

Commit 77d72b6

Browse files
committed
sstable: reimplement lazy load the index block in two level iterator
This commit reimplements lazy loading for two-level sstable iterators, fixing critical issues from the previous attempt (fe43e7b) which was reverted due to data corruption in stress tests. Benchmark tests are added to validate lazy loading behavior under various scenarios. Implements #3248
1 parent e82fab7 commit 77d72b6

File tree

2 files changed

+456
-18
lines changed

2 files changed

+456
-18
lines changed

sstable/reader_iter_two_lvl.go

Lines changed: 90 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterat
3232
// false - any filtering happens at the top level.
3333
useFilterBlock bool
3434
lastBloomFilterMatched bool
35+
36+
// topLevelIndexLoaded is set to true if the top-level index block load
37+
// operation completed successfully.
38+
topLevelIndexLoaded bool
3539
}
3640

3741
var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
@@ -45,6 +49,12 @@ func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loa
4549
// the index fails.
4650
PD(&i.secondLevel.data).Invalidate()
4751
PI(&i.secondLevel.index).Invalidate()
52+
53+
// Ensure top-level index is loaded before accessing it
54+
if !i.ensureTopLevelIndexLoaded() {
55+
return loadBlockFailed
56+
}
57+
4858
if !PI(&i.topLevelIndex).Valid() {
4959
return loadBlockFailed
5060
}
@@ -87,6 +97,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loa
8797
// appropriate bound, depending on the iteration direction, and returns either
8898
// `blockIntersects` or `blockExcluded`.
8999
func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
100+
if invariants.Enabled && !i.topLevelIndexLoaded {
101+
panic("pebble: resolveMaybeExcluded called without loaded top-level index")
102+
}
103+
90104
// This iterator is configured with a bound-limited block property filter.
91105
// The bpf determined this entire index block could be excluded from
92106
// iteration based on the property encoded in the block handle. However, we
@@ -162,6 +176,7 @@ func newColumnBlockTwoLevelIterator(
162176
}
163177
i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
164178
i.secondLevel.init(ctx, r, opts)
179+
i.secondLevel.indexLoaded = true
165180
// Only check the bloom filter at the top level.
166181
i.useFilterBlock = i.secondLevel.useFilterBlock
167182
i.secondLevel.useFilterBlock = false
@@ -181,14 +196,7 @@ func newColumnBlockTwoLevelIterator(
181196
objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
182197
}
183198
i.secondLevel.data.InitOnce(r.keySchema, r.Comparer, &i.secondLevel.internalValueConstructor)
184-
topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readEnv.Block, i.secondLevel.indexFilterRH)
185-
if err == nil {
186-
err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, opts.Transforms)
187-
}
188-
if err != nil {
189-
_ = i.Close()
190-
return nil, err
191-
}
199+
192200
return i, nil
193201
}
194202

@@ -210,6 +218,7 @@ func newRowBlockTwoLevelIterator(
210218
}
211219
i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
212220
i.secondLevel.init(ctx, r, opts)
221+
i.secondLevel.indexLoaded = true
213222
// Only check the bloom filter at the top level.
214223
i.useFilterBlock = i.secondLevel.useFilterBlock
215224
i.secondLevel.useFilterBlock = false
@@ -235,14 +244,6 @@ func newRowBlockTwoLevelIterator(
235244
i.secondLevel.data.SetHasValuePrefix(true)
236245
}
237246

238-
topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readEnv.Block, i.secondLevel.indexFilterRH)
239-
if err == nil {
240-
err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, opts.Transforms)
241-
}
242-
if err != nil {
243-
_ = i.Close()
244-
return nil, err
245-
}
246247
return i, nil
247248
}
248249

@@ -275,6 +276,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
275276
err := i.secondLevel.err
276277
i.secondLevel.err = nil // clear cached iteration error
277278

279+
if !i.ensureTopLevelIndexLoaded() {
280+
return nil
281+
}
282+
278283
// The twoLevelIterator could be already exhausted. Utilize that when
279284
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
280285
// bounds-exhausted near the top of the file.
@@ -417,6 +422,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
417422
err := i.secondLevel.err
418423
i.secondLevel.err = nil // clear cached iteration error
419424

425+
if !i.ensureTopLevelIndexLoaded() {
426+
return nil
427+
}
428+
420429
// The twoLevelIterator could be already exhausted. Utilize that when
421430
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
422431
// bounds-exhausted near the top of the file.
@@ -584,6 +593,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
584593
panic("unexpected virtualLastSeekLE with exclusive upper bounds")
585594
}
586595
key := i.secondLevel.upper
596+
597+
if !i.ensureTopLevelIndexLoaded() {
598+
return nil
599+
}
600+
587601
// Need to position the topLevelIndex.
588602
//
589603
// The previous exhausted state of singleLevelIterator is no longer
@@ -641,6 +655,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
641655
// Seek optimization only applies until iterator is first positioned after SetBounds.
642656
i.secondLevel.boundsCmp = 0
643657

658+
if !i.ensureTopLevelIndexLoaded() {
659+
return nil
660+
}
661+
644662
var result loadBlockResult
645663
// NB: Unlike SeekGE, we don't have a fast-path here since we don't know
646664
// whether the topLevelIndex is positioned after the position that would
@@ -714,6 +732,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
714732
// Seek optimization only applies until iterator is first positioned after SetBounds.
715733
i.secondLevel.boundsCmp = 0
716734

735+
if !i.ensureTopLevelIndexLoaded() {
736+
return nil
737+
}
738+
717739
if !PI(&i.topLevelIndex).First() {
718740
return nil
719741
}
@@ -763,6 +785,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
763785
// Seek optimization only applies until iterator is first positioned after SetBounds.
764786
i.secondLevel.boundsCmp = 0
765787

788+
if !i.ensureTopLevelIndexLoaded() {
789+
return nil
790+
}
791+
766792
if !PI(&i.topLevelIndex).Last() {
767793
return nil
768794
}
@@ -830,6 +856,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.Intern
830856

831857
// Did not find prefix in the existing second-level index block. This is the
832858
// slow-path where we seek the iterator.
859+
860+
if !i.ensureTopLevelIndexLoaded() {
861+
return nil
862+
}
863+
833864
if !PI(&i.topLevelIndex).SeekGE(succKey) {
834865
PD(&i.secondLevel.data).Invalidate()
835866
PI(&i.secondLevel.index).Invalidate()
@@ -877,6 +908,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
877908
return nil
878909
}
879910

911+
if !i.ensureTopLevelIndexLoaded() {
912+
return nil
913+
}
914+
880915
// It is possible that skipBackward went too far and the virtual table lower
881916
// bound is after the first key in the block we are about to load, in which
882917
// case we must use SeekGE below. The keys in the block we are about to load
@@ -954,6 +989,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
954989
if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
955990
return nil
956991
}
992+
993+
if !i.ensureTopLevelIndexLoaded() {
994+
return nil
995+
}
996+
957997
i.secondLevel.exhaustedBounds = 0
958998
if !PI(&i.topLevelIndex).Prev() {
959999
PD(&i.secondLevel.data).Invalidate()
@@ -1007,8 +1047,39 @@ func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
10071047
i.secondLevel.SetupForCompaction()
10081048
}
10091049

1010-
// Close implements internalIterator.Close, as documented in the pebble
1011-
// package.
1050+
func (i *twoLevelIterator[I, PI, D, PD]) ensureTopLevelIndexLoaded() bool {
1051+
if i.topLevelIndexLoaded && i.secondLevel.err == nil {
1052+
return true
1053+
}
1054+
1055+
// Perform the deferred top-level index loading calls
1056+
topLevelIndexH, err := i.secondLevel.reader.readTopLevelIndexBlock(
1057+
i.secondLevel.ctx,
1058+
i.secondLevel.readEnv.Block,
1059+
i.secondLevel.indexFilterRH,
1060+
)
1061+
if err != nil {
1062+
i.secondLevel.err = err
1063+
i.topLevelIndexLoaded = false
1064+
return false
1065+
}
1066+
1067+
err = PI(&i.topLevelIndex).InitHandle(
1068+
i.secondLevel.reader.Comparer,
1069+
topLevelIndexH,
1070+
i.secondLevel.transforms,
1071+
)
1072+
if err != nil {
1073+
i.secondLevel.err = err
1074+
i.topLevelIndexLoaded = false
1075+
return false
1076+
}
1077+
1078+
i.topLevelIndexLoaded = true
1079+
return true
1080+
}
1081+
1082+
// Close implements internalIterator.Close, as documented in the pebble package.
10121083
func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
10131084
if invariants.Enabled && i.secondLevel.pool != nil {
10141085
panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
@@ -1019,6 +1090,7 @@ func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
10191090
err = firstError(err, PI(&i.topLevelIndex).Close())
10201091
i.useFilterBlock = false
10211092
i.lastBloomFilterMatched = false
1093+
i.topLevelIndexLoaded = false
10221094
if pool != nil {
10231095
pool.Put(i)
10241096
}

0 commit comments

Comments
 (0)