Skip to content

Commit a65aa46

Browse files
committed
sstable: reimplement lazy load the index block in two level iterator
This commit reimplements lazy loading for two-level sstable iterators, fixing critical issues from the previous attempt (fe43e7b) which was reverted due to data corruption in stress tests. The key improvement is the introduction of proper error handling and state management through the `topLevelIndexLoadError` field. Combining with the `topLevelIndexLoaded` flag, the code can now cache the index loading errors, avoid retry loops and inconsistent state when index loading failed. Benchmark tests are added to validate lazy loading behavior under various scenarios. Implements #3248
1 parent 9ed6d14 commit a65aa46

File tree

2 files changed

+468
-16
lines changed

2 files changed

+468
-16
lines changed

sstable/reader_iter_two_lvl.go

Lines changed: 102 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterat
3232
// false - any filtering happens at the top level.
3333
useFilterBlock bool
3434
lastBloomFilterMatched bool
35+
36+
// topLevelIndexLoaded is set to true if the top-level index block load
37+
// operation completed regardless of success or failure. In case of failure,
38+
// topLevelIndexLoadError is set to avoid retrying the load operation.
39+
topLevelIndexLoaded bool
40+
topLevelIndexLoadError error
3541
}
3642

3743
var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
@@ -45,6 +51,13 @@ func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loa
4551
// the index fails.
4652
PD(&i.secondLevel.data).Invalidate()
4753
PI(&i.secondLevel.index).Invalidate()
54+
55+
// Ensure top-level index is loaded before accessing it
56+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
57+
i.secondLevel.err = err
58+
return loadBlockFailed
59+
}
60+
4861
if !PI(&i.topLevelIndex).Valid() {
4962
return loadBlockFailed
5063
}
@@ -87,6 +100,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loa
87100
// appropriate bound, depending on the iteration direction, and returns either
88101
// `blockIntersects` or `blockExcluded`.
89102
func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
103+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
104+
i.secondLevel.err = err
105+
return blockExcluded // Conservative choice when we can't load the index
106+
}
107+
90108
// This iterator is configured with a bound-limited block property filter.
91109
// The bpf determined this entire index block could be excluded from
92110
// iteration based on the property encoded in the block handle. However, we
@@ -181,14 +199,7 @@ func newColumnBlockTwoLevelIterator(
181199
objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
182200
}
183201
i.secondLevel.data.InitOnce(r.keySchema, r.Comparer, &i.secondLevel.internalValueConstructor)
184-
topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readEnv.Block, i.secondLevel.indexFilterRH)
185-
if err == nil {
186-
err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, opts.Transforms)
187-
}
188-
if err != nil {
189-
_ = i.Close()
190-
return nil, err
191-
}
202+
192203
return i, nil
193204
}
194205

@@ -235,14 +246,6 @@ func newRowBlockTwoLevelIterator(
235246
i.secondLevel.data.SetHasValuePrefix(true)
236247
}
237248

238-
topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readEnv.Block, i.secondLevel.indexFilterRH)
239-
if err == nil {
240-
err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, opts.Transforms)
241-
}
242-
if err != nil {
243-
_ = i.Close()
244-
return nil, err
245-
}
246249
return i, nil
247250
}
248251

@@ -275,6 +278,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
275278
err := i.secondLevel.err
276279
i.secondLevel.err = nil // clear cached iteration error
277280

281+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
282+
i.secondLevel.err = err
283+
return nil
284+
}
285+
278286
// The twoLevelIterator could be already exhausted. Utilize that when
279287
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
280288
// bounds-exhausted near the top of the file.
@@ -417,6 +425,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
417425
err := i.secondLevel.err
418426
i.secondLevel.err = nil // clear cached iteration error
419427

428+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
429+
i.secondLevel.err = err
430+
return nil
431+
}
432+
420433
// The twoLevelIterator could be already exhausted. Utilize that when
421434
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
422435
// bounds-exhausted near the top of the file.
@@ -584,6 +597,12 @@ func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
584597
panic("unexpected virtualLastSeekLE with exclusive upper bounds")
585598
}
586599
key := i.secondLevel.upper
600+
601+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
602+
i.secondLevel.err = err
603+
return nil
604+
}
605+
587606
// Need to position the topLevelIndex.
588607
//
589608
// The previous exhausted state of singleLevelIterator is no longer
@@ -641,6 +660,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
641660
// Seek optimization only applies until iterator is first positioned after SetBounds.
642661
i.secondLevel.boundsCmp = 0
643662

663+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
664+
i.secondLevel.err = err
665+
return nil
666+
}
667+
644668
var result loadBlockResult
645669
// NB: Unlike SeekGE, we don't have a fast-path here since we don't know
646670
// whether the topLevelIndex is positioned after the position that would
@@ -714,6 +738,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
714738
// Seek optimization only applies until iterator is first positioned after SetBounds.
715739
i.secondLevel.boundsCmp = 0
716740

741+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
742+
i.secondLevel.err = err
743+
return nil
744+
}
745+
717746
if !PI(&i.topLevelIndex).First() {
718747
return nil
719748
}
@@ -763,6 +792,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
763792
// Seek optimization only applies until iterator is first positioned after SetBounds.
764793
i.secondLevel.boundsCmp = 0
765794

795+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
796+
i.secondLevel.err = err
797+
return nil
798+
}
799+
766800
if !PI(&i.topLevelIndex).Last() {
767801
return nil
768802
}
@@ -830,6 +864,12 @@ func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.Intern
830864

831865
// Did not find prefix in the existing second-level index block. This is the
832866
// slow-path where we seek the iterator.
867+
868+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
869+
i.secondLevel.err = err
870+
return nil
871+
}
872+
833873
if !PI(&i.topLevelIndex).SeekGE(succKey) {
834874
PD(&i.secondLevel.data).Invalidate()
835875
PI(&i.secondLevel.index).Invalidate()
@@ -877,6 +917,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
877917
return nil
878918
}
879919

920+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
921+
i.secondLevel.err = err
922+
return nil
923+
}
924+
880925
// It is possible that skipBackward went too far and the virtual table lower
881926
// bound is after the first key in the block we are about to load, in which
882927
// case we must use SeekGE below. The keys in the block we are about to load
@@ -954,6 +999,12 @@ func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
954999
if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
9551000
return nil
9561001
}
1002+
1003+
if err := i.ensureTopLevelIndexLoaded(); err != nil {
1004+
i.secondLevel.err = err
1005+
return nil
1006+
}
1007+
9571008
i.secondLevel.exhaustedBounds = 0
9581009
if !PI(&i.topLevelIndex).Prev() {
9591010
PD(&i.secondLevel.data).Invalidate()
@@ -1009,6 +1060,39 @@ func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
10091060

10101061
// Close implements internalIterator.Close, as documented in the pebble
10111062
// package.
1063+
func (i *twoLevelIterator[I, PI, D, PD]) ensureTopLevelIndexLoaded() error {
1064+
// If already loaded, return cached error (nil means success)
1065+
if i.topLevelIndexLoaded {
1066+
return i.topLevelIndexLoadError
1067+
}
1068+
1069+
// Perform the deferred top-level index loading calls
1070+
topLevelIndexH, err := i.secondLevel.reader.readTopLevelIndexBlock(
1071+
i.secondLevel.ctx,
1072+
i.secondLevel.readEnv.Block,
1073+
i.secondLevel.indexFilterRH,
1074+
)
1075+
if err != nil {
1076+
i.topLevelIndexLoadError = err
1077+
i.topLevelIndexLoaded = true // Mark as loaded to avoid retry
1078+
return err
1079+
}
1080+
1081+
err = PI(&i.topLevelIndex).InitHandle(
1082+
i.secondLevel.reader.Comparer,
1083+
topLevelIndexH,
1084+
i.secondLevel.transforms,
1085+
)
1086+
if err != nil {
1087+
i.topLevelIndexLoadError = err
1088+
i.topLevelIndexLoaded = true
1089+
return err
1090+
}
1091+
1092+
i.topLevelIndexLoaded = true
1093+
return nil
1094+
}
1095+
10121096
func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
10131097
if invariants.Enabled && i.secondLevel.pool != nil {
10141098
panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
@@ -1019,6 +1103,8 @@ func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
10191103
err = firstError(err, PI(&i.topLevelIndex).Close())
10201104
i.useFilterBlock = false
10211105
i.lastBloomFilterMatched = false
1106+
i.topLevelIndexLoaded = false
1107+
i.topLevelIndexLoadError = nil
10221108
if pool != nil {
10231109
pool.Put(i)
10241110
}

0 commit comments

Comments
 (0)