-
Notifications
You must be signed in to change notification settings - Fork 6.6k
Introduction of the PrefetchRateLimiter #13907
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b27217e
f9ff96d
a0cb28f
6d49421
f785f74
b164f7c
fa5df87
677f7c9
7e5e964
0d2f0e2
63cdbed
64e9780
3d5aee2
396b0d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,56 @@ | |
|
||
namespace ROCKSDB_NAMESPACE { | ||
|
||
size_t DefaultPrefetchRateLimiter::acquire(const BlockBasedTable* /*unused*/, | ||
size_t bytes, bool all_or_nothing) { | ||
bool done = false; | ||
size_t amount = 0; | ||
// Quick check if we have nothing. | ||
if (cur_bytes_ == 0) { | ||
return amount; | ||
} | ||
while (!done) { | ||
// Check again here. | ||
size_t current = cur_bytes_.load(); | ||
if (current == 0) { | ||
amount = 0; | ||
return amount; | ||
} | ||
if (all_or_nothing) { | ||
if (current >= bytes) { | ||
done = cur_bytes_.compare_exchange_weak(current, current - bytes); | ||
amount = bytes; | ||
} else { | ||
amount = 0; | ||
return amount; | ||
} | ||
} else { | ||
if (current > bytes) { | ||
done = cur_bytes_.compare_exchange_weak(current, current - bytes); | ||
amount = bytes; | ||
} else { | ||
done = cur_bytes_.compare_exchange_weak(current, 0); | ||
amount = current; | ||
} | ||
} | ||
} | ||
return amount; | ||
} | ||
|
||
bool DefaultPrefetchRateLimiter::release(size_t bytes) { | ||
bool done = false; | ||
while (!done) { | ||
// Check again here. | ||
size_t current = cur_bytes_.load(); | ||
if (current + bytes >= max_bytes_) { | ||
done = cur_bytes_.compare_exchange_weak(current, max_bytes_); | ||
} else { | ||
done = cur_bytes_.compare_exchange_weak(current, current + bytes); | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
void BlockBasedTableIterator::SeekToFirst() { SeekImpl(nullptr, false); } | ||
|
||
void BlockBasedTableIterator::Seek(const Slice& target) { | ||
|
@@ -984,6 +1034,7 @@ void BlockBasedTableIterator::Prepare(const MultiScanArgs* multiscan_opts) { | |
std::vector<BlockHandle> blocks_to_prepare; | ||
Status s; | ||
std::vector<std::tuple<size_t, size_t>> block_ranges_per_scan; | ||
total_acquired_ = 0; | ||
for (const auto& scan_opt : *scan_opts) { | ||
size_t num_blocks = 0; | ||
// Current scan overlap the last block of the previous scan. | ||
|
@@ -1000,6 +1051,16 @@ void BlockBasedTableIterator::Prepare(const MultiScanArgs* multiscan_opts) { | |
index_iter_->user_key(), | ||
/*a_has_ts*/ true, *scan_opt.range.limit, | ||
/*b_has_ts=*/false) <= 0)) { | ||
// Lets make sure we are rate limited on how many blocks to prepare | ||
if (multiscan_opts->prefetch_rate_limiter) { | ||
auto blocks = multiscan_opts->GetMutablePrefetchRateLimiter().acquire( | ||
table_, index_iter_->value().handle.size(), true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Write last parameter as |
||
total_acquired_ += blocks; | ||
if (blocks == 0) { | ||
break; | ||
} | ||
} | ||
|
||
if (check_overlap && | ||
blocks_to_prepare.back() == index_iter_->value().handle) { | ||
// Skip the current block since it's already in the list | ||
|
@@ -1162,6 +1223,10 @@ void BlockBasedTableIterator::Prepare(const MultiScanArgs* multiscan_opts) { | |
} | ||
} | ||
|
||
if (read_reqs.size() == 0) { | ||
return; | ||
} | ||
|
||
AlignedBuf aligned_buf; | ||
s = table_->get_rep()->file.get()->MultiRead( | ||
io_opts, read_reqs.data(), read_reqs.size(), | ||
|
@@ -1345,6 +1410,14 @@ void BlockBasedTableIterator::FindBlockForwardInMultiScan() { | |
} | ||
// Move to the next pinned data block | ||
ResetDataIter(); | ||
if (multi_scan_->prefetch_rate_limiter) { | ||
size_t releasing = | ||
multi_scan_->pinned_data_blocks[multi_scan_->cur_data_block_idx] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pinned_data_blocks[multi_scan_->cur_data_block_idx] would not be valid at this point I think, since it would've been transferred to the data block iter. Or am I missing something? |
||
.GetValue() | ||
->size(); | ||
multi_scan_->prefetch_rate_limiter->release(releasing); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally we'd do this in a cleanup function registered with block_iter_ (which is derived from Cleanable) so that the release happens whenever block_iter_ is reset. |
||
total_acquired_ -= releasing; | ||
} | ||
++multi_scan_->cur_data_block_idx; | ||
table_->NewDataBlockIterator<DataBlockIter>( | ||
read_options_, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This need not be declared in a public header file. RocksDB typically just exposes an allocator, like NewDefaultPrefetchRateLimiter().