Skip to content

Commit f96ac18

Browse files
authored
[Improvement](segment iterator) Optimize column row reservation to reduce overhead #42060 (#42372)
cherry pick from #42060
1 parent b39e2e1 commit f96ac18

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,6 +1987,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
19871987
if (UNLIKELY(!_lazy_inited)) {
19881988
RETURN_IF_ERROR(_lazy_init());
19891989
_lazy_inited = true;
1990+
// If the row bitmap size is smaller than block_row_max, there's no need to reserve that many column rows.
1991+
auto nrows_reserve_limit =
1992+
std::min(_row_bitmap.cardinality(), uint64_t(_opts.block_row_max));
19901993
if (_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval) {
19911994
_block_rowids.resize(_opts.block_row_max);
19921995
}
@@ -2011,7 +2014,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
20112014
storage_column_type->is_nullable(), _opts.io_ctx.reader_type));
20122015
_current_return_columns[cid]->set_rowset_segment_id(
20132016
{_segment->rowset_id(), _segment->id()});
2014-
_current_return_columns[cid]->reserve(_opts.block_row_max);
2017+
_current_return_columns[cid]->reserve(nrows_reserve_limit);
20152018
} else if (i >= block->columns()) {
20162019
// if i >= block->columns means the column and not the pred_column means `column i` is
20172020
// a delete condition column. but the column is not effective in the segment. so we just
@@ -2022,7 +2025,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
20222025
// TODO: skip read the not effective delete column to speed up segment read.
20232026
_current_return_columns[cid] =
20242027
Schema::get_data_type_ptr(*column_desc)->create_column();
2025-
_current_return_columns[cid]->reserve(_opts.block_row_max);
2028+
_current_return_columns[cid]->reserve(nrows_reserve_limit);
20262029
}
20272030
}
20282031

@@ -2047,7 +2050,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
20472050
if (_can_opt_topn_reads()) {
20482051
nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), nrows_read_limit);
20492052
}
2050-
2053+
// If the row bitmap size is smaller than nrows_read_limit, there's no need to reserve that many column rows.
2054+
nrows_read_limit = std::min(_row_bitmap.cardinality(), uint64_t(nrows_read_limit));
20512055
DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", {
20522056
if (nrows_read_limit != 1) {
20532057
return Status::Error<ErrorCode::INTERNAL_ERROR>("topn opt 1 execute failed: {}",

0 commit comments

Comments
 (0)