-
Notifications
You must be signed in to change notification settings - Fork 3.6k
[Improvement](sort) Free sort blocks if this block is exhausted #39306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
00fa574
0d45355
4ecf059
214f478
21602d5
4839df8
4201866
e325d68
68508a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,20 +58,17 @@ Status PartitionSorter::append_block(Block* input_block) { | |
Block sorted_block = VectorizedUtils::create_empty_columnswithtypename(_row_desc); | ||
DCHECK(input_block->columns() == sorted_block.columns()); | ||
RETURN_IF_ERROR(partial_sort(*input_block, sorted_block)); | ||
RETURN_IF_ERROR(_state->add_sorted_block(sorted_block)); | ||
_state->add_sorted_block(Block::create_shared(std::move(sorted_block))); | ||
return Status::OK(); | ||
} | ||
|
||
Status PartitionSorter::prepare_for_read() { | ||
auto& cursors = _state->get_cursors(); | ||
auto& blocks = _state->get_sorted_block(); | ||
auto& priority_queue = _state->get_priority_queue(); | ||
for (auto& block : blocks) { | ||
cursors.emplace_back(block, _sort_description); | ||
} | ||
for (auto& cursor : cursors) { | ||
priority_queue.push(MergeSortCursor(&cursor)); | ||
priority_queue.push(MergeSortCursorImpl::create_shared(block, _sort_description)); | ||
} | ||
blocks.clear(); | ||
return Status::OK(); | ||
} | ||
|
||
|
@@ -84,29 +81,30 @@ void PartitionSorter::reset_sorter_state(RuntimeState* runtime_state) { | |
} | ||
|
||
Status PartitionSorter::get_next(RuntimeState* state, Block* block, bool* eos) { | ||
if (_state->get_sorted_block().empty()) { | ||
if (_state->get_priority_queue().empty()) { | ||
*eos = true; | ||
} else if (_state->get_priority_queue().size() == 1 && _has_global_limit) { | ||
block->swap(*_state->get_priority_queue().top().impl->block); | ||
block->set_num_rows(_partition_inner_limit); | ||
*eos = true; | ||
} else { | ||
if (_state->get_sorted_block().size() == 1 && _has_global_limit) { | ||
auto& sorted_block = _state->get_sorted_block()[0]; | ||
block->swap(sorted_block); | ||
block->set_num_rows(_partition_inner_limit); | ||
*eos = true; | ||
} else { | ||
RETURN_IF_ERROR(partition_sort_read(block, eos, state->batch_size())); | ||
} | ||
RETURN_IF_ERROR(partition_sort_read(block, eos, state->batch_size())); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int batch_size) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: function 'partition_sort_read' has cognitive complexity of 55 (threshold 50) [readability-function-cognitive-complexity] Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int batch_size) {
^ Additional contextbe/src/vec/common/sort/partition_sorter.cpp:97: +1, including nesting penalty of 0, nesting level increased to 1 if (priority_queue.empty()) {
^ be/src/vec/common/sort/partition_sorter.cpp:109: +1, including nesting penalty of 0, nesting level increased to 1 while (!priority_queue.empty()) {
^ be/src/vec/common/sort/partition_sorter.cpp:112: +2, including nesting penalty of 1, nesting level increased to 2 if (UNLIKELY(_previous_row->impl == nullptr)) {
^ be/src/vec/common/sort/partition_sorter.cpp:116: +2, including nesting penalty of 1, nesting level increased to 2 switch (_top_n_algorithm) {
^ be/src/vec/common/sort/partition_sorter.cpp:119: +3, including nesting penalty of 2, nesting level increased to 3 if ((current_output_rows + _output_total_rows) < _partition_inner_limit) {
^ be/src/vec/common/sort/partition_sorter.cpp:120: +4, including nesting penalty of 3, nesting level increased to 4 for (size_t i = 0; i < num_columns; ++i) {
^ be/src/vec/common/sort/partition_sorter.cpp:123: +1, nesting level increased to 3 } else {
^ be/src/vec/common/sort/partition_sorter.cpp:135: +3, including nesting penalty of 2, nesting level increased to 3 if (_has_global_limit &&
^ be/src/vec/common/sort/partition_sorter.cpp:135: +1 if (_has_global_limit &&
^ be/src/vec/common/sort/partition_sorter.cpp:140: +3, including nesting penalty of 2, nesting level increased to 3 if (_has_global_limit) {
^ be/src/vec/common/sort/partition_sorter.cpp:142: +1, nesting level increased to 3 } else {
^ be/src/vec/common/sort/partition_sorter.cpp:145: +4, including nesting penalty of 3, nesting level increased to 4 if (cmp_res == false) {
^ be/src/vec/common/sort/partition_sorter.cpp:147: +5, including nesting penalty of 4, nesting level increased to 5 if (_output_distinct_rows >= _partition_inner_limit) {
^ be/src/vec/common/sort/partition_sorter.cpp:154: +3, including nesting penalty of 2, nesting level increased to 3 for (size_t i = 0; i < num_columns; ++i) {
^ be/src/vec/common/sort/partition_sorter.cpp:164: +3, including nesting penalty of 2, nesting level increased to 3 if (_has_global_limit &&
^ be/src/vec/common/sort/partition_sorter.cpp:164: +1 if (_has_global_limit &&
^ be/src/vec/common/sort/partition_sorter.cpp:171: +3, including nesting penalty of 2, nesting level increased to 3 if (cmp_res == false) {
^ be/src/vec/common/sort/partition_sorter.cpp:173: +4, including nesting penalty of 3, nesting level increased to 4 if ((current_output_rows + _output_total_rows) >= _partition_inner_limit) {
^ be/src/vec/common/sort/partition_sorter.cpp:179: +3, including nesting penalty of 2, nesting level increased to 3 for (size_t i = 0; i < num_columns; ++i) {
^ be/src/vec/common/sort/partition_sorter.cpp:189: +2, including nesting penalty of 1, nesting level increased to 2 if (!current->is_last()) {
^ be/src/vec/common/sort/partition_sorter.cpp:194: +2, including nesting penalty of 1, nesting level increased to 2 if (current_output_rows == batch_size || get_enough_data == true) {
^ be/src/vec/common/sort/partition_sorter.cpp:194: +1 if (current_output_rows == batch_size || get_enough_data == true) {
^ be/src/vec/common/sort/partition_sorter.cpp:200: +1, including nesting penalty of 0, nesting level increased to 1 if (current_output_rows == 0 || get_enough_data == true) {
^ be/src/vec/common/sort/partition_sorter.cpp:200: +1 if (current_output_rows == 0 || get_enough_data == true) {
^ |
||
const auto& sorted_block = _state->get_sorted_block()[0]; | ||
size_t num_columns = sorted_block.columns(); | ||
auto& priority_queue = _state->get_priority_queue(); | ||
if (priority_queue.empty()) { | ||
*eos = true; | ||
return Status::OK(); | ||
} | ||
const auto& sorted_block = priority_queue.top().impl->block; | ||
size_t num_columns = sorted_block->columns(); | ||
MutableBlock m_block = | ||
VectorizedUtils::build_mutable_mem_reuse_block(output_block, sorted_block); | ||
VectorizedUtils::build_mutable_mem_reuse_block(output_block, *sorted_block); | ||
MutableColumns& merged_columns = m_block.mutable_columns(); | ||
size_t current_output_rows = 0; | ||
auto& priority_queue = _state->get_priority_queue(); | ||
|
||
bool get_enough_data = false; | ||
while (!priority_queue.empty()) { | ||
|
@@ -121,7 +119,7 @@ Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int | |
//1 row_number no need to check distinct, just output partition_inner_limit row | ||
if ((current_output_rows + _output_total_rows) < _partition_inner_limit) { | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
merged_columns[i]->insert_from(*current->all_columns[i], current->pos); | ||
merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); | ||
} | ||
} else { | ||
//rows has get enough | ||
|
@@ -155,7 +153,7 @@ Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int | |
} | ||
} | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
merged_columns[i]->insert_from(*current->all_columns[i], current->pos); | ||
merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); | ||
} | ||
break; | ||
} | ||
|
@@ -180,7 +178,7 @@ Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int | |
*_previous_row = current; | ||
} | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
merged_columns[i]->insert_from(*current->all_columns[i], current->pos); | ||
merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); | ||
} | ||
current_output_rows++; | ||
break; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: function 'partition_sort_read' has cognitive complexity of 55 (threshold 50) [readability-function-cognitive-complexity]
Additional context
be/src/vec/common/sort/partition_sorter.cpp:102: +1, including nesting penalty of 0, nesting level increased to 1
if (priority_queue.empty()) { ^
be/src/vec/common/sort/partition_sorter.cpp:114: +1, including nesting penalty of 0, nesting level increased to 1
while (!priority_queue.empty()) { ^
be/src/vec/common/sort/partition_sorter.cpp:117: +2, including nesting penalty of 1, nesting level increased to 2
be/src/vec/common/sort/partition_sorter.cpp:121: +2, including nesting penalty of 1, nesting level increased to 2
switch (_top_n_algorithm) { ^
be/src/vec/common/sort/partition_sorter.cpp:124: +3, including nesting penalty of 2, nesting level increased to 3
if ((current_output_rows + _output_total_rows) < _partition_inner_limit) { ^
be/src/vec/common/sort/partition_sorter.cpp:125: +4, including nesting penalty of 3, nesting level increased to 4
be/src/vec/common/sort/partition_sorter.cpp:128: +1, nesting level increased to 3
} else { ^
be/src/vec/common/sort/partition_sorter.cpp:140: +3, including nesting penalty of 2, nesting level increased to 3
if (_has_global_limit && ^
be/src/vec/common/sort/partition_sorter.cpp:140: +1
if (_has_global_limit && ^
be/src/vec/common/sort/partition_sorter.cpp:145: +3, including nesting penalty of 2, nesting level increased to 3
if (_has_global_limit) { ^
be/src/vec/common/sort/partition_sorter.cpp:147: +1, nesting level increased to 3
} else { ^
be/src/vec/common/sort/partition_sorter.cpp:150: +4, including nesting penalty of 3, nesting level increased to 4
be/src/vec/common/sort/partition_sorter.cpp:152: +5, including nesting penalty of 4, nesting level increased to 5
if (_output_distinct_rows >= _partition_inner_limit) { ^
be/src/vec/common/sort/partition_sorter.cpp:159: +3, including nesting penalty of 2, nesting level increased to 3
be/src/vec/common/sort/partition_sorter.cpp:169: +3, including nesting penalty of 2, nesting level increased to 3
if (_has_global_limit && ^
be/src/vec/common/sort/partition_sorter.cpp:169: +1
if (_has_global_limit && ^
be/src/vec/common/sort/partition_sorter.cpp:176: +3, including nesting penalty of 2, nesting level increased to 3
be/src/vec/common/sort/partition_sorter.cpp:178: +4, including nesting penalty of 3, nesting level increased to 4
if ((current_output_rows + _output_total_rows) >= _partition_inner_limit) { ^
be/src/vec/common/sort/partition_sorter.cpp:184: +3, including nesting penalty of 2, nesting level increased to 3
be/src/vec/common/sort/partition_sorter.cpp:194: +2, including nesting penalty of 1, nesting level increased to 2
be/src/vec/common/sort/partition_sorter.cpp:199: +2, including nesting penalty of 1, nesting level increased to 2
be/src/vec/common/sort/partition_sorter.cpp:199: +1
be/src/vec/common/sort/partition_sorter.cpp:205: +1, including nesting penalty of 0, nesting level increased to 1
be/src/vec/common/sort/partition_sorter.cpp:205: +1