23
23
24
24
#include < cstdint>
25
25
#include < memory>
26
- #include < sstream >
26
+ #include < string >
27
27
28
28
#include " common/logging.h"
29
29
#include " common/status.h"
@@ -116,6 +116,10 @@ Status VRowDistribution::automatic_create_partition() {
116
116
if (result.status .status_code == TStatusCode::OK) {
117
117
// add new created partitions
118
118
RETURN_IF_ERROR (_vpartition->add_partitions (result.partitions ));
119
+ for (const auto & part : result.partitions ) {
120
+ _new_partition_ids.insert (part.id );
121
+ VLOG_TRACE << " record new id: " << part.id ;
122
+ }
119
123
RETURN_IF_ERROR (_create_partition_callback (_caller, &result));
120
124
}
121
125
@@ -134,7 +138,7 @@ static TCreatePartitionResult cast_as_create_result(TReplacePartitionResult& arg
134
138
135
139
// use _partitions and replace them
136
140
Status VRowDistribution::_replace_overwriting_partition () {
137
- SCOPED_TIMER (_add_partition_request_timer);
141
+ SCOPED_TIMER (_add_partition_request_timer); // also for replace_partition
138
142
TReplacePartitionRequest request;
139
143
TReplacePartitionResult result;
140
144
request.__set_overwrite_group_id (_vpartition->get_overwrite_group_id ());
@@ -144,16 +148,20 @@ Status VRowDistribution::_replace_overwriting_partition() {
144
148
// only request for partitions not recorded for replacement
145
149
std::set<int64_t > id_deduper;
146
150
for (const auto * part : _partitions) {
147
- if (part == nullptr ) [[unlikely]] {
148
- return Status::InternalError (
149
- " Cannot found origin partitions in auto detect overwriting, stop processing" );
150
- }
151
- if (_new_partition_ids.contains (part->id )) {
152
- // this is a new partition. dont replace again.
153
- } else {
154
- // request for replacement
155
- id_deduper.insert (part->id );
156
- }
151
+ if (part != nullptr ) {
152
+ if (_new_partition_ids.contains (part->id )) {
153
+ // this is a new partition. dont replace again.
154
+ VLOG_TRACE << " skip new partition: " << part->id ;
155
+ } else {
156
+ // request for replacement
157
+ id_deduper.insert (part->id );
158
+ }
159
+ } else if (_missing_map.empty ()) {
160
+ // no origin partition. and not allow to create.
161
+ return Status::InvalidArgument (
162
+ " Cannot found origin partitions in auto detect overwriting, stop "
163
+ " processing" );
164
+ } // else: part is null and _missing_map is not empty. dealed outside using auto-partition way. nothing to do here.
157
165
}
158
166
if (id_deduper.empty ()) {
159
167
return Status::OK (); // no need to request
@@ -182,6 +190,7 @@ Status VRowDistribution::_replace_overwriting_partition() {
182
190
// record new partitions
183
191
for (const auto & part : result.partitions ) {
184
192
_new_partition_ids.insert (part.id );
193
+ VLOG_TRACE << " record new id: " << part.id ;
185
194
}
186
195
// replace data in _partitions
187
196
RETURN_IF_ERROR (_vpartition->replace_partitions (request_part_ids, result.partitions ));
@@ -304,6 +313,52 @@ Status VRowDistribution::_generate_rows_distribution_for_non_auto_partition(
304
313
return Status::OK ();
305
314
}
306
315
316
+ Status VRowDistribution::_deal_missing_map (vectorized::Block* block,
317
+ const std::vector<uint16_t >& partition_cols_idx,
318
+ int64_t & rows_stat_val) {
319
+ // for missing partition keys, calc the missing partition and save in _partitions_need_create
320
+ auto [part_ctxs, part_exprs] = _get_partition_function ();
321
+ auto part_col_num = part_exprs.size ();
322
+ // the two vectors are in column-first-order
323
+ std::vector<std::vector<std::string>> col_strs;
324
+ std::vector<const NullMap*> col_null_maps;
325
+ col_strs.resize (part_col_num);
326
+ col_null_maps.reserve (part_col_num);
327
+
328
+ for (int i = 0 ; i < part_col_num; ++i) {
329
+ auto return_type = part_exprs[i]->data_type ();
330
+ // expose the data column. the return type would be nullable
331
+ const auto & [range_left_col, col_const] =
332
+ unpack_if_const (block->get_by_position (partition_cols_idx[i]).column );
333
+ if (range_left_col->is_nullable ()) {
334
+ col_null_maps.push_back (&(
335
+ assert_cast<const ColumnNullable*>(range_left_col.get ())->get_null_map_data ()));
336
+ } else {
337
+ col_null_maps.push_back (nullptr );
338
+ }
339
+ for (auto row : _missing_map) {
340
+ col_strs[i].push_back (
341
+ return_type->to_string (*range_left_col, index_check_const (row, col_const)));
342
+ }
343
+ }
344
+
345
+ // calc the end value and save them. in the end of sending, we will create partitions for them and deal them.
346
+ RETURN_IF_ERROR (
347
+ _save_missing_values (col_strs, part_col_num, block, _missing_map, col_null_maps));
348
+
349
+ size_t new_bt_rows = _batching_block->rows ();
350
+ size_t new_bt_bytes = _batching_block->bytes ();
351
+ rows_stat_val -= new_bt_rows - _batching_rows;
352
+ _state->update_num_rows_load_total (_batching_rows - new_bt_rows);
353
+ _state->update_num_bytes_load_total (_batching_bytes - new_bt_bytes);
354
+ DorisMetrics::instance ()->load_rows ->increment (_batching_rows - new_bt_rows);
355
+ DorisMetrics::instance ()->load_bytes ->increment (_batching_bytes - new_bt_bytes);
356
+ _batching_rows = new_bt_rows;
357
+ _batching_bytes = new_bt_bytes;
358
+
359
+ return Status::OK ();
360
+ }
361
+
307
362
Status VRowDistribution::_generate_rows_distribution_for_auto_partition (
308
363
vectorized::Block* block, const std::vector<uint16_t >& partition_cols_idx,
309
364
bool has_filtered_rows, std::vector<RowPartTabletIds>& row_part_tablet_ids,
@@ -329,63 +384,64 @@ Status VRowDistribution::_generate_rows_distribution_for_auto_partition(
329
384
RETURN_IF_ERROR (_filter_block (block, row_part_tablet_ids));
330
385
331
386
if (!_missing_map.empty ()) {
332
- // for missing partition keys, calc the missing partition and save in _partitions_need_create
333
- auto [part_ctxs, part_exprs] = _get_partition_function ();
334
- auto part_col_num = part_exprs.size ();
335
- // the two vectors are in column-first-order
336
- std::vector<std::vector<std::string>> col_strs;
337
- std::vector<const NullMap*> col_null_maps;
338
- col_strs.resize (part_col_num);
339
- col_null_maps.reserve (part_col_num);
340
-
341
- for (int i = 0 ; i < part_col_num; ++i) {
342
- auto return_type = part_exprs[i]->data_type ();
343
- // expose the data column. the return type would be nullable
344
- const auto & [range_left_col, col_const] =
345
- unpack_if_const (block->get_by_position (partition_cols_idx[i]).column );
346
- if (range_left_col->is_nullable ()) {
347
- col_null_maps.push_back (&(assert_cast<const ColumnNullable*>(range_left_col.get ())
348
- ->get_null_map_data ()));
349
- } else {
350
- col_null_maps.push_back (nullptr );
351
- }
352
- for (auto row : _missing_map) {
353
- col_strs[i].push_back (
354
- return_type->to_string (*range_left_col, index_check_const (row, col_const)));
355
- }
356
- }
357
-
358
- // calc the end value and save them. in the end of sending, we will create partitions for them and deal them.
359
- RETURN_IF_ERROR (
360
- _save_missing_values (col_strs, part_col_num, block, _missing_map, col_null_maps));
361
-
362
- size_t new_bt_rows = _batching_block->rows ();
363
- size_t new_bt_bytes = _batching_block->bytes ();
364
- rows_stat_val -= new_bt_rows - _batching_rows;
365
- _state->update_num_rows_load_total (_batching_rows - new_bt_rows);
366
- _state->update_num_bytes_load_total (_batching_bytes - new_bt_bytes);
367
- DorisMetrics::instance ()->load_rows ->increment (_batching_rows - new_bt_rows);
368
- DorisMetrics::instance ()->load_bytes ->increment (_batching_bytes - new_bt_bytes);
369
- _batching_rows = new_bt_rows;
370
- _batching_bytes = new_bt_bytes;
387
+ RETURN_IF_ERROR (_deal_missing_map (block, partition_cols_idx, rows_stat_val));
371
388
}
372
389
return Status::OK ();
373
390
}
374
391
375
392
Status VRowDistribution::_generate_rows_distribution_for_auto_overwrite (
376
- vectorized::Block* block, bool has_filtered_rows,
377
- std::vector<RowPartTabletIds>& row_part_tablet_ids) {
393
+ vectorized::Block* block, const std::vector<uint16_t >& partition_cols_idx,
394
+ bool has_filtered_rows, std::vector<RowPartTabletIds>& row_part_tablet_ids,
395
+ int64_t & rows_stat_val) {
378
396
auto num_rows = block->rows ();
379
397
398
+ // for non-auto-partition situation, goes into two 'else' branch. just find the origin partitions, replace them by rpc,
399
+ // and find the new partitions to use.
400
+ // for auto-partition's, find and save origins in _partitions and replace them. at meanwhile save the missing values for auto
401
+ // partition. then we find partition again to get replaced partitions in _partitions. this time _missing_map is ignored cuz
402
+ // we already saved missing values.
380
403
bool stop_processing = false ;
381
- RETURN_IF_ERROR (_tablet_finder->find_tablets (_state, block, num_rows, _partitions,
382
- _tablet_indexes, stop_processing, _skip));
404
+ if (_vpartition->is_auto_partition () &&
405
+ _state->query_options ().enable_auto_create_when_overwrite ) {
406
+ // allow auto create partition for missing rows.
407
+ std::vector<uint16_t > partition_keys = _vpartition->get_partition_keys ();
408
+ auto partition_col = block->get_by_position (partition_keys[0 ]);
409
+ _missing_map.clear ();
410
+ _missing_map.reserve (partition_col.column ->size ());
411
+
412
+ RETURN_IF_ERROR (_tablet_finder->find_tablets (_state, block, num_rows, _partitions,
413
+ _tablet_indexes, stop_processing, _skip,
414
+ &_missing_map));
415
+
416
+ // allow and really need to create during auto-detect-overwriting.
417
+ if (!_missing_map.empty ()) {
418
+ RETURN_IF_ERROR (_deal_missing_map (block, partition_cols_idx, rows_stat_val));
419
+ }
420
+ } else {
421
+ RETURN_IF_ERROR (_tablet_finder->find_tablets (_state, block, num_rows, _partitions,
422
+ _tablet_indexes, stop_processing, _skip));
423
+ }
383
424
RETURN_IF_ERROR (_replace_overwriting_partition ());
384
425
385
426
// regenerate locations for new partitions & tablets
386
427
_reset_find_tablets (num_rows);
387
- RETURN_IF_ERROR (_tablet_finder->find_tablets (_state, block, num_rows, _partitions,
388
- _tablet_indexes, stop_processing, _skip));
428
+ if (_vpartition->is_auto_partition () &&
429
+ _state->query_options ().enable_auto_create_when_overwrite ) {
430
+ // here _missing_map is just a placeholder
431
+ RETURN_IF_ERROR (_tablet_finder->find_tablets (_state, block, num_rows, _partitions,
432
+ _tablet_indexes, stop_processing, _skip,
433
+ &_missing_map));
434
+ if (VLOG_TRACE_IS_ON) {
435
+ std::string tmp;
436
+ for (auto v : _missing_map) {
437
+ tmp += std::to_string (v).append (" , " );
438
+ }
439
+ VLOG_TRACE << " Trace missing map of " << this << ' :' << tmp;
440
+ }
441
+ } else {
442
+ RETURN_IF_ERROR (_tablet_finder->find_tablets (_state, block, num_rows, _partitions,
443
+ _tablet_indexes, stop_processing, _skip));
444
+ }
389
445
if (has_filtered_rows) {
390
446
for (int i = 0 ; i < num_rows; i++) {
391
447
_skip[i] = _skip[i] || _block_convertor->filter_map ()[i];
@@ -456,10 +512,11 @@ Status VRowDistribution::generate_rows_distribution(
456
512
}
457
513
458
514
Status st = Status::OK ();
459
- if (_vpartition->is_auto_detect_overwrite ()) {
515
+ if (_vpartition->is_auto_detect_overwrite () && !_deal_batched ) {
460
516
// when overwrite, no auto create partition allowed.
461
- st = _generate_rows_distribution_for_auto_overwrite (block.get (), has_filtered_rows,
462
- row_part_tablet_ids);
517
+ st = _generate_rows_distribution_for_auto_overwrite (block.get (), partition_cols_idx,
518
+ has_filtered_rows, row_part_tablet_ids,
519
+ rows_stat_val);
463
520
} else if (_vpartition->is_auto_partition () && !_deal_batched) {
464
521
st = _generate_rows_distribution_for_auto_partition (block.get (), partition_cols_idx,
465
522
has_filtered_rows, row_part_tablet_ids,
0 commit comments