Skip to content

Commit 27e0051

Browse files
authored
[fix](variant) fix index in variant (#43375) (#43773)
pick from master (#43375)
1 parent daf4f0a commit 27e0051

File tree

10 files changed

+348
-58
lines changed

10 files changed

+348
-58
lines changed

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,13 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
585585
pre_size = condition_row_ranges->count();
586586
RowRanges::ranges_intersection(*condition_row_ranges, bf_row_ranges, condition_row_ranges);
587587
_opts.stats->rows_bf_filtered += (pre_size - condition_row_ranges->count());
588+
589+
DBUG_EXECUTE_IF("bloom_filter_must_filter_data", {
590+
if (pre_size - condition_row_ranges->count() == 0) {
591+
return Status::Error<ErrorCode::INTERNAL_ERROR>(
592+
"Bloom filter did not filter the data.");
593+
}
594+
})
588595
}
589596

590597
{

be/src/olap/rowset/segment_v2/segment_writer.cpp

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,6 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co
219219
skip_inverted_index = true;
220220
}
221221

222-
if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
223-
opts.need_zone_map = false;
224-
opts.need_bloom_filter = false;
225-
opts.need_bitmap_index = false;
226-
}
227-
228222
// indexes for this column
229223
if (const auto& index = schema->inverted_index(column);
230224
index != nullptr && !skip_inverted_index) {
@@ -234,27 +228,24 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co
234228
opts.inverted_index_file_writer = _inverted_index_file_writer;
235229
// TODO support multiple inverted index
236230
}
237-
#define CHECK_FIELD_TYPE(TYPE, type_name) \
238-
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
239-
opts.need_zone_map = false; \
240-
if (opts.need_bloom_filter) { \
241-
return Status::NotSupported("Do not support bloom filter for " type_name " type"); \
242-
} \
243-
if (opts.need_bitmap_index) { \
244-
return Status::NotSupported("Do not support bitmap index for " type_name " type"); \
245-
} \
246-
}
247-
248-
CHECK_FIELD_TYPE(STRUCT, "struct")
249-
CHECK_FIELD_TYPE(ARRAY, "array")
250-
CHECK_FIELD_TYPE(JSONB, "jsonb")
251-
CHECK_FIELD_TYPE(AGG_STATE, "agg_state")
252-
CHECK_FIELD_TYPE(MAP, "map")
253-
CHECK_FIELD_TYPE(OBJECT, "object")
254-
CHECK_FIELD_TYPE(HLL, "hll")
255-
CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
256-
257-
#undef CHECK_FIELD_TYPE
231+
#define DISABLE_INDEX_IF_FIELD_TYPE(TYPE, type_name) \
232+
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
233+
opts.need_zone_map = false; \
234+
opts.need_bloom_filter = false; \
235+
opts.need_bitmap_index = false; \
236+
}
237+
238+
DISABLE_INDEX_IF_FIELD_TYPE(STRUCT, "struct")
239+
DISABLE_INDEX_IF_FIELD_TYPE(ARRAY, "array")
240+
DISABLE_INDEX_IF_FIELD_TYPE(JSONB, "jsonb")
241+
DISABLE_INDEX_IF_FIELD_TYPE(AGG_STATE, "agg_state")
242+
DISABLE_INDEX_IF_FIELD_TYPE(MAP, "map")
243+
DISABLE_INDEX_IF_FIELD_TYPE(OBJECT, "object")
244+
DISABLE_INDEX_IF_FIELD_TYPE(HLL, "hll")
245+
DISABLE_INDEX_IF_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
246+
DISABLE_INDEX_IF_FIELD_TYPE(VARIANT, "variant")
247+
248+
#undef DISABLE_INDEX_IF_FIELD_TYPE
258249

259250
if (column.is_row_store_column()) {
260251
// smaller page size for row store column

be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -210,12 +210,6 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
210210
tablet_schema->skip_write_index_on_load()) {
211211
skip_inverted_index = true;
212212
}
213-
214-
if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
215-
opts.need_zone_map = false;
216-
opts.need_bloom_filter = false;
217-
opts.need_bitmap_index = false;
218-
}
219213
if (const auto& index = tablet_schema->inverted_index(column);
220214
index != nullptr && !skip_inverted_index) {
221215
opts.inverted_index = index;
@@ -225,25 +219,24 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
225219
// TODO support multiple inverted index
226220
}
227221

228-
#define CHECK_FIELD_TYPE(TYPE, type_name) \
229-
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
230-
opts.need_zone_map = false; \
231-
if (opts.need_bloom_filter) { \
232-
return Status::NotSupported("Do not support bloom filter for " type_name " type"); \
233-
} \
234-
if (opts.need_bitmap_index) { \
235-
return Status::NotSupported("Do not support bitmap index for " type_name " type"); \
236-
} \
222+
#define DISABLE_INDEX_IF_FIELD_TYPE(TYPE, type_name) \
223+
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
224+
opts.need_zone_map = false; \
225+
opts.need_bloom_filter = false; \
226+
opts.need_bitmap_index = false; \
237227
}
238228

239-
CHECK_FIELD_TYPE(STRUCT, "struct")
240-
CHECK_FIELD_TYPE(ARRAY, "array")
241-
CHECK_FIELD_TYPE(JSONB, "jsonb")
242-
CHECK_FIELD_TYPE(AGG_STATE, "agg_state")
243-
CHECK_FIELD_TYPE(MAP, "map")
244-
CHECK_FIELD_TYPE(OBJECT, "object")
245-
CHECK_FIELD_TYPE(HLL, "hll")
246-
CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
229+
DISABLE_INDEX_IF_FIELD_TYPE(STRUCT, "struct")
230+
DISABLE_INDEX_IF_FIELD_TYPE(ARRAY, "array")
231+
DISABLE_INDEX_IF_FIELD_TYPE(JSONB, "jsonb")
232+
DISABLE_INDEX_IF_FIELD_TYPE(AGG_STATE, "agg_state")
233+
DISABLE_INDEX_IF_FIELD_TYPE(MAP, "map")
234+
DISABLE_INDEX_IF_FIELD_TYPE(OBJECT, "object")
235+
DISABLE_INDEX_IF_FIELD_TYPE(HLL, "hll")
236+
DISABLE_INDEX_IF_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
237+
DISABLE_INDEX_IF_FIELD_TYPE(VARIANT, "variant")
238+
239+
#undef DISABLE_INDEX_IF_FIELD_TYPE
247240

248241
#undef CHECK_FIELD_TYPE
249242

be/src/olap/tablet_schema.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -909,13 +909,16 @@ void TabletSchema::append_index(TabletIndex&& index) {
909909
_indexes.push_back(std::move(index));
910910
}
911911

912-
void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) {
913-
int32_t col_unique_id = col.unique_id();
912+
void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
913+
TabletIndex&& index) {
914+
int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
914915
const std::string& suffix_path = escape_for_path_name(col.suffix_path());
915916
for (size_t i = 0; i < _indexes.size(); i++) {
916917
for (int32_t id : _indexes[i].col_unique_ids()) {
917-
if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) {
918-
_indexes[i] = index;
918+
if (_indexes[i].index_type() == index_type && id == col_unique_id &&
919+
_indexes[i].get_index_suffix() == suffix_path) {
920+
_indexes[i] = std::move(index);
921+
break;
919922
}
920923
}
921924
}
@@ -1423,7 +1426,6 @@ const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const
14231426
}
14241427
}
14251428
}
1426-
14271429
return nullptr;
14281430
}
14291431

be/src/olap/tablet_schema.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ class TabletSchema {
322322
void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
323323
void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL);
324324
void append_index(TabletIndex&& index);
325-
void update_index(const TabletColumn& column, TabletIndex index);
325+
void update_index(const TabletColumn& column, const IndexType& index_type, TabletIndex&& index);
326326
void remove_index(int64_t index_id);
327327
void clear_index();
328328
// Must make sure the row column is always the last column

be/src/vec/common/schema_util.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,14 +361,18 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
361361
void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
362362
TabletSchemaSPtr& target_schema) {
363363
DCHECK(target.is_extracted_column());
364+
target.set_aggregation_method(source.aggregation());
365+
366+
// 1. bloom filter
364367
if (target.type() != FieldType::OLAP_FIELD_TYPE_TINYINT &&
365368
target.type() != FieldType::OLAP_FIELD_TYPE_ARRAY &&
366369
target.type() != FieldType::OLAP_FIELD_TYPE_DOUBLE &&
367370
target.type() != FieldType::OLAP_FIELD_TYPE_FLOAT) {
368371
// above types are not supported in bf
369372
target.set_is_bf_column(source.is_bf_column());
370373
}
371-
target.set_aggregation_method(source.aggregation());
374+
375+
// 2. inverted index
372376
const auto* source_index_meta = target_schema->inverted_index(source.unique_id());
373377
if (source_index_meta != nullptr) {
374378
// add index meta
@@ -378,11 +382,13 @@ void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
378382
target.parent_unique_id(), target.path_info_ptr()->get_path());
379383
if (target_index_meta != nullptr) {
380384
// already exist
381-
target_schema->update_index(target, index_info);
385+
target_schema->update_index(target, IndexType::INVERTED, std::move(index_info));
382386
} else {
383387
target_schema->append_index(std::move(index_info));
384388
}
385389
}
390+
391+
// 3. TODO: gnragm bf index
386392
}
387393

388394
void inherit_column_attributes(TabletSchemaSPtr& schema) {

be/src/vec/common/schema_util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
109109
// inherit attributes like index/agg info from it's parent column
110110
void inherit_column_attributes(TabletSchemaSPtr& schema);
111111

112+
// source: variant column
113+
// target: extracted column from variant column
112114
void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
113115
TabletSchemaSPtr& target_schema);
114116

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "vec/common/schema_util.h"
19+
20+
#include <gtest/gtest.h>
21+
22+
namespace doris {
23+
24+
class SchemaUtilTest : public testing::Test {};
25+
26+
void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index, int64_t index_id,
27+
const std::string& index_name, int32_t col_unique_id,
28+
const std::string& column_type, const std::string& column_name,
29+
const IndexType& index_type) {
30+
column_pb->set_unique_id(col_unique_id);
31+
column_pb->set_name(column_name);
32+
column_pb->set_type(column_type);
33+
column_pb->set_is_nullable(true);
34+
column_pb->set_is_bf_column(true);
35+
tablet_index->set_index_id(index_id);
36+
tablet_index->set_index_name(index_name);
37+
tablet_index->set_index_type(index_type);
38+
tablet_index->add_col_unique_id(col_unique_id);
39+
}
40+
41+
void construct_subcolumn(TabletSchemaSPtr schema, const FieldType& type, int32_t col_unique_id,
42+
std::string_view path, std::vector<TabletColumn>* subcolumns) {
43+
TabletColumn subcol;
44+
subcol.set_type(type);
45+
subcol.set_is_nullable(true);
46+
subcol.set_unique_id(-1);
47+
subcol.set_parent_unique_id(col_unique_id);
48+
vectorized::PathInData col_path(path);
49+
subcol.set_path_info(col_path);
50+
subcol.set_name(col_path.get_path());
51+
schema->append_column(subcol);
52+
subcolumns->emplace_back(std::move(subcol));
53+
}
54+
55+
TEST_F(SchemaUtilTest, inherit_column_attributes) {
56+
TabletSchemaPB schema_pb;
57+
schema_pb.set_keys_type(KeysType::DUP_KEYS);
58+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
59+
60+
construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, "key_index", 0, "INT",
61+
"key", IndexType::INVERTED);
62+
construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, "v1_index", 1, "VARIANT",
63+
"v1", IndexType::INVERTED);
64+
construct_column(schema_pb.add_column(), schema_pb.add_index(), 10003, "v3_index", 3, "VARIANT",
65+
"v3", IndexType::INVERTED);
66+
67+
TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
68+
tablet_schema->init_from_pb(schema_pb);
69+
std::vector<TabletColumn> subcolumns;
70+
71+
construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_STRING, 1, "v1.b", &subcolumns);
72+
construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_INT, 1, "v1.c", &subcolumns);
73+
74+
construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_ARRAY, 3, "v3.d", &subcolumns);
75+
construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_FLOAT, 3, "v3.a", &subcolumns);
76+
77+
vectorized::schema_util::inherit_column_attributes(tablet_schema);
78+
for (const auto& col : subcolumns) {
79+
switch (col._parent_col_unique_id) {
80+
case 1:
81+
EXPECT_TRUE(tablet_schema->inverted_index(col) != nullptr);
82+
break;
83+
case 3:
84+
EXPECT_TRUE(tablet_schema->inverted_index(col) == nullptr);
85+
break;
86+
default:
87+
EXPECT_TRUE(false);
88+
}
89+
}
90+
EXPECT_EQ(tablet_schema->inverted_indexes().size(), 7);
91+
92+
for (const auto& col : tablet_schema->_cols) {
93+
if (!col->is_extracted_column()) {
94+
continue;
95+
}
96+
switch (col->_parent_col_unique_id) {
97+
case 1:
98+
EXPECT_TRUE(col->is_bf_column());
99+
break;
100+
case 3:
101+
EXPECT_TRUE(!col->is_bf_column());
102+
break;
103+
default:
104+
EXPECT_TRUE(false);
105+
}
106+
}
107+
}
108+
109+
} // namespace doris
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql1 --
3+
20291263
4+
20291263
5+
20291263
6+
20291263
7+
20291263
8+
20291263
9+
20291263
10+
20291263
11+
20291263
12+
20291263
13+
20291263
14+
20291263
15+
20291263
16+
20291263
17+
20291263
18+
20291263
19+
20291263
20+
20291263
21+
20291263
22+
20291263
23+
20291263
24+
20291263
25+
20291263
26+
20291263
27+
20291263
28+
20291263
29+
20291263
30+
20291263
31+
20291263
32+
20291263
33+
34+
-- !sql2 --
35+
ridget/dotfiles
36+
ridget/dotfiles
37+
ridget/dotfiles
38+
ridget/dotfiles
39+
ridget/dotfiles
40+

0 commit comments

Comments
 (0)