Skip to content

Commit cd6025e

Browse files
committed
add regression test
1 parent f41c21b commit cd6025e

File tree

5 files changed

+227
-46
lines changed

5 files changed

+227
-46
lines changed

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,13 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
585585
pre_size = condition_row_ranges->count();
586586
RowRanges::ranges_intersection(*condition_row_ranges, bf_row_ranges, condition_row_ranges);
587587
_opts.stats->rows_bf_filtered += (pre_size - condition_row_ranges->count());
588+
589+
DBUG_EXECUTE_IF("bloom_filter_must_filter_data", {
590+
if (pre_size - condition_row_ranges->count() == 0) {
591+
return Status::Error<ErrorCode::INTERNAL_ERROR>(
592+
"Bloom filter did not filter the data.");
593+
}
594+
})
588595
}
589596

590597
{

be/src/olap/rowset/segment_v2/segment_writer.cpp

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co
186186

187187
// now we create zone map for key columns in AGG_KEYS or all column in UNIQUE_KEYS or DUP_KEYS
188188
// except for columns whose type don't support zone map.
189-
opts.need_zone_map = (column.is_key() || schema->keys_type() != KeysType::AGG_KEYS);
190-
opts.need_bloom_filter = column.is_bf_column() && !column.is_variant_type();
189+
opts.need_zone_map = column.is_key() || schema->keys_type() != KeysType::AGG_KEYS;
190+
opts.need_bloom_filter = column.is_bf_column();
191191
auto* tablet_index = schema->get_ngram_bf_index(column.unique_id());
192192
if (tablet_index) {
193193
opts.need_bloom_filter = true;
@@ -217,28 +217,24 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co
217217
opts.inverted_index_file_writer = _inverted_index_file_writer;
218218
// TODO support multiple inverted index
219219
}
220-
#define CHECK_FIELD_TYPE(TYPE, type_name) \
221-
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
222-
opts.need_zone_map = false; \
223-
if (opts.need_bloom_filter) { \
224-
return Status::NotSupported("Do not support bloom filter for " type_name " type"); \
225-
} \
226-
if (opts.need_bitmap_index) { \
227-
return Status::NotSupported("Do not support bitmap index for " type_name " type"); \
228-
} \
229-
}
230-
231-
CHECK_FIELD_TYPE(STRUCT, "struct")
232-
CHECK_FIELD_TYPE(ARRAY, "array")
233-
CHECK_FIELD_TYPE(JSONB, "jsonb")
234-
CHECK_FIELD_TYPE(AGG_STATE, "agg_state")
235-
CHECK_FIELD_TYPE(MAP, "map")
236-
CHECK_FIELD_TYPE(OBJECT, "object")
237-
CHECK_FIELD_TYPE(HLL, "hll")
238-
CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
239-
CHECK_FIELD_TYPE(VARIANT, "variant")
240-
241-
#undef CHECK_FIELD_TYPE
220+
#define DISABLE_INDEX_IF_FIELD_TYPE(TYPE, type_name) \
221+
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
222+
opts.need_zone_map = false; \
223+
opts.need_bloom_filter = false; \
224+
opts.need_bitmap_index = false; \
225+
}
226+
227+
DISABLE_INDEX_IF_FIELD_TYPE(STRUCT, "struct")
228+
DISABLE_INDEX_IF_FIELD_TYPE(ARRAY, "array")
229+
DISABLE_INDEX_IF_FIELD_TYPE(JSONB, "jsonb")
230+
DISABLE_INDEX_IF_FIELD_TYPE(AGG_STATE, "agg_state")
231+
DISABLE_INDEX_IF_FIELD_TYPE(MAP, "map")
232+
DISABLE_INDEX_IF_FIELD_TYPE(OBJECT, "object")
233+
DISABLE_INDEX_IF_FIELD_TYPE(HLL, "hll")
234+
DISABLE_INDEX_IF_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
235+
DISABLE_INDEX_IF_FIELD_TYPE(VARIANT, "variant")
236+
237+
#undef DISABLE_INDEX_IF_FIELD_TYPE
242238

243239
if (_opts.rowset_ctx != nullptr) {
244240
int64_t storage_page_size = _opts.rowset_ctx->storage_page_size;

be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
179179

180180
// now we create zone map for key columns in AGG_KEYS or all column in UNIQUE_KEYS or DUP_KEYS
181181
// except for columns whose type don't support zone map.
182-
opts.need_zone_map = (column.is_key() || tablet_schema->keys_type() != KeysType::AGG_KEYS);
183-
opts.need_bloom_filter = column.is_bf_column() && !column.is_variant_type();
182+
opts.need_zone_map = column.is_key() || tablet_schema->keys_type() != KeysType::AGG_KEYS;
183+
opts.need_bloom_filter = column.is_bf_column();
184184
auto* tablet_index = tablet_schema->get_ngram_bf_index(column.unique_id());
185185
if (tablet_index) {
186186
opts.need_bloom_filter = true;
@@ -210,26 +210,24 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
210210
// TODO support multiple inverted index
211211
}
212212

213-
#define CHECK_FIELD_TYPE(TYPE, type_name) \
214-
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
215-
opts.need_zone_map = false; \
216-
if (opts.need_bloom_filter) { \
217-
return Status::NotSupported("Do not support bloom filter for " type_name " type"); \
218-
} \
219-
if (opts.need_bitmap_index) { \
220-
return Status::NotSupported("Do not support bitmap index for " type_name " type"); \
221-
} \
222-
}
223-
224-
CHECK_FIELD_TYPE(STRUCT, "struct")
225-
CHECK_FIELD_TYPE(ARRAY, "array")
226-
CHECK_FIELD_TYPE(JSONB, "jsonb")
227-
CHECK_FIELD_TYPE(AGG_STATE, "agg_state")
228-
CHECK_FIELD_TYPE(MAP, "map")
229-
CHECK_FIELD_TYPE(OBJECT, "object")
230-
CHECK_FIELD_TYPE(HLL, "hll")
231-
CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
232-
CHECK_FIELD_TYPE(VARIANT, "variant")
213+
#define DISABLE_INDEX_IF_FIELD_TYPE(TYPE, type_name) \
214+
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \
215+
opts.need_zone_map = false; \
216+
opts.need_bloom_filter = false; \
217+
opts.need_bitmap_index = false; \
218+
}
219+
220+
DISABLE_INDEX_IF_FIELD_TYPE(STRUCT, "struct")
221+
DISABLE_INDEX_IF_FIELD_TYPE(ARRAY, "array")
222+
DISABLE_INDEX_IF_FIELD_TYPE(JSONB, "jsonb")
223+
DISABLE_INDEX_IF_FIELD_TYPE(AGG_STATE, "agg_state")
224+
DISABLE_INDEX_IF_FIELD_TYPE(MAP, "map")
225+
DISABLE_INDEX_IF_FIELD_TYPE(OBJECT, "object")
226+
DISABLE_INDEX_IF_FIELD_TYPE(HLL, "hll")
227+
DISABLE_INDEX_IF_FIELD_TYPE(QUANTILE_STATE, "quantile_state")
228+
DISABLE_INDEX_IF_FIELD_TYPE(VARIANT, "variant")
229+
230+
#undef DISABLE_INDEX_IF_FIELD_TYPE
233231

234232
#undef CHECK_FIELD_TYPE
235233

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql1 --
3+
20291263
4+
20291263
5+
20291263
6+
20291263
7+
20291263
8+
20291263
9+
20291263
10+
20291263
11+
20291263
12+
20291263
13+
20291263
14+
20291263
15+
20291263
16+
20291263
17+
20291263
18+
20291263
19+
20291263
20+
20291263
21+
20291263
22+
20291263
23+
20291263
24+
20291263
25+
20291263
26+
20291263
27+
20291263
28+
20291263
29+
20291263
30+
20291263
31+
20291263
32+
20291263
33+
34+
-- !sql2 --
35+
ridget/dotfiles
36+
ridget/dotfiles
37+
ridget/dotfiles
38+
ridget/dotfiles
39+
ridget/dotfiles
40+
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
import java.util.concurrent.TimeUnit
19+
import org.awaitility.Awaitility
20+
21+
suite("test_variant_bloom_filter", "nonConcurrent") {
22+
23+
def index_table = "test_variant_bloom_filter"
24+
25+
def load_json_data = {table_name, file_name ->
26+
// load the json data
27+
streamLoad {
28+
table "${table_name}"
29+
30+
// set http request header params
31+
set 'read_json_by_line', 'true'
32+
set 'format', 'json'
33+
set 'max_filter_ratio', '0.1'
34+
set 'memtable_on_sink_node', 'true'
35+
file file_name // import json file
36+
time 10000 // limit inflight 10s
37+
38+
// if declared a check callback, the default check condition will ignore.
39+
// So you must check all condition
40+
41+
check { result, exception, startTime, endTime ->
42+
if (exception != null) {
43+
throw exception
44+
}
45+
logger.info("Stream load ${file_name} result: ${result}".toString())
46+
def json = parseJson(result)
47+
assertEquals("success", json.Status.toLowerCase())
48+
// assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
49+
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
50+
}
51+
}
52+
}
53+
54+
sql """DROP TABLE IF EXISTS ${index_table}"""
55+
sql """
56+
CREATE TABLE IF NOT EXISTS ${index_table} (
57+
k bigint,
58+
v variant
59+
)
60+
DUPLICATE KEY(`k`)
61+
DISTRIBUTED BY HASH(k) BUCKETS 1
62+
properties("replication_num" = "1", "disable_auto_compaction" = "false", "bloom_filter_columns" = "v");
63+
"""
64+
load_json_data.call(index_table, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
65+
load_json_data.call(index_table, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
66+
load_json_data.call(index_table, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
67+
load_json_data.call(index_table, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
68+
load_json_data.call(index_table, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
69+
70+
def backendId_to_backendIP = [:]
71+
def backendId_to_backendHttpPort = [:]
72+
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
73+
def tablets = sql_return_maparray """ show tablets from ${index_table}; """
74+
75+
int beforeSegmentCount = 0
76+
for (def tablet in tablets) {
77+
String tablet_id = tablet.TabletId
78+
(code, out, err) = curl("GET", tablet.CompactionStatus)
79+
logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
80+
assertEquals(code, 0)
81+
def tabletJson = parseJson(out.trim())
82+
assert tabletJson.rowsets instanceof List
83+
for (String rowset in (List<String>) tabletJson.rowsets) {
84+
beforeSegmentCount += Integer.parseInt(rowset.split(" ")[1])
85+
}
86+
}
87+
assertEquals(beforeSegmentCount, 5)
88+
89+
// trigger compactions for all tablets in ${tableName}
90+
for (def tablet in tablets) {
91+
String tablet_id = tablet.TabletId
92+
backend_id = tablet.BackendId
93+
(code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
94+
logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
95+
assertEquals(code, 0)
96+
def compactJson = parseJson(out.trim())
97+
assertEquals("success", compactJson.status.toLowerCase())
98+
}
99+
100+
// wait for all compactions done
101+
for (def tablet in tablets) {
102+
Awaitility.await().atMost(3, TimeUnit.MINUTES).untilAsserted(() -> {
103+
String tablet_id = tablet.TabletId
104+
backend_id = tablet.BackendId
105+
(code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
106+
logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
107+
assertEquals(code, 0)
108+
def compactionStatus = parseJson(out.trim())
109+
assertEquals("compaction task for this tablet is not running", compactionStatus.msg.toLowerCase())
110+
return compactionStatus.run_status;
111+
});
112+
}
113+
114+
int afterSegmentCount = 0
115+
for (def tablet in tablets) {
116+
String tablet_id = tablet.TabletId
117+
(code, out, err) = curl("GET", tablet.CompactionStatus)
118+
logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
119+
assertEquals(code, 0)
120+
def tabletJson = parseJson(out.trim())
121+
assert tabletJson.rowsets instanceof List
122+
for (String rowset in (List<String>) tabletJson.rowsets) {
123+
logger.info("rowset is: " + rowset)
124+
afterSegmentCount += Integer.parseInt(rowset.split(" ")[1])
125+
}
126+
}
127+
assertEquals(afterSegmentCount, 1)
128+
129+
try {
130+
GetDebugPoint().enableDebugPointForAllBEs("bloom_filter_must_filter_data")
131+
132+
// number
133+
qt_sql1 """ select cast(v['repo']['id'] as int) from ${index_table} where cast(v['repo']['id'] as int) = 20291263; """
134+
135+
// string
136+
qt_sql2 """ select cast(v['repo']['name'] as text) from ${index_table} where cast(v['repo']['name'] as text) = "ridget/dotfiles"; """
137+
} finally {
138+
GetDebugPoint().disableDebugPointForAllBEs("bloom_filter_must_filter_data")
139+
}
140+
}

0 commit comments

Comments
 (0)