Skip to content

Commit 8369058

Browse files
Storages: load RSResult only once (#9738)
ref #6233 Before, TiFlash will load the `RSResult` three times for one query (MVCC/Build Bitmap/Query). This PR introduces `DMFilePackFilterResult` which load `RSResult` only once, and only passes the `RSResult` to `DMFileReader`. Signed-off-by: JaySon-Huang <[email protected]> Co-authored-by: JaySon <[email protected]> Co-authored-by: JaySon-Huang <[email protected]>
1 parent 29d4b4f commit 8369058

27 files changed

+663
-424
lines changed

dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include <Interpreters/Context.h>
1615
#include <Interpreters/SharedContexts/Disagg.h>
1716
#include <Storages/DeltaMerge/ColumnFile/ColumnFileBig.h>
1817
#include <Storages/DeltaMerge/DMContext.h>
@@ -38,22 +37,13 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil
3837

3938
void ColumnFileBig::calculateStat(const DMContext & dm_context)
4039
{
41-
auto index_cache = dm_context.global_context.getMinMaxIndexCache();
42-
43-
auto pack_filter = DMFilePackFilter::loadFrom(
40+
auto m = DMFilePackFilter::loadValidRowsAndBytes(
41+
dm_context,
4442
file,
45-
index_cache,
4643
/*set_cache_if_miss*/ false,
47-
{segment_range},
48-
EMPTY_RS_OPERATOR,
49-
{},
50-
dm_context.global_context.getFileProvider(),
51-
dm_context.getReadLimiter(),
52-
dm_context.scan_context,
53-
/*tracing_id*/ dm_context.tracing_id,
54-
ReadTag::Internal);
55-
56-
std::tie(valid_rows, valid_bytes) = pack_filter.validRowsAndBytes();
44+
{segment_range});
45+
valid_rows = m.match_rows;
46+
valid_bytes = m.match_bytes;
5747
}
5848

5949
void ColumnFileBig::removeData(WriteBatches & wbs) const

dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include <IO/FileProvider/ChecksumReadBufferBuilder.h>
1516
#include <Storages/DeltaMerge/File/ColumnStream.h>
1617
#include <Storages/DeltaMerge/File/DMFileReader.h>
1718
#include <Storages/Page/PageUtil.h>
@@ -157,7 +158,7 @@ std::unique_ptr<CompressedSeekableReaderBuffer> ColumnReadStream::buildColDataRe
157158

158159
// Try to get the largest buffer size of reading continuous packs
159160
size_t buffer_size = 0;
160-
const auto & pack_res = reader.pack_filter.getPackResConst();
161+
const auto & pack_res = reader.pack_filter->getPackRes();
161162
for (size_t i = 0; i < n_packs; /*empty*/)
162163
{
163164
if (!pack_res[i].isUse())

dbms/src/Storages/DeltaMerge/File/DMFile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ size_t DMFile::colIndexSize(ColId id) const
219219
}
220220
else
221221
{
222-
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Index of {} not exist", id);
222+
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Index is not exist, col_id={}", id);
223223
}
224224
}
225225
else

dbms/src/Storages/DeltaMerge/File/DMFile.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ class DMFile : private boost::noncopyable
210210

211211
UInt32 metaVersion() const { return meta->metaVersion(); }
212212

213+
bool isColIndexExist(const ColId & col_id) const;
214+
213215
private:
214216
DMFile(
215217
UInt64 file_id_,
@@ -293,8 +295,6 @@ class DMFile : private boost::noncopyable
293295
String colIndexCacheKey(const FileNameBase & file_name_base) const;
294296
String colMarkCacheKey(const FileNameBase & file_name_base) const;
295297

296-
bool isColIndexExist(const ColId & col_id) const;
297-
298298
String encryptionBasePath() const;
299299
EncryptionPath encryptionDataPath(const FileNameBase & file_name_base) const;
300300
EncryptionPath encryptionIndexPath(const FileNameBase & file_name_base) const;

dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
#include <Interpreters/Context.h>
1616
#include <Storages/DeltaMerge/File/DMFileBlockInputStream.h>
1717
#include <Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.h>
18-
#include <Storages/DeltaMerge/Filter/WithANNQueryInfo.h>
1918
#include <Storages/DeltaMerge/Index/VectorIndex.h>
2019
#include <Storages/DeltaMerge/ScanContext.h>
2120

22-
2321
namespace DB::DM
2422
{
2523

@@ -58,19 +56,6 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(
5856

5957
bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle;
6058

61-
DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom(
62-
dmfile,
63-
index_cache,
64-
/*set_cache_if_miss*/ true,
65-
rowkey_ranges,
66-
rs_filter,
67-
read_packs,
68-
file_provider,
69-
read_limiter,
70-
scan_context,
71-
tracing_id,
72-
read_tag);
73-
7459
bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader();
7560

7661
if (!enable_read_thread || max_sharing_column_bytes_for_all <= 0)
@@ -79,6 +64,22 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(
7964
max_sharing_column_bytes_for_all = 0;
8065
}
8166

67+
// If pack_filter is not set, load from EMPTY_RS_OPERATOR.
68+
if (!pack_filter)
69+
{
70+
pack_filter = DMFilePackFilter::loadFrom(
71+
index_cache,
72+
file_provider,
73+
read_limiter,
74+
scan_context,
75+
dmfile,
76+
true,
77+
rowkey_ranges,
78+
EMPTY_RS_OPERATOR,
79+
read_packs,
80+
tracing_id);
81+
}
82+
8283
DMFileReader reader(
8384
dmfile,
8485
read_columns,
@@ -87,7 +88,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(
8788
enable_del_clean_read,
8889
is_fast_scan,
8990
max_data_version,
90-
std::move(pack_filter),
91+
pack_filter,
9192
mark_cache,
9293
enable_column_cache,
9394
column_cache,
@@ -140,18 +141,13 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn
140141
return build(dmfile, read_columns, rowkey_ranges, scan_context);
141142
};
142143

143-
if (!rs_filter)
144-
return fallback();
145-
146-
auto filter_with_ann = std::dynamic_pointer_cast<WithANNQueryInfo>(rs_filter);
147-
if (!filter_with_ann)
144+
if (!ann_query_info)
148145
return fallback();
149146

150147
if (!bitmap_filter.has_value())
151148
return fallback();
152149

153150
Block header_layout = toEmptyBlock(read_columns);
154-
auto ann_query_info = filter_with_ann->ann_query_info;
155151

156152
// Copy out the vector column for later use. Copy is intentionally performed after the
157153
// fast check so that in fallback conditions we don't need unnecessary copies.
@@ -181,22 +177,25 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn
181177

182178
// All check passed. Let's read via vector index.
183179

184-
DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom(
185-
dmfile,
186-
index_cache,
187-
/*set_cache_if_miss*/ true,
188-
rowkey_ranges,
189-
rs_filter,
190-
read_packs,
191-
file_provider,
192-
read_limiter,
193-
scan_context,
194-
tracing_id,
195-
ReadTag::Query);
196-
197180
bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader();
198181
bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle;
199182

183+
// If pack_filter is not set, load from EMPTY_RS_OPERATOR.
184+
if (!pack_filter)
185+
{
186+
pack_filter = DMFilePackFilter::loadFrom(
187+
index_cache,
188+
file_provider,
189+
read_limiter,
190+
scan_context,
191+
dmfile,
192+
true,
193+
rowkey_ranges,
194+
EMPTY_RS_OPERATOR,
195+
read_packs,
196+
tracing_id);
197+
}
198+
200199
DMFileReader rest_columns_reader(
201200
dmfile,
202201
rest_columns,
@@ -205,7 +204,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn
205204
enable_del_clean_read,
206205
is_fast_scan,
207206
max_data_version,
208-
std::move(pack_filter),
207+
pack_filter,
209208
mark_cache,
210209
enable_column_cache,
211210
column_cache,
@@ -217,7 +216,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn
217216
tracing_id,
218217
enable_read_thread,
219218
scan_context,
220-
ReadTag::Query);
219+
read_tag);
221220

222221
if (column_cache_long_term && pk_col_id)
223222
// ColumnCacheLongTerm is only filled in Vector Search.

dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ class DMFileBlockInputStreamBuilder
138138
return *this;
139139
}
140140

141-
DMFileBlockInputStreamBuilder & setRSOperator(const RSOperatorPtr & filter_)
141+
DMFileBlockInputStreamBuilder & setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_)
142142
{
143-
rs_filter = filter_;
143+
ann_query_info = ann_query_info_;
144144
return *this;
145145
}
146146

@@ -162,6 +162,7 @@ class DMFileBlockInputStreamBuilder
162162
read_one_pack_every_time = true;
163163
return *this;
164164
}
165+
165166
DMFileBlockInputStreamBuilder & setRowsThreshold(size_t rows_threshold_per_read_)
166167
{
167168
rows_threshold_per_read = rows_threshold_per_read_;
@@ -180,6 +181,12 @@ class DMFileBlockInputStreamBuilder
180181
return *this;
181182
}
182183

184+
DMFileBlockInputStreamBuilder & setDMFilePackFilterResult(const DMFilePackFilterResultPtr & pack_filter_)
185+
{
186+
pack_filter = pack_filter_;
187+
return *this;
188+
}
189+
183190
/**
184191
* @note To really enable the long term cache, you also need to ensure
185192
* ColumnCacheLongTerm is initialized in the global context.
@@ -217,8 +224,6 @@ class DMFileBlockInputStreamBuilder
217224
bool is_fast_scan = false;
218225
bool enable_del_clean_read = false;
219226
UInt64 max_data_version = std::numeric_limits<UInt64>::max();
220-
// Rough set filter
221-
RSOperatorPtr rs_filter;
222227
// packs filter (filter by pack index)
223228
IdSetPtr read_packs;
224229
MarkCachePtr mark_cache;
@@ -234,6 +239,10 @@ class DMFileBlockInputStreamBuilder
234239
String tracing_id;
235240
ReadTag read_tag = ReadTag::Internal;
236241

242+
DMFilePackFilterResultPtr pack_filter;
243+
244+
ANNQueryInfoPtr ann_query_info = nullptr;
245+
237246
VectorIndexCachePtr vector_index_cache;
238247
// Note: Currently thie field is assigned only for Stable streams, not available for ColumnFileBig
239248
std::optional<BitmapFilterView> bitmap_filter;

0 commit comments

Comments
 (0)