Skip to content

Commit d6e10ca

Browse files
Jibing-LiYour Name
authored andcommitted
[improvement](statistics)Skip auto analyze empty table. (#43865)
### What problem does this PR solve? When doing auto analyze, skip the table if the row count of this table is empty. This is more safe than write all 0 stats for this empty table, because the row count reported by BE is not realtime. Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None
1 parent f2c3201 commit d6e10ca

File tree

5 files changed

+54
-10
lines changed

5 files changed

+54
-10
lines changed

fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,6 @@ protected void processOneJob(TableIf table, Set<Pair<String, String>> columns,
145145
// appendMvColumn(table, columns);
146146
appendAllColumns(table, columns);
147147
columns = columns.stream().filter(c -> StatisticsUtil.needAnalyzeColumn(table, c)).collect(Collectors.toSet());
148-
if (columns.isEmpty()) {
149-
return;
150-
}
151148
AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority);
152149
if (analyzeJob == null) {
153150
return;
@@ -206,17 +203,30 @@ protected AnalysisInfo createAnalyzeJobForTbl(
206203
if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) {
207204
analysisMethod = AnalysisMethod.FULL;
208205
}
206+
AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
207+
TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId());
209208
if (table instanceof OlapTable && analysisMethod.equals(AnalysisMethod.SAMPLE)) {
210209
OlapTable ot = (OlapTable) table;
211210
if (ot.getRowCountForIndex(ot.getBaseIndexId(), true) == TableIf.UNKNOWN_ROW_COUNT) {
212211
LOG.info("Table {} row count is not fully reported, skip auto analyzing this time.", ot.getName());
213212
return null;
214213
}
215214
}
216-
AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
217-
TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId());
218-
long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 :
219-
(table.getRowCount() <= 0 ? table.fetchRowCount() : table.getRowCount());
215+
// We don't auto analyze empty table to avoid all 0 stats.
216+
// Because all 0 is more dangerous than unknown stats when row count report is delayed.
217+
long rowCount = table.getRowCount();
218+
if (rowCount <= 0) {
219+
LOG.info("Table {} is empty, remove its old stats and skip auto analyze it.", table.getName());
220+
// Remove the table's old stats if exists.
221+
if (tableStatsStatus != null && !tableStatsStatus.isColumnsStatsEmpty()) {
222+
manager.dropStats(table, null);
223+
}
224+
return null;
225+
}
226+
if (jobColumns == null || jobColumns.isEmpty()) {
227+
return null;
228+
}
229+
LOG.info("Auto analyze table {} row count is {}", table.getName(), rowCount);
220230
StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
221231
for (Pair<String, String> pair : jobColumns) {
222232
stringJoiner.add(pair.toString());

fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,4 +255,8 @@ public long getBaseIndexDeltaRowCount(OlapTable table) {
255255
}
256256
return updatedRows.get() - maxUpdateRows;
257257
}
258+
259+
public boolean isColumnsStatsEmpty() {
260+
return colToColStatsMeta == null || colToColStatsMeta.isEmpty();
261+
}
258262
}

fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,6 @@ public long getRowCountForIndex(long indexId, boolean strict) {
166166
return 100;
167167
}
168168
};
169-
Assertions.assertThrows(NullPointerException.class, () -> collector.createAnalyzeJobForTbl(table, null, null));
169+
Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null));
170170
}
171171
}

regression-test/suites/statistics/test_analyze_mv.groovy

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ suite("test_analyze_mv") {
671671
verifyTaskStatus(result_sample, "mva_MIN__`value3`", "mv3")
672672
verifyTaskStatus(result_sample, "mva_SUM__CAST(`value1` AS bigint)", "mv3")
673673

674-
// Test row count report and report for nereids
674+
// * Test row count report and report for nereids
675675
sql """truncate table mvTestDup"""
676676
result_row = sql """show index stats mvTestDup mv3"""
677677
assertEquals(1, result_row.size())
@@ -680,6 +680,18 @@ suite("test_analyze_mv") {
680680
assertEquals("0", result_row[0][3])
681681
assertEquals("-1", result_row[0][4])
682682

683+
// ** Embedded test for skip auto analyze when table is empty
684+
sql """analyze table mvTestDup properties ("use.auto.analyzer" = "true")"""
685+
def empty_test = sql """show auto analyze mvTestDup"""
686+
assertEquals(0, empty_test.size())
687+
empty_test = sql """show column stats mvTestDup"""
688+
assertEquals(0, empty_test.size())
689+
// ** End of embedded test
690+
691+
sql """analyze table mvTestDup with sync"""
692+
empty_test = sql """show column stats mvTestDup"""
693+
assertEquals(12, empty_test.size())
694+
683695
for (int i = 0; i < 120; i++) {
684696
result_row = sql """show index stats mvTestDup mv3"""
685697
logger.info("mv3 stats: " + result_row)
@@ -694,6 +706,23 @@ suite("test_analyze_mv") {
694706
assertEquals("mv3", result_row[0][1])
695707
assertEquals("0", result_row[0][3])
696708
assertEquals("0", result_row[0][4])
709+
710+
// ** Embedded test for skip auto analyze when table is empty again
711+
sql """analyze table mvTestDup properties ("use.auto.analyzer" = "true")"""
712+
empty_test = sql """show auto analyze mvTestDup"""
713+
assertEquals(0, empty_test.size())
714+
empty_test = sql """show column stats mvTestDup"""
715+
for (int i = 0; i < 100; i++) {
716+
empty_test = sql """show column stats mvTestDup"""
717+
if (empty_test.size() != 0) {
718+
logger.info("async delete is not finished yet.")
719+
Thread.sleep(1000)
720+
}
721+
break
722+
}
723+
assertEquals(0, empty_test.size())
724+
// ** End of embedded test
725+
697726
sql """insert into mvTestDup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);"""
698727
result_row = sql """show index stats mvTestDup mv3"""
699728
assertEquals(1, result_row.size())

regression-test/suites/statistics/test_auto_analyze_black_white_list.groovy

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@ suite("test_auto_analyze_black_white_list") {
6868
)
6969
"""
7070

71+
sql """insert into test_bw values (1, 1, 1, 1, 1)"""
7172
try {
72-
wait_row_count_reported("test_auto_analyze_black_white_list", "test_bw", 0, 4, "0")
73+
wait_row_count_reported("test_auto_analyze_black_white_list", "test_bw", 0, 4, "1")
7374
} catch (Exception e) {
7475
logger.info(e.getMessage());
7576
return;

0 commit comments

Comments
 (0)