Skip to content

Commit 8503881

Browse files
authored
Merge branch 'master' into pick-table
2 parents 501b3d4 + 8ba68d4 commit 8503881

File tree

2,751 files changed

+102744
-61089
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,751 files changed

+102744
-61089
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ docker/thirdparties/docker-compose/hive/scripts/paimon1
6060
fe_plugins/output
6161
fe_plugins/**/.factorypath
6262

63+
docker/thirdparties/docker-compose/hive/scripts/data/*/*/data
64+
6365
fs_brokers/apache_hdfs_broker/src/main/resources/
6466
fs_brokers/apache_hdfs_broker/src/main/thrift/
6567

@@ -100,7 +102,6 @@ be/tags
100102
be/test/olap/test_data/tablet_meta_test.hdr
101103
be/.devcontainer/
102104
be/src/apache-orc/
103-
zoneinfo/
104105

105106
# Cloud
106107
cloud/build*/

.licenserc.yaml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ header:
4040
- "**/*.sql"
4141
- "**/*.lock"
4242
- "**/*.out"
43+
- "**/*.parquet"
4344
- "docs/.markdownlintignore"
4445
- "fe/fe-core/src/test/resources/data/net_snmp_normal"
4546
- "fe/fe-core/src/main/antlr4/org/apache/doris/nereids/JavaLexer.g4"
@@ -77,12 +78,7 @@ header:
7778
- "docs/package-lock.json"
7879
- "regression-test/script/README"
7980
- "regression-test/suites/load_p0/stream_load/data"
80-
- "docker/thirdparties/docker-compose/hive/scripts/README"
81-
- "docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql"
82-
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_orc.hql"
83-
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_parquet.hql"
84-
- "docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/"
85-
- "docker/thirdparties/docker-compose/hive/scripts/data/**"
81+
- "docker/thirdparties/docker-compose/hive/scripts/**"
8682
- "docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl"
8783
- "conf/mysql_ssl_default_certificate/*"
8884
- "conf/mysql_ssl_default_certificate/client_certificate/ca.pem"

README.md

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,68 @@ under the License.
1818
-->
1919

2020
<div align="center">
21-
<img src="https://doris.apache.org/assets/images/home-banner-7f193353c932af31634eca0a028f03ed.png" align="right" height="240"/>
22-
</div>
2321

2422
# Apache Doris
23+
2524
[![License](https://img.shields.io/badge/license-Apache%202-4EB1BA.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)
2625
[![GitHub release](https://img.shields.io/github/release/apache/doris.svg)](https://github.com/apache/doris/releases)
26+
[![OSSRank](https://shields.io/endpoint?url=https://ossrank.com/shield/516)](https://ossrank.com/p/516)
2727
[![Jenkins Vec](https://img.shields.io/jenkins/tests?compact_message&jobUrl=https://ci-builds.apache.org/job/Doris/job/doris_daily_enable_vectorized&label=VectorizedEngine)](https://ci-builds.apache.org/job/Doris/job/doris_daily_enable_vectorized)
28-
[![Total Lines](https://tokei.rs/b1/github/apache/doris?category=lines)](https://github.com/apache/doris)
29-
[![Join the Doris Community on Slack](https://join.slack.com/t/apachedoriscommunity/shared_invite/zt-2kl08hzc0-SPJe4VWmL_qzrFd2u2XYQA)
28+
[![Total Line](https://img.shields.io/badge/Total_Line-GitHub-blue)]((https://github.com/apache/doris))
29+
[![Join the chat at https://gitter.im/apache-doris/Lobby](https://badges.gitter.im/apache-doris/Lobby.svg)](https://gitter.im/apache-doris/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
3030
[![EN doc](https://img.shields.io/badge/Docs-English-blue.svg)](https://doris.apache.org/docs/get-starting/quick-start)
31-
[![CN doc](https://img.shields.io/badge/文档-中文版-blue.svg)]([https://doris.apache.org/zh-CN/docs/dev/get-starting/what-is-apache-doris](https://doris.apache.org/zh-CN/docs/get-starting/what-is-apache-doris))
31+
[![CN doc](https://img.shields.io/badge/文档-中文版-blue.svg)](https://doris.apache.org/zh-CN/docs/get-starting/quick-start/)
32+
33+
34+
35+
<div>
36+
37+
38+
[![Official Website](<https://img.shields.io/badge/-Visit%20the%20Official%20Website%20%E2%86%92-rgb(15,214,106)?style=for-the-badge>)](https://doris.apache.org/)
39+
[![Quick Download](<https://img.shields.io/badge/-Quick%20%20Download%20%E2%86%92-rgb(66,56,255)?style=for-the-badge>)](https://doris.apache.org/download)
40+
41+
42+
</div>
43+
44+
45+
<div>
46+
<a href="https://twitter.com/doris_apache"><img src="https://img.shields.io/badge/- @Doris_Apache -424549?style=social&logo=x" height=25></a>
47+
&nbsp;
48+
<a href="https://github.com/apache/doris/discussions"><img src="https://img.shields.io/badge/- Discussion -red?style=social&logo=discourse" height=25></a>
49+
&nbsp;
50+
<a href="https://apachedoriscommunity.slack.com/join/shared_invite/zt-2kl08hzc0-SPJe4VWmL_qzrFd2u2XYQA"><img src="https://img.shields.io/badge/-Slack-red?style=social&logo=slack" height=25></a>
51+
&nbsp;
52+
<a href="https://medium.com/@ApacheDoris"><img src="https://img.shields.io/badge/-Medium-red?style=social&logo=medium" height=25></a>
53+
54+
</div>
55+
56+
</div>
57+
58+
---
59+
60+
3261

33-
Apache Doris is an MPP-based real-time data warehouse known for its high query speed. For queries on large datasets, it returns results in sub-seconds. It supports both high-concurrency point queries and high-throughput complex analysis. It can be used for report analysis, ad-hoc queries, unified data warehouse building, and data lake query acceleration. Based on Apache Doris, users can build applications for user behavior analysis, A/B testing platform, log analysis, and e-commerce order analysis.
3462

35-
Please visit our [official download page](https://doris.apache.org/download/) to get the latest release version.
63+
Apache Doris is an easy-to-use, high-performance and real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrent point query scenarios but also high-throughput complex analysis scenarios.
3664

37-
The current stable version is the 2.0.x series, and the latest version is the 2.1.x series. For production, it is recommended to use the latest version of the 2.0.x series. And if used for POC or testing, it is recommended to use the latest version of the 2.1.x series.
65+
All this makes Apache Doris an ideal tool for scenarios including report analysis, ad-hoc query, unified data warehouse, and data lake query acceleration. On Apache Doris, users can build various applications, such as user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, and order analysis.
66+
67+
🎉 Version 2.1.0 released now. Check out the 🔗[Release Notes](https://doris.apache.org/docs/releasenotes/release-2.1.0) here. The 2.1 verison delivers exceptional performance with 100% higher out-of-the-box queries proven by TPC-DS 1TB tests, enhanced data lake analytics that are 4-6 times speedier than Trino and Spark, solid support for semi-structured data analysis with new Variant types and suite of analytical functions, asynchronous materialized views for query acceleration, optimized real-time writing at scale, and better workload management with stability and runtime SQL resource tracking.
68+
69+
70+
🎉 Version 2.0.6 is now released ! This fully evolved and stable release is ready for all users to upgrade. Check out the 🔗[Release Notes](https://doris.apache.org/docs/releasenotes/release-2.0.6) here.
3871

3972
👀 Have a look at the 🔗[Official Website](https://doris.apache.org/) for a comprehensive list of Apache Doris's core features, blogs and user cases.
4073

4174
## 📈 Usage Scenarios
4275

4376
As shown in the figure below, after various data integration and processing, the data sources are usually stored in the real-time data warehouse Apache Doris and the offline data lake or data warehouse (in Apache Hive, Apache Iceberg or Apache Hudi).
4477

45-
<img src="https://dev-to-uploads.s3.amazonaws.com/uploads/articles/sekvbs5ih5rb16wz6n9k.png">
78+
<br />
79+
80+
<img src="https://cdn.selectdb.com/static/What_is_Apache_Doris_3_a61692c2ce.png" />
81+
82+
<br />
4683

4784
Apache Doris is widely used in the following scenarios:
4885

@@ -70,7 +107,11 @@ The overall architecture of Apache Doris is shown in the following figure. The D
70107

71108
Both types of processes are horizontally scalable, and a single cluster can support up to hundreds of machines and tens of petabytes of storage capacity. And these two types of processes guarantee high availability of services and high reliability of data through consistency protocols. This highly integrated architecture design greatly reduces the operation and maintenance cost of a distributed system.
72109

73-
![The overall architecture of Apache Doris](https://dev-to-uploads.s3.amazonaws.com/uploads/articles/mnz20ae3s23vv3e9ltmi.png)
110+
<br />
111+
112+
![The overall architecture of Apache Doris](https://cdn.selectdb.com/static/What_is_Apache_Doris_adb26397e2.png)
113+
114+
<br />
74115

75116
In terms of interfaces, Apache Doris adopts MySQL protocol, supports standard SQL, and is highly compatible with MySQL dialect. Users can access Doris through various client tools and it supports seamless connection with BI tools.
76117

@@ -100,11 +141,19 @@ Doris also supports strongly consistent materialized views. Materialized views a
100141

101142
Doris adopts the MPP model in its query engine to realize parallel execution between and within nodes. It also supports distributed shuffle join for multiple large tables so as to handle complex queries.
102143

103-
![](https://dev-to-uploads.s3.amazonaws.com/uploads/articles/vjlmumwyx728uymsgcw0.png)
144+
<br />
145+
146+
![Query Engine](https://cdn.selectdb.com/static/What_is_Apache_Doris_1_c6f5ba2af9.png)
147+
148+
<br />
104149

105150
The Doris query engine is vectorized, with all memory structures laid out in a columnar format. This can largely reduce virtual function calls, improve cache hit rates, and make efficient use of SIMD instructions. Doris delivers a 5–10 times higher performance in wide table aggregation scenarios than non-vectorized engines.
106151

107-
![](https://dev-to-uploads.s3.amazonaws.com/uploads/articles/ck2m3kbnodn28t28vphp.png)
152+
<br />
153+
154+
![Doris query engine](https://cdn.selectdb.com/static/What_is_Apache_Doris_2_29cf58cc6b.png)
155+
156+
<br />
108157

109158
Apache Doris uses Adaptive Query Execution technology to dynamically adjust the execution plan based on runtime statistics. For example, it can generate runtime filter, push it to the probe side, and automatically penetrate it to the Scan node at the bottom, which drastically reduces the amount of data in the probe and increases join performance. The runtime filter in Doris supports In/Min/Max/Bloom filter.
110159

@@ -133,7 +182,7 @@ In terms of optimizers, Doris uses a combination of CBO and RBO. RBO supports co
133182

134183
**Apache Doris has graduated from Apache incubator successfully and become a Top-Level Project in June 2022**.
135184

136-
Currently, the Apache Doris community has gathered more than 600 contributors from over 200 companies in different industries, and the number of monthly active contributors exceeds 100.
185+
Currently, the Apache Doris community has gathered more than 400 contributors from nearly 200 companies in different industries, and the number of active contributors is close to 100 per month.
137186

138187

139188
[![Monthly Active Contributors](https://contributor-overtime-api.apiseven.com/contributors-svg?chart=contributorMonthlyActivity&repo=apache/doris)](https://www.apiseven.com/en/contributor-graph?chart=contributorMonthlyActivity&repo=apache/doris)
@@ -212,7 +261,7 @@ Contact us through the following mailing list.
212261

213262
* Apache Doris Official Website - [Site](https://doris.apache.org)
214263
* Developer Mailing list - <[email protected]>. Mail to <[email protected]>, follow the reply to subscribe the mail list.
215-
* Slack channel - [Join the Slack](https://join.slack.com/t/apachedoriscommunity/shared_invite/zt-2kl08hzc0-SPJe4VWmL_qzrFd2u2XYQA)
264+
* Slack channel - [Join the Slack](https://join.slack.com/t/apachedoriscommunity/shared_invite/zt-28il1o2wk-DD6LsLOz3v4aD92Mu0S0aQ)
216265
* Twitter - [Follow @doris_apache](https://twitter.com/doris_apache)
217266

218267

be/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ set(BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
118118
set(ENV{DORIS_HOME} "${BASE_DIR}/..")
119119
set(BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}")
120120
set(GENSRC_DIR "${BASE_DIR}/../gensrc/build/")
121+
set(COMMON_SRC_DIR "${BASE_DIR}/../common")
121122
set(SRC_DIR "${BASE_DIR}/src/")
122123
set(TEST_DIR "${CMAKE_SOURCE_DIR}/test/")
123124
set(OUTPUT_DIR "${BASE_DIR}/output")
@@ -436,6 +437,7 @@ include_directories(
436437

437438
include_directories(
438439
SYSTEM
440+
${COMMON_SRC_DIR}
439441
${GENSRC_DIR}/
440442
${THIRDPARTY_DIR}/include
441443
${GPERFTOOLS_HOME}/include
@@ -500,6 +502,7 @@ set(DORIS_LINK_LIBS
500502
Pipeline
501503
Cloud
502504
${WL_END_GROUP}
505+
CommonCPP
503506
)
504507

505508
set(absl_DIR ${THIRDPARTY_DIR}/lib/cmake/absl)
@@ -765,6 +768,8 @@ if (MAKE_TEST)
765768
add_subdirectory(${TEST_DIR})
766769
endif ()
767770

771+
add_subdirectory(${COMMON_SRC_DIR}/cpp ${BUILD_DIR}/src/common_cpp)
772+
768773
# Install be
769774
install(DIRECTORY DESTINATION ${OUTPUT_DIR})
770775
install(DIRECTORY DESTINATION ${OUTPUT_DIR}/bin)

be/cmake/thirdparty.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ if (USE_JEMALLOC)
8181
else()
8282
add_thirdparty(tcmalloc WHOLELIBPATH ${GPERFTOOLS_HOME}/lib/libtcmalloc.a NOTADD)
8383
endif()
84-
add_thirdparty(jemalloc_arrow LIBNAME "lib/libjemalloc_arrow.a")
8584

8685
if (WITH_MYSQL)
8786
add_thirdparty(mysql LIBNAME "lib/libmysqlclient.a")

be/src/agent/cgroup_cpu_ctl.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ Status CgroupV1CpuCtl::init() {
130130
return Status::InternalError<false>("invalid cgroup path, not find cpu quota file");
131131
}
132132

133-
if (_tg_id == -1) {
133+
if (_wg_id == -1) {
134134
// means current cgroup cpu ctl is just used to clear dir,
135135
// it does not contains workload group.
136136
// todo(wb) rethinking whether need to refactor cgroup_cpu_ctl
@@ -140,7 +140,7 @@ Status CgroupV1CpuCtl::init() {
140140
}
141141

142142
// workload group path
143-
_cgroup_v1_cpu_tg_path = _cgroup_v1_cpu_query_path + "/" + std::to_string(_tg_id);
143+
_cgroup_v1_cpu_tg_path = _cgroup_v1_cpu_query_path + "/" + std::to_string(_wg_id);
144144
if (access(_cgroup_v1_cpu_tg_path.c_str(), F_OK) != 0) {
145145
int ret = mkdir(_cgroup_v1_cpu_tg_path.c_str(), S_IRWXU);
146146
if (ret != 0) {
@@ -186,7 +186,8 @@ Status CgroupV1CpuCtl::add_thread_to_cgroup() {
186186
return Status::OK();
187187
#else
188188
int tid = static_cast<int>(syscall(SYS_gettid));
189-
std::string msg = "add thread " + std::to_string(tid) + " to group";
189+
std::string msg =
190+
"add thread " + std::to_string(tid) + " to group" + " " + std::to_string(_wg_id);
190191
std::lock_guard<std::shared_mutex> w_lock(_lock_mutex);
191192
return CgroupCpuCtl::write_cg_sys_file(_cgroup_v1_cpu_tg_task_file, tid, msg, true);
192193
#endif

be/src/agent/cgroup_cpu_ctl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class CgroupCpuCtl {
3535
public:
3636
virtual ~CgroupCpuCtl() = default;
3737
CgroupCpuCtl() = default;
38-
CgroupCpuCtl(uint64_t tg_id) { _tg_id = tg_id; }
38+
CgroupCpuCtl(uint64_t wg_id) { _wg_id = wg_id; }
3939

4040
virtual Status init();
4141

@@ -63,7 +63,7 @@ class CgroupCpuCtl {
6363
int _cpu_hard_limit = 0;
6464
std::shared_mutex _lock_mutex;
6565
bool _init_succ = false;
66-
uint64_t _tg_id = -1; // workload group id
66+
uint64_t _wg_id = -1; // workload group id
6767
uint64_t _cpu_shares = 0;
6868
};
6969

be/src/agent/task_worker_pool.cpp

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -540,26 +540,20 @@ Status TaskWorkerPool::submit_task(const TAgentTaskRequest& task) {
540540
}
541541

542542
PriorTaskWorkerPool::PriorTaskWorkerPool(
543-
std::string_view name, int normal_worker_count, int high_prior_worker_count,
543+
const std::string& name, int normal_worker_count, int high_prior_worker_count,
544544
std::function<void(const TAgentTaskRequest& task)> callback)
545545
: _callback(std::move(callback)) {
546-
auto st = ThreadPoolBuilder(fmt::format("TaskWP_.{}", name))
547-
.set_min_threads(normal_worker_count)
548-
.set_max_threads(normal_worker_count)
549-
.build(&_normal_pool);
550-
CHECK(st.ok()) << name << ": " << st;
551-
552-
st = _normal_pool->submit_func([this] { normal_loop(); });
553-
CHECK(st.ok()) << name << ": " << st;
554-
555-
st = ThreadPoolBuilder(fmt::format("HighPriorPool.{}", name))
556-
.set_min_threads(high_prior_worker_count)
557-
.set_max_threads(high_prior_worker_count)
558-
.build(&_high_prior_pool);
559-
CHECK(st.ok()) << name << ": " << st;
546+
for (int i = 0; i < normal_worker_count; ++i) {
547+
auto st = Thread::create(
548+
"Normal", name, [this] { normal_loop(); }, &_workers.emplace_back());
549+
CHECK(st.ok()) << name << ": " << st;
550+
}
560551

561-
st = _high_prior_pool->submit_func([this] { high_prior_loop(); });
562-
CHECK(st.ok()) << name << ": " << st;
552+
for (int i = 0; i < high_prior_worker_count; ++i) {
553+
auto st = Thread::create(
554+
"HighPrior", name, [this] { high_prior_loop(); }, &_workers.emplace_back());
555+
CHECK(st.ok()) << name << ": " << st;
556+
}
563557
}
564558

565559
PriorTaskWorkerPool::~PriorTaskWorkerPool() {
@@ -578,12 +572,10 @@ void PriorTaskWorkerPool::stop() {
578572
_normal_condv.notify_all();
579573
_high_prior_condv.notify_all();
580574

581-
if (_normal_pool) {
582-
_normal_pool->shutdown();
583-
}
584-
585-
if (_high_prior_pool) {
586-
_high_prior_pool->shutdown();
575+
for (auto&& w : _workers) {
576+
if (w) {
577+
w->join();
578+
}
587579
}
588580
}
589581

be/src/agent/task_worker_pool.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ class PublishVersionWorkerPool final : public TaskWorkerPool {
7979

8080
class PriorTaskWorkerPool final : public TaskWorkerPoolIf {
8181
public:
82-
PriorTaskWorkerPool(std::string_view name, int normal_worker_count, int high_prior_worker_count,
82+
PriorTaskWorkerPool(const std::string& name, int normal_worker_count,
83+
int high_prior_worker_count,
8384
std::function<void(const TAgentTaskRequest& task)> callback);
8485

8586
~PriorTaskWorkerPool() override;
@@ -101,8 +102,7 @@ class PriorTaskWorkerPool final : public TaskWorkerPoolIf {
101102
std::condition_variable _high_prior_condv;
102103
std::deque<std::unique_ptr<TAgentTaskRequest>> _high_prior_queue;
103104

104-
std::unique_ptr<ThreadPool> _normal_pool;
105-
std::unique_ptr<ThreadPool> _high_prior_pool;
105+
std::vector<scoped_refptr<Thread>> _workers;
106106

107107
std::function<void(const TAgentTaskRequest&)> _callback;
108108
};

0 commit comments

Comments
 (0)