Skip to content

Commit 6639d26

Browse files
wu-hanqingwuhongsong
authored andcommitted
Fix metaserver deadlock caused by bthread coroutine switching
Signed-off-by: Hanqing Wu <[email protected]>
1 parent d996e02 commit 6639d26

File tree

2 files changed

+109
-46
lines changed

2 files changed

+109
-46
lines changed

curvefs/src/metaserver/partition.cpp

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <algorithm>
2828
#include <cstdint>
29+
#include <future>
2930
#include <memory>
3031
#include <string>
3132
#include <utility>
@@ -537,54 +538,68 @@ MetaStatusCode Partition::GetAllBlockGroup(
537538
}
538539

539540
void Partition::StartS3Compact() {
540-
S3CompactManager::GetInstance().Register(
541-
S3Compact{inodeManager_, partitionInfo_});
541+
// register s3 compaction task in a separate thread, since the caller may
542+
// holds a pthread wrlock when calling this function, and create `S3Compact`
543+
// will acquire a bthread rwlock, may cause thread switching, thus causing a
544+
// deadlock.
545+
// FIXME(wuhanqing): handle it in a more elegant way
546+
auto handle = std::async(std::launch::async, [this]() {
547+
S3CompactManager::GetInstance().Register(
548+
S3Compact{inodeManager_, partitionInfo_});
549+
});
550+
551+
handle.wait();
542552
}
543553

544554
void Partition::CancelS3Compact() {
545555
S3CompactManager::GetInstance().Cancel(partitionInfo_.partitionid());
546556
}
547557

548558
void Partition::StartVolumeDeallocate() {
549-
FsInfo fsInfo;
550-
bool ok =
551-
FsInfoManager::GetInstance().GetFsInfo(partitionInfo_.fsid(), &fsInfo);
552-
if (!ok) {
553-
LOG(ERROR)
554-
<< "Partition start volume deallocate fail, get fsinfo fail. fsid="
555-
<< partitionInfo_.fsid();
556-
return;
557-
}
558-
559-
if (!fsInfo.detail().has_volume()) {
560-
LOG(INFO) << "Partition not belong to volume, do not need start "
561-
"deallocate. partitionInfo="
562-
<< partitionInfo_.DebugString();
563-
return;
564-
}
565-
566-
VolumeDeallocateCalOption calOpt;
567-
calOpt.kvStorage = kvStorage_;
568-
calOpt.inodeStorage = inodeStorage_;
569-
calOpt.nameGen = nameGen_;
570-
auto copysetNode =
571-
copyset::CopysetNodeManager::GetInstance().GetSharedCopysetNode(
572-
partitionInfo_.poolid(), partitionInfo_.copysetid());
573-
if (copysetNode == nullptr) {
574-
LOG(ERROR) << "Partition get copyset node failed. poolid="
575-
<< partitionInfo_.poolid()
576-
<< ", copysetid=" << partitionInfo_.copysetid();
577-
return;
578-
}
579-
580-
InodeVolumeSpaceDeallocate task(partitionInfo_.fsid(),
581-
partitionInfo_.partitionid(), copysetNode);
582-
task.Init(calOpt);
583-
584-
VolumeDeallocateManager::GetInstance().Register(std::move(task));
585-
586-
VLOG(3) << "Partition start volume deallocate success. partitionInfo="
587-
<< partitionInfo_.DebugString();
559+
// FIXME(wuhanqing): same as `StartS3Compact`
560+
auto handle = std::async(std::launch::async, [this]() {
561+
FsInfo fsInfo;
562+
bool ok = FsInfoManager::GetInstance().GetFsInfo(
563+
partitionInfo_.fsid(), &fsInfo);
564+
if (!ok) {
565+
LOG(ERROR) << "Partition start volume deallocate fail, get fsinfo "
566+
"fail. fsid="
567+
<< partitionInfo_.fsid();
568+
return;
569+
}
570+
571+
if (!fsInfo.detail().has_volume()) {
572+
LOG(INFO) << "Partition not belong to volume, do not need start "
573+
"deallocate. partitionInfo="
574+
<< partitionInfo_.DebugString();
575+
return;
576+
}
577+
578+
VolumeDeallocateCalOption calOpt;
579+
calOpt.kvStorage = kvStorage_;
580+
calOpt.inodeStorage = inodeStorage_;
581+
calOpt.nameGen = nameGen_;
582+
auto copysetNode =
583+
copyset::CopysetNodeManager::GetInstance().GetSharedCopysetNode(
584+
partitionInfo_.poolid(), partitionInfo_.copysetid());
585+
if (copysetNode == nullptr) {
586+
LOG(ERROR) << "Partition get copyset node failed. poolid="
587+
<< partitionInfo_.poolid()
588+
<< ", copysetid=" << partitionInfo_.copysetid();
589+
return;
590+
}
591+
592+
InodeVolumeSpaceDeallocate task(
593+
partitionInfo_.fsid(), partitionInfo_.partitionid(), copysetNode);
594+
task.Init(calOpt);
595+
596+
VolumeDeallocateManager::GetInstance().Register(std::move(task));
597+
598+
VLOG(3) << "Partition start volume deallocate success. partitionInfo="
599+
<< partitionInfo_.DebugString();
600+
});
601+
602+
handle.wait();
588603
}
589604

590605
void Partition::CancelVolumeDeallocate() {

src/common/concurrent/rw_lock.h

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,31 @@
2323
#ifndef SRC_COMMON_CONCURRENT_RW_LOCK_H_
2424
#define SRC_COMMON_CONCURRENT_RW_LOCK_H_
2525

26-
#include <pthread.h>
2726
#include <assert.h>
28-
#include <glog/logging.h>
2927
#include <bthread/bthread.h>
28+
#include <glog/logging.h>
29+
#include <pthread.h>
30+
#include <sys/types.h> // gettid
3031

32+
#include "include/curve_compiler_specific.h"
3133
#include "src/common/uncopyable.h"
3234

35+
// Due to the mixed use of bthread and pthread in some cases, acquiring another
36+
// bthread lock(mutex/rwlock) after acquiring a write lock on a pthread rwlock
37+
// may result in switching the bthread coroutine, and then the operation of
38+
// releasing the previous write lock in the other pthread will not take effect
39+
// (implying that the write lock is still held), thus causing a deadlock.
40+
41+
// Check pthread rwlock tid between wrlock and unlock
42+
#if defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID) && \
43+
(ENABLE_CHECK_PTHREAD_WRLOCK_TID == 1)
44+
#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1
45+
#elif !defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID)
46+
#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1
47+
#else
48+
#define CURVE_CHECK_PTHREAD_WRLOCK_TID 0
49+
#endif
50+
3351
namespace curve {
3452
namespace common {
3553

@@ -51,10 +69,21 @@ class PthreadRWLockBase : public RWLockBase {
5169
void WRLock() override {
5270
int ret = pthread_rwlock_wrlock(&rwlock_);
5371
CHECK(0 == ret) << "wlock failed: " << ret << ", " << strerror(ret);
72+
#if CURVE_CHECK_PTHREAD_WRLOCK_TID
73+
tid_ = gettid();
74+
#endif
5475
}
5576

5677
int TryWRLock() override {
57-
return pthread_rwlock_trywrlock(&rwlock_);
78+
int ret = pthread_rwlock_trywrlock(&rwlock_);
79+
if (CURVE_UNLIKELY(ret != 0)) {
80+
return ret;
81+
}
82+
83+
#if CURVE_CHECK_PTHREAD_WRLOCK_TID
84+
tid_ = gettid();
85+
#endif
86+
return 0;
5887
}
5988

6089
void RDLock() override {
@@ -67,6 +96,19 @@ class PthreadRWLockBase : public RWLockBase {
6796
}
6897

6998
void Unlock() override {
99+
#if CURVE_CHECK_PTHREAD_WRLOCK_TID
100+
if (tid_ != 0) {
101+
const pid_t current = gettid();
102+
// If CHECK here is triggered, please look at the comments at the
103+
// beginning of the file.
104+
// In the meantime, the simplest solution might be to use
105+
// `BthreadRWLock` locks everywhere.
106+
CHECK(tid_ == current)
107+
<< ", tid has changed, previous tid: " << tid_
108+
<< ", current tid: " << current;
109+
tid_ = 0;
110+
}
111+
#endif
70112
pthread_rwlock_unlock(&rwlock_);
71113
}
72114

@@ -76,8 +118,14 @@ class PthreadRWLockBase : public RWLockBase {
76118

77119
pthread_rwlock_t rwlock_;
78120
pthread_rwlockattr_t rwlockAttr_;
121+
122+
#if CURVE_CHECK_PTHREAD_WRLOCK_TID
123+
pid_t tid_ = 0;
124+
#endif
79125
};
80126

127+
#undef CURVE_CHECK_PTHREAD_WRLOCK_TID
128+
81129
class RWLock : public PthreadRWLockBase {
82130
public:
83131
RWLock() {
@@ -122,7 +170,7 @@ class BthreadRWLock : public RWLockBase {
122170
}
123171

124172
int TryWRLock() override {
125-
// not support yet
173+
LOG(WARNING) << "TryWRLock not support yet";
126174
return EINVAL;
127175
}
128176

@@ -132,7 +180,7 @@ class BthreadRWLock : public RWLockBase {
132180
}
133181

134182
int TryRDLock() override {
135-
// not support yet
183+
LOG(WARNING) << "TryRDLock not support yet";
136184
return EINVAL;
137185
}
138186

0 commit comments

Comments
 (0)