Skip to content

Commit dfedaf7

Browse files
authored
refactor: remove FULL-SYNC-CUT cmd #2687 (#2688)
* refactor: remove FULL-SYNC-CUT cmd #2687
1 parent 66b87e1 commit dfedaf7

File tree

9 files changed

+62
-140
lines changed

9 files changed

+62
-140
lines changed

src/server/cluster/cluster_family.cc

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -701,10 +701,6 @@ void ClusterFamily::DflyClusterMigrationFinalize(CmdArgList args, ConnectionCont
701701
if (!migration)
702702
return cntx->SendError(kIdNotFound);
703703

704-
if (migration->GetState() != MigrationState::C_STABLE_SYNC) {
705-
return cntx->SendError("Migration process is not in STABLE_SYNC state");
706-
}
707-
708704
// TODO implement blocking on migrated slots only
709705

710706
bool is_block_active = true;
@@ -742,8 +738,6 @@ void ClusterFamily::DflyMigrate(CmdArgList args, ConnectionContext* cntx) {
742738
MigrationConf(args, cntx);
743739
} else if (sub_cmd == "FLOW") {
744740
DflyMigrateFlow(args, cntx);
745-
} else if (sub_cmd == "FULL-SYNC-CUT") {
746-
DflyMigrateFullSyncCut(args, cntx);
747741
} else if (sub_cmd == "ACK") {
748742
DflyMigrateAck(args, cntx);
749743
} else {
@@ -864,36 +858,6 @@ void ClusterFamily::DflyMigrateFlow(CmdArgList args, ConnectionContext* cntx) {
864858
info->StartFlow(&shard->db_slice(), sync_id, server_family_->journal(), cntx->conn()->socket());
865859
}
866860

867-
void ClusterFamily::DflyMigrateFullSyncCut(CmdArgList args, ConnectionContext* cntx) {
868-
CHECK(cntx->slot_migration_id != 0);
869-
CmdArgParser parser{args};
870-
auto [sync_id, shard_id] = parser.Next<uint32_t, uint32_t>();
871-
872-
if (auto err = parser.Error(); err) {
873-
return cntx->SendError(err->MakeReply());
874-
}
875-
876-
VLOG(1) << "Full sync cut "
877-
<< " sync_id: " << sync_id << " shard_id: " << shard_id << " shard";
878-
879-
std::lock_guard lck(migration_mu_);
880-
auto migration_it = std::find_if(
881-
incoming_migrations_jobs_.begin(), incoming_migrations_jobs_.end(),
882-
[cntx](const auto& el) { return cntx->slot_migration_id == el->GetLocalSyncId(); });
883-
884-
if (migration_it == incoming_migrations_jobs_.end()) {
885-
LOG(WARNING) << "Couldn't find migration id";
886-
return cntx->SendError(kIdNotFound);
887-
}
888-
889-
(*migration_it)->SetStableSyncForFlow(shard_id);
890-
if ((*migration_it)->GetState() == MigrationState::C_STABLE_SYNC) {
891-
LOG(INFO) << "STABLE-SYNC state is set for sync_id " << sync_id;
892-
}
893-
894-
cntx->SendOk();
895-
}
896-
897861
void ClusterFamily::FinalizeIncomingMigration(uint32_t local_sync_id) {
898862
lock_guard lk(migration_mu_);
899863
auto it =

src/server/cluster/cluster_family.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,6 @@ class ClusterFamily {
7171
// source for migration
7272
void DflyMigrateFlow(CmdArgList args, ConnectionContext* cntx);
7373

74-
void DflyMigrateFullSyncCut(CmdArgList args, ConnectionContext* cntx);
75-
7674
void DflyMigrateAck(CmdArgList args, ConnectionContext* cntx);
7775

7876
// create a ClusterSlotMigration entity which will execute migration

src/server/cluster/cluster_slot_migration.cc

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,16 +110,6 @@ ClusterSlotMigration::Info ClusterSlotMigration::GetInfo() const {
110110
return {ctx.host, ctx.port};
111111
}
112112

113-
void ClusterSlotMigration::SetStableSyncForFlow(uint32_t flow) {
114-
DCHECK(shard_flows_.size() > flow);
115-
shard_flows_[flow]->SetStableSync();
116-
117-
if (std::all_of(shard_flows_.begin(), shard_flows_.end(),
118-
[](const auto& el) { return el->IsStableSync(); })) {
119-
state_ = MigrationState::C_STABLE_SYNC;
120-
}
121-
}
122-
123113
bool ClusterSlotMigration::IsFinalized() const {
124114
return std::all_of(shard_flows_.begin(), shard_flows_.end(),
125115
[](const auto& el) { return el->IsFinalized(); });

src/server/cluster/cluster_slot_migration.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ class ClusterSlotMigration : private ProtocolClient {
4242
return state_;
4343
}
4444

45-
void SetStableSyncForFlow(uint32_t flow);
46-
4745
void Stop();
4846

4947
const SlotRanges& GetSlots() const {

src/server/cluster/outgoing_slot_migration.cc

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <atomic>
88

9+
#include "base/logging.h"
910
#include "server/db_slice.h"
1011
#include "server/journal/streamer.h"
1112

@@ -17,46 +18,58 @@ class OutgoingMigration::SliceSlotMigration {
1718
SliceSlotMigration(DbSlice* slice, SlotSet slots, uint32_t sync_id, journal::Journal* journal,
1819
Context* cntx, io::Sink* dest)
1920
: streamer_(slice, std::move(slots), sync_id, journal, cntx) {
20-
streamer_.Start(dest);
2121
state_.store(MigrationState::C_FULL_SYNC, memory_order_relaxed);
22+
sync_fb_ = Fiber("slot-snapshot", [this, dest] { streamer_.Start(dest); });
2223
}
2324

2425
void Cancel() {
2526
streamer_.Cancel();
2627
}
2728

29+
void WaitForSnapshotFinished() {
30+
sync_fb_.JoinIfNeeded();
31+
}
32+
2833
void Finalize() {
2934
streamer_.SendFinalize();
3035
state_.store(MigrationState::C_FINISHED, memory_order_relaxed);
3136
}
3237

3338
MigrationState GetState() const {
34-
auto state = state_.load(memory_order_relaxed);
35-
return state == MigrationState::C_FULL_SYNC && streamer_.IsSnapshotFinished()
36-
? MigrationState::C_STABLE_SYNC
37-
: state;
39+
return state_.load(memory_order_relaxed);
3840
}
3941

4042
private:
4143
RestoreStreamer streamer_;
4244
// Atomic only for simple read operation, writes - from the same thread, reads - from any thread
4345
atomic<MigrationState> state_ = MigrationState::C_CONNECTING;
46+
Fiber sync_fb_;
4447
};
4548

4649
OutgoingMigration::OutgoingMigration(std::uint32_t flows_num, std::string ip, uint16_t port,
4750
SlotRanges slots, Context::ErrHandler err_handler)
4851
: host_ip_(ip), port_(port), slots_(slots), cntx_(err_handler), slot_migrations_(flows_num) {
4952
}
5053

51-
OutgoingMigration::~OutgoingMigration() = default;
54+
OutgoingMigration::~OutgoingMigration() {
55+
main_sync_fb_.JoinIfNeeded();
56+
}
5257

5358
void OutgoingMigration::StartFlow(DbSlice* slice, uint32_t sync_id, journal::Journal* journal,
5459
io::Sink* dest) {
5560
const auto shard_id = slice->shard_id();
5661

57-
std::lock_guard lck(flows_mu_);
58-
slot_migrations_[shard_id] =
59-
std::make_unique<SliceSlotMigration>(slice, slots_, sync_id, journal, &cntx_, dest);
62+
MigrationState state = MigrationState::C_NO_STATE;
63+
{
64+
std::lock_guard lck(flows_mu_);
65+
slot_migrations_[shard_id] =
66+
std::make_unique<SliceSlotMigration>(slice, slots_, sync_id, journal, &cntx_, dest);
67+
state = GetStateImpl();
68+
}
69+
70+
if (state == MigrationState::C_FULL_SYNC) {
71+
main_sync_fb_ = Fiber("outgoing_migration", &OutgoingMigration::SyncFb, this);
72+
}
6073
}
6174

6275
void OutgoingMigration::Finalize(uint32_t shard_id) {
@@ -75,10 +88,20 @@ MigrationState OutgoingMigration::GetState() const {
7588
MigrationState OutgoingMigration::GetStateImpl() const {
7689
MigrationState min_state = MigrationState::C_MAX_INVALID;
7790
for (const auto& slot_migration : slot_migrations_) {
78-
if (slot_migration)
91+
if (slot_migration) {
7992
min_state = std::min(min_state, slot_migration->GetState());
93+
} else {
94+
min_state = MigrationState::C_NO_STATE;
95+
}
8096
}
8197
return min_state;
8298
}
8399

100+
void OutgoingMigration::SyncFb() {
101+
for (auto& migration : slot_migrations_) {
102+
migration->WaitForSnapshotFinished();
103+
}
104+
VLOG(1) << "Migrations snapshot is finihed";
105+
}
106+
84107
} // namespace dfly

src/server/cluster/outgoing_slot_migration.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,17 @@ class OutgoingMigration {
4747
// SliceSlotMigration manages state and data transfering for the corresponding shard
4848
class SliceSlotMigration;
4949

50+
void SyncFb();
51+
5052
private:
5153
std::string host_ip_;
5254
uint16_t port_;
5355
SlotRanges slots_;
5456
Context cntx_;
5557
mutable Mutex flows_mu_;
5658
std::vector<std::unique_ptr<SliceSlotMigration>> slot_migrations_ ABSL_GUARDED_BY(flows_mu_);
59+
60+
Fiber main_sync_fb_;
5761
};
5862

5963
} // namespace dfly

src/server/journal/streamer.cc

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -65,39 +65,30 @@ void RestoreStreamer::Start(io::Sink* dest) {
6565

6666
JournalStreamer::Start(dest);
6767

68-
DCHECK(!snapshot_fb_.IsJoinable());
69-
snapshot_fb_ = fb2::Fiber("slot-snapshot", [this] {
70-
PrimeTable::Cursor cursor;
71-
uint64_t last_yield = 0;
72-
PrimeTable* pt = &db_slice_->databases()[0]->prime;
73-
74-
do {
75-
if (fiber_cancellation_.IsCancelled())
76-
return;
77-
78-
bool written = false;
79-
cursor = pt->Traverse(cursor, [&](PrimeTable::bucket_iterator it) {
80-
if (WriteBucket(it)) {
81-
written = true;
82-
}
83-
});
84-
if (written) {
85-
NotifyWritten(true);
86-
}
87-
++last_yield;
68+
PrimeTable::Cursor cursor;
69+
uint64_t last_yield = 0;
70+
PrimeTable* pt = &db_slice_->databases()[0]->prime;
8871

89-
if (last_yield >= 100) {
90-
ThisFiber::Yield();
91-
last_yield = 0;
72+
do {
73+
if (fiber_cancellation_.IsCancelled())
74+
return;
75+
76+
bool written = false;
77+
cursor = pt->Traverse(cursor, [&](PrimeTable::bucket_iterator it) {
78+
if (WriteBucket(it)) {
79+
written = true;
9280
}
93-
} while (cursor);
81+
});
82+
if (written) {
83+
NotifyWritten(true);
84+
}
85+
++last_yield;
9486

95-
VLOG(2) << "FULL-SYNC-CUT for " << sync_id_ << " : " << db_slice_->shard_id();
96-
WriteCommand(make_pair("DFLYMIGRATE", ArgSlice{"FULL-SYNC-CUT", absl::StrCat(sync_id_),
97-
absl::StrCat(db_slice_->shard_id())}));
98-
NotifyWritten(true);
99-
snapshot_finished_ = true;
100-
});
87+
if (last_yield >= 100) {
88+
ThisFiber::Yield();
89+
last_yield = 0;
90+
}
91+
} while (cursor);
10192
}
10293

10394
void RestoreStreamer::SendFinalize() {
@@ -110,12 +101,10 @@ void RestoreStreamer::SendFinalize() {
110101
}
111102

112103
RestoreStreamer::~RestoreStreamer() {
113-
CHECK(!snapshot_fb_.IsJoinable());
114104
}
115105

116106
void RestoreStreamer::Cancel() {
117107
fiber_cancellation_.Cancel();
118-
snapshot_fb_.JoinIfNeeded();
119108
db_slice_->UnregisterOnChange(snapshot_version_);
120109
JournalStreamer::Cancel();
121110
}

src/server/journal/streamer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ class RestoreStreamer : public JournalStreamer {
8282
uint64_t snapshot_version_ = 0;
8383
SlotSet my_slots_;
8484
uint32_t sync_id_;
85-
Fiber snapshot_fb_;
8685
Cancellation fiber_cancellation_;
8786
bool snapshot_finished_ = false;
8887
};

tests/dragonfly/cluster_test.py

Lines changed: 4 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -816,22 +816,7 @@ async def test_cluster_slot_migration(df_local_factory: DflyInstanceFactory):
816816
c_nodes_admin,
817817
)
818818

819-
while (
820-
await c_nodes_admin[1].execute_command(
821-
"DFLYCLUSTER", "SLOT-MIGRATION-STATUS", "127.0.0.1", str(nodes[0].admin_port)
822-
)
823-
!= "STABLE_SYNC"
824-
):
825-
await asyncio.sleep(0.05)
826-
827-
status = await c_nodes_admin[0].execute_command(
828-
"DFLYCLUSTER", "SLOT-MIGRATION-STATUS", "127.0.0.1", str(nodes[1].port)
829-
)
830-
assert "STABLE_SYNC" == status
831-
832-
status = await c_nodes_admin[0].execute_command("DFLYCLUSTER", "SLOT-MIGRATION-STATUS")
833-
assert ["out 127.0.0.1:30002 STABLE_SYNC"] == status
834-
819+
await asyncio.sleep(0.5)
835820
try:
836821
await c_nodes_admin[1].execute_command(
837822
"DFLYCLUSTER",
@@ -850,12 +835,6 @@ async def test_cluster_slot_migration(df_local_factory: DflyInstanceFactory):
850835
c_nodes_admin,
851836
)
852837

853-
status = await c_nodes_admin[0].execute_command("DFLYCLUSTER SLOT-MIGRATION-STATUS")
854-
assert ["out 127.0.0.1:30002 STABLE_SYNC"] == status
855-
856-
status = await c_nodes_admin[1].execute_command("DFLYCLUSTER SLOT-MIGRATION-STATUS")
857-
assert ["in 127.0.0.1:31001 STABLE_SYNC"] == status
858-
859838
await close_clients(*c_nodes, *c_nodes_admin)
860839

861840

@@ -920,13 +899,7 @@ async def test_cluster_data_migration(df_local_factory: DflyInstanceFactory):
920899
assert await c_nodes[0].set("KEY0", "value")
921900
assert await c_nodes[0].set("KEY1", "value")
922901

923-
while (
924-
await c_nodes_admin[1].execute_command(
925-
"DFLYCLUSTER", "SLOT-MIGRATION-STATUS", "127.0.0.1", str(nodes[0].admin_port)
926-
)
927-
!= "STABLE_SYNC"
928-
):
929-
await asyncio.sleep(0.05)
902+
await asyncio.sleep(0.5)
930903

931904
assert await c_nodes[0].set("KEY4", "value")
932905
assert await c_nodes[0].set("KEY5", "value")
@@ -1090,28 +1063,12 @@ async def generate_config():
10901063
keeping = node.slots[num_outgoing:]
10911064
node.next_slots.extend(keeping)
10921065

1093-
# Busy loop for migrations to finish - all in stable state
1094-
iterations = 0
1095-
while True:
1096-
for node in nodes:
1097-
states = await node.admin_client.execute_command("DFLYCLUSTER", "SLOT-MIGRATION-STATUS")
1098-
print(states)
1099-
if not all(s.endswith("STABLE_SYNC") for s in states) and not states == "NO_STATE":
1100-
break
1101-
else:
1102-
break
1103-
1104-
iterations += 1
1105-
assert iterations < 100
1106-
1107-
await asyncio.sleep(0.1)
1108-
1109-
# Give seeder one more second
1066+
# some more time fo seeder
11101067
await asyncio.sleep(1.0)
11111068

1112-
# Stop seeder
11131069
seeder.stop()
11141070
await fill_task
1071+
await asyncio.sleep(1.0)
11151072

11161073
# Counter that pushes values to a list
11171074
async def list_counter(key, client: aioredis.RedisCluster):

0 commit comments

Comments
 (0)