Skip to content

Commit fd75d1a

Browse files
committed
mmaprototype: fix missing change id bug
Previously, processStoreLeaseholderMsgInternal iterated over the slice rs.pendingChanges while modifying it in-place by swapping elements to the back and shortening the slice. Since the slice header is copied at the start of a range loop, this led to attempts to delete changes that are technically already out of the length had already been removed. This commit fixes the issue by copying the change IDs before the loop to avoid iterating over a mutating slice. Epic: none Release note: none
1 parent ca0d407 commit fd75d1a

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

pkg/kv/kvserver/allocator/mmaprototype/cluster_state.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,8 +1377,12 @@ func (cs *clusterState) processStoreLeaseholderMsgInternal(
13771377
// Since this range is going away, mark all the pending changes as
13781378
// enacted. This will allow the load adjustments to also be garbage
13791379
// collected in the future.
1380-
for _, change := range rs.pendingChanges {
1381-
cs.pendingChangeEnacted(change.ChangeID, now, true)
1380+
changeIDs := make([]ChangeID, len(rs.pendingChanges))
1381+
for i, change := range rs.pendingChanges {
1382+
changeIDs[i] = change.ChangeID
1383+
}
1384+
for _, changeID := range changeIDs {
1385+
cs.pendingChangeEnacted(changeID, now, true)
13821386
}
13831387
// Remove from the storeStates.
13841388
for _, replica := range rs.replicas {
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# This test tests an edge case where the leaseholder is removed from the range.
2+
# Regression test for a bug where processStoreLeaseholderMsgInternal was
3+
# iterating over the pending changes and removing them inside the loop.
4+
set-store
5+
store-id=1 node-id=1 attrs=purple locality-tiers=region=us-west-1,zone=us-west-1a
6+
store-id=2 node-id=2 attrs=yellow locality-tiers=region=us-east-1,zone=us-east-1a
7+
----
8+
node-id=1 failure-summary=ok locality-tiers=region=us-west-1,zone=us-west-1a,node=1
9+
store-id=1 membership=full attrs=purple locality-code=1:2:3:
10+
node-id=2 failure-summary=ok locality-tiers=region=us-east-1,zone=us-east-1a,node=2
11+
store-id=2 membership=full attrs=yellow locality-code=4:5:6:
12+
13+
store-leaseholder-msg
14+
store-id=1
15+
range-id=1 load=[80,80,80] raft-cpu=20 config=(num_replicas=3 constraints={'+region=us-west-1:1'} voter_constraints={'+region=us-west-1:1'})
16+
store-id=1 replica-id=1 type=VOTER_FULL leaseholder=true
17+
----
18+
19+
ranges
20+
----
21+
range-id=1 load=[cpu:80, write-bandwidth:80, byte-size:80] raft-cpu=20
22+
store-id=1 replica-id=1 type=VOTER_FULL leaseholder=true
23+
24+
make-pending-changes range-id=1
25+
rebalance-replica: remove-store-id=1 add-store-id=2
26+
----
27+
pending(2)
28+
change-id=1 store-id=2 node-id=2 range-id=1 load-delta=[cpu:88, write-bandwidth:88, byte-size:88] start=0s
29+
prev=(replica-id=none type=VOTER_FULL)
30+
next=(replica-id=unknown type=VOTER_FULL leaseholder=true)
31+
change-id=2 store-id=1 node-id=1 range-id=1 load-delta=[cpu:-80, write-bandwidth:-80, byte-size:-80] start=0s
32+
prev=(replica-id=1 type=VOTER_FULL leaseholder=true)
33+
next=(replica-id=none type=VOTER_FULL)
34+
35+
# Advance time to 1s so that we can see pending changes for removed ranges are
36+
# properly enacted.
37+
tick seconds=1
38+
----
39+
t=1s
40+
41+
store-leaseholder-msg
42+
store-id=1
43+
----
44+
45+
# The pending changes are enacted with enactedAtTime set to 1s.
46+
get-pending-changes
47+
----
48+
pending(2)
49+
change-id=1 store-id=2 node-id=2 range-id=1 load-delta=[cpu:88, write-bandwidth:88, byte-size:88] start=0s enacted=1s
50+
prev=(replica-id=none type=VOTER_FULL)
51+
next=(replica-id=unknown type=VOTER_FULL leaseholder=true)
52+
change-id=2 store-id=1 node-id=1 range-id=1 load-delta=[cpu:-80, write-bandwidth:-80, byte-size:-80] start=0s enacted=1s
53+
prev=(replica-id=1 type=VOTER_FULL leaseholder=true)
54+
next=(replica-id=none type=VOTER_FULL)
55+
56+
ranges
57+
----

0 commit comments

Comments
 (0)