Skip to content

Commit 80f96ed

Browse files
authored
ebs br: control the snapshots batch size for fsr enable/disable (pingcap#48506) (pingcap#48526)
close pingcap#48505
1 parent 23796a4 commit 80f96ed

File tree

2 files changed

+51
-33
lines changed

2 files changed

+51
-33
lines changed

br/pkg/aws/ebs.go

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
const (
2828
pollingPendingSnapshotInterval = 30 * time.Second
2929
errCodeTooManyPendingSnapshots = "PendingSnapshotLimitExceeded"
30+
FsrApiSnapshotsThreshold = 10
3031
)
3132

3233
type EC2Session struct {
@@ -294,24 +295,32 @@ func (e *EC2Session) EnableDataFSR(meta *config.EBSBasedBRMeta, targetAZ string)
294295

295296
for availableZone := range snapshotsIDsMap {
296297
targetAZ := availableZone
297-
eg.Go(func() error {
298-
log.Info("enable fsr for snapshots", zap.String("available zone", targetAZ))
299-
resp, err := e.ec2.EnableFastSnapshotRestores(&ec2.EnableFastSnapshotRestoresInput{
300-
AvailabilityZones: []*string{&targetAZ},
301-
SourceSnapshotIds: snapshotsIDsMap[targetAZ],
302-
})
303-
304-
if err != nil {
305-
return errors.Trace(err)
298+
// We have to control the batch size to avoid the error of "parameter SourceSnapshotIds must be less than or equal to 10"
299+
for i := 0; i < len(snapshotsIDsMap[targetAZ]); i += FsrApiSnapshotsThreshold {
300+
start := i
301+
end := i + FsrApiSnapshotsThreshold
302+
if end > len(snapshotsIDsMap[targetAZ]) {
303+
end = len(snapshotsIDsMap[targetAZ])
306304
}
305+
eg.Go(func() error {
306+
log.Info("enable fsr for snapshots", zap.String("available zone", targetAZ), zap.Any("snapshots", snapshotsIDsMap[targetAZ][start:end]))
307+
resp, err := e.ec2.EnableFastSnapshotRestores(&ec2.EnableFastSnapshotRestoresInput{
308+
AvailabilityZones: []*string{&targetAZ},
309+
SourceSnapshotIds: snapshotsIDsMap[targetAZ][start:end],
310+
})
307311

308-
if len(resp.Unsuccessful) > 0 {
309-
log.Warn("not all snapshots enabled FSR")
310-
return errors.Errorf("Some snapshot fails to enable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
311-
}
312+
if err != nil {
313+
return errors.Trace(err)
314+
}
312315

313-
return e.waitDataFSREnabled(snapshotsIDsMap[targetAZ], targetAZ)
314-
})
316+
if len(resp.Unsuccessful) > 0 {
317+
log.Warn("not all snapshots enabled FSR")
318+
return errors.Errorf("Some snapshot fails to enable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
319+
}
320+
321+
return e.waitDataFSREnabled(snapshotsIDsMap[targetAZ][start:end], targetAZ)
322+
})
323+
}
315324
}
316325
return snapshotsIDsMap, eg.Wait()
317326
}
@@ -329,7 +338,7 @@ func (e *EC2Session) waitDataFSREnabled(snapShotIDs []*string, targetAZ string)
329338
log.Info("starts check fsr pending snapshots", zap.Any("snapshots", pendingSnapshots), zap.String("available zone", targetAZ))
330339
for {
331340
if len(pendingSnapshots) == 0 {
332-
log.Info("all snapshots fsr enablement is finished", zap.String("available zone", targetAZ))
341+
log.Info("all snapshots in current batch fsr enablement is finished", zap.String("available zone", targetAZ), zap.Any("snapshots", snapShotIDs))
333342
return nil
334343
}
335344

@@ -380,25 +389,33 @@ func (e *EC2Session) DisableDataFSR(snapshotsIDsMap map[string][]*string) error
380389

381390
for availableZone := range snapshotsIDsMap {
382391
targetAZ := availableZone
383-
eg.Go(func() error {
384-
resp, err := e.ec2.DisableFastSnapshotRestores(&ec2.DisableFastSnapshotRestoresInput{
385-
AvailabilityZones: []*string{&targetAZ},
386-
SourceSnapshotIds: snapshotsIDsMap[targetAZ],
387-
})
388-
389-
if err != nil {
390-
return errors.Trace(err)
392+
// We have to control the batch size to avoid the error of "parameter SourceSnapshotIds must be less than or equal to 10"
393+
for i := 0; i < len(snapshotsIDsMap[targetAZ]); i += FsrApiSnapshotsThreshold {
394+
start := i
395+
end := i + FsrApiSnapshotsThreshold
396+
if end > len(snapshotsIDsMap[targetAZ]) {
397+
end = len(snapshotsIDsMap[targetAZ])
391398
}
399+
eg.Go(func() error {
400+
resp, err := e.ec2.DisableFastSnapshotRestores(&ec2.DisableFastSnapshotRestoresInput{
401+
AvailabilityZones: []*string{&targetAZ},
402+
SourceSnapshotIds: snapshotsIDsMap[targetAZ][start:end],
403+
})
392404

393-
if len(resp.Unsuccessful) > 0 {
394-
log.Warn("not all snapshots disabled FSR", zap.String("available zone", targetAZ))
395-
return errors.Errorf("Some snapshot fails to disable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
396-
}
405+
if err != nil {
406+
return errors.Trace(err)
407+
}
397408

398-
log.Info("Disable FSR issued", zap.String("available zone", targetAZ))
409+
if len(resp.Unsuccessful) > 0 {
410+
log.Warn("not all snapshots disabled FSR", zap.String("available zone", targetAZ))
411+
return errors.Errorf("Some snapshot fails to disable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
412+
}
399413

400-
return nil
401-
})
414+
log.Info("Disable FSR issued", zap.String("available zone", targetAZ), zap.Any("snapshots", snapshotsIDsMap[targetAZ][start:end]))
415+
416+
return nil
417+
})
418+
}
402419
}
403420
return eg.Wait()
404421
}

br/pkg/task/restore_ebs_meta.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,11 @@ func (h *restoreEBSMetaHelper) restoreVolumes(progress glue.Progress) (map[strin
241241
log.Error("failed to create all volumes, cleaning up created volume")
242242
ec2Session.DeleteVolumes(volumeIDMap)
243243
}
244-
245244
if h.cfg.UseFSR {
246245
err = ec2Session.DisableDataFSR(snapshotsIDsMap)
247-
log.Error("disable fsr failed", zap.Error(err))
246+
if err != nil {
247+
log.Error("disable fsr failed", zap.Error(err))
248+
}
248249
}
249250
}()
250251

0 commit comments

Comments
 (0)