Skip to content

Commit 09ce985

Browse files
authored
ebs br: control the snapshots batch size for fsr enable/disable (#48506) (#48527)
close #48505
1 parent 245dd62 commit 09ce985

File tree

2 files changed

+51
-32
lines changed

2 files changed

+51
-32
lines changed

br/pkg/aws/ebs.go

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
const (
2828
pollingPendingSnapshotInterval = 30 * time.Second
2929
errCodeTooManyPendingSnapshots = "PendingSnapshotLimitExceeded"
30+
FsrApiSnapshotsThreshold = 10
3031
)
3132

3233
type EC2Session struct {
@@ -293,24 +294,32 @@ func (e *EC2Session) EnableDataFSR(meta *config.EBSBasedBRMeta, targetAZ string)
293294

294295
for availableZone := range snapshotsIDsMap {
295296
targetAZ := availableZone
296-
eg.Go(func() error {
297-
log.Info("enable fsr for snapshots", zap.String("available zone", targetAZ))
298-
resp, err := e.ec2.EnableFastSnapshotRestores(&ec2.EnableFastSnapshotRestoresInput{
299-
AvailabilityZones: []*string{&targetAZ},
300-
SourceSnapshotIds: snapshotsIDsMap[targetAZ],
301-
})
302-
303-
if err != nil {
304-
return errors.Trace(err)
297+
// We have to control the batch size to avoid the error of "parameter SourceSnapshotIds must be less than or equal to 10"
298+
for i := 0; i < len(snapshotsIDsMap[targetAZ]); i += FsrApiSnapshotsThreshold {
299+
start := i
300+
end := i + FsrApiSnapshotsThreshold
301+
if end > len(snapshotsIDsMap[targetAZ]) {
302+
end = len(snapshotsIDsMap[targetAZ])
305303
}
304+
eg.Go(func() error {
305+
log.Info("enable fsr for snapshots", zap.String("available zone", targetAZ), zap.Any("snapshots", snapshotsIDsMap[targetAZ][start:end]))
306+
resp, err := e.ec2.EnableFastSnapshotRestores(&ec2.EnableFastSnapshotRestoresInput{
307+
AvailabilityZones: []*string{&targetAZ},
308+
SourceSnapshotIds: snapshotsIDsMap[targetAZ][start:end],
309+
})
306310

307-
if len(resp.Unsuccessful) > 0 {
308-
log.Warn("not all snapshots enabled FSR")
309-
return errors.Errorf("Some snapshot fails to enable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
310-
}
311+
if err != nil {
312+
return errors.Trace(err)
313+
}
311314

312-
return e.waitDataFSREnabled(snapshotsIDsMap[targetAZ], targetAZ)
313-
})
315+
if len(resp.Unsuccessful) > 0 {
316+
log.Warn("not all snapshots enabled FSR")
317+
return errors.Errorf("Some snapshot fails to enable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
318+
}
319+
320+
return e.waitDataFSREnabled(snapshotsIDsMap[targetAZ][start:end], targetAZ)
321+
})
322+
}
314323
}
315324
return snapshotsIDsMap, eg.Wait()
316325
}
@@ -328,7 +337,7 @@ func (e *EC2Session) waitDataFSREnabled(snapShotIDs []*string, targetAZ string)
328337
log.Info("starts check fsr pending snapshots", zap.Any("snapshots", pendingSnapshots), zap.String("available zone", targetAZ))
329338
for {
330339
if len(pendingSnapshots) == 0 {
331-
log.Info("all snapshots fsr enablement is finished", zap.String("available zone", targetAZ))
340+
log.Info("all snapshots in current batch fsr enablement is finished", zap.String("available zone", targetAZ), zap.Any("snapshots", snapShotIDs))
332341
return nil
333342
}
334343

@@ -379,25 +388,33 @@ func (e *EC2Session) DisableDataFSR(snapshotsIDsMap map[string][]*string) error
379388

380389
for availableZone := range snapshotsIDsMap {
381390
targetAZ := availableZone
382-
eg.Go(func() error {
383-
resp, err := e.ec2.DisableFastSnapshotRestores(&ec2.DisableFastSnapshotRestoresInput{
384-
AvailabilityZones: []*string{&targetAZ},
385-
SourceSnapshotIds: snapshotsIDsMap[targetAZ],
386-
})
387-
388-
if err != nil {
389-
return errors.Trace(err)
391+
// We have to control the batch size to avoid the error of "parameter SourceSnapshotIds must be less than or equal to 10"
392+
for i := 0; i < len(snapshotsIDsMap[targetAZ]); i += FsrApiSnapshotsThreshold {
393+
start := i
394+
end := i + FsrApiSnapshotsThreshold
395+
if end > len(snapshotsIDsMap[targetAZ]) {
396+
end = len(snapshotsIDsMap[targetAZ])
390397
}
398+
eg.Go(func() error {
399+
resp, err := e.ec2.DisableFastSnapshotRestores(&ec2.DisableFastSnapshotRestoresInput{
400+
AvailabilityZones: []*string{&targetAZ},
401+
SourceSnapshotIds: snapshotsIDsMap[targetAZ][start:end],
402+
})
391403

392-
if len(resp.Unsuccessful) > 0 {
393-
log.Warn("not all snapshots disabled FSR", zap.String("available zone", targetAZ))
394-
return errors.Errorf("Some snapshot fails to disable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
395-
}
404+
if err != nil {
405+
return errors.Trace(err)
406+
}
396407

397-
log.Info("Disable FSR issued", zap.String("available zone", targetAZ))
408+
if len(resp.Unsuccessful) > 0 {
409+
log.Warn("not all snapshots disabled FSR", zap.String("available zone", targetAZ))
410+
return errors.Errorf("Some snapshot fails to disable FSR for available zone %s, such as %s, error code is %v", targetAZ, *resp.Unsuccessful[0].SnapshotId, resp.Unsuccessful[0].FastSnapshotRestoreStateErrors)
411+
}
398412

399-
return nil
400-
})
413+
log.Info("Disable FSR issued", zap.String("available zone", targetAZ), zap.Any("snapshots", snapshotsIDsMap[targetAZ][start:end]))
414+
415+
return nil
416+
})
417+
}
401418
}
402419
return eg.Wait()
403420
}

br/pkg/task/restore_ebs_meta.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,9 @@ func (h *restoreEBSMetaHelper) restoreVolumes(progress glue.Progress) (map[strin
244244

245245
if h.cfg.UseFSR {
246246
err = ec2Session.DisableDataFSR(snapshotsIDsMap)
247-
log.Error("disable fsr failed", zap.Error(err))
247+
if err != nil {
248+
log.Error("disable fsr failed", zap.Error(err))
249+
}
248250
}
249251
}()
250252

0 commit comments

Comments
 (0)