Skip to content

Commit

Permalink
Add support for Fast Snapshot Restores
Browse files Browse the repository at this point in the history
Signed-off-by: Eddie Torres <[email protected]>
  • Loading branch information
torredil committed Apr 3, 2023
1 parent ed42e16 commit de20e06
Show file tree
Hide file tree
Showing 11 changed files with 594 additions and 5 deletions.
39 changes: 39 additions & 0 deletions docs/fast-snapshot-restores.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Fast Snapshot Restores

The EBS CSI Driver provides support for [Fast Snapshot Restores(FSR)](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-fast-snapshot-restore.html) via `VolumeSnapshotClass.parameters.fastSnapshotRestoreAvailabilityZones`.

Amazon EBS fast snapshot restore (FSR) enables you to create a volume from a snapshot that is fully initialized at creation. This eliminates the latency of I/O operations on a block when it is accessed for the first time. Volumes that are created using fast snapshot restore instantly deliver all of their provisioned performance.

Availability zones are specified as a comma separated list.

**Example**
```
apiVersion: snapshot.storage.k8s.io/v1
kind: VolumeSnapshotClass
metadata:
name: csi-aws-vsc
driver: ebs.csi.aws.com
deletionPolicy: Delete
parameters:
fastSnapshotRestoreAvailabilityZones: "us-east-1a, us-east-1b"
```

## Prerequisites

- Install the [Kubernetes Volume Snapshot CRDs](https://github.com/kubernetes-csi/external-snapshotter/tree/master/client/config/crd) and external-snapshotter sidecar. For installation instructions, see [CSI Snapshotter Usage](https://github.com/kubernetes-csi/external-snapshotter#usage).

- The EBS CSI Driver must be given permission to access the [`EnableFastSnapshotRestores` EC2 API](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_EnableFastSnapshotRestores.html). This example snippet can be used in an IAM policy to grant access to `EnableFastSnapshotRestores`:

```json
{
"Effect": "Allow",
"Action": [
"ec2:EnableFastSnapshotRestores"
],
"Resource": "*"
}
```

## Failure Mode

The driver will attempt to check if the availability zones provided are supported for fast snapshot restore before attempting to create the snapshot. If the `EnableFastSnapshotRestores` API call fails, the driver will hard-fail the request and delete the snapshot. This is to ensure that the snapshot is not left in an inconsistent state.
31 changes: 31 additions & 0 deletions pkg/cloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,24 @@ func (c *cloud) ec2SnapshotResponseToStruct(ec2Snapshot *ec2.Snapshot) *Snapshot
return snapshot
}

func (c *cloud) EnableFastSnapshotRestores(ctx context.Context, availabilityZones []string, snapshotID string) (*ec2.EnableFastSnapshotRestoresOutput, error) {
request := &ec2.EnableFastSnapshotRestoresInput{
AvailabilityZones: aws.StringSlice(availabilityZones),
SourceSnapshotIds: []*string{
aws.String(snapshotID),
},
}
klog.V(4).InfoS("Creating Fast Snapshot Restores", "snapshotID", snapshotID, "availabilityZones", availabilityZones)
response, err := c.ec2.EnableFastSnapshotRestoresWithContext(ctx, request)
if err != nil {
return nil, err
}
if len(response.Unsuccessful) > 0 {
return response, fmt.Errorf("failed to create fast snapshot restores for snapshot %s: %v", snapshotID, response.Unsuccessful)
}
return response, nil
}

func (c *cloud) getVolume(ctx context.Context, request *ec2.DescribeVolumesInput) (*ec2.Volume, error) {
var volumes []*ec2.Volume
var nextToken *string
Expand Down Expand Up @@ -1236,6 +1254,19 @@ func (c *cloud) randomAvailabilityZone(ctx context.Context) (string, error) {
return zones[0], nil
}

// AvailabilityZones returns availability zones from the given region
func (c *cloud) AvailabilityZones(ctx context.Context) (map[string]struct{}, error) {
response, err := c.ec2.DescribeAvailabilityZonesWithContext(ctx, &ec2.DescribeAvailabilityZonesInput{})
if err != nil {
return nil, fmt.Errorf("error describing availability zones: %w", err)
}
zones := make(map[string]struct{})
for _, zone := range response.AvailabilityZones {
zones[*zone.ZoneName] = struct{}{}
}
return zones, nil
}

func volumeModificationDone(state string) bool {
if state == ec2.VolumeModificationStateCompleted || state == ec2.VolumeModificationStateOptimizing {
return true
Expand Down
2 changes: 2 additions & 0 deletions pkg/cloud/cloud_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ type Cloud interface {
GetSnapshotByName(ctx context.Context, name string) (snapshot *Snapshot, err error)
GetSnapshotByID(ctx context.Context, snapshotID string) (snapshot *Snapshot, err error)
ListSnapshots(ctx context.Context, volumeID string, maxResults int64, nextToken string) (listSnapshotsResponse *ListSnapshotsResponse, err error)
EnableFastSnapshotRestores(ctx context.Context, availabilityZones []string, snapshotID string) (*ec2.EnableFastSnapshotRestoresOutput, error)
AvailabilityZones(ctx context.Context) (map[string]struct{}, error)
}
136 changes: 136 additions & 0 deletions pkg/cloud/cloud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,142 @@ func TestCreateSnapshot(t *testing.T) {
}
}

func TestEnableFastSnapshotRestores(t *testing.T) {
testCases := []struct {
name string
snapshotID string
availabilityZones []string
expOutput *ec2.EnableFastSnapshotRestoresOutput
expErr error
}{
{
name: "success: normal",
snapshotID: "snap-test-id",
availabilityZones: []string{"us-west-2a", "us-west-2b"},
expOutput: &ec2.EnableFastSnapshotRestoresOutput{
Successful: []*ec2.EnableFastSnapshotRestoreSuccessItem{{
AvailabilityZone: aws.String("us-west-2a,us-west-2b"),
SnapshotId: aws.String("snap-test-id")}},
Unsuccessful: []*ec2.EnableFastSnapshotRestoreErrorItem{},
},
expErr: nil,
},
{
name: "fail: unsuccessful response",
snapshotID: "snap-test-id",
availabilityZones: []string{"us-west-2a", "invalid-zone"},
expOutput: &ec2.EnableFastSnapshotRestoresOutput{
Unsuccessful: []*ec2.EnableFastSnapshotRestoreErrorItem{{
SnapshotId: aws.String("snap-test-id"),
FastSnapshotRestoreStateErrors: []*ec2.EnableFastSnapshotRestoreStateErrorItem{
{AvailabilityZone: aws.String("us-west-2a,invalid-zone"),
Error: &ec2.EnableFastSnapshotRestoreStateError{
Message: aws.String("failed to create fast snapshot restore")}},
},
}},
},
expErr: fmt.Errorf("failed to create fast snapshot restores for snapshot"),
},
{
name: "fail: error",
snapshotID: "",
availabilityZones: nil,
expOutput: nil,
expErr: fmt.Errorf("EnableFastSnapshotRestores error"),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
mockCtrl := gomock.NewController(t)
mockEC2 := NewMockEC2(mockCtrl)
c := newCloud(mockEC2)

ctx := context.Background()
mockEC2.EXPECT().EnableFastSnapshotRestoresWithContext(gomock.Eq(ctx), gomock.Any()).Return(tc.expOutput, tc.expErr).AnyTimes()

response, err := c.EnableFastSnapshotRestores(ctx, tc.availabilityZones, tc.snapshotID)

if err != nil {
if tc.expErr == nil {
t.Fatalf("EnableFastSnapshotRestores() failed: expected no error, got: %v", err)
}
if err.Error() != tc.expErr.Error() {
t.Fatalf("EnableFastSnapshotRestores() failed: expected error %v, got %v", tc.expErr, err)
}
} else {
if tc.expErr != nil {
t.Fatalf("EnableFastSnapshotRestores() failed: expected error %v, got nothing", tc.expErr)
}
if len(response.Successful) == 0 || len(response.Unsuccessful) > 0 {
t.Fatalf("EnableFastSnapshotRestores() failed: expected successful response, got %v", response)
}
if *response.Successful[0].SnapshotId != tc.snapshotID {
t.Fatalf("EnableFastSnapshotRestores() failed: expected successful response to have SnapshotId %s, got %s", tc.snapshotID, *response.Successful[0].SnapshotId)
}
az := strings.Split(*response.Successful[0].AvailabilityZone, ",")
if !reflect.DeepEqual(az, tc.availabilityZones) {
t.Fatalf("EnableFastSnapshotRestores() failed: expected successful response to have AvailabilityZone %v, got %v", az, tc.availabilityZones)
}
}

mockCtrl.Finish()
})
}
}

func TestAvailabilityZones(t *testing.T) {
testCases := []struct {
name string
availabilityZone string
expOutput *ec2.DescribeAvailabilityZonesOutput
expErr error
}{
{
name: "success: normal",
availabilityZone: expZone,
expOutput: &ec2.DescribeAvailabilityZonesOutput{
AvailabilityZones: []*ec2.AvailabilityZone{
{ZoneName: aws.String(expZone)},
}},
expErr: nil,
},
{
name: "fail: error",
availabilityZone: "",
expOutput: nil,
expErr: fmt.Errorf("TestAvailabilityZones error"),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
mockCtrl := gomock.NewController(t)
mockEC2 := NewMockEC2(mockCtrl)
c := newCloud(mockEC2)

ctx := context.Background()
mockEC2.EXPECT().DescribeAvailabilityZonesWithContext(gomock.Eq(ctx), gomock.Any()).Return(tc.expOutput, tc.expErr).AnyTimes()

az, err := c.AvailabilityZones(ctx)
if err != nil {
if tc.expErr == nil {
t.Fatalf("AvailabilityZones() failed: expected no error, got: %v", err)
}
} else {
if tc.expErr != nil {
t.Fatalf("AvailabilityZones() failed: expected error, got nothing")
}
if val, ok := az[tc.availabilityZone]; !ok {
t.Fatalf("AvailabilityZones() failed: expected to find %s, got %v", tc.availabilityZone, val)
}
}

mockCtrl.Finish()
})
}
}

func TestDeleteSnapshot(t *testing.T) {
testCases := []struct {
name string
Expand Down
1 change: 1 addition & 0 deletions pkg/cloud/ec2_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ type EC2 interface {
DescribeVolumesModificationsWithContext(ctx aws.Context, input *ec2.DescribeVolumesModificationsInput, opts ...request.Option) (*ec2.DescribeVolumesModificationsOutput, error)
DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error)
CreateTagsWithContext(ctx aws.Context, input *ec2.CreateTagsInput, opts ...request.Option) (*ec2.CreateTagsOutput, error)
EnableFastSnapshotRestoresWithContext(ctx aws.Context, input *ec2.EnableFastSnapshotRestoresInput, opts ...request.Option) (*ec2.EnableFastSnapshotRestoresOutput, error)
}
30 changes: 30 additions & 0 deletions pkg/cloud/mock_cloud.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions pkg/cloud/mock_ec2.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pkg/driver/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ const (
TagKeyPrefix = "tagSpecification"
)

// constants of keys in snapshot parameters
const (
// FastSnapShotRestoreAvailabilityZones represents key for fast snapshot restore availability zones
FastSnapshotRestoreAvailabilityZones = "fastsnapshotrestoreavailabilityzones"
)

// constants for volume tags and their values
const (
// ResourceLifecycleTagPrefix is prefix of tag for provisioned EBS volume that
Expand Down
41 changes: 36 additions & 5 deletions pkg/driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,11 +605,18 @@ func (d *controllerService) CreateSnapshot(ctx context.Context, req *csi.CreateS
}

var vscTags []string
var fsrAvailabilityZones []string
for key, value := range req.GetParameters() {
if strings.HasPrefix(key, TagKeyPrefix) {
vscTags = append(vscTags, value)
} else {
return nil, status.Errorf(codes.InvalidArgument, "Invalid parameter key %s for CreateSnapshot", key)
switch strings.ToLower(key) {
case FastSnapshotRestoreAvailabilityZones:
f := strings.ReplaceAll(value, " ", "")
fsrAvailabilityZones = strings.Split(f, ",")
default:
if strings.HasPrefix(key, TagKeyPrefix) {
vscTags = append(vscTags, value)
} else {
return nil, status.Errorf(codes.InvalidArgument, "Invalid parameter key %s for CreateSnapshot", key)
}
}
}

Expand Down Expand Up @@ -639,11 +646,35 @@ func (d *controllerService) CreateSnapshot(ctx context.Context, req *csi.CreateS
Tags: snapshotTags,
}

snapshot, err = d.cloud.CreateSnapshot(ctx, volumeID, opts)
// Check if the availability zone is supported for fast snapshot restore
if len(fsrAvailabilityZones) > 0 {
zones, error := d.cloud.AvailabilityZones(ctx)
if error != nil {
klog.ErrorS(error, "failed to get availability zones")
} else {
klog.V(4).InfoS("Availability Zones", "zone", zones)
for _, az := range fsrAvailabilityZones {
if _, ok := zones[az]; !ok {
return nil, status.Errorf(codes.InvalidArgument, "Availability zone %s is not supported for fast snapshot restore", az)
}
}
}
}

snapshot, err = d.cloud.CreateSnapshot(ctx, volumeID, opts)
if err != nil {
return nil, status.Errorf(codes.Internal, "Could not create snapshot %q: %v", snapshotName, err)
}

if len(fsrAvailabilityZones) > 0 {
_, err := d.cloud.EnableFastSnapshotRestores(ctx, fsrAvailabilityZones, snapshot.SnapshotID)
if err != nil {
if _, err = d.cloud.DeleteSnapshot(ctx, snapshot.SnapshotID); err != nil {
return nil, status.Errorf(codes.Internal, "Could not delete snapshot ID %q: %v", snapshotName, err)
}
return nil, status.Errorf(codes.Internal, "Failed to create Fast Snapshot Restores for snapshot ID %q: %v", snapshotName, err)
}
}
return newCreateSnapshotResponse(snapshot)
}

Expand Down
Loading

0 comments on commit de20e06

Please sign in to comment.