Skip to content

Commit

Permalink
cloud/aws: add a third secure instance fallback across AZs
Browse files Browse the repository at this point in the history
In case the on demand option failed as well, retry one more time across
availability zones. This significantly increases the pool of available
instances, but increases network related costs, as transferring data
between AZs is not free.
  • Loading branch information
croissanne committed Oct 7, 2024
1 parent 78d3b2f commit 905df41
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
9 changes: 8 additions & 1 deletion internal/cloud/awscloud/secure-instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,15 @@ func (a *AWS) createFleet(input *ec2.CreateFleetInput) (*ec2.CreateFleetOutput,
input.SpotOptions = nil
createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input)
}

if len(createFleetOutput.Errors) > 0 && *createFleetOutput.Errors[0].ErrorCode == "UnfulfillableCapacity" {
logrus.Warn("Received UnfulfillableCapacity from CreateFleet with OnDemand instance option, retrying across availability zones")
input.LaunchTemplateConfigs[0].Overrides = nil
createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input)
}

if err != nil {
return nil, fmt.Errorf("Unable to create on-demand fleet: %w", err)
return nil, fmt.Errorf("Unable to create fleet, tried on-demand and across AZs: %w", err)
}

if len(createFleetOutput.Errors) > 0 {
Expand Down
6 changes: 3 additions & 3 deletions internal/cloud/awscloud/secure-instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,12 @@ func TestSICreateFleetFailures(t *testing.T) {
aws := awscloud.NewForTest(m, &ec2imdsmock{t, "instance-id", "region1"}, nil, nil, nil)
require.NotNil(t, aws)

// unfillable capacity should call create fleet twice
// unfillable capacity should call create fleet thrice
m.failFn["CreateFleet"] = nil
si, err := aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname")
require.Error(t, err)
require.Nil(t, si)
require.Equal(t, 2, m.calledFn["CreateFleet"])
require.Equal(t, 3, m.calledFn["CreateFleet"])
require.Equal(t, 1, m.calledFn["CreateSecurityGroup"])
require.Equal(t, 1, m.calledFn["CreateLaunchTemplate"])
require.Equal(t, 2, m.calledFn["DeleteSecurityGroup"])
Expand All @@ -158,7 +158,7 @@ func TestSICreateFleetFailures(t *testing.T) {
si, err = aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname")
require.Error(t, err)
require.Nil(t, si)
require.Equal(t, 3, m.calledFn["CreateFleet"])
require.Equal(t, 4, m.calledFn["CreateFleet"])
require.Equal(t, 2, m.calledFn["CreateSecurityGroup"])
require.Equal(t, 2, m.calledFn["CreateLaunchTemplate"])
require.Equal(t, 4, m.calledFn["DeleteSecurityGroup"])
Expand Down

0 comments on commit 905df41

Please sign in to comment.