diff --git a/internal/cloud/awscloud/secure-instance.go b/internal/cloud/awscloud/secure-instance.go index e4c9ce9f1e..98749cd022 100644 --- a/internal/cloud/awscloud/secure-instance.go +++ b/internal/cloud/awscloud/secure-instance.go @@ -552,8 +552,15 @@ func (a *AWS) createFleet(input *ec2.CreateFleetInput) (*ec2.CreateFleetOutput, input.SpotOptions = nil createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input) } + + if len(createFleetOutput.Errors) > 0 && *createFleetOutput.Errors[0].ErrorCode == "UnfulfillableCapacity" { + logrus.Warn("Received UnfulfillableCapacity from CreateFleet with OnDemand instance option, retrying across availability zones") + input.LaunchTemplateConfigs[0].Overrides = nil + createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input) + } + if err != nil { - return nil, fmt.Errorf("Unable to create on-demand fleet: %w", err) + return nil, fmt.Errorf("Unable to create fleet, tried on-demand and across AZs: %w", err) } if len(createFleetOutput.Errors) > 0 { diff --git a/internal/cloud/awscloud/secure-instance_test.go b/internal/cloud/awscloud/secure-instance_test.go index 6d8fc6338e..ad27d342b5 100644 --- a/internal/cloud/awscloud/secure-instance_test.go +++ b/internal/cloud/awscloud/secure-instance_test.go @@ -142,12 +142,12 @@ func TestSICreateFleetFailures(t *testing.T) { aws := awscloud.NewForTest(m, &ec2imdsmock{t, "instance-id", "region1"}, nil, nil, nil) require.NotNil(t, aws) - // unfillable capacity should call create fleet twice + // unfillable capacity should call create fleet thrice m.failFn["CreateFleet"] = nil si, err := aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname") require.Error(t, err) require.Nil(t, si) - require.Equal(t, 2, m.calledFn["CreateFleet"]) + require.Equal(t, 3, m.calledFn["CreateFleet"]) require.Equal(t, 1, m.calledFn["CreateSecurityGroup"]) require.Equal(t, 1, m.calledFn["CreateLaunchTemplate"]) require.Equal(t, 2, m.calledFn["DeleteSecurityGroup"]) @@ -158,7 +158,7 @@ func TestSICreateFleetFailures(t *testing.T) { si, err = aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname") require.Error(t, err) require.Nil(t, si) - require.Equal(t, 3, m.calledFn["CreateFleet"]) + require.Equal(t, 4, m.calledFn["CreateFleet"]) require.Equal(t, 2, m.calledFn["CreateSecurityGroup"]) require.Equal(t, 2, m.calledFn["CreateLaunchTemplate"]) require.Equal(t, 4, m.calledFn["DeleteSecurityGroup"])