From ed5a4ad65512c99e3b08f0612f15b9c89ce7c38f Mon Sep 17 00:00:00 2001 From: Claes Mogren Date: Thu, 13 Aug 2020 08:30:19 -0700 Subject: [PATCH] Wait for ENI and secondary IPs When a newly created ENI gets attached, we should wait for the secondary IPv4 addresses to be assigned to the ENI before continuing. --- pkg/awsutils/awsutils.go | 69 ++++++++++++++++++++++-- pkg/awsutils/awsutils_test.go | 80 ++++++++++++++++++++++++++++ pkg/awsutils/mocks/awsutils_mocks.go | 15 ++++++ pkg/ipamd/ipamd.go | 29 +--------- pkg/ipamd/ipamd_test.go | 10 ++-- 5 files changed, 167 insertions(+), 36 deletions(-) diff --git a/pkg/awsutils/awsutils.go b/pkg/awsutils/awsutils.go index d1a32117c9..bc8e6830d2 100644 --- a/pkg/awsutils/awsutils.go +++ b/pkg/awsutils/awsutils.go @@ -77,8 +77,11 @@ const ( var ( // ErrENINotFound is an error when ENI is not found. ErrENINotFound = errors.New("ENI is not found") - // ErrNoNetworkInterfaces occurs when - // DesribeNetworkInterfaces(eniID) returns no network interfaces + // ErrAllSecondaryIPsNotFound is returned when not all secondary IPs on an ENI have been assigned + ErrAllSecondaryIPsNotFound = errors.New("All secondary IPs not found") + // ErrNoSecondaryIPsFound is returned when not all secondary IPs on an ENI have been assigned + ErrNoSecondaryIPsFound = errors.New("No secondary IPs have been assigned to this ENI") + // ErrNoNetworkInterfaces occurs when DescribeNetworkInterfaces(eniID) returns no network interfaces ErrNoNetworkInterfaces = errors.New("No network interfaces found for ENI") // Custom user agent userAgent = request.WithAppendUserAgent("amazon-vpc-cni-k8s") @@ -160,6 +163,9 @@ type APIs interface { //isUnmanagedENI IsUnmanagedENI(eniID string) bool + + // WaitForENIAndIPsAttached waits until the ENI has been attached and the secondary IPs have been added + WaitForENIAndIPsAttached(eni string, wantedSecondaryIPs int) (ENIMetadata, error) } // EC2InstanceMetadataCache caches instance metadata @@ -1282,12 +1288,13 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int output, err := cache.ec2SVC.AssignPrivateIpAddressesWithContext(context.Background(), input, userAgent) awsAPILatency.WithLabelValues("AssignPrivateIpAddresses", fmt.Sprint(err != nil)).Observe(msSince(start)) if err != nil { - log.Errorf("Failed to allocate a private IP addresses on ENI %v: %v", eniID, err) - awsAPIErrInc("AssignPrivateIpAddresses", err) if containsPrivateIPAddressLimitExceededError(err) { - log.Debug("AssignPrivateIpAddresses returned PrivateIpAddressLimitExceeded") + log.Debug("AssignPrivateIpAddresses returned PrivateIpAddressLimitExceeded, probably because the data store was out of sync." + + "Returning nil since we will check this by calling EC2 to verify what addresses have already assigned to this ENI.") return nil } + log.Errorf("Failed to allocate a private IP addresses on ENI %v: %v", eniID, err) + awsAPIErrInc("AssignPrivateIpAddresses", err) return errors.Wrap(err, "allocate IP address: failed to allocate a private IP address") } if output != nil { @@ -1296,6 +1303,58 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int return nil } +func (cache *EC2InstanceMetadataCache) WaitForENIAndIPsAttached(eni string, wantedSecondaryIPs int) (eniMetadata ENIMetadata, err error) { + return cache.waitForENIAndIPsAttached(eni, wantedSecondaryIPs, maxENIBackoffDelay) +} + +func (cache *EC2InstanceMetadataCache) waitForENIAndIPsAttached(eni string, wantedSecondaryIPs int, maxBackoffDelay time.Duration) (eniMetadata ENIMetadata, err error) { + start := time.Now() + attempt := 0 + // Wait until the ENI shows up in the instance metadata service and has at least some secondary IPs + err = retry.RetryNWithBackoff(retry.NewSimpleBackoff(time.Millisecond*100, maxBackoffDelay, 0.15, 2.0), maxENIEC2APIRetries, func() error { + attempt++ + enis, err := cache.GetAttachedENIs() + if err != nil { + log.Warnf("Failed to increase pool, error trying to discover attached ENIs on attempt %d/%d: %v ", attempt, maxENIEC2APIRetries, err) + return ErrNoNetworkInterfaces + } + // Verify that the ENI we are waiting for is in the returned list + for _, returnedENI := range enis { + if eni == returnedENI.ENIID { + // Check how many Secondary IPs have been attached + eniIPCount := len(returnedENI.IPv4Addresses) + if eniIPCount <= 1 { + log.Debugf("No secondary IPv4 addresses available yet on ENI %s", returnedENI.ENIID) + return ErrNoSecondaryIPsFound + } + // At least some are attached + eniMetadata = returnedENI + // ipsToAllocate will be at most 1 less then the IP limit for the ENI because of the primary IP + if eniIPCount > wantedSecondaryIPs { + return nil + } + return ErrAllSecondaryIPsNotFound + } + } + log.Debugf("Not able to find the right ENI yet (attempt %d/%d)", attempt, maxENIEC2APIRetries) + return ErrENINotFound + }) + awsAPILatency.WithLabelValues("waitForENIAndIPsAttached", fmt.Sprint(err != nil)).Observe(msSince(start)) + if err != nil { + // If we have at least 1 Secondary IP, by now return what we have without an error + if err == ErrAllSecondaryIPsNotFound { + if len(eniMetadata.IPv4Addresses) > 1 { + // We have some Secondary IPs, return the ones we have + log.Warnf("This ENI only has %d IP addresses, we wanted %d", len(eniMetadata.IPv4Addresses), wantedSecondaryIPs) + return eniMetadata, nil + } + } + awsAPIErrInc("waitENIAttachedFailedToAssignIPs", err) + return ENIMetadata{}, errors.New("waitForENIAndIPsAttached: giving up trying to retrieve ENIs from metadata service") + } + return eniMetadata, nil +} + // DeallocIPAddresses allocates numIPs of IP address on an ENI func (cache *EC2InstanceMetadataCache) DeallocIPAddresses(eniID string, ips []string) error { log.Infof("Trying to unassign the following IPs %s from ENI %s", ips, eniID) diff --git a/pkg/awsutils/awsutils_test.go b/pkg/awsutils/awsutils_test.go index a9dbf605ce..e27d2c5b43 100644 --- a/pkg/awsutils/awsutils_test.go +++ b/pkg/awsutils/awsutils_test.go @@ -16,8 +16,11 @@ package awsutils import ( "context" "errors" + "fmt" "os" + "reflect" "sort" + "strconv" "testing" "time" @@ -784,3 +787,80 @@ func Test_badENIID(t *testing.T) { }) } } + +func TestEC2InstanceMetadataCache_waitForENIAndIPsAttached(t *testing.T) { + type args struct { + eni string + foundSecondaryIPs int + wantedSecondaryIPs int + maxBackoffDelay time.Duration + times int + } + eni1Metadata := ENIMetadata{ + ENIID: eniID, + IPv4Addresses: nil, + } + isPrimary := true + notPrimary := false + primaryIP := eni2PrivateIP + secondaryIP1 := primaryIP + "0" + secondaryIP2 := primaryIP + "1" + eni2Metadata := ENIMetadata{ + ENIID: eni2ID, + MAC: eni2MAC, + DeviceNumber: 2, + SubnetIPv4CIDR: subnetCIDR, + IPv4Addresses: []*ec2.NetworkInterfacePrivateIpAddress{ + { + Primary: &isPrimary, + PrivateIpAddress: &primaryIP, + }, { + Primary: ¬Primary, + PrivateIpAddress: &secondaryIP1, + }, { + Primary: ¬Primary, + PrivateIpAddress: &secondaryIP2, + }, + }, + } + eniList := []ENIMetadata{eni1Metadata, eni2Metadata} + tests := []struct { + name string + args args + wantEniMetadata ENIMetadata + wantErr bool + }{ + {"Test wait success", args{eni: eni2ID, foundSecondaryIPs: 2, wantedSecondaryIPs: 2, maxBackoffDelay: 5 * time.Millisecond, times: 1}, eniList[1], false}, + {"Test partial success", args{eni: eni2ID, foundSecondaryIPs: 2, wantedSecondaryIPs: 12, maxBackoffDelay: 5 * time.Millisecond, times: maxENIEC2APIRetries}, eniList[1], false}, + {"Test wait fail", args{eni: eni2ID, foundSecondaryIPs: 0, wantedSecondaryIPs: 12, maxBackoffDelay: 5 * time.Millisecond, times: maxENIEC2APIRetries}, ENIMetadata{}, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctrl, mockMetadata, mockEC2 := setup(t) + defer ctrl.Finish() + eniIPs := eni2PrivateIP + for i := 0; i < tt.args.foundSecondaryIPs; i++ { + eniIPs += " " + eni2PrivateIP + strconv.Itoa(i) + } + fmt.Println("eniips", eniIPs) + mockMetadata.EXPECT().GetMetadata(metadataMACPath).Return(primaryMAC+" "+eni2MAC, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+primaryMAC+metadataDeviceNum).Return(eni1Device, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+primaryMAC+metadataInterface).Return(primaryeniID, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+primaryMAC+metadataSubnetCIDR).Return(subnetCIDR, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+primaryMAC+metadataIPv4s).Return(eni1PrivateIP, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+eni2MAC+metadataDeviceNum).Return(eni2Device, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+eni2MAC+metadataInterface).Return(eni2ID, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+eni2MAC+metadataSubnetCIDR).Return(subnetCIDR, nil).Times(tt.args.times) + mockMetadata.EXPECT().GetMetadata(metadataMACPath+eni2MAC+metadataIPv4s).Return(eniIPs, nil).Times(tt.args.times) + cache := &EC2InstanceMetadataCache{ec2Metadata: mockMetadata, ec2SVC: mockEC2} + gotEniMetadata, err := cache.waitForENIAndIPsAttached(tt.args.eni, tt.args.wantedSecondaryIPs, tt.args.maxBackoffDelay) + if (err != nil) != tt.wantErr { + t.Errorf("waitForENIAndIPsAttached() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(gotEniMetadata, tt.wantEniMetadata) { + t.Errorf("waitForENIAndIPsAttached() gotEniMetadata = %v, want %v", gotEniMetadata, tt.wantEniMetadata) + } + }) + } +} diff --git a/pkg/awsutils/mocks/awsutils_mocks.go b/pkg/awsutils/mocks/awsutils_mocks.go index bffafab52b..16f1d2caa3 100644 --- a/pkg/awsutils/mocks/awsutils_mocks.go +++ b/pkg/awsutils/mocks/awsutils_mocks.go @@ -280,3 +280,18 @@ func (mr *MockAPIsMockRecorder) SetUnmanagedENIs(arg0 interface{}) *gomock.Call mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetUnmanagedENIs", reflect.TypeOf((*MockAPIs)(nil).SetUnmanagedENIs), arg0) } + +// WaitForENIAndIPsAttached mocks base method +func (m *MockAPIs) WaitForENIAndIPsAttached(arg0 string, arg1 int) (awsutils.ENIMetadata, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "WaitForENIAndIPsAttached", arg0, arg1) + ret0, _ := ret[0].(awsutils.ENIMetadata) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// WaitForENIAndIPsAttached indicates an expected call of WaitForENIAndIPsAttached +func (mr *MockAPIsMockRecorder) WaitForENIAndIPsAttached(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WaitForENIAndIPsAttached", reflect.TypeOf((*MockAPIs)(nil).WaitForENIAndIPsAttached), arg0, arg1) +} diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 737e95a3d4..992814997a 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -682,12 +682,13 @@ func (c *IPAMContext) tryAllocateENI() error { ipamdErrInc("increaseIPPoolAllocIPAddressesFailed") } - eniMetadata, err := c.waitENIAttached(eni) + eniMetadata, err := c.awsClient.WaitForENIAndIPsAttached(eni, ipsToAllocate) if err != nil { ipamdErrInc("increaseIPPoolwaitENIAttachedFailed") log.Errorf("Failed to increase pool size: Unable to discover attached ENI from metadata service %v", err) return err } + err = c.setupENI(eni, eniMetadata, c.dataStore.GetTrunkENI()) if err != nil { ipamdErrInc("increaseIPPoolsetupENIFailed") @@ -776,32 +777,6 @@ func (c *IPAMContext) addENIaddressesToDataStore(ec2Addrs []*ec2.NetworkInterfac return primaryIP } -func (c *IPAMContext) waitENIAttached(eni string) (awsutils.ENIMetadata, error) { - // Wait until the ENI shows up in the instance metadata service - retry := 0 - for { - enis, err := c.awsClient.GetAttachedENIs() - if err != nil { - log.Warnf("Failed to increase pool, error trying to discover attached ENIs: %v ", err) - } else { - // Verify that the ENI we are waiting for is in the returned list - for _, returnedENI := range enis { - if eni == returnedENI.ENIID { - return returnedENI, nil - } - } - log.Debugf("Not able to find the right ENI yet (attempt %d/%d)", retry, maxRetryCheckENI) - } - retry++ - if retry > maxRetryCheckENI { - ipamdErrInc("waitENIAttachedMaxRetryExceeded") - return awsutils.ENIMetadata{}, errors.New("waitENIAttached: giving up trying to retrieve ENIs from metadata service") - } - log.Debugf("Not able to discover attached ENIs yet (attempt %d/%d)", retry, maxRetryCheckENI) - time.Sleep(eniAttachTime) - } -} - // getMaxENI returns the maximum number of ENIs to attach to this instance. This is calculated as the lesser of // the limit for the instance type and the value configured via the MAX_ENI environment variable. If the value of // the environment variable is 0 or less, it will be ignored and the maximum for the instance is returned. diff --git a/pkg/ipamd/ipamd_test.go b/pkg/ipamd/ipamd_test.go index 615c68b188..bed6afbae6 100644 --- a/pkg/ipamd/ipamd_test.go +++ b/pkg/ipamd/ipamd_test.go @@ -236,7 +236,7 @@ func testIncreaseIPPool(t *testing.T, useENIConfig bool) { m.awsutils.EXPECT().AllocENI(false, nil, "").Return(eni2, nil) } - m.awsutils.EXPECT().GetAttachedENIs().Return([]awsutils.ENIMetadata{ + eniMetadata := []awsutils.ENIMetadata{ { ENIID: primaryENIid, MAC: primaryMAC, @@ -265,9 +265,10 @@ func testIncreaseIPPool(t *testing.T, useENIConfig bool) { }, }, }, - }, nil) + } m.awsutils.EXPECT().GetPrimaryENI().Return(primaryENIid) + m.awsutils.EXPECT().WaitForENIAndIPsAttached(secENIid, 14).Return(eniMetadata[1], nil) m.network.EXPECT().SetupENINetwork(gomock.Any(), secMAC, secDevice, secSubnet) m.awsutils.EXPECT().AllocIPAddresses(eni2, 14) @@ -305,7 +306,7 @@ func TestTryAddIPToENI(t *testing.T) { m.awsutils.EXPECT().AllocENI(false, nil, "").Return(secENIid, nil) m.awsutils.EXPECT().AllocIPAddresses(secENIid, warmIpTarget) - m.awsutils.EXPECT().GetAttachedENIs().Return([]awsutils.ENIMetadata{ + eniMetadata := []awsutils.ENIMetadata{ { ENIID: primaryENIid, MAC: primaryMAC, @@ -334,7 +335,8 @@ func TestTryAddIPToENI(t *testing.T) { }, }, }, - }, nil) + } + m.awsutils.EXPECT().WaitForENIAndIPsAttached(secENIid, 3).Return(eniMetadata[1], nil) m.awsutils.EXPECT().GetPrimaryENI().Return(primaryENIid) m.network.EXPECT().SetupENINetwork(gomock.Any(), secMAC, secDevice, secSubnet) m.awsutils.EXPECT().GetPrimaryENI().Return(primaryENIid)