diff --git a/go.mod b/go.mod index fa10d6f7ca..f0c32aceb1 100644 --- a/go.mod +++ b/go.mod @@ -66,7 +66,7 @@ replace ( k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.20.0 k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.20.0 k8s.io/sample-controller => k8s.io/sample-controller v0.20.0 - sigs.k8s.io/cloud-provider-azure => sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210416084621-94849ca13e9a + sigs.k8s.io/cloud-provider-azure => sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210423025240-0d2a26c03390 ) replace github.com/niemeyer/pretty => github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e diff --git a/go.sum b/go.sum index 936365c87a..f70a906f2e 100644 --- a/go.sum +++ b/go.sum @@ -1137,8 +1137,8 @@ rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.14 h1:TihvEz9MPj2u0KWds6E2OBUXfwaL4qRJ33c7HGiJpqk= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.14/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= -sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210416084621-94849ca13e9a h1:7VLO4aljiKeCND/RyyYFIoOiiAi8NHHXkcbkHHqb7Ic= -sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210416084621-94849ca13e9a/go.mod h1:3qXojekdE+ViHa+utBwLJEuizyxFlt/yrQIEuPIDahQ= +sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210423025240-0d2a26c03390 h1:0/FXjuDIxBHnsjoXp1xyGCN6Ma6JpnpBkbzmPj10qIA= +sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210423025240-0d2a26c03390/go.mod h1:3qXojekdE+ViHa+utBwLJEuizyxFlt/yrQIEuPIDahQ= sigs.k8s.io/kustomize v2.0.3+incompatible/go.mod h1:MkjgH3RdOWrievjo6c9T245dYlB5QeXV4WCbnt/PEpU= sigs.k8s.io/structured-merge-diff/v4 v4.0.2 h1:YHQV7Dajm86OuqnIR6zAelnDWBRjo+YhYV9PmGrh1s8= sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= diff --git a/vendor/modules.txt b/vendor/modules.txt index 8ec4a9106c..0a4faa27b7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -820,7 +820,7 @@ k8s.io/utils/trace # sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.14 sigs.k8s.io/apiserver-network-proxy/konnectivity-client/pkg/client sigs.k8s.io/apiserver-network-proxy/konnectivity-client/proto/client -# sigs.k8s.io/cloud-provider-azure v0.0.0 => sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210416084621-94849ca13e9a +# sigs.k8s.io/cloud-provider-azure v0.0.0 => sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210423025240-0d2a26c03390 ## explicit sigs.k8s.io/cloud-provider-azure/pkg/auth sigs.k8s.io/cloud-provider-azure/pkg/azureclients @@ -897,5 +897,5 @@ sigs.k8s.io/yaml # k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.20.0 # k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.20.0 # k8s.io/sample-controller => k8s.io/sample-controller v0.20.0 -# sigs.k8s.io/cloud-provider-azure => sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210416084621-94849ca13e9a +# sigs.k8s.io/cloud-provider-azure => sigs.k8s.io/cloud-provider-azure v0.7.1-0.20210423025240-0d2a26c03390 # github.com/niemeyer/pretty => github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_backoff.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_backoff.go index 382791feff..4edad72424 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_backoff.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_backoff.go @@ -18,6 +18,7 @@ package provider import ( "errors" + "fmt" "net/http" "regexp" "strings" @@ -29,6 +30,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" cloudprovider "k8s.io/cloud-provider" "k8s.io/klog/v2" @@ -256,6 +258,41 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer) return rerr.Error() } +// ListAgentPoolLBs invokes az.LoadBalancerClient.List and filter out +// those that are not managed by cloud provider azure or not associated to a managed VMSet. +func (az *Cloud) ListAgentPoolLBs(service *v1.Service, nodes []*v1.Node, clusterName string) ([]network.LoadBalancer, error) { + allLBs, err := az.ListLB(service) + if err != nil { + return nil, err + } + + if allLBs == nil { + klog.Warningf("ListAgentPoolLBs: no LBs found") + return nil, nil + } + + agentPoolLBs := make([]network.LoadBalancer, 0) + agentPoolVMSetNames, err := az.VMSet.GetAgentPoolVMSetNames(nodes) + if err != nil { + return nil, fmt.Errorf("ListAgentPoolLBs: failed to get agent pool vmSet names: %w", err) + } + agentPoolVMSetNamesSet := sets.NewString() + if agentPoolVMSetNames != nil && len(*agentPoolVMSetNames) > 0 { + for _, vmSetName := range *agentPoolVMSetNames { + agentPoolVMSetNamesSet.Insert(strings.ToLower(vmSetName)) + } + } + + for _, lb := range allLBs { + vmSetNameFromLBName := az.mapLoadBalancerNameToVMSet(to.String(lb.Name), clusterName) + if agentPoolVMSetNamesSet.Has(strings.ToLower(vmSetNameFromLBName)) { + agentPoolLBs = append(agentPoolLBs, lb) + } + } + + return agentPoolLBs, nil +} + // ListLB invokes az.LoadBalancerClient.List with exponential backoff retry func (az *Cloud) ListLB(service *v1.Service) ([]network.LoadBalancer, error) { ctx, cancel := getContextWithCancel() diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_controller_common.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_controller_common.go index 12a9e8ef59..3b30437e59 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_controller_common.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_controller_common.go @@ -312,6 +312,7 @@ func (c *controllerCommon) DetachDisk(diskName, diskURI string, nodeName types.N } c.lockMap.LockEntry(node) + defer c.lockMap.UnlockEntry(node) diskMap, err := c.cleanDetachDiskRequests(node) if err != nil { return err @@ -320,39 +321,21 @@ func (c *controllerCommon) DetachDisk(diskName, diskURI string, nodeName types.N klog.V(2).Infof("Trying to detach volume %q from node %q, diskMap: %s", diskURI, nodeName, diskMap) if len(diskMap) > 0 { c.diskStateMap.Store(disk, "detaching") - err = vmset.DetachDisk(nodeName, diskMap) - c.diskStateMap.Delete(disk) - } - c.lockMap.UnlockEntry(node) - - if err != nil { - if isInstanceNotFoundError(err) { - // if host doesn't exist, no need to detach - klog.Warningf("azureDisk - got InstanceNotFoundError(%v), DetachDisk(%s) will assume disk is already detached", - err, diskURI) - return nil - } - if retry.IsErrorRetriable(err) && c.cloud.CloudProviderBackoff { - klog.Warningf("azureDisk - update backing off: detach disk(%s, %s), err: %w", diskName, diskURI, err) - retryErr := kwait.ExponentialBackoff(c.cloud.RequestBackoff(), func() (bool, error) { - c.lockMap.LockEntry(node) - c.diskStateMap.Store(disk, "detaching") - err := vmset.DetachDisk(nodeName, diskMap) - c.diskStateMap.Delete(disk) - c.lockMap.UnlockEntry(node) - - retriable := false - if err != nil && retry.IsErrorRetriable(err) { - retriable = true - } - return !retriable, err - }) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("azureDisk - update abort backoff: detach disk(%s, %s), err: %w", diskName, diskURI, err) + defer c.diskStateMap.Delete(disk) + if err = vmset.DetachDisk(nodeName, diskMap); err != nil { + if isInstanceNotFoundError(err) { + // if host doesn't exist, no need to detach + klog.Warningf("azureDisk - got InstanceNotFoundError(%v), DetachDisk(%s) will assume disk is already detached", + err, diskURI) + return nil + } + if retry.IsErrorRetriable(err) && c.cloud.CloudProviderBackoff { + klog.Warningf("azureDisk - update backing off: detach disk(%s, %s), err: %w", diskName, diskURI, err) + err = vmset.DetachDisk(nodeName, diskMap) } } } + if err != nil { klog.Errorf("azureDisk - detach disk(%s, %s) failed, err: %v", diskName, diskURI, err) return err diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_loadbalancer.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_loadbalancer.go index d184d3379b..21512760bc 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_loadbalancer.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_loadbalancer.go @@ -141,7 +141,7 @@ func (az *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, ser // UpdateLoadBalancer updates hosts under the specified load balancer. func (az *Cloud) UpdateLoadBalancer(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) error { - if !az.shouldUpdateLoadBalancer(clusterName, service) { + if !az.shouldUpdateLoadBalancer(clusterName, service, nodes) { klog.V(2).Infof("UpdateLoadBalancer: skipping service %s because it is either being deleted or does not exist anymore", service.Name) return nil } @@ -435,15 +435,64 @@ func (az *Cloud) cleanOrphanedLoadBalancer(lb *network.LoadBalancer, service *v1 return nil } +// safeDeleteLoadBalancer deletes the load balancer after decoupling it from the vmSet +func (az *Cloud) safeDeleteLoadBalancer(lb network.LoadBalancer, clusterName, vmSetName string, service *v1.Service) error { + lbBackendPoolID := az.getBackendPoolID(to.String(lb.Name), az.getLoadBalancerResourceGroup(), getBackendPoolName(clusterName, service)) + err := az.VMSet.EnsureBackendPoolDeleted(service, lbBackendPoolID, vmSetName, lb.BackendAddressPools) + if err != nil { + return fmt.Errorf("deleteDedicatedLoadBalancer: failed to EnsureBackendPoolDeleted: %w", err) + } + + klog.V(2).Infof("deleteDedicatedLoadBalancer: deleting LB %s because the corresponding vmSet is supposed to be in the primary SLB", to.String(lb.Name)) + err = az.DeleteLB(service, to.String(lb.Name)) + if err != nil { + return fmt.Errorf("deleteDedicatedLoadBalancer : failed to DeleteLB: %w", err) + } + _ = az.lbCache.Delete(to.String(lb.Name)) + + return nil +} + +func extractBackendIPConfigurationIDsFromLB(lb network.LoadBalancer, lbBackendPoolName string) []string { + result := make([]string, 0) + if lb.LoadBalancerPropertiesFormat != nil && + lb.BackendAddressPools != nil { + for i := 0; i < len(*lb.BackendAddressPools); i++ { + backendPool := (*lb.BackendAddressPools)[i] + if strings.EqualFold(to.String(backendPool.Name), lbBackendPoolName) { + if backendPool.BackendAddressPoolPropertiesFormat != nil && + backendPool.BackendIPConfigurations != nil { + for _, ipConfiguration := range *backendPool.BackendIPConfigurations { + if ipConfiguration.ID != nil { + result = append(result, to.String(ipConfiguration.ID)) + } + } + } + } + } + } + + return result +} + +// reconcileSharedLoadBalancer deletes the dedicated SLBs of the non-primary vmSets. There are +// two scenarios where this operation is needed: +// 1. Using multiple slbs and the vmSet is supposed to share the primary slb. +// 2. When migrating from multiple slbs to single slb mode. +// It also ensures those vmSets are joint the backend pools of the primary SLBs. +// It runs only once everytime the cloud controller manager restarts. func (az *Cloud) reconcileSharedLoadBalancer(service *v1.Service, clusterName string, nodes []*v1.Node) ([]network.LoadBalancer, error) { var ( - primarySLB network.LoadBalancer - changed bool - ipConfigIDsToBeAddedToPrimarySLB []string - existingLBs []network.LoadBalancer - err error + primarySLBs, existingLBs []network.LoadBalancer + changed bool + err error ) + // skip this operation when wantLb=false + if nodes == nil { + return nil, nil + } + // only run once since the controller manager rebooted if az.isSharedLoadBalancerSynced { return nil, nil @@ -454,84 +503,92 @@ func (az *Cloud) reconcileSharedLoadBalancer(service *v1.Service, clusterName st } }() - // skip if the cluster doesn't enable the multiple slbs mode - useMultipleSLBs := az.useStandardLoadBalancer() && az.EnableMultipleStandardLoadBalancers - if !useMultipleSLBs { + // skip if the cluster is using basic LB + if !az.useStandardLoadBalancer() { return nil, nil } - existingLBs, err = az.ListLB(service) + existingLBs, err = az.ListAgentPoolLBs(service, nodes, clusterName) if err != nil { return nil, fmt.Errorf("reconcileSharedLoadBalancer: failed to list LB: %w", err) } lbBackendPoolName := getBackendPoolName(clusterName, service) + lbNamesToBeDeleted := sets.NewString() + // loop 1: delete unwanted LBs + for _, lb := range existingLBs { + lbNamePrefix := strings.TrimSuffix(to.String(lb.Name), consts.InternalLoadBalancerNameSuffix) + + // skip the internal or external primary load balancer + if strings.EqualFold(lbNamePrefix, clusterName) { + primarySLBs = append(primarySLBs, lb) + continue + } - for i := len(existingLBs) - 1; i >= 0; i-- { - lb := existingLBs[i] - - // skip the primary load balancer - if strings.EqualFold(to.String(lb.Name), clusterName) { - primarySLB = lb + // skip if the multiple slbs mode is enabled and + // the vmSet is supposed to have dedicated SLBs + vmSetName := strings.ToLower(az.mapLoadBalancerNameToVMSet(to.String(lb.Name), clusterName)) + if az.EnableMultipleStandardLoadBalancers && !az.getVMSetNamesSharingPrimarySLB().Has(vmSetName) { continue } // For non-primary load balancer, the lb name is the name of the VMSet. // If the VMSet name is in az.NodePoolsWithoutDedicatedSLB, we should - // decouple the VMSet from the lb and delete the lb. Then we should - // add the VMSet to the backend pool of the primary slb. - vmSetName := strings.ToLower(to.String(lb.Name)) - lbBackendPoolID := az.getBackendPoolID(to.String(lb.Name), az.getLoadBalancerResourceGroup(), getBackendPoolName(clusterName, service)) - if az.getVMSetNamesSharingPrimarySLB().Has(vmSetName) { - err = az.VMSet.EnsureBackendPoolDeleted(service, lbBackendPoolID, vmSetName, lb.BackendAddressPools) - if err != nil { - return nil, fmt.Errorf("reconcileSharedLoadBalancer: failed to EnsureBackendPoolDeleted: %w", err) - } + // decouple the VMSet from the lb and delete the lb. Then adding the VMSet + // to the backend pool of the primary slb. + err = az.safeDeleteLoadBalancer(lb, clusterName, vmSetName, service) + if err != nil { + return nil, err + } - klog.V(2).Infof("reconcileSharedLoadBalancer: deleting LB %s because the corresponding vmSet is sharing the primary SLB", to.String(lb.Name)) - err = az.DeleteLB(service, to.String(lb.Name)) - if err != nil { - return nil, fmt.Errorf("reconcileSharedLoadBalancer: failed to DeleteLB: %w", err) - } - _ = az.lbCache.Delete(to.String(lb.Name)) + // remove the deleted lb from the list and construct a new primary + // lb, so that getServiceLoadBalancer doesn't have to call list api again + lbNamesToBeDeleted.Insert(to.String(lb.Name)) + changed = true + } - primaryLBBackendPoolID := az.getBackendPoolID(clusterName, az.getLoadBalancerResourceGroup(), getBackendPoolName(clusterName, service)) - err = az.VMSet.EnsureHostsInPool(service, nodes, primaryLBBackendPoolID, vmSetName, false) - if err != nil { - return nil, fmt.Errorf("reconcileSharedLoadBalancer: failed to EnsureHostsInPool: %w", err) - } - - if lb.LoadBalancerPropertiesFormat != nil && - lb.BackendAddressPools != nil { - for i := 0; i < len(*lb.BackendAddressPools); i++ { - backendPool := (*lb.BackendAddressPools)[i] - if strings.EqualFold(to.String(backendPool.Name), lbBackendPoolName) { - if backendPool.BackendAddressPoolPropertiesFormat != nil && - backendPool.BackendIPConfigurations != nil { - for _, ipConfiguration := range *backendPool.BackendIPConfigurations { - if ipConfiguration.ID != nil { - ipConfigIDsToBeAddedToPrimarySLB = append(ipConfigIDsToBeAddedToPrimarySLB, to.String(ipConfiguration.ID)) - } - } - } - } - } - } + if !changed { + klog.V(4).Infof("reconcileSharedLoadBalancer: no changes made, return now") + return existingLBs, nil + } - // remove the deleted lb from the list and construct a new primary - // lb, so that getServiceLoadBalancer doesn't have to call list api again - existingLBs = append(existingLBs[:i], existingLBs[i+1:]...) - changed = true + ipConfigIDsToBeAddedToPrimarySLB := sets.NewString() + // loop2: add nodes to the backend pool of the primary SLBs + for i := len(existingLBs) - 1; i >= 0; i-- { + lb := existingLBs[i] + if !lbNamesToBeDeleted.Has(to.String(lb.Name)) { + continue } + + vmSetName := strings.ToLower(az.mapLoadBalancerNameToVMSet(to.String(lb.Name), clusterName)) + isInternalLB := strings.HasSuffix(to.String(lb.Name), consts.InternalLoadBalancerNameSuffix) + primarySLBName := clusterName + if isInternalLB { + primarySLBName = fmt.Sprintf("%s%s", clusterName, consts.InternalLoadBalancerNameSuffix) + } + primaryLBBackendPoolID := az.getBackendPoolID(primarySLBName, az.getLoadBalancerResourceGroup(), getBackendPoolName(clusterName, service)) + + klog.V(2).Infof("reconcileSharedLoadBalancer: binding the vmSet %s to the backend pool %s", vmSetName, primaryLBBackendPoolID) + err = az.VMSet.EnsureHostsInPool(service, nodes, primaryLBBackendPoolID, vmSetName, false) + if err != nil { + return nil, fmt.Errorf("reconcileSharedLoadBalancer: failed to EnsureHostsInPool: %w", err) + } + + for _, id := range extractBackendIPConfigurationIDsFromLB(lb, lbBackendPoolName) { + ipConfigIDsToBeAddedToPrimarySLB.Insert(id) + } + + // remove the deleted LB from the list + existingLBs = append(existingLBs[:i], existingLBs[i+1:]...) } - if changed { + for _, primarySLB := range primarySLBs { if primarySLB.LoadBalancerPropertiesFormat != nil && primarySLB.BackendAddressPools != nil { for i := 0; i < len(*primarySLB.BackendAddressPools); i++ { if strings.EqualFold(to.String((*primarySLB.BackendAddressPools)[i].Name), lbBackendPoolName) { backendPoolIPConfigs := (*primarySLB.BackendAddressPools)[i].BackendIPConfigurations - for _, id := range ipConfigIDsToBeAddedToPrimarySLB { + for _, id := range ipConfigIDsToBeAddedToPrimarySLB.List() { *backendPoolIPConfigs = append(*backendPoolIPConfigs, network.InterfaceIPConfiguration{ ID: to.StringPtr(id), }) @@ -540,16 +597,17 @@ func (az *Cloud) reconcileSharedLoadBalancer(service *v1.Service, clusterName st } } } + } - for i, existingLB := range existingLBs { - if strings.EqualFold(to.String(existingLB.Name), clusterName) { + for i, existingLB := range existingLBs { + for _, primarySLB := range primarySLBs { + if strings.EqualFold(to.String(existingLB.Name), to.String(primarySLB.Name)) { // Proactively disable the etag to prevent etag mismatch error when putting lb later. // This could be happen because when we remove the hosts from the lb, the nrp // would put the lb to remove the backend references as well. primarySLB.Etag = nil existingLBs[i] = primarySLB - return existingLBs, nil } } } @@ -580,19 +638,18 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string, // check if the service already has a load balancer for i := range existingLBs { existingLB := existingLBs[i] + existingLBNamePrefix := strings.TrimSuffix(to.String(existingLB.Name), consts.InternalLoadBalancerNameSuffix) - // for the primary standard load balancer, when enabled multiple slbs - if strings.EqualFold(to.String(existingLB.Name), clusterName) && useMultipleSLBs { - // there are two conditions we need to remove the vmSet from the - // backend pool of the primary SLB when enabling multiple SLBs: + // for the primary standard load balancer (internal or external), when enabled multiple slbs + if strings.EqualFold(existingLBNamePrefix, clusterName) && useMultipleSLBs { shouldRemoveVMSetFromSLB := func(vmSetName string) bool { - // condition 1: not removing the vmSet from the primary SLB + // not removing the vmSet from the primary SLB // if it is supposed to share the primary SLB. if az.getVMSetNamesSharingPrimarySLB().Has(strings.ToLower(vmSetName)) { return false } - // condition 2: removing the vmSet from the primary SLB if + // removing the vmSet from the primary SLB if // it is not the primary vmSet. There are two situations: // 1. when migrating from single SLB to multiple SLBs, we // need to remove all non-primary vmSets from the primary SLB; @@ -2332,8 +2389,8 @@ func (az *Cloud) getExpectedSecurityRules(wantLb bool, ports []v1.ServicePort, s return expectedSecurityRules, nil } -func (az *Cloud) shouldUpdateLoadBalancer(clusterName string, service *v1.Service) bool { - _, _, existsLb, _ := az.getServiceLoadBalancer(service, clusterName, nil, false, []network.LoadBalancer{}) +func (az *Cloud) shouldUpdateLoadBalancer(clusterName string, service *v1.Service, nodes []*v1.Node) bool { + _, _, existsLb, _ := az.getServiceLoadBalancer(service, clusterName, nodes, false, []network.LoadBalancer{}) return existsLb && service.ObjectMeta.DeletionTimestamp == nil } diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_mock_vmsets.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_mock_vmsets.go index 0950d17c01..db7e760ca6 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_mock_vmsets.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_mock_vmsets.go @@ -219,9 +219,9 @@ func (mr *MockVMSetMockRecorder) EnsureBackendPoolDeleted(service, backendPoolID } // EnsureBackendPoolDeletedFromVMSets mocks base method -func (m *MockVMSet) EnsureBackendPoolDeletedFromVMSets(vmSetsNameMap map[string]bool, backendPoolID string) error { +func (m *MockVMSet) EnsureBackendPoolDeletedFromVMSets(vmSetNamesMap map[string]bool, backendPoolID string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "EnsureBackendPoolDeletedFromVMSet", vmSetsNameMap, backendPoolID) + ret := m.ctrl.Call(m, "EnsureBackendPoolDeletedFromVMSets", vmSetNamesMap, backendPoolID) ret0, _ := ret[0].(error) return ret0 } @@ -229,7 +229,7 @@ func (m *MockVMSet) EnsureBackendPoolDeletedFromVMSets(vmSetsNameMap map[string] // EnsureBackendPoolDeletedFromVMSets indicates an expected call of EnsureBackendPoolDeletedFromVMSets func (mr *MockVMSetMockRecorder) EnsureBackendPoolDeletedFromVMSets(vmSetNamesMap, backendPoolID interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnsureBackendPoolDeletedFromVMSet", reflect.TypeOf((*MockVMSet)(nil).EnsureBackendPoolDeletedFromVMSets), vmSetNamesMap, backendPoolID) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnsureBackendPoolDeletedFromVMSets", reflect.TypeOf((*MockVMSet)(nil).EnsureBackendPoolDeletedFromVMSets), vmSetNamesMap, backendPoolID) } // AttachDisk mocks base method @@ -351,3 +351,18 @@ func (mr *MockVMSetMockRecorder) GetNodeCIDRMasksByProviderID(providerID interfa mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNodeCIDRMasksByProviderID", reflect.TypeOf((*MockVMSet)(nil).GetNodeCIDRMasksByProviderID), providerID) } + +// GetAgentPoolVMSetNames mocks base method +func (m *MockVMSet) GetAgentPoolVMSetNames(nodes []*v1.Node) (*[]string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetAgentPoolVMSetNames", nodes) + ret0, _ := ret[0].(*[]string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetAgentPoolVMSetNames indicates an expected call of GetAgentPoolVMSetNames +func (mr *MockVMSetMockRecorder) GetAgentPoolVMSetNames(nodes interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAgentPoolVMSetNames", reflect.TypeOf((*MockVMSet)(nil).GetAgentPoolVMSetNames), nodes) +} diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_standard.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_standard.go index 9992dccdd9..f92318f89d 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_standard.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_standard.go @@ -650,12 +650,7 @@ func (as *availabilitySet) GetPrivateIPsByNodeName(name string) ([]string, error // getAgentPoolAvailabilitySets lists the virtual machines for the resource group and then builds // a list of availability sets that match the nodes available to k8s. -func (as *availabilitySet) getAgentPoolAvailabilitySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) { - vms, err := as.ListVirtualMachines(as.ResourceGroup) - if err != nil { - klog.Errorf("as.getNodeAvailabilitySet - ListVirtualMachines failed, err=%v", err) - return nil, err - } +func (as *availabilitySet) getAgentPoolAvailabilitySets(vms []compute.VirtualMachine, nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) { vmNameToAvailabilitySetID := make(map[string]string, len(vms)) for vmx := range vms { vm := vms[vmx] @@ -672,8 +667,8 @@ func (as *availabilitySet) getAgentPoolAvailabilitySets(nodes []*v1.Node) (agent } asID, ok := vmNameToAvailabilitySetID[nodeName] if !ok { - klog.Errorf("as.getNodeAvailabilitySet - Node(%s) has no availability sets", nodeName) - return nil, fmt.Errorf("node (%s) - has no availability sets", nodeName) + klog.Warningf("as.getNodeAvailabilitySet - Node(%s) has no availability sets", nodeName) + continue } if availabilitySetIDs.Has(asID) { // already added in the list @@ -708,7 +703,13 @@ func (as *availabilitySet) GetVMSetNames(service *v1.Service, nodes []*v1.Node) availabilitySetNames = &[]string{as.Config.PrimaryAvailabilitySetName} return availabilitySetNames, nil } - availabilitySetNames, err = as.getAgentPoolAvailabilitySets(nodes) + + vms, err := as.ListVirtualMachines(as.ResourceGroup) + if err != nil { + klog.Errorf("as.getNodeAvailabilitySet - ListVirtualMachines failed, err=%v", err) + return nil, err + } + availabilitySetNames, err = as.getAgentPoolAvailabilitySets(vms, nodes) if err != nil { klog.Errorf("as.GetVMSetNames - getAgentPoolAvailabilitySets failed err=(%v)", err) return nil, err @@ -1208,3 +1209,14 @@ func (as *availabilitySet) GetNodeCIDRMasksByProviderID(providerID string) (int, func (as *availabilitySet) EnsureBackendPoolDeletedFromVMSets(vmasNamesMap map[string]bool, backendPoolID string) error { return nil } + +// GetAgentPoolVMSetNames returns all VMAS names according to the nodes +func (as *availabilitySet) GetAgentPoolVMSetNames(nodes []*v1.Node) (*[]string, error) { + vms, err := as.ListVirtualMachines(as.ResourceGroup) + if err != nil { + klog.Errorf("as.getNodeAvailabilitySet - ListVirtualMachines failed, err=%v", err) + return nil, err + } + + return as.getAgentPoolAvailabilitySets(vms, nodes) +} diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmsets.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmsets.go index 5171c2e6a2..d8238af75e 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmsets.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmsets.go @@ -86,6 +86,9 @@ type VMSet interface { // GetNodeNameByIPConfigurationID gets the nodeName and vmSetName by IP configuration ID. GetNodeNameByIPConfigurationID(ipConfigurationID string) (string, string, error) - // GetNodeCIDRMaskByProviderID returns the node CIDR subnet mask by provider ID. + // GetNodeCIDRMasksByProviderID returns the node CIDR subnet mask by provider ID. GetNodeCIDRMasksByProviderID(providerID string) (int, int, error) + + // GetAgentPoolVMSetNames returns all vmSet names according to the nodes + GetAgentPoolVMSetNames(nodes []*v1.Node) (*[]string, error) } diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss.go index e0a14effb1..1d884f1e88 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss.go @@ -921,16 +921,18 @@ func (ss *ScaleSet) getConfigForScaleSetByIPFamily(config *compute.VirtualMachin // EnsureHostInPool ensures the given VM's Primary NIC's Primary IP Configuration is // participating in the specified LoadBalancer Backend Pool, which returns (resourceGroup, vmasName, instanceID, vmssVM, error). -func (ss *ScaleSet) EnsureHostInPool(service *v1.Service, nodeName types.NodeName, backendPoolID string, vmSetName string, isInternal bool) (string, string, string, *compute.VirtualMachineScaleSetVM, error) { - klog.V(3).Infof("ensuring node %q of scaleset %q in LB backendpool %q", nodeName, vmSetName, backendPoolID) +func (ss *ScaleSet) EnsureHostInPool(service *v1.Service, nodeName types.NodeName, backendPoolID string, vmSetNameOfLB string, isInternal bool) (string, string, string, *compute.VirtualMachineScaleSetVM, error) { vmName := mapNodeNameToVMName(nodeName) ssName, instanceID, vm, err := ss.getVmssVM(vmName, azcache.CacheReadTypeDefault) if err != nil { + klog.Errorf("EnsureHostInPool: failed to get VMSS VM %s: %v", vmName, err) return "", "", "", nil, err } + klog.V(2).Infof("ensuring node %q of scaleset %q in LB backendpool %q", nodeName, ssName, backendPoolID) + // Check scale set name: - // - For basic SKU load balancer, return nil if the node's scale set is mismatched with vmSetName. + // - For basic SKU load balancer, return nil if the node's scale set is mismatched with vmSetNameOfLB. // - For single standard SKU load balancer, backend could belong to multiple VMSS, so we // don't check vmSet for it. // - For multiple standard SKU load balancers, the behavior is similar to the basic load balancer @@ -942,8 +944,8 @@ func (ss *ScaleSet) EnsureHostInPool(service *v1.Service, nodeName types.NodeNam // need to check the vmSet name when using multiple standard LBs needCheck = true } - if vmSetName != "" && needCheck && !strings.EqualFold(vmSetName, ssName) { - klog.V(3).Infof("EnsureHostInPool skips node %s because it is not in the ScaleSet %s", vmName, vmSetName) + if vmSetNameOfLB != "" && needCheck && !strings.EqualFold(vmSetNameOfLB, ssName) { + klog.V(3).Infof("EnsureHostInPool skips node %s because it is not in the ScaleSet %s", vmName, vmSetNameOfLB) return "", "", "", nil, nil } @@ -1051,7 +1053,7 @@ func getVmssAndResourceGroupNameByVMProviderID(providerID string) (string, strin return matches[1], matches[2], nil } -func (ss *ScaleSet) ensureVMSSInPool(service *v1.Service, nodes []*v1.Node, backendPoolID string, vmSetName string) error { +func (ss *ScaleSet) ensureVMSSInPool(service *v1.Service, nodes []*v1.Node, backendPoolID string, vmSetNameOfLB string) error { klog.V(2).Infof("ensureVMSSInPool: ensuring VMSS with backendPoolID %s", backendPoolID) vmssNamesMap := make(map[string]bool) @@ -1080,7 +1082,7 @@ func (ss *ScaleSet) ensureVMSSInPool(service *v1.Service, nodes []*v1.Node, back } } } else { - vmssNamesMap[vmSetName] = true + vmssNamesMap[vmSetNameOfLB] = true } klog.V(2).Infof("ensureVMSSInPool begins to update VMSS %v with backendPoolID %s", vmssNamesMap, backendPoolID) @@ -1189,7 +1191,7 @@ func (ss *ScaleSet) ensureVMSSInPool(service *v1.Service, nodes []*v1.Node, back // EnsureHostsInPool ensures the given Node's primary IP configurations are // participating in the specified LoadBalancer Backend Pool. -func (ss *ScaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, backendPoolID string, vmSetName string, isInternal bool) error { +func (ss *ScaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, backendPoolID string, vmSetNameOfLB string, isInternal bool) error { mc := metrics.NewMetricContext("services", "vmss_ensure_hosts_in_pool", ss.ResourceGroup, ss.SubscriptionID, service.Name) isOperationSucceeded := false defer func() { @@ -1224,7 +1226,7 @@ func (ss *ScaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac // VMAS nodes should also be added to the SLB backends. if ss.useStandardLoadBalancer() { hostUpdates = append(hostUpdates, func() error { - _, _, _, _, err := ss.availabilitySet.EnsureHostInPool(service, types.NodeName(localNodeName), backendPoolID, vmSetName, isInternal) + _, _, _, _, err := ss.availabilitySet.EnsureHostInPool(service, types.NodeName(localNodeName), backendPoolID, vmSetNameOfLB, isInternal) return err }) continue @@ -1234,7 +1236,7 @@ func (ss *ScaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac continue } - nodeResourceGroup, nodeVMSS, nodeInstanceID, nodeVMSSVM, err := ss.EnsureHostInPool(service, types.NodeName(localNodeName), backendPoolID, vmSetName, isInternal) + nodeResourceGroup, nodeVMSS, nodeInstanceID, nodeVMSSVM, err := ss.EnsureHostInPool(service, types.NodeName(localNodeName), backendPoolID, vmSetNameOfLB, isInternal) if err != nil { klog.Errorf("EnsureHostInPool(%s): backendPoolID(%s) - failed to ensure host in pool: %q", getServiceName(service), backendPoolID, err) errors = append(errors, err) @@ -1291,7 +1293,7 @@ func (ss *ScaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac // Ensure the backendPoolID is also added on VMSS itself. // Refer to issue kubernetes/kubernetes#80365 for detailed information - err := ss.ensureVMSSInPool(service, nodes, backendPoolID, vmSetName) + err := ss.ensureVMSSInPool(service, nodes, backendPoolID, vmSetNameOfLB) if err != nil { return err } @@ -1669,3 +1671,40 @@ func (ss *ScaleSet) EnsureBackendPoolDeletedFromVMSets(vmssNamesMap map[string]b return nil } + +// GetAgentPoolVMSetNames returns all VMSS/VMAS names according to the nodes. +// We need to include the VMAS here because some of the cluster provisioning tools +// like capz allows mixed instance type. +func (ss *ScaleSet) GetAgentPoolVMSetNames(nodes []*v1.Node) (*[]string, error) { + vmSetNames := make([]string, 0) + as := ss.availabilitySet.(*availabilitySet) + + for _, node := range nodes { + var names *[]string + managedByAS, err := ss.isNodeManagedByAvailabilitySet(node.Name, azcache.CacheReadTypeDefault) + if err != nil { + return nil, fmt.Errorf("GetAgentPoolVMSetNames: failed to check if the node %s is managed by VMAS: %w", node.Name, err) + } + if managedByAS { + cached, err := ss.availabilitySetNodesCache.Get(consts.AvailabilitySetNodesKey, azcache.CacheReadTypeDefault) + if err != nil { + return nil, fmt.Errorf("GetAgentPoolVMSetNames: failed to get availabilitySetNodesCache") + } + vms := cached.(availabilitySetNodeEntry).vms + names, err = as.getAgentPoolAvailabilitySets(vms, []*v1.Node{node}) + if err != nil { + return nil, fmt.Errorf("GetAgentPoolVMSetNames: failed to execute getAgentPoolAvailabilitySets: %w", err) + } + vmSetNames = append(vmSetNames, *names...) + continue + } + + names, err = ss.getAgentPoolScaleSets([]*v1.Node{node}) + if err != nil { + return nil, fmt.Errorf("GetAgentPoolVMSetNames: failed to execute getAgentPoolScaleSets: %w", err) + } + vmSetNames = append(vmSetNames, *names...) + } + + return &vmSetNames, nil +} diff --git a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss_cache.go b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss_cache.go index d1d4acc6d8..8666ef83a1 100644 --- a/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss_cache.go +++ b/vendor/sigs.k8s.io/cloud-provider-azure/pkg/provider/azure_vmss_cache.go @@ -50,6 +50,7 @@ type vmssEntry struct { type availabilitySetNodeEntry struct { vmNames sets.String nodeNames sets.String + vms []compute.VirtualMachine } func (ss *ScaleSet) newVMSSCache() (*azcache.TimedCache, error) { @@ -278,15 +279,16 @@ func (ss *ScaleSet) newAvailabilitySetNodesCache() (*azcache.TimedCache, error) return nil, err } + vmList := make([]compute.VirtualMachine, 0) for _, resourceGroup := range resourceGroups.List() { - vmList, err := ss.Cloud.ListVirtualMachines(resourceGroup) + vms, err := ss.Cloud.ListVirtualMachines(resourceGroup) if err != nil { - return nil, err + return nil, fmt.Errorf("newAvailabilitySetNodesCache: failed to list vms in the resource group %s: %w", resourceGroup, err) } - - for _, vm := range vmList { + for _, vm := range vms { if vm.Name != nil { - vmNames.Insert(*vm.Name) + vmNames.Insert(to.String(vm.Name)) + vmList = append(vmList, vm) } } } @@ -300,6 +302,7 @@ func (ss *ScaleSet) newAvailabilitySetNodesCache() (*azcache.TimedCache, error) localCache := availabilitySetNodeEntry{ vmNames: vmNames, nodeNames: nodeNames, + vms: vmList, } return localCache, nil