From eccf309ce80bf256eaa5bb2523f89c697f6f5b85 Mon Sep 17 00:00:00 2001 From: Cezary Zawadka Date: Tue, 17 May 2022 19:57:46 +0200 Subject: [PATCH] Rewrite L4 healthchecks: Apply review comments EnsureL4HealthCheck: replace lengthy return value list in with named struct Improve firewall rule comparison Added debug logs And many more small ones --- pkg/firewalls/firewalls_l4.go | 24 ++++++--- pkg/healthchecks/healthchecks_l4.go | 62 ++++++++++++++---------- pkg/healthchecks/healthchecks_l4_test.go | 8 +-- pkg/healthchecks/interfaces.go | 10 ++-- pkg/loadbalancers/l4.go | 6 +-- pkg/loadbalancers/l4_test.go | 2 +- pkg/loadbalancers/l4netlb.go | 8 +-- 7 files changed, 70 insertions(+), 50 deletions(-) diff --git a/pkg/firewalls/firewalls_l4.go b/pkg/firewalls/firewalls_l4.go index 238095a250..bd9cbf26c1 100644 --- a/pkg/firewalls/firewalls_l4.go +++ b/pkg/firewalls/firewalls_l4.go @@ -104,18 +104,28 @@ func EnsureL4FirewallRuleDeleted(cloud *gce.Cloud, fwName string) error { } func firewallRuleEqual(a, b *compute.Firewall, skipDescription bool) bool { - fwrEqual := len(a.Allowed) == 1 && - len(a.Allowed) == len(b.Allowed) && - a.Allowed[0].IPProtocol == b.Allowed[0].IPProtocol && - utils.EqualStringSets(a.Allowed[0].Ports, b.Allowed[0].Ports) && - utils.EqualStringSets(a.SourceRanges, b.SourceRanges) && + if len(a.Allowed) != len(b.Allowed) { + return false + } + for i := range a.Allowed { + if !allowRulesEqual(a.Allowed[i], b.Allowed[i]) { + return false + } + } + + srcAndTargetEqual := utils.EqualStringSets(a.SourceRanges, b.SourceRanges) && utils.EqualStringSets(a.TargetTags, b.TargetTags) // Don't compare the "description" field for shared firewall rules if skipDescription { - return fwrEqual + return srcAndTargetEqual } - return fwrEqual && a.Description == b.Description + return srcAndTargetEqual && a.Description == b.Description +} + +func allowRulesEqual(a *compute.FirewallAllowed, b *compute.FirewallAllowed) bool { + return a.IPProtocol == b.IPProtocol && + utils.EqualStringSets(a.Ports, b.Ports) } func ensureFirewall(svc *v1.Service, shared bool, params *FirewallParams, cloud *gce.Cloud, recorder record.EventRecorder) error { diff --git a/pkg/healthchecks/healthchecks_l4.go b/pkg/healthchecks/healthchecks_l4.go index 0a4c4a578f..0cda6e2894 100644 --- a/pkg/healthchecks/healthchecks_l4.go +++ b/pkg/healthchecks/healthchecks_l4.go @@ -98,69 +98,75 @@ func GetL4() *l4HealthChecks { // for the healthcheck. If healthcheck is shared (external traffic policy 'cluster') then firewall rules will be shared // regardless of scope param. // If the healthcheck already exists, it is updated as needed. -func (l4hc *l4HealthChecks) EnsureL4HealthCheck(svc *corev1.Service, namer namer.L4ResourcesNamer, sharedHC bool, scope meta.KeyType, l4Type utils.L4LBType, nodeNames []string) *L4HealthCheckResult { +func (l4hc *l4HealthChecks) EnsureL4HealthCheck(svc *corev1.Service, namer namer.L4ResourcesNamer, sharedHC bool, scope meta.KeyType, l4Type utils.L4LBType, nodeNames []string) *EnsureL4HealthCheckResult { hcName, hcFwName := namer.L4HealthCheck(svc.Namespace, svc.Name, sharedHC) hcPath, hcPort := helpers.GetServiceHealthCheckPathPort(svc) + klog.V(3).Infof("Ensuring L4 healthcheck: %s and firewall rule %s from service %s/%s, shared: %v.", hcName, hcFwName, svc.Name, svc.Namespace, sharedHC) + if sharedHC { hcPath, hcPort = gce.GetNodesHealthCheckPath(), gce.GetNodesHealthCheckPort() - // We need to acquire a controller-wide mutex to ensure that in the case of a healthcheck shared between loadbalancers that the sync of the GCE resources is not performed in parallel. l4hc.sharedResourcesLock.Lock() defer l4hc.sharedResourcesLock.Unlock() } + klog.V(3).Infof("L4 Healthcheck %s, path: %q, port %d", hcName, hcPath, hcPort) namespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} _, hcLink, err := l4hc.ensureL4HealthCheckInternal(hcName, namespacedName, sharedHC, hcPath, hcPort, scope, l4Type) if err != nil { - return &L4HealthCheckResult{ + return &EnsureL4HealthCheckResult{ GceResourceInError: annotations.HealthcheckResource, Err: err, } } + + klog.V(3).Infof("Healthcheck created, ensuring firewall rule %s", hcFwName) err = l4hc.ensureFirewall(svc, hcFwName, hcPort, sharedHC, nodeNames) if err != nil { - return &L4HealthCheckResult{ + return &EnsureL4HealthCheckResult{ GceResourceInError: annotations.HealthcheckResource, Err: err, } } - return &L4HealthCheckResult{ - Name: hcName, - Link: hcLink, - FirewallRuleName: hcFwName, + return &EnsureL4HealthCheckResult{ + HCName: hcName, + HCLink: hcLink, + HCFirewallRuleName: hcFwName, } } // DeleteHealthCheck deletes health check (and firewall rule) for l4 service. Checks if shared resources are safe to delete. func (l4hc *l4HealthChecks) DeleteHealthCheck(svc *corev1.Service, namer namer.L4ResourcesNamer, sharedHC bool, scope meta.KeyType, l4Type utils.L4LBType) (string, error) { + + hcName, hcFwName := namer.L4HealthCheck(svc.Namespace, svc.Name, sharedHC) + namespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} + + klog.V(3).Infof("Trying to delete L4 healthcheck: %s and firewall rule %s from service %s/%s, shared: %v", hcName, hcFwName, svc.Name, svc.Namespace, sharedHC) if sharedHC { - // lock out entire DeleteHealthCheck process + // We need to acquire a controller-wide mutex to ensure that in the case of a healthcheck shared between loadbalancers that the sync of the GCE resources is not performed in parallel. l4hc.sharedResourcesLock.Lock() defer l4hc.sharedResourcesLock.Unlock() } - hcName, hcFwName := namer.L4HealthCheck(svc.Namespace, svc.Name, sharedHC) - namespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} err := utils.IgnoreHTTPNotFound(l4hc.deleteHealthCheck(hcName, scope)) if err != nil { + // Ignore deletion error due to health check in use by another resource. if !utils.IsInUsedByError(err) { klog.Errorf("Failed to delete healthcheck for service %s - %v", namespacedName.String(), err) return annotations.HealthcheckResource, err } - // Ignore deletion error due to health check in use by another resource. - // This will be hit if this is a shared healthcheck. - klog.V(2).Infof("Failed to delete healthcheck %s: health check in use.", hcName) + klog.V(2).Infof("Failed to delete healthcheck %s: shared health check in use.", hcName) return "", nil } // Health check deleted, now delete the firewall rule return l4hc.deleteHealthCheckFirewall(svc, hcName, hcFwName, sharedHC, l4Type) } -func (l4hc *l4HealthChecks) ensureL4HealthCheckInternal(name string, svcName types.NamespacedName, shared bool, path string, port int32, scope meta.KeyType, l4Type utils.L4LBType) (*composite.HealthCheck, string, error) { +func (l4hc *l4HealthChecks) ensureL4HealthCheckInternal(hcName string, svcName types.NamespacedName, shared bool, path string, port int32, scope meta.KeyType, l4Type utils.L4LBType) (*composite.HealthCheck, string, error) { selfLink := "" - key, err := composite.CreateKey(l4hc.cloud, name, scope) + key, err := composite.CreateKey(l4hc.cloud, hcName, scope) if err != nil { - return nil, selfLink, fmt.Errorf("Failed to create key for healthcheck with name %s for service %s", name, svcName.String()) + return nil, selfLink, fmt.Errorf("Failed to create key for healthcheck with name %s for service %s", hcName, svcName.String()) } hc, err := composite.GetHealthCheck(l4hc.cloud, key, meta.VersionGA) if err != nil { @@ -172,10 +178,11 @@ func (l4hc *l4HealthChecks) ensureL4HealthCheckInternal(name string, svcName typ if scope == meta.Regional { region = l4hc.cloud.Region() } - expectedHC := NewL4HealthCheck(name, svcName, shared, path, port, l4Type, scope, region) + expectedHC := newL4HealthCheck(hcName, svcName, shared, path, port, l4Type, scope, region) + if hc == nil { // Create the healthcheck - klog.V(2).Infof("Creating healthcheck %s for service %s, shared = %v", name, svcName, shared) + klog.V(2).Infof("Creating healthcheck %s for service %s, shared = %v. Expected healthcheck: %v", hcName, svcName, shared, expectedHC) err = composite.CreateHealthCheck(l4hc.cloud, key, expectedHC) if err != nil { return nil, selfLink, err @@ -186,10 +193,11 @@ func (l4hc *l4HealthChecks) ensureL4HealthCheckInternal(name string, svcName typ selfLink = hc.SelfLink if !needToUpdateHealthChecks(hc, expectedHC) { // nothing to do + klog.V(3).Infof("Healthcheck %v already exists", hcName) return hc, selfLink, nil } mergeHealthChecks(hc, expectedHC) - klog.V(2).Infof("Updating healthcheck %s for service %s", name, svcName) + klog.V(2).Infof("Updating healthcheck %s for service %s, updated healthcheck: %v", hcName, svcName, expectedHC) err = composite.UpdateHealthCheck(l4hc.cloud, key, expectedHC) if err != nil { return nil, selfLink, err @@ -197,9 +205,10 @@ func (l4hc *l4HealthChecks) ensureL4HealthCheckInternal(name string, svcName typ return expectedHC, selfLink, err } -// ensureFirewall rule for L4 service. -// The firewall rules are the same for ILB and NetLB that use external traffic policy 'local' (sharedHC == true) -// despite healthchecks have different scopes (global vs regional) +// ensureFirewall rule for `svc`. +// +// L4 ILB and L4 NetLB Services with ExternalTrafficPolicy=Cluster use the same firewall +// rule at global scope. func (l4hc *l4HealthChecks) ensureFirewall(svc *corev1.Service, hcFwName string, hcPort int32, sharedHC bool, nodeNames []string) error { // Add firewall rule for healthchecks to nodes hcFWRParams := firewalls.FirewallParams{ @@ -229,9 +238,10 @@ func (l4hc *l4HealthChecks) deleteHealthCheckFirewall(svc *corev1.Service, hcNam return annotations.HealthcheckResource, err } if !safeToDelete { - klog.V(2).Infof("Failed to delete health check firewall rule %s: health check in use.", hcName) + klog.V(3).Infof("Failed to delete health check firewall rule %s: health check in use.", hcName) return "", nil } + klog.V(3).Infof("Deleting healthcheck firewall rule named: %s", hcFwName) // Delete healthcheck firewall rule if no healthcheck uses the firewall rule. err = l4hc.deleteFirewall(hcFwName, svc) if err != nil { @@ -271,7 +281,7 @@ func (l4hc *l4HealthChecks) deleteFirewall(name string, svc *corev1.Service) err return nil } -func NewL4HealthCheck(name string, svcName types.NamespacedName, shared bool, path string, port int32, l4Type utils.L4LBType, scope meta.KeyType, region string) *composite.HealthCheck { +func newL4HealthCheck(name string, svcName types.NamespacedName, shared bool, path string, port int32, l4Type utils.L4LBType, scope meta.KeyType, region string) *composite.HealthCheck { httpSettings := composite.HTTPHealthCheck{ Port: int64(port), RequestPath: path, @@ -298,7 +308,7 @@ func NewL4HealthCheck(name string, svcName types.NamespacedName, shared bool, pa // mergeHealthChecks reconciles HealthCheck config to be no smaller than // the default values. newHC is assumed to have defaults, -// since it is created by the NewL4HealthCheck call. +// since it is created by the newL4HealthCheck call. // E.g. old health check interval is 2s, new has the default of 8. // The HC interval will be reconciled to 8 seconds. // If the existing health check values are larger than the default interval, diff --git a/pkg/healthchecks/healthchecks_l4_test.go b/pkg/healthchecks/healthchecks_l4_test.go index edd6957417..f0fccb53e1 100644 --- a/pkg/healthchecks/healthchecks_l4_test.go +++ b/pkg/healthchecks/healthchecks_l4_test.go @@ -50,7 +50,7 @@ func TestMergeHealthChecks(t *testing.T) { } { t.Run(tc.desc, func(t *testing.T) { // healthcheck intervals and thresholds are common for Global and Regional healthchecks. Hence testing only Global case. - wantHC := NewL4HealthCheck("hc", types.NamespacedName{Name: "svc", Namespace: "default"}, false, "/", 12345, utils.ILB, meta.Global, "") + wantHC := newL4HealthCheck("hc", types.NamespacedName{Name: "svc", Namespace: "default"}, false, "/", 12345, utils.ILB, meta.Global, "") hc := &composite.HealthCheck{ CheckIntervalSec: tc.checkIntervalSec, TimeoutSec: tc.timeoutSec, @@ -97,8 +97,8 @@ func TestCompareHealthChecks(t *testing.T) { } { t.Run(tc.desc, func(t *testing.T) { // healthcheck intervals and thresholds are common for Global and Regional healthchecks. Hence testing only Global case. - hc := NewL4HealthCheck("hc", types.NamespacedName{Name: "svc", Namespace: "default"}, false, "/", 12345, utils.ILB, meta.Global, "") - wantHC := NewL4HealthCheck("hc", types.NamespacedName{Name: "svc", Namespace: "default"}, false, "/", 12345, utils.ILB, meta.Global, "") + hc := newL4HealthCheck("hc", types.NamespacedName{Name: "svc", Namespace: "default"}, false, "/", 12345, utils.ILB, meta.Global, "") + wantHC := newL4HealthCheck("hc", types.NamespacedName{Name: "svc", Namespace: "default"}, false, "/", 12345, utils.ILB, meta.Global, "") if tc.modifier != nil { tc.modifier(hc) } @@ -120,7 +120,7 @@ func TestCreateHealthCheck(t *testing.T) { {meta.Global, ""}, {meta.Regional, "us-central1"}, } { - hc := NewL4HealthCheck("hc", namespaceName, false, "/", 12345, utils.ILB, v.scope, v.region) + hc := newL4HealthCheck("hc", namespaceName, false, "/", 12345, utils.ILB, v.scope, v.region) if hc.Region != v.region { t.Errorf("HealthCheck Region mismatch! %v != %v", hc.Region, v.region) } diff --git a/pkg/healthchecks/interfaces.go b/pkg/healthchecks/interfaces.go index 7ae209a7b4..0b9956be13 100644 --- a/pkg/healthchecks/interfaces.go +++ b/pkg/healthchecks/interfaces.go @@ -62,15 +62,15 @@ type HealthChecker interface { // L4HealthChecks defines methods for creating and deleting health checks (and their firewall rules) for l4 services type L4HealthChecks interface { // EnsureL4HealthCheck creates health check (and firewall rule) for l4 service - EnsureL4HealthCheck(svc *v1.Service, namer namer.L4ResourcesNamer, sharedHC bool, scope meta.KeyType, l4Type utils.L4LBType, nodeNames []string) *L4HealthCheckResult + EnsureL4HealthCheck(svc *v1.Service, namer namer.L4ResourcesNamer, sharedHC bool, scope meta.KeyType, l4Type utils.L4LBType, nodeNames []string) *EnsureL4HealthCheckResult // DeleteHealthCheck deletes health check (and firewall rule) for l4 service DeleteHealthCheck(svc *v1.Service, namer namer.L4ResourcesNamer, sharedHC bool, scope meta.KeyType, l4Type utils.L4LBType) (string, error) } -type L4HealthCheckResult struct { - Name string - Link string - FirewallRuleName string +type EnsureL4HealthCheckResult struct { + HCName string + HCLink string + HCFirewallRuleName string GceResourceInError string Err error } diff --git a/pkg/loadbalancers/l4.go b/pkg/loadbalancers/l4.go index 83b124515c..3311bd518d 100644 --- a/pkg/loadbalancers/l4.go +++ b/pkg/loadbalancers/l4.go @@ -222,7 +222,7 @@ func (l *L4) EnsureInternalLoadBalancer(nodeNames []string, svc *corev1.Service) result.Error = hcResult.Err return result } - result.Annotations[annotations.HealthcheckKey] = hcResult.Name + result.Annotations[annotations.HealthcheckKey] = hcResult.HCName _, portRanges, _, protocol := utils.GetPortsAndProtocol(l.Service.Spec.Ports) @@ -247,7 +247,7 @@ func (l *L4) EnsureInternalLoadBalancer(nodeNames []string, svc *corev1.Service) return result } result.Annotations[annotations.FirewallRuleKey] = name - result.Annotations[annotations.FirewallRuleForHealthcheckKey] = hcResult.FirewallRuleName + result.Annotations[annotations.FirewallRuleForHealthcheckKey] = hcResult.HCFirewallRuleName // Check if protocol has changed for this service. In this case, forwarding rule should be deleted before // the backend service can be updated. @@ -265,7 +265,7 @@ func (l *L4) EnsureInternalLoadBalancer(nodeNames []string, svc *corev1.Service) } // ensure backend service - bs, err := l.backendPool.EnsureL4BackendService(name, hcResult.Link, string(protocol), string(l.Service.Spec.SessionAffinity), + bs, err := l.backendPool.EnsureL4BackendService(name, hcResult.HCLink, string(protocol), string(l.Service.Spec.SessionAffinity), string(cloud.SchemeInternal), l.NamespacedName, meta.VersionGA) if err != nil { result.GCEResourceInError = annotations.BackendServiceResource diff --git a/pkg/loadbalancers/l4_test.go b/pkg/loadbalancers/l4_test.go index a1e0869ee6..efb19b6c1e 100644 --- a/pkg/loadbalancers/l4_test.go +++ b/pkg/loadbalancers/l4_test.go @@ -225,7 +225,7 @@ func TestEnsureInternalLoadBalancerWithExistingResources(t *testing.T) { if hcResult.Err != nil { t.Errorf("Failed to create healthcheck, err %v", hcResult.Err) } - _, err := l.backendPool.EnsureL4BackendService(lbName, hcResult.Link, "TCP", string(l.Service.Spec.SessionAffinity), + _, err := l.backendPool.EnsureL4BackendService(lbName, hcResult.HCLink, "TCP", string(l.Service.Spec.SessionAffinity), string(cloud.SchemeInternal), l.NamespacedName, meta.VersionGA) if err != nil { t.Errorf("Failed to create backendservice, err %v", err) diff --git a/pkg/loadbalancers/l4netlb.go b/pkg/loadbalancers/l4netlb.go index 50f6e1fa4a..6acc8ccf8b 100644 --- a/pkg/loadbalancers/l4netlb.go +++ b/pkg/loadbalancers/l4netlb.go @@ -112,10 +112,10 @@ func (l4netlb *L4NetLB) EnsureFrontend(nodeNames []string, svc *corev1.Service) if hcResult.Err != nil { result.GCEResourceInError = hcResult.GceResourceInError - result.Error = fmt.Errorf("Failed to ensure health check %s - %w", hcResult.Name, hcResult.Err) + result.Error = fmt.Errorf("Failed to ensure health check %s - %w", hcResult.HCName, hcResult.Err) return result } - result.Annotations[annotations.HealthcheckKey] = hcResult.Name + result.Annotations[annotations.HealthcheckKey] = hcResult.HCName name := l4netlb.ServicePort.BackendName() protocol, res := l4netlb.createFirewalls(name, nodeNames) @@ -123,9 +123,9 @@ func (l4netlb *L4NetLB) EnsureFrontend(nodeNames []string, svc *corev1.Service) return res } result.Annotations[annotations.FirewallRuleKey] = name - result.Annotations[annotations.FirewallRuleForHealthcheckKey] = hcResult.FirewallRuleName + result.Annotations[annotations.FirewallRuleForHealthcheckKey] = hcResult.HCFirewallRuleName - bs, err := l4netlb.backendPool.EnsureL4BackendService(name, hcResult.Link, protocol, string(l4netlb.Service.Spec.SessionAffinity), string(cloud.SchemeExternal), l4netlb.NamespacedName, meta.VersionGA) + bs, err := l4netlb.backendPool.EnsureL4BackendService(name, hcResult.HCLink, protocol, string(l4netlb.Service.Spec.SessionAffinity), string(cloud.SchemeExternal), l4netlb.NamespacedName, meta.VersionGA) if err != nil { result.GCEResourceInError = annotations.BackendServiceResource result.Error = fmt.Errorf("Failed to ensure backend service %s - %w", name, err)