From ec6940c4c06ac5c9b0827298e5a62e6ee1634834 Mon Sep 17 00:00:00 2001 From: Kalyan Inampudi Date: Tue, 24 Jan 2023 09:13:58 -0800 Subject: [PATCH 1/4] When Degraded state, canary-service doesn't have any endpoints, resuting in traefik unable to route requests. Signed-off-by: Kalyan Inampudi --- USERS.md | 1 + rollout/canary.go | 8 +++ rollout/canary_test.go | 93 ++++++++++++++++++++++++++++++++++ rollout/service.go | 30 ++++++++--- rollout/trafficrouting_test.go | 2 + 5 files changed, 126 insertions(+), 8 deletions(-) diff --git a/USERS.md b/USERS.md index bc49f539d5..aa59a08ade 100644 --- a/USERS.md +++ b/USERS.md @@ -41,3 +41,4 @@ Organizations below are **officially** using Argo Rollouts. Please send a PR wit 1. [Ubie](https://ubie.life/) 1. [VISITS Technologies](https://visits.world/en) 1. [Yotpo](https://www.yotpo.com/) +1. [Nike](https://nike.com) diff --git a/rollout/canary.go b/rollout/canary.go index 7c31506a82..37cda06841 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -71,6 +71,13 @@ func (c *rolloutContext) rolloutCanary() error { return err } + //Reset the Canary service to have stableRS pod selector in event of failed analysis run or failed experiment + //to expose network endpoints to route traffic without any interruption. + err = c.reconcileStableAndCanaryService() + if err != nil { + return err + } + noScalingOccurred, err := c.reconcileCanaryReplicaSets() if err != nil { return err @@ -396,6 +403,7 @@ func (c *rolloutContext) syncRolloutStatusCanary() error { } } newStatus = c.calculateRolloutConditions(newStatus) + println("Here in aborted") return c.persistRolloutStatus(&newStatus) } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 56445f3978..844c16d811 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1206,6 +1206,7 @@ func TestCanarySVCSelectors(t *testing.T) { } { namespace := "namespace" selectorNewRSVal := "new-rs-xxx" + selectorOldRSVal := "old-rs-xxx" stableService := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "stable", @@ -1263,8 +1264,15 @@ func TestCanarySVCSelectors(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "stable", Namespace: namespace, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorOldRSVal, + }, }, }, + pauseContext: &pauseContext{ + removeAbort: true, + addAbort: false, + }, } stopchan := make(chan struct{}) defer close(stopchan) @@ -1286,6 +1294,91 @@ func TestCanarySVCSelectors(t *testing.T) { } } +func TestCanarySVCSelectorsAbortedRun(t *testing.T) { + namespace := "namespace" + selectorNewRSVal := "new-rs-xxx" + selectorOldRSVal := "old-rs-xxx" + stableService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stable", + Namespace: namespace, + }, + } + canaryService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "canary", + Namespace: namespace, + }, + } + kubeclient := k8sfake.NewSimpleClientset(stableService, canaryService) + informers := k8sinformers.NewSharedInformerFactory(kubeclient, 0) + servicesLister := informers.Core().V1().Services().Lister() + + rollout := &v1alpha1.Rollout{ + ObjectMeta: metav1.ObjectMeta{ + Name: "selector-labels-test", + Namespace: namespace, + }, + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + Canary: &v1alpha1.CanaryStrategy{ + StableService: stableService.Name, + CanaryService: canaryService.Name, + }, + }, + }, + } + rc := rolloutContext{ + log: logutil.WithRollout(rollout), + reconcilerBase: reconcilerBase{ + servicesLister: servicesLister, + kubeclientset: kubeclient, + recorder: record.NewFakeEventRecorder(), + }, + rollout: rollout, + newRS: &v1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "canary", + Namespace: namespace, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorNewRSVal, + }, + }, + Spec: v1.ReplicaSetSpec{ + Replicas: pointer.Int32Ptr(2), + }, + Status: v1.ReplicaSetStatus{ + AvailableReplicas: 2, + }, + }, + stableRS: &v1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stable", + Namespace: namespace, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorOldRSVal, + }, + }, + }, + pauseContext: &pauseContext{ + removeAbort: false, + addAbort: true, + }, + } + replicaSpecs := int32(2) + rc.stableRS.Spec.Replicas = &replicaSpecs + rc.stableRS.Status.AvailableReplicas = 2 + stopchan := make(chan struct{}) + defer close(stopchan) + informers.Start(stopchan) + informers.WaitForCacheSync(stopchan) + err := rc.reconcileStableAndCanaryService() + assert.NoError(t, err, "unable to reconcileStableAndCanaryService") + updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name) + assert.NoError(t, err, "unable to get updated canary service") + assert.Equal(t, "old-rs-xxx", updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]) +} + func TestCanaryRolloutWithInvalidCanaryServiceName(t *testing.T) { f := newFixture(t) defer f.Close() diff --git a/rollout/service.go b/rollout/service.go index de63f527b3..a60c935c60 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -3,6 +3,7 @@ package rollout import ( "context" "fmt" + "strconv" "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/rollout/trafficrouting" @@ -53,12 +54,16 @@ func generatePatch(service *corev1.Service, newRolloutUniqueLabelValue string, r // switchSelector switch the selector on an existing service to a new value func (c rolloutContext) switchServiceSelector(service *corev1.Service, newRolloutUniqueLabelValue string, r *v1alpha1.Rollout) error { + print("here in init") ctx := context.TODO() if service.Spec.Selector == nil { service.Spec.Selector = make(map[string]string) } + print("here in init") _, hasManagedRollout := serviceutil.HasManagedByAnnotation(service) + print("hasManagedRollout " + strconv.FormatBool(hasManagedRollout)) oldPodHash, ok := service.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] + print("old " + oldPodHash) if ok && oldPodHash == newRolloutUniqueLabelValue && hasManagedRollout { return nil } @@ -253,15 +258,25 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error { if c.rollout.Spec.Strategy.Canary == nil { return nil } - err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.StableService, c.stableRS, true) - if err != nil { - return err - } - err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true) - if err != nil { - return err + if c.pauseContext.IsAborted() { + err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.stableRS, true) + if err != nil { + return err + } + } else { + err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.StableService, c.stableRS, true) + if err != nil { + return err + } else { + err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true) + if err != nil { + return err + } + return nil + } } return nil + } // ensureSVCTargets updates the service with the given name to point to the given ReplicaSet, @@ -280,7 +295,6 @@ func (c *rolloutContext) ensureSVCTargets(svcName string, rs *appsv1.ReplicaSet, currSelector := svc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] desiredSelector := rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] logCtx := c.log.WithField(logutil.ServiceKey, svc.Name) - if currSelector != desiredSelector { if _, ok := svc.Annotations[v1alpha1.ManagedByRolloutsKey]; !ok { // This block will be entered only when adopting a service that already exists, because the current annotation diff --git a/rollout/trafficrouting_test.go b/rollout/trafficrouting_test.go index 8ac4a08e67..ee510316ec 100644 --- a/rollout/trafficrouting_test.go +++ b/rollout/trafficrouting_test.go @@ -893,6 +893,7 @@ func TestDynamicScalingDontIncreaseWeightWhenAborted(t *testing.T) { f.objects = append(f.objects, r2) f.expectPatchRolloutAction(r2) + f.expectPatchServiceAction(canarySvc, rs1PodHash) f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler() f.fakeTrafficRouting.On("UpdateHash", mock.Anything, mock.Anything, mock.Anything).Return(nil) @@ -964,6 +965,7 @@ func TestDynamicScalingDecreaseWeightAccordingToStableAvailabilityWhenAborted(t f.objects = append(f.objects, r2) f.expectPatchRolloutAction(r2) + f.expectPatchServiceAction(canarySvc, rs1PodHash) f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler() f.fakeTrafficRouting.On("UpdateHash", mock.Anything, mock.Anything, mock.Anything).Return(nil) From cca4ee437b02670802f775abc6d2ce09293faaf7 Mon Sep 17 00:00:00 2001 From: Kalyan Inampudi Date: Tue, 24 Jan 2023 14:57:23 -0800 Subject: [PATCH 2/4] When Degraded state, canary-service doesn't have any endpoints, resuting in traefik unable to route requests. Signed-off-by: Kalyan Inampudi --- rollout/canary.go | 1 - rollout/canary_test.go | 110 +++++++---------------------------------- rollout/service.go | 2 - 3 files changed, 18 insertions(+), 95 deletions(-) diff --git a/rollout/canary.go b/rollout/canary.go index 37cda06841..59c7e1186b 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -403,7 +403,6 @@ func (c *rolloutContext) syncRolloutStatusCanary() error { } } newStatus = c.calculateRolloutConditions(newStatus) - println("Here in aborted") return c.persistRolloutStatus(&newStatus) } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 844c16d811..73857ad3a2 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1198,11 +1198,12 @@ func TestCanarySVCSelectors(t *testing.T) { canaryAvailReplicas int32 shouldTargetNewRS bool + isAbortedRun bool }{ - {0, 0, false}, - {2, 0, false}, - {2, 1, false}, - {2, 2, true}, + {0, 0, false, false}, + {2, 0, false, false}, + {2, 1, false, true}, + {2, 2, true, true}, } { namespace := "namespace" selectorNewRSVal := "new-rs-xxx" @@ -1268,10 +1269,16 @@ func TestCanarySVCSelectors(t *testing.T) { v1alpha1.DefaultRolloutUniqueLabelKey: selectorOldRSVal, }, }, + Spec: v1.ReplicaSetSpec{ + Replicas: pointer.Int32Ptr(tc.canaryReplicas), + }, + Status: v1.ReplicaSetStatus{ + AvailableReplicas: tc.canaryAvailReplicas, + }, }, pauseContext: &pauseContext{ - removeAbort: true, - addAbort: false, + removeAbort: !tc.isAbortedRun, + addAbort: tc.isAbortedRun, }, } stopchan := make(chan struct{}) @@ -1282,8 +1289,12 @@ func TestCanarySVCSelectors(t *testing.T) { assert.NoError(t, err, "unable to reconcileStableAndCanaryService") updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name) assert.NoError(t, err, "unable to get updated canary service") + expectedRSVal := selectorNewRSVal + if tc.isAbortedRun { + expectedRSVal = selectorOldRSVal + } if tc.shouldTargetNewRS { - assert.Equal(t, selectorNewRSVal, updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey], + assert.Equal(t, expectedRSVal, updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey], "canary SVC should have newRS selector label when newRS has %d replicas and %d AvailableReplicas", tc.canaryReplicas, tc.canaryAvailReplicas) } else { @@ -1294,91 +1305,6 @@ func TestCanarySVCSelectors(t *testing.T) { } } -func TestCanarySVCSelectorsAbortedRun(t *testing.T) { - namespace := "namespace" - selectorNewRSVal := "new-rs-xxx" - selectorOldRSVal := "old-rs-xxx" - stableService := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "stable", - Namespace: namespace, - }, - } - canaryService := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "canary", - Namespace: namespace, - }, - } - kubeclient := k8sfake.NewSimpleClientset(stableService, canaryService) - informers := k8sinformers.NewSharedInformerFactory(kubeclient, 0) - servicesLister := informers.Core().V1().Services().Lister() - - rollout := &v1alpha1.Rollout{ - ObjectMeta: metav1.ObjectMeta{ - Name: "selector-labels-test", - Namespace: namespace, - }, - Spec: v1alpha1.RolloutSpec{ - Strategy: v1alpha1.RolloutStrategy{ - Canary: &v1alpha1.CanaryStrategy{ - StableService: stableService.Name, - CanaryService: canaryService.Name, - }, - }, - }, - } - rc := rolloutContext{ - log: logutil.WithRollout(rollout), - reconcilerBase: reconcilerBase{ - servicesLister: servicesLister, - kubeclientset: kubeclient, - recorder: record.NewFakeEventRecorder(), - }, - rollout: rollout, - newRS: &v1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "canary", - Namespace: namespace, - Labels: map[string]string{ - v1alpha1.DefaultRolloutUniqueLabelKey: selectorNewRSVal, - }, - }, - Spec: v1.ReplicaSetSpec{ - Replicas: pointer.Int32Ptr(2), - }, - Status: v1.ReplicaSetStatus{ - AvailableReplicas: 2, - }, - }, - stableRS: &v1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "stable", - Namespace: namespace, - Labels: map[string]string{ - v1alpha1.DefaultRolloutUniqueLabelKey: selectorOldRSVal, - }, - }, - }, - pauseContext: &pauseContext{ - removeAbort: false, - addAbort: true, - }, - } - replicaSpecs := int32(2) - rc.stableRS.Spec.Replicas = &replicaSpecs - rc.stableRS.Status.AvailableReplicas = 2 - stopchan := make(chan struct{}) - defer close(stopchan) - informers.Start(stopchan) - informers.WaitForCacheSync(stopchan) - err := rc.reconcileStableAndCanaryService() - assert.NoError(t, err, "unable to reconcileStableAndCanaryService") - updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name) - assert.NoError(t, err, "unable to get updated canary service") - assert.Equal(t, "old-rs-xxx", updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]) -} - func TestCanaryRolloutWithInvalidCanaryServiceName(t *testing.T) { f := newFixture(t) defer f.Close() diff --git a/rollout/service.go b/rollout/service.go index a60c935c60..b297473721 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -54,12 +54,10 @@ func generatePatch(service *corev1.Service, newRolloutUniqueLabelValue string, r // switchSelector switch the selector on an existing service to a new value func (c rolloutContext) switchServiceSelector(service *corev1.Service, newRolloutUniqueLabelValue string, r *v1alpha1.Rollout) error { - print("here in init") ctx := context.TODO() if service.Spec.Selector == nil { service.Spec.Selector = make(map[string]string) } - print("here in init") _, hasManagedRollout := serviceutil.HasManagedByAnnotation(service) print("hasManagedRollout " + strconv.FormatBool(hasManagedRollout)) oldPodHash, ok := service.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] From e97367ed874ad6d961d07b4812f9e1fec9fd0841 Mon Sep 17 00:00:00 2001 From: Kalyan Inampudi Date: Wed, 25 Jan 2023 09:47:10 -0800 Subject: [PATCH 3/4] When Degraded state, canary-service doesn't have any endpoints, resuting in traefik unable to route requests. Signed-off-by: Kalyan Inampudi --- rollout/service.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/rollout/service.go b/rollout/service.go index b297473721..e9a6df30ab 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -3,8 +3,6 @@ package rollout import ( "context" "fmt" - "strconv" - "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/rollout/trafficrouting" "github.com/argoproj/argo-rollouts/utils/annotations" @@ -59,9 +57,7 @@ func (c rolloutContext) switchServiceSelector(service *corev1.Service, newRollou service.Spec.Selector = make(map[string]string) } _, hasManagedRollout := serviceutil.HasManagedByAnnotation(service) - print("hasManagedRollout " + strconv.FormatBool(hasManagedRollout)) oldPodHash, ok := service.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] - print("old " + oldPodHash) if ok && oldPodHash == newRolloutUniqueLabelValue && hasManagedRollout { return nil } @@ -265,16 +261,13 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error { err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.StableService, c.stableRS, true) if err != nil { return err - } else { - err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true) - if err != nil { - return err - } - return nil + } + err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true) + if err != nil { + return err } } return nil - } // ensureSVCTargets updates the service with the given name to point to the given ReplicaSet, From 6aebcf20744ac33440090bff1481cdb33c415f72 Mon Sep 17 00:00:00 2001 From: Kalyan Inampudi Date: Wed, 25 Jan 2023 11:50:53 -0800 Subject: [PATCH 4/4] fix lint issues Signed-off-by: Kalyan Inampudi --- rollout/service.go | 1 + 1 file changed, 1 insertion(+) diff --git a/rollout/service.go b/rollout/service.go index e9a6df30ab..f44fa473e3 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -3,6 +3,7 @@ package rollout import ( "context" "fmt" + "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/rollout/trafficrouting" "github.com/argoproj/argo-rollouts/utils/annotations"