From eec9bb481b11807e4ef0cb31333cdbded3c5eb4d Mon Sep 17 00:00:00 2001 From: Liming Liu Date: Sun, 17 Dec 2023 18:04:31 +0000 Subject: [PATCH 1/3] fix: fix the rollout stuck when pod/replicas changed together or canary strategy. Signed-off-by: Liming Liu --- rollout/canary.go | 7 +++++ rollout/canary_test.go | 43 +++++++++++++++++++++++++ rollout/service.go | 22 +++++++++++++ rollout/service_test.go | 70 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+) diff --git a/rollout/canary.go b/rollout/canary.go index b443db507e..cd3aa58384 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -435,6 +435,13 @@ func (c *rolloutContext) reconcileCanaryReplicaSets() (bool, error) { return true, nil } + if c.newRS == nil { + canaryRS, err := c.getCanaryReplicaSet() + if err != nil { + return false, err + } + c.newRS = canaryRS + } scaledNewRS, err := c.reconcileNewReplicaSet() if err != nil { return false, err diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 6fc3dda93c..3f598eff6b 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1,6 +1,7 @@ package rollout import ( + "context" "encoding/json" "fmt" "strconv" @@ -2009,3 +2010,45 @@ func TestIsDynamicallyRollingBackToStable(t *testing.T) { }) } } + +func TestCanaryReplicaAndSpecChangedTogether(t *testing.T) { + f := newFixture(t) + defer f.Close() + + originReplicas := 3 + r1 := newCanaryRollout("foo", originReplicas, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(0)) + canarySVCName := "canary" + stableSVCName := "stable" + r1.Spec.Strategy.Canary.CanaryService = canarySVCName + r1.Spec.Strategy.Canary.StableService = stableSVCName + + stableRS := newReplicaSetWithStatus(r1, originReplicas, originReplicas) + stableSVC := newService(stableSVCName, 80, + map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, r1) + + r2 := bumpVersion(r1) + canaryRS := newReplicaSetWithStatus(r2, originReplicas, originReplicas) + canarySVC := newService(canarySVCName, 80, + map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, r2) + + f.replicaSetLister = append(f.replicaSetLister, canaryRS, stableRS) + f.serviceLister = append(f.serviceLister, canarySVC, stableSVC) + + r3 := bumpVersion(r2) + r3.Spec.Replicas = pointer.Int32(int32(originReplicas) + 5) + r3.Status.StableRS = stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + + f.rolloutLister = append(f.rolloutLister, r3) + f.kubeobjects = append(f.kubeobjects, canaryRS, stableRS, canarySVC, stableSVC) + f.objects = append(f.objects, r3) + + ctrl, _, _ := f.newController(noResyncPeriodFunc) + roCtx, err := ctrl.newRolloutContext(r3) + assert.NoError(t, err) + err = roCtx.reconcile() + assert.NoError(t, err) + updated, err := f.kubeclient.AppsV1().ReplicaSets(r3.Namespace).Get(context.Background(), canaryRS.Name, metav1.GetOptions{}) + assert.NoError(t, err) + // check the canary one is updated + assert.NotEqual(t, originReplicas, int(*updated.Spec.Replicas)) +} diff --git a/rollout/service.go b/rollout/service.go index 69739b9315..5846635ef7 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -17,6 +17,7 @@ import ( serviceutil "github.com/argoproj/argo-rollouts/utils/service" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" patchtypes "k8s.io/apimachinery/pkg/types" "k8s.io/utils/pointer" @@ -284,6 +285,27 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error { return nil } +func (c *rolloutContext) getCanaryReplicaSet() (*appsv1.ReplicaSet, error) { + if c.rollout.Spec.Strategy.Canary == nil { + return nil, nil + } + svc, err := c.servicesLister.Services(c.rollout.Namespace).Get(c.rollout.Spec.Strategy.Canary.CanaryService) + if err != nil { + if k8serrors.IsNotFound(err) { + return nil, nil + } + return nil, err + } + if svc != nil { + for _, rs := range c.allRSs { + if serviceutil.GetRolloutSelectorLabel(svc) == rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] { + return rs, nil + } + } + } + return nil, nil +} + // ensureSVCTargets updates the service with the given name to point to the given ReplicaSet, // but only if that ReplicaSet has proper availability. There is still an edge case with this function if // in the small window of time between a rollout being completed, and we try to update the service selector, we lose 100% diff --git a/rollout/service_test.go b/rollout/service_test.go index cb15367a3a..e090054558 100644 --- a/rollout/service_test.go +++ b/rollout/service_test.go @@ -879,3 +879,73 @@ func TestDelayCanaryStableServiceDelayOnAdoptedService(t *testing.T) { }) } + +func TestGetCanaryReplicaSet(t *testing.T) { + t.Run("Not Canary Strategy", func(t *testing.T) { + f := newFixture(t) + defer f.Close() + ctrl, _, _ := f.newController(noResyncPeriodFunc) + ro := newRollout("foo", 3, nil, nil) + roCtx, err := ctrl.newRolloutContext(ro) + assert.NoError(t, err) + rs, err := roCtx.getCanaryReplicaSet() + assert.Nil(t, rs) + assert.NoError(t, err) + }, + ) + + t.Run("No Canary SVC", func(t *testing.T) { + f := newFixture(t) + defer f.Close() + ctrl, _, _ := f.newController(noResyncPeriodFunc) + ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) + roCtx, err := ctrl.newRolloutContext(ro) + assert.NoError(t, err) + rs, err := roCtx.getCanaryReplicaSet() + assert.Nil(t, rs) + assert.NoError(t, err) + }, + ) + t.Run("Have Canary SVC", func(t *testing.T) { + f := newFixture(t) + defer f.Close() + ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) + canarySVCName := "canary" + canaryRS := newReplicaSetWithStatus(ro, 3, 3) + canarySVC := newService(canarySVCName, 80, + map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, ro) + ro.Spec.Strategy.Canary.CanaryService = canarySVCName + f.rolloutLister = append(f.rolloutLister, ro) + f.kubeobjects = append(f.kubeobjects, canaryRS, canarySVC) + f.replicaSetLister = append(f.replicaSetLister, canaryRS) + f.serviceLister = append(f.serviceLister, canarySVC) + + ctrl, _, _ := f.newController(noResyncPeriodFunc) + roCtx, err := ctrl.newRolloutContext(ro) + assert.NoError(t, err) + rs, err := roCtx.getCanaryReplicaSet() + assert.NoError(t, err) + assert.NotNil(t, rs) + }) + t.Run("No Matched Replicaset", func(t *testing.T) { + f := newFixture(t) + defer f.Close() + ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) + canarySVCName := "canary" + canaryRS := newReplicaSetWithStatus(ro, 3, 0) + canarySVC := newService(canarySVCName, 80, + map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: "youknowthisisdifferent"}, ro) + ro.Spec.Strategy.Canary.CanaryService = canarySVCName + f.rolloutLister = append(f.rolloutLister, ro) + f.kubeobjects = append(f.kubeobjects, canaryRS, canarySVC) + f.replicaSetLister = append(f.replicaSetLister, canaryRS) + f.serviceLister = append(f.serviceLister, canarySVC) + + ctrl, _, _ := f.newController(noResyncPeriodFunc) + roCtx, err := ctrl.newRolloutContext(ro) + assert.NoError(t, err) + rs, err := roCtx.getCanaryReplicaSet() + assert.NoError(t, err) + assert.Nil(t, rs) + }) +} From 48a109d58eb3ee70b078e5f2e6dad45e3fe5149a Mon Sep 17 00:00:00 2001 From: Liming Liu Date: Wed, 20 Dec 2023 04:37:46 +0000 Subject: [PATCH 2/3] add one unit test case for empty canary service. Signed-off-by: Liming Liu --- rollout/service.go | 2 +- rollout/service_test.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/rollout/service.go b/rollout/service.go index 5846635ef7..0517cfc6b9 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -286,7 +286,7 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error { } func (c *rolloutContext) getCanaryReplicaSet() (*appsv1.ReplicaSet, error) { - if c.rollout.Spec.Strategy.Canary == nil { + if c.rollout.Spec.Strategy.Canary == nil || c.rollout.Spec.Strategy.Canary.CanaryService == "" { return nil, nil } svc, err := c.servicesLister.Services(c.rollout.Namespace).Get(c.rollout.Spec.Strategy.Canary.CanaryService) diff --git a/rollout/service_test.go b/rollout/service_test.go index e090054558..a1d8f871e4 100644 --- a/rollout/service_test.go +++ b/rollout/service_test.go @@ -889,8 +889,22 @@ func TestGetCanaryReplicaSet(t *testing.T) { roCtx, err := ctrl.newRolloutContext(ro) assert.NoError(t, err) rs, err := roCtx.getCanaryReplicaSet() + assert.NoError(t, err) assert.Nil(t, rs) + }, + ) + + t.Run("Empty Canary Service", func(t *testing.T) { + f := newFixture(t) + defer f.Close() + ctrl, _, _ := f.newController(noResyncPeriodFunc) + ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) + ro.Spec.Strategy.Canary.CanaryService = "" + roCtx, err := ctrl.newRolloutContext(ro) + assert.NoError(t, err) + rs, err := roCtx.getCanaryReplicaSet() assert.NoError(t, err) + assert.Nil(t, rs) }, ) @@ -899,6 +913,7 @@ func TestGetCanaryReplicaSet(t *testing.T) { defer f.Close() ctrl, _, _ := f.newController(noResyncPeriodFunc) ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) + ro.Spec.Strategy.Canary.CanaryService = "canary" roCtx, err := ctrl.newRolloutContext(ro) assert.NoError(t, err) rs, err := roCtx.getCanaryReplicaSet() @@ -906,11 +921,13 @@ func TestGetCanaryReplicaSet(t *testing.T) { assert.NoError(t, err) }, ) + t.Run("Have Canary SVC", func(t *testing.T) { f := newFixture(t) defer f.Close() ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) canarySVCName := "canary" + ro.Spec.Strategy.Canary.CanaryService = canarySVCName canaryRS := newReplicaSetWithStatus(ro, 3, 3) canarySVC := newService(canarySVCName, 80, map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, ro) @@ -927,11 +944,13 @@ func TestGetCanaryReplicaSet(t *testing.T) { assert.NoError(t, err) assert.NotNil(t, rs) }) + t.Run("No Matched Replicaset", func(t *testing.T) { f := newFixture(t) defer f.Close() ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) canarySVCName := "canary" + ro.Spec.Strategy.Canary.CanaryService = canarySVCName canaryRS := newReplicaSetWithStatus(ro, 3, 0) canarySVC := newService(canarySVCName, 80, map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: "youknowthisisdifferent"}, ro) From 085cb4bbd7792bd7dfa48b5a0121e42d8d13f0ef Mon Sep 17 00:00:00 2001 From: Zach Aller Date: Thu, 21 Dec 2023 11:42:27 -0600 Subject: [PATCH 3/3] use rollout status to get the replicaset hash instead of service Signed-off-by: Zach Aller --- rollout/canary.go | 12 +++--- rollout/canary_test.go | 1 + rollout/service.go | 22 ---------- rollout/service_test.go | 89 ----------------------------------------- 4 files changed, 7 insertions(+), 117 deletions(-) diff --git a/rollout/canary.go b/rollout/canary.go index cd3aa58384..f37e03bab1 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -435,13 +435,13 @@ func (c *rolloutContext) reconcileCanaryReplicaSets() (bool, error) { return true, nil } - if c.newRS == nil { - canaryRS, err := c.getCanaryReplicaSet() - if err != nil { - return false, err - } - c.newRS = canaryRS + // If we have updated both the replica count and the pod template hash c.newRS will be nil we want to reconcile the newRS so we look at the + // rollout status to get the newRS to reconcile it. + if c.newRS == nil && c.rollout.Status.CurrentPodHash != c.rollout.Status.StableRS { + rs, _ := replicasetutil.GetReplicaSetByTemplateHash(c.allRSs, c.rollout.Status.CurrentPodHash) + c.newRS = rs } + scaledNewRS, err := c.reconcileNewReplicaSet() if err != nil { return false, err diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 3f598eff6b..d92cfd2c17 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -2037,6 +2037,7 @@ func TestCanaryReplicaAndSpecChangedTogether(t *testing.T) { r3 := bumpVersion(r2) r3.Spec.Replicas = pointer.Int32(int32(originReplicas) + 5) r3.Status.StableRS = stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] + r3.Status.CurrentPodHash = canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] f.rolloutLister = append(f.rolloutLister, r3) f.kubeobjects = append(f.kubeobjects, canaryRS, stableRS, canarySVC, stableSVC) diff --git a/rollout/service.go b/rollout/service.go index 0517cfc6b9..69739b9315 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -17,7 +17,6 @@ import ( serviceutil "github.com/argoproj/argo-rollouts/utils/service" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" patchtypes "k8s.io/apimachinery/pkg/types" "k8s.io/utils/pointer" @@ -285,27 +284,6 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error { return nil } -func (c *rolloutContext) getCanaryReplicaSet() (*appsv1.ReplicaSet, error) { - if c.rollout.Spec.Strategy.Canary == nil || c.rollout.Spec.Strategy.Canary.CanaryService == "" { - return nil, nil - } - svc, err := c.servicesLister.Services(c.rollout.Namespace).Get(c.rollout.Spec.Strategy.Canary.CanaryService) - if err != nil { - if k8serrors.IsNotFound(err) { - return nil, nil - } - return nil, err - } - if svc != nil { - for _, rs := range c.allRSs { - if serviceutil.GetRolloutSelectorLabel(svc) == rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] { - return rs, nil - } - } - } - return nil, nil -} - // ensureSVCTargets updates the service with the given name to point to the given ReplicaSet, // but only if that ReplicaSet has proper availability. There is still an edge case with this function if // in the small window of time between a rollout being completed, and we try to update the service selector, we lose 100% diff --git a/rollout/service_test.go b/rollout/service_test.go index a1d8f871e4..cb15367a3a 100644 --- a/rollout/service_test.go +++ b/rollout/service_test.go @@ -879,92 +879,3 @@ func TestDelayCanaryStableServiceDelayOnAdoptedService(t *testing.T) { }) } - -func TestGetCanaryReplicaSet(t *testing.T) { - t.Run("Not Canary Strategy", func(t *testing.T) { - f := newFixture(t) - defer f.Close() - ctrl, _, _ := f.newController(noResyncPeriodFunc) - ro := newRollout("foo", 3, nil, nil) - roCtx, err := ctrl.newRolloutContext(ro) - assert.NoError(t, err) - rs, err := roCtx.getCanaryReplicaSet() - assert.NoError(t, err) - assert.Nil(t, rs) - }, - ) - - t.Run("Empty Canary Service", func(t *testing.T) { - f := newFixture(t) - defer f.Close() - ctrl, _, _ := f.newController(noResyncPeriodFunc) - ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) - ro.Spec.Strategy.Canary.CanaryService = "" - roCtx, err := ctrl.newRolloutContext(ro) - assert.NoError(t, err) - rs, err := roCtx.getCanaryReplicaSet() - assert.NoError(t, err) - assert.Nil(t, rs) - }, - ) - - t.Run("No Canary SVC", func(t *testing.T) { - f := newFixture(t) - defer f.Close() - ctrl, _, _ := f.newController(noResyncPeriodFunc) - ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) - ro.Spec.Strategy.Canary.CanaryService = "canary" - roCtx, err := ctrl.newRolloutContext(ro) - assert.NoError(t, err) - rs, err := roCtx.getCanaryReplicaSet() - assert.Nil(t, rs) - assert.NoError(t, err) - }, - ) - - t.Run("Have Canary SVC", func(t *testing.T) { - f := newFixture(t) - defer f.Close() - ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) - canarySVCName := "canary" - ro.Spec.Strategy.Canary.CanaryService = canarySVCName - canaryRS := newReplicaSetWithStatus(ro, 3, 3) - canarySVC := newService(canarySVCName, 80, - map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, ro) - ro.Spec.Strategy.Canary.CanaryService = canarySVCName - f.rolloutLister = append(f.rolloutLister, ro) - f.kubeobjects = append(f.kubeobjects, canaryRS, canarySVC) - f.replicaSetLister = append(f.replicaSetLister, canaryRS) - f.serviceLister = append(f.serviceLister, canarySVC) - - ctrl, _, _ := f.newController(noResyncPeriodFunc) - roCtx, err := ctrl.newRolloutContext(ro) - assert.NoError(t, err) - rs, err := roCtx.getCanaryReplicaSet() - assert.NoError(t, err) - assert.NotNil(t, rs) - }) - - t.Run("No Matched Replicaset", func(t *testing.T) { - f := newFixture(t) - defer f.Close() - ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1)) - canarySVCName := "canary" - ro.Spec.Strategy.Canary.CanaryService = canarySVCName - canaryRS := newReplicaSetWithStatus(ro, 3, 0) - canarySVC := newService(canarySVCName, 80, - map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: "youknowthisisdifferent"}, ro) - ro.Spec.Strategy.Canary.CanaryService = canarySVCName - f.rolloutLister = append(f.rolloutLister, ro) - f.kubeobjects = append(f.kubeobjects, canaryRS, canarySVC) - f.replicaSetLister = append(f.replicaSetLister, canaryRS) - f.serviceLister = append(f.serviceLister, canarySVC) - - ctrl, _, _ := f.newController(noResyncPeriodFunc) - roCtx, err := ctrl.newRolloutContext(ro) - assert.NoError(t, err) - rs, err := roCtx.getCanaryReplicaSet() - assert.NoError(t, err) - assert.Nil(t, rs) - }) -}