diff --git a/USERS.md b/USERS.md index 944b4ad722..f3c04b35fa 100644 --- a/USERS.md +++ b/USERS.md @@ -38,3 +38,4 @@ Organizations below are **officially** using Argo Rollouts. Please send a PR wit 1. [Twilio SendGrid](https://sendgrid.com) 1. [Ubie](https://ubie.life/) 1. [VISITS Technologies](https://visits.world/en) +1. [Plaid](https://plaid.com) diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 56445f3978..29cdd298bb 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -19,6 +19,8 @@ import ( "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" "github.com/argoproj/argo-rollouts/pkg/client/clientset/versioned/fake" + "github.com/argoproj/argo-rollouts/rollout/mocks" + "github.com/argoproj/argo-rollouts/rollout/trafficrouting" "github.com/argoproj/argo-rollouts/utils/annotations" "github.com/argoproj/argo-rollouts/utils/conditions" "github.com/argoproj/argo-rollouts/utils/hash" @@ -60,6 +62,125 @@ func bumpVersion(rollout *v1alpha1.Rollout) *v1alpha1.Rollout { return newRollout } +func TestCanaryRollout(t *testing.T) { + newrsVal := "new-rs-xxx" + for _, tc := range []struct { + canaryReplicas int32 + canaryAvailReplicas int32 + + shouldRouteTraffic bool + }{ + {0, 0, false}, + {2, 0, false}, + {2, 1, false}, + {2, 2, true}, + } { + namespace := "namespace" + selectorNewRSVal := newrsVal + stableService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stable", + Namespace: namespace, + }, + } + canaryService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "canary", + Namespace: namespace, + }, + } + canaryReplicaset := &v1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "canary", + Namespace: namespace, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorNewRSVal, + }, + }, + Spec: v1.ReplicaSetSpec{ + Replicas: pointer.Int32Ptr(tc.canaryReplicas), + }, + Status: v1.ReplicaSetStatus{ + AvailableReplicas: tc.canaryAvailReplicas, + }, + } + + stableReplicaset := &v1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stable", + Namespace: namespace, + }, + Spec: v1.ReplicaSetSpec{ + Replicas: pointer.Int32Ptr(0), + }, + } + + fake := fake.Clientset{} + kubeclient := k8sfake.NewSimpleClientset( + stableService, canaryService, canaryReplicaset, stableReplicaset) + informers := k8sinformers.NewSharedInformerFactory(kubeclient, 0) + servicesLister := informers.Core().V1().Services().Lister() + + rollout := &v1alpha1.Rollout{ + ObjectMeta: metav1.ObjectMeta{ + Name: "selector-labels-test", + Namespace: namespace, + }, + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + Canary: &v1alpha1.CanaryStrategy{ + DynamicStableScale: true, + StableService: stableService.Name, + CanaryService: canaryService.Name, + }, + }, + }, + } + + rollout.Status.CurrentStepHash = conditions.ComputeStepHash(rollout) + trafficRouter := mocks.NewTrafficRoutingReconciler(t) + trafficRouter.On("Type").Return("mock") + trafficRouter.On("RemoveManagedRoutes").Return(nil) + trafficRouter.On("UpdateHash", newrsVal, "").Return(nil) + if tc.shouldRouteTraffic { + trafficRouter.On("SetWeight", int32(0)).Return(nil) + trafficRouter.On("VerifyWeight", int32(0)).Return(nil, nil) + } + mocks.NewTrafficRoutingReconciler(t) + rc := rolloutContext{ + log: logutil.WithRollout(rollout), + reconcilerBase: reconcilerBase{ + argoprojclientset: &fake, + servicesLister: servicesLister, + kubeclientset: kubeclient, + recorder: record.NewFakeEventRecorder(), + + newTrafficRoutingReconciler: func(roCtx *rolloutContext) ([]trafficrouting.TrafficRoutingReconciler, error) { + return []trafficrouting.TrafficRoutingReconciler{ + trafficRouter, + }, nil + }, + }, + + rollout: rollout, + pauseContext: &pauseContext{ + rollout: rollout, + }, + newRS: canaryReplicaset, + stableRS: stableReplicaset, + } + stopchan := make(chan struct{}) + defer close(stopchan) + informers.Start(stopchan) + informers.WaitForCacheSync(stopchan) + err := rc.rolloutCanary() + assert.NoError(t, err) + if !tc.shouldRouteTraffic { + trafficRouter.AssertNotCalled(t, "SetWeight", int32(0)) + } + } +} + // TestCanaryRolloutBumpVersion verifies we correctly bump revision of Rollout and new ReplicaSet func TestCanaryRolloutBumpVersion(t *testing.T) { f := newFixture(t) diff --git a/rollout/context.go b/rollout/context.go index f8fa5b5f03..14283e3a83 100644 --- a/rollout/context.go +++ b/rollout/context.go @@ -49,6 +49,10 @@ type rolloutContext struct { // (e.g. a setWeight step, after a blue-green active switch, after stable service switch), // since we do not want to continually verify weight in case it could incur rate-limiting or other expenses. targetsVerified *bool + + // skippedSelectorSwap indicates that we have skipped swapping selectors + // and are not ready to perform any final actions of moving to 100% + skippedSelectorSwap *bool } func (c *rolloutContext) reconcile() error { diff --git a/rollout/service.go b/rollout/service.go index a2861be0c2..1f929049be 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -282,8 +282,10 @@ func (c *rolloutContext) ensureSVCTargets(svcName string, rs *appsv1.ReplicaSet, if checkRsAvailability && !replicasetutil.IsReplicaSetAvailable(rs) { logCtx := c.log.WithField(logutil.ServiceKey, svc.Name) logCtx.Infof("delaying service switch from %s to %s: ReplicaSet not fully available", currSelector, desiredSelector) + c.skippedSelectorSwap = pointer.BoolPtr(true) return nil } + c.skippedSelectorSwap = pointer.BoolPtr(false) err = c.switchServiceSelector(svc, desiredSelector, c.rollout) if err != nil { return err diff --git a/rollout/service_test.go b/rollout/service_test.go index 8a402d001c..c14506ab3c 100644 --- a/rollout/service_test.go +++ b/rollout/service_test.go @@ -786,6 +786,11 @@ func TestDelayCanaryStableServiceLabelInjection(t *testing.T) { assert.False(t, canaryInjected) _, stableInjected := stableSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] assert.False(t, stableInjected) + + assert.NotNil(t, roCtx.skippedSelectorSwap) + if roCtx.skippedSelectorSwap != nil { + assert.True(t, *roCtx.skippedSelectorSwap) + } } { // next ensure we do update service because new/stable are now available diff --git a/rollout/trafficrouting.go b/rollout/trafficrouting.go index ebc22b9704..163d41767c 100644 --- a/rollout/trafficrouting.go +++ b/rollout/trafficrouting.go @@ -220,6 +220,14 @@ func (c *rolloutContext) reconcileTrafficRouting() error { return err } + // if we haven't swapped selectors and dynamic stable scale is set then + // we might switch weights to a set of pods that are not ready (importantly this is the case + // where canary: 100, stable: 0 -> canary: 0, stable: 100). + if c.skippedSelectorSwap != nil && *c.skippedSelectorSwap && c.rollout.Spec.Strategy.Canary.DynamicStableScale { + c.log.Infof("Skipping weight changes since replica set is not ready and dynamicStableScale is set") + return nil + } + err = reconciler.SetWeight(desiredWeight, weightDestinations...) if err != nil { c.recorder.Warnf(c.rollout, record.EventOptions{EventReason: "TrafficRoutingError"}, err.Error())