Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: return an error when we cannot swap the replicaset hashes fixes #2050 #2187

1 change: 1 addition & 0 deletions USERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ Organizations below are **officially** using Argo Rollouts. Please send a PR wit
1. [Twilio SendGrid](https://sendgrid.com)
1. [Ubie](https://ubie.life/)
1. [VISITS Technologies](https://visits.world/en)
1. [Plaid](https://plaid.com)
121 changes: 121 additions & 0 deletions rollout/canary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (

"github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
"github.com/argoproj/argo-rollouts/pkg/client/clientset/versioned/fake"
"github.com/argoproj/argo-rollouts/rollout/mocks"
"github.com/argoproj/argo-rollouts/rollout/trafficrouting"
"github.com/argoproj/argo-rollouts/utils/annotations"
"github.com/argoproj/argo-rollouts/utils/conditions"
"github.com/argoproj/argo-rollouts/utils/hash"
Expand Down Expand Up @@ -60,6 +62,125 @@ func bumpVersion(rollout *v1alpha1.Rollout) *v1alpha1.Rollout {
return newRollout
}

func TestCanaryRollout(t *testing.T) {
newrsVal := "new-rs-xxx"
for _, tc := range []struct {
canaryReplicas int32
canaryAvailReplicas int32

shouldRouteTraffic bool
}{
{0, 0, false},
{2, 0, false},
{2, 1, false},
{2, 2, true},
} {
namespace := "namespace"
selectorNewRSVal := newrsVal
stableService := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "stable",
Namespace: namespace,
},
}
canaryService := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "canary",
Namespace: namespace,
},
}
canaryReplicaset := &v1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Name: "canary",
Namespace: namespace,
Labels: map[string]string{
v1alpha1.DefaultRolloutUniqueLabelKey: selectorNewRSVal,
},
},
Spec: v1.ReplicaSetSpec{
Replicas: pointer.Int32Ptr(tc.canaryReplicas),
},
Status: v1.ReplicaSetStatus{
AvailableReplicas: tc.canaryAvailReplicas,
},
}

stableReplicaset := &v1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Name: "stable",
Namespace: namespace,
},
Spec: v1.ReplicaSetSpec{
Replicas: pointer.Int32Ptr(0),
},
}

fake := fake.Clientset{}
kubeclient := k8sfake.NewSimpleClientset(
stableService, canaryService, canaryReplicaset, stableReplicaset)
informers := k8sinformers.NewSharedInformerFactory(kubeclient, 0)
servicesLister := informers.Core().V1().Services().Lister()

rollout := &v1alpha1.Rollout{
ObjectMeta: metav1.ObjectMeta{
Name: "selector-labels-test",
Namespace: namespace,
},
Spec: v1alpha1.RolloutSpec{
Strategy: v1alpha1.RolloutStrategy{
Canary: &v1alpha1.CanaryStrategy{
DynamicStableScale: true,
StableService: stableService.Name,
CanaryService: canaryService.Name,
},
},
},
}

rollout.Status.CurrentStepHash = conditions.ComputeStepHash(rollout)
trafficRouter := mocks.NewTrafficRoutingReconciler(t)
trafficRouter.On("Type").Return("mock")
trafficRouter.On("RemoveManagedRoutes").Return(nil)
trafficRouter.On("UpdateHash", newrsVal, "").Return(nil)
if tc.shouldRouteTraffic {
trafficRouter.On("SetWeight", int32(0)).Return(nil)
trafficRouter.On("VerifyWeight", int32(0)).Return(nil, nil)
}
mocks.NewTrafficRoutingReconciler(t)
rc := rolloutContext{
log: logutil.WithRollout(rollout),
reconcilerBase: reconcilerBase{
argoprojclientset: &fake,
servicesLister: servicesLister,
kubeclientset: kubeclient,
recorder: record.NewFakeEventRecorder(),

newTrafficRoutingReconciler: func(roCtx *rolloutContext) ([]trafficrouting.TrafficRoutingReconciler, error) {
return []trafficrouting.TrafficRoutingReconciler{
trafficRouter,
}, nil
},
},

rollout: rollout,
pauseContext: &pauseContext{
rollout: rollout,
},
newRS: canaryReplicaset,
stableRS: stableReplicaset,
}
stopchan := make(chan struct{})
defer close(stopchan)
informers.Start(stopchan)
informers.WaitForCacheSync(stopchan)
err := rc.rolloutCanary()
assert.NoError(t, err)
if !tc.shouldRouteTraffic {
trafficRouter.AssertNotCalled(t, "SetWeight", int32(0))
}
}
}

// TestCanaryRolloutBumpVersion verifies we correctly bump revision of Rollout and new ReplicaSet
func TestCanaryRolloutBumpVersion(t *testing.T) {
f := newFixture(t)
Expand Down
4 changes: 4 additions & 0 deletions rollout/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ type rolloutContext struct {
// (e.g. a setWeight step, after a blue-green active switch, after stable service switch),
// since we do not want to continually verify weight in case it could incur rate-limiting or other expenses.
targetsVerified *bool

// skippedSelectorSwap indicates that we have skipped swapping selectors
// and are not ready to perform any final actions of moving to 100%
skippedSelectorSwap *bool
}

func (c *rolloutContext) reconcile() error {
Expand Down
2 changes: 2 additions & 0 deletions rollout/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,10 @@ func (c *rolloutContext) ensureSVCTargets(svcName string, rs *appsv1.ReplicaSet,
if checkRsAvailability && !replicasetutil.IsReplicaSetAvailable(rs) {
logCtx := c.log.WithField(logutil.ServiceKey, svc.Name)
logCtx.Infof("delaying service switch from %s to %s: ReplicaSet not fully available", currSelector, desiredSelector)
c.skippedSelectorSwap = pointer.BoolPtr(true)
return nil
}
c.skippedSelectorSwap = pointer.BoolPtr(false)
err = c.switchServiceSelector(svc, desiredSelector, c.rollout)
if err != nil {
return err
Expand Down
5 changes: 5 additions & 0 deletions rollout/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,11 @@ func TestDelayCanaryStableServiceLabelInjection(t *testing.T) {
assert.False(t, canaryInjected)
_, stableInjected := stableSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]
assert.False(t, stableInjected)

assert.NotNil(t, roCtx.skippedSelectorSwap)
if roCtx.skippedSelectorSwap != nil {
assert.True(t, *roCtx.skippedSelectorSwap)
}
}
{
// next ensure we do update service because new/stable are now available
Expand Down
8 changes: 8 additions & 0 deletions rollout/trafficrouting.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ func (c *rolloutContext) reconcileTrafficRouting() error {
return err
}

// if we haven't swapped selectors and dynamic stable scale is set then
// we might switch weights to a set of pods that are not ready (importantly this is the case
// where canary: 100, stable: 0 -> canary: 0, stable: 100).
if c.skippedSelectorSwap != nil && *c.skippedSelectorSwap && c.rollout.Spec.Strategy.Canary.DynamicStableScale {
c.log.Infof("Skipping weight changes since replica set is not ready and dynamicStableScale is set")
return nil
}

err = reconciler.SetWeight(desiredWeight, weightDestinations...)
if err != nil {
c.recorder.Warnf(c.rollout, record.EventOptions{EventReason: "TrafficRoutingError"}, err.Error())
Expand Down