diff --git a/rollout/canary.go b/rollout/canary.go index 7c31506a82..c2300fc02d 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -1,6 +1,7 @@ package rollout import ( + "errors" "sort" appsv1 "k8s.io/api/apps/v1" @@ -50,6 +51,11 @@ func (c *rolloutContext) rolloutCanary() error { } if err := c.reconcileStableAndCanaryService(); err != nil { + // if we cannot reconcile the stable and canary services then + // we should not continue to adjust traffic routing + if errors.Is(err, DelayServiceSelectorSwapError) { + return nil + } return err } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 56445f3978..d59ab1bff4 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -1271,7 +1271,13 @@ func TestCanarySVCSelectors(t *testing.T) { informers.Start(stopchan) informers.WaitForCacheSync(stopchan) err := rc.reconcileStableAndCanaryService() - assert.NoError(t, err, "unable to reconcileStableAndCanaryService") + // There is an error returned here because we could not reconcile + // unhealthy services. + if tc.shouldTargetNewRS { + assert.NoError(t, err, "unable to reconcileStableAndCanaryService") + } else { + assert.Error(t, err, "able to reconcileStableAndCanaryService for unhealthy replicas") + } updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name) assert.NoError(t, err, "unable to get updated canary service") if tc.shouldTargetNewRS { diff --git a/rollout/service.go b/rollout/service.go index a2861be0c2..8a657efb8d 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -45,6 +45,14 @@ const ( }` ) +type delayServiceSelectorSwapError struct{} + +func (e delayServiceSelectorSwapError) Error() string { + return "Selectors cannot be swapped yet because not all pods are ready" +} + +var DelayServiceSelectorSwapError = delayServiceSelectorSwapError{} + func generatePatch(service *corev1.Service, newRolloutUniqueLabelValue string, r *v1alpha1.Rollout) string { if _, ok := service.Annotations[v1alpha1.ManagedByRolloutsKey]; !ok { return fmt.Sprintf(switchSelectorAndAddManagedByPatch, r.Name, newRolloutUniqueLabelValue) @@ -282,7 +290,7 @@ func (c *rolloutContext) ensureSVCTargets(svcName string, rs *appsv1.ReplicaSet, if checkRsAvailability && !replicasetutil.IsReplicaSetAvailable(rs) { logCtx := c.log.WithField(logutil.ServiceKey, svc.Name) logCtx.Infof("delaying service switch from %s to %s: ReplicaSet not fully available", currSelector, desiredSelector) - return nil + return DelayServiceSelectorSwapError } err = c.switchServiceSelector(svc, desiredSelector, c.rollout) if err != nil { diff --git a/rollout/service_test.go b/rollout/service_test.go index 8a402d001c..9ea2a411a2 100644 --- a/rollout/service_test.go +++ b/rollout/service_test.go @@ -781,7 +781,8 @@ func TestDelayCanaryStableServiceLabelInjection(t *testing.T) { roCtx.stableRS = newReplicaSetWithStatus(ro2, 3, 0) err = roCtx.reconcileStableAndCanaryService() - assert.NoError(t, err) + // an error is returned because we are delaying + assert.Equal(t, err, DelayServiceSelectorSwapError) _, canaryInjected := canarySvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] assert.False(t, canaryInjected) _, stableInjected := stableSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]