Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: When Degraded state, canary-service doesn't have any endpoints #2536

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions USERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ Organizations below are **officially** using Argo Rollouts. Please send a PR wit
1. [Ubie](https://ubie.life/)
1. [VISITS Technologies](https://visits.world/en)
1. [Yotpo](https://www.yotpo.com/)
1. [Nike](https://nike.com)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you might want to propose this in a separate PR.

7 changes: 7 additions & 0 deletions rollout/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ func (c *rolloutContext) rolloutCanary() error {
return err
}

//Reset the Canary service to have stableRS pod selector in event of failed analysis run or failed experiment
//to expose network endpoints to route traffic without any interruption.
err = c.reconcileStableAndCanaryService()
if err != nil {
return err
}

noScalingOccurred, err := c.reconcileCanaryReplicaSets()
if err != nil {
return err
Expand Down
29 changes: 24 additions & 5 deletions rollout/canary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1198,14 +1198,16 @@ func TestCanarySVCSelectors(t *testing.T) {
canaryAvailReplicas int32

shouldTargetNewRS bool
isAbortedRun bool
}{
{0, 0, false},
{2, 0, false},
{2, 1, false},
{2, 2, true},
{0, 0, false, false},
{2, 0, false, false},
{2, 1, false, true},
{2, 2, true, true},
} {
namespace := "namespace"
selectorNewRSVal := "new-rs-xxx"
selectorOldRSVal := "old-rs-xxx"
stableService := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "stable",
Expand Down Expand Up @@ -1263,8 +1265,21 @@ func TestCanarySVCSelectors(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Name: "stable",
Namespace: namespace,
Labels: map[string]string{
v1alpha1.DefaultRolloutUniqueLabelKey: selectorOldRSVal,
},
},
Spec: v1.ReplicaSetSpec{
Replicas: pointer.Int32Ptr(tc.canaryReplicas),
},
Status: v1.ReplicaSetStatus{
AvailableReplicas: tc.canaryAvailReplicas,
},
},
pauseContext: &pauseContext{
removeAbort: !tc.isAbortedRun,
addAbort: tc.isAbortedRun,
},
}
stopchan := make(chan struct{})
defer close(stopchan)
Expand All @@ -1274,8 +1289,12 @@ func TestCanarySVCSelectors(t *testing.T) {
assert.NoError(t, err, "unable to reconcileStableAndCanaryService")
updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name)
assert.NoError(t, err, "unable to get updated canary service")
expectedRSVal := selectorNewRSVal
if tc.isAbortedRun {
expectedRSVal = selectorOldRSVal
}
if tc.shouldTargetNewRS {
assert.Equal(t, selectorNewRSVal, updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey],
assert.Equal(t, expectedRSVal, updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey],
"canary SVC should have newRS selector label when newRS has %d replicas and %d AvailableReplicas",
tc.canaryReplicas, tc.canaryAvailReplicas)
} else {
Expand Down
22 changes: 14 additions & 8 deletions rollout/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,20 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error {
if c.rollout.Spec.Strategy.Canary == nil {
return nil
}
err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.StableService, c.stableRS, true)
if err != nil {
return err
}
err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true)
if err != nil {
return err
if c.pauseContext.IsAborted() {
err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.stableRS, true)
if err != nil {
return err
}
} else {
err := c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.StableService, c.stableRS, true)
if err != nil {
return err
}
err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true)
if err != nil {
return err
}
}
return nil
}
Expand All @@ -280,7 +287,6 @@ func (c *rolloutContext) ensureSVCTargets(svcName string, rs *appsv1.ReplicaSet,
currSelector := svc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]
desiredSelector := rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
logCtx := c.log.WithField(logutil.ServiceKey, svc.Name)

if currSelector != desiredSelector {
if _, ok := svc.Annotations[v1alpha1.ManagedByRolloutsKey]; !ok {
// This block will be entered only when adopting a service that already exists, because the current annotation
Expand Down
2 changes: 2 additions & 0 deletions rollout/trafficrouting_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,7 @@ func TestDynamicScalingDontIncreaseWeightWhenAborted(t *testing.T) {
f.objects = append(f.objects, r2)

f.expectPatchRolloutAction(r2)
f.expectPatchServiceAction(canarySvc, rs1PodHash)

f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler()
f.fakeTrafficRouting.On("UpdateHash", mock.Anything, mock.Anything, mock.Anything).Return(nil)
Expand Down Expand Up @@ -964,6 +965,7 @@ func TestDynamicScalingDecreaseWeightAccordingToStableAvailabilityWhenAborted(t
f.objects = append(f.objects, r2)

f.expectPatchRolloutAction(r2)
f.expectPatchServiceAction(canarySvc, rs1PodHash)

f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler()
f.fakeTrafficRouting.On("UpdateHash", mock.Anything, mock.Anything, mock.Anything).Return(nil)
Expand Down