Skip to content

Commit

Permalink
fix: fix the rollout stuck when pod/replicas changed together or cana…
Browse files Browse the repository at this point in the history
…ry strategy.

Signed-off-by: Liming Liu <[email protected]>
  • Loading branch information
andyliuliming committed Dec 19, 2023
1 parent 0eff316 commit 976a6cb
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 0 deletions.
7 changes: 7 additions & 0 deletions rollout/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,13 @@ func (c *rolloutContext) reconcileCanaryReplicaSets() (bool, error) {
return true, nil
}

if c.newRS == nil {
canaryRS, err := c.getCanaryReplicaSet()
if err != nil {
return false, err
}
c.newRS = canaryRS
}
scaledNewRS, err := c.reconcileNewReplicaSet()
if err != nil {
return false, err
Expand Down
43 changes: 43 additions & 0 deletions rollout/canary_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package rollout

import (
"context"
"encoding/json"
"fmt"
"strconv"
Expand Down Expand Up @@ -2009,3 +2010,45 @@ func TestIsDynamicallyRollingBackToStable(t *testing.T) {
})
}
}

func TestCanaryReplicaAndSpecChangedTogether(t *testing.T) {
f := newFixture(t)
defer f.Close()

originReplicas := 3
r1 := newCanaryRollout("foo", originReplicas, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(0))
canarySVCName := "canary"
stableSVCName := "stable"
r1.Spec.Strategy.Canary.CanaryService = canarySVCName
r1.Spec.Strategy.Canary.StableService = stableSVCName

stableRS := newReplicaSetWithStatus(r1, originReplicas, originReplicas)
stableSVC := newService(stableSVCName, 80,
map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, r1)

r2 := bumpVersion(r1)
canaryRS := newReplicaSetWithStatus(r2, originReplicas, originReplicas)
canarySVC := newService(canarySVCName, 80,
map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, r2)

f.replicaSetLister = append(f.replicaSetLister, canaryRS, stableRS)
f.serviceLister = append(f.serviceLister, canarySVC, stableSVC)

r3 := bumpVersion(r2)
r3.Spec.Replicas = pointer.Int32(int32(originReplicas) + 5)
r3.Status.StableRS = stableRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]

f.rolloutLister = append(f.rolloutLister, r3)
f.kubeobjects = append(f.kubeobjects, canaryRS, stableRS, canarySVC, stableSVC)
f.objects = append(f.objects, r3)

ctrl, _, _ := f.newController(noResyncPeriodFunc)
roCtx, err := ctrl.newRolloutContext(r3)
assert.NoError(t, err)
err = roCtx.reconcile()
assert.NoError(t, err)
updated, err := f.kubeclient.AppsV1().ReplicaSets(r3.Namespace).Get(context.Background(), canaryRS.Name, metav1.GetOptions{})
assert.NoError(t, err)
// check the canary one is updated
assert.NotEqual(t, originReplicas, int(*updated.Spec.Replicas))
}
22 changes: 22 additions & 0 deletions rollout/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
serviceutil "github.com/argoproj/argo-rollouts/utils/service"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
patchtypes "k8s.io/apimachinery/pkg/types"
"k8s.io/utils/pointer"
Expand Down Expand Up @@ -284,6 +285,27 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error {
return nil
}

func (c *rolloutContext) getCanaryReplicaSet() (*appsv1.ReplicaSet, error) {
if c.rollout.Spec.Strategy.Canary == nil {
return nil, nil
}
svc, err := c.servicesLister.Services(c.rollout.Namespace).Get(c.rollout.Spec.Strategy.Canary.CanaryService)
if err != nil {
if k8serrors.IsNotFound(err) {
return nil, nil
}
return nil, err
}
if svc != nil {
for _, rs := range c.allRSs {
if serviceutil.GetRolloutSelectorLabel(svc) == rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] {
return rs, nil
}
}
}
return nil, nil
}

// ensureSVCTargets updates the service with the given name to point to the given ReplicaSet,
// but only if that ReplicaSet has proper availability. There is still an edge case with this function if
// in the small window of time between a rollout being completed, and we try to update the service selector, we lose 100%
Expand Down
70 changes: 70 additions & 0 deletions rollout/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -879,3 +879,73 @@ func TestDelayCanaryStableServiceDelayOnAdoptedService(t *testing.T) {
})

}

func TestGetCanaryReplicaSet(t *testing.T) {
t.Run("Not Canary Strategy", func(t *testing.T) {
f := newFixture(t)
defer f.Close()
ctrl, _, _ := f.newController(noResyncPeriodFunc)
ro := newRollout("foo", 3, nil, nil)
roCtx, err := ctrl.newRolloutContext(ro)
assert.NoError(t, err)
rs, err := roCtx.getCanaryReplicaSet()
assert.Nil(t, rs)
assert.NoError(t, err)
},
)

t.Run("No Canary SVC", func(t *testing.T) {
f := newFixture(t)
defer f.Close()
ctrl, _, _ := f.newController(noResyncPeriodFunc)
ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1))
roCtx, err := ctrl.newRolloutContext(ro)
assert.NoError(t, err)
rs, err := roCtx.getCanaryReplicaSet()
assert.Nil(t, rs)
assert.NoError(t, err)
},
)
t.Run("Have Canary SVC", func(t *testing.T) {
f := newFixture(t)
defer f.Close()
ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1))
canarySVCName := "canary"
canaryRS := newReplicaSetWithStatus(ro, 3, 3)
canarySVC := newService(canarySVCName, 80,
map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: canaryRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]}, ro)
ro.Spec.Strategy.Canary.CanaryService = canarySVCName
f.rolloutLister = append(f.rolloutLister, ro)
f.kubeobjects = append(f.kubeobjects, canaryRS, canarySVC)
f.replicaSetLister = append(f.replicaSetLister, canaryRS)
f.serviceLister = append(f.serviceLister, canarySVC)

ctrl, _, _ := f.newController(noResyncPeriodFunc)
roCtx, err := ctrl.newRolloutContext(ro)
assert.NoError(t, err)
rs, err := roCtx.getCanaryReplicaSet()
assert.NoError(t, err)
assert.NotNil(t, rs)
})
t.Run("No Matched Replicaset", func(t *testing.T) {
f := newFixture(t)
defer f.Close()
ro := newCanaryRollout("foo", 3, nil, nil, nil, intstr.FromInt(1), intstr.FromInt(1))
canarySVCName := "canary"
canaryRS := newReplicaSetWithStatus(ro, 3, 0)
canarySVC := newService(canarySVCName, 80,
map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: "youknowthisisdifferent"}, ro)
ro.Spec.Strategy.Canary.CanaryService = canarySVCName
f.rolloutLister = append(f.rolloutLister, ro)
f.kubeobjects = append(f.kubeobjects, canaryRS, canarySVC)
f.replicaSetLister = append(f.replicaSetLister, canaryRS)
f.serviceLister = append(f.serviceLister, canarySVC)

ctrl, _, _ := f.newController(noResyncPeriodFunc)
roCtx, err := ctrl.newRolloutContext(ro)
assert.NoError(t, err)
rs, err := roCtx.getCanaryReplicaSet()
assert.NoError(t, err)
assert.Nil(t, rs)
})
}

0 comments on commit 976a6cb

Please sign in to comment.