From 7943d8d5a4334404a8d9246922650331c88ca06a Mon Sep 17 00:00:00 2001 From: Zach Aller Date: Mon, 29 Apr 2024 15:33:52 -0500 Subject: [PATCH] fix: fallback to patch on scale conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Zach Aller fix: switch to retry logic Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller retry experiments Signed-off-by: Zach Aller remove TODO Signed-off-by: Zach Aller remove accidental add Signed-off-by: Zach Aller remove accidental add Signed-off-by: Zach Aller add retry to setting revision Signed-off-by: Zach Aller chore(deps): bump slsa-framework/slsa-github-generator from 1.10.0 to 2.0.0 (#3537) chore(deps): bump slsa-framework/slsa-github-generator Bumps [slsa-framework/slsa-github-generator](https://github.com/slsa-framework/slsa-github-generator) from 1.10.0 to 2.0.0. - [Release notes](https://github.com/slsa-framework/slsa-github-generator/releases) - [Changelog](https://github.com/slsa-framework/slsa-github-generator/blob/main/CHANGELOG.md) - [Commits](https://github.com/slsa-framework/slsa-github-generator/compare/v1.10.0...v2.0.0) --- updated-dependencies: - dependency-name: slsa-framework/slsa-github-generator dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump sigstore/cosign-installer from 3.4.0 to 3.5.0 (#3522) Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 3.4.0 to 3.5.0. - [Release notes](https://github.com/sigstore/cosign-installer/releases) - [Commits](https://github.com/sigstore/cosign-installer/compare/e1523de7571e31dbe865fd2e80c5c7c23ae71eb4...59acb6260d9c0ba8f4a2f9d9b48431a222b68e20) --- updated-dependencies: - dependency-name: sigstore/cosign-installer dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump golangci/golangci-lint-action from 4 to 5 (#3540) Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 4 to 5. - [Release notes](https://github.com/golangci/golangci-lint-action/releases) - [Commits](https://github.com/golangci/golangci-lint-action/compare/v4...v5) --- updated-dependencies: - dependency-name: golangci/golangci-lint-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> docs: provide recommendation for strategies (#3531) * docs: provide recommendation for strategies Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> * docs: traffic manager clarifications Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> * docs: explain canary with/out traffic manager Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> * docs: add 3 columns on the comparison table Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> --------- Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> feat(dashboard): change the color of the current rollout step (#3526) I feel that having the current (running) step in a orange color is misleading, as orange usually means warning. This commit changes the color to the `$argo-running-color`. Signed-off-by: Alejandro López Sánchez chore(deps): bump github.com/aws/aws-sdk-go-v2/service/cloudwatch from 1.37.0 to 1.38.0 (#3525) chore(deps): bump github.com/aws/aws-sdk-go-v2/service/cloudwatch Bumps [github.com/aws/aws-sdk-go-v2/service/cloudwatch](https://github.com/aws/aws-sdk-go-v2) from 1.37.0 to 1.38.0. - [Release notes](https://github.com/aws/aws-sdk-go-v2/releases) - [Changelog](https://github.com/aws/aws-sdk-go-v2/blob/service/s3/v1.38.0/CHANGELOG.md) - [Commits](https://github.com/aws/aws-sdk-go-v2/compare/service/s3/v1.37.0...service/s3/v1.38.0) --- updated-dependencies: - dependency-name: github.com/aws/aws-sdk-go-v2/service/cloudwatch dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> perform all of set revision actions on retry Signed-off-by: Zach Aller fix variable Signed-off-by: Zach Aller add retry counts to log Signed-off-by: Zach Aller add retry counts to logs Signed-off-by: Zach Aller clean logs, always dump controller e2e logs Signed-off-by: Zach Aller lower timeout Signed-off-by: Zach Aller bump timeout on e2e Signed-off-by: Zach Aller retry on rollout conflict Signed-off-by: Zach Aller don't reque on rs changes Signed-off-by: Zach Aller reque rs Signed-off-by: Zach Aller bump qps for e2e Signed-off-by: Zach Aller fix gen-crd Signed-off-by: Zach Aller switch to patch Signed-off-by: Zach Aller switch to patch Signed-off-by: Zach Aller add log Signed-off-by: Zach Aller move log lines Signed-off-by: Zach Aller Trigger Build Signed-off-by: Zach Aller fix one e2e test Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller add test Signed-off-by: Zach Aller chore(deps): bump actions/setup-go from 5.0.0 to 5.0.1 (#3552) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.0.0 to 5.0.1. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v5.0.0...v5.0.1) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump codecov/codecov-action from 4.3.0 to 4.3.1 (#3550) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4.3.0 to 4.3.1. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4.3.0...v4.3.1) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump google.golang.org/protobuf from 1.33.0 to 1.34.0 (#3548) Bumps google.golang.org/protobuf from 1.33.0 to 1.34.0. --- updated-dependencies: - dependency-name: google.golang.org/protobuf dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> refactor Signed-off-by: Zach Aller add test for updating rs revision Signed-off-by: Zach Aller add retry for ephemeral metadata Signed-off-by: Zach Aller clear some fields Signed-off-by: Zach Aller add logs Signed-off-by: Zach Aller refactor into function Signed-off-by: Zach Aller change log Signed-off-by: Zach Aller switch rollout update to patch fallback Signed-off-by: Zach Aller siwtch ephemeral metadata sync to shared function Signed-off-by: Zach Aller siwtch merge type Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller don't update status Signed-off-by: Zach Aller switch rollout update to not use patch Signed-off-by: Zach Aller change log Signed-off-by: Zach Aller switch to small patch Signed-off-by: Zach Aller some cleanup Signed-off-by: Zach Aller remove not found rollout removal Signed-off-by: Zach Aller working setup Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller fix test Signed-off-by: Zach Aller small cleanup Signed-off-by: Zach Aller --- .github/workflows/e2e.yaml | 2 +- Makefile | 4 +- experiments/replicaset.go | 29 +++++++++- hack/gen-crd-spec/main.go | 11 ++-- rollout/canary.go | 2 +- rollout/canary_test.go | 92 ++++++++++++++++++++++++++++++++ rollout/controller.go | 78 +++++++++++++++++++++++++++ rollout/controller_test.go | 6 +++ rollout/ephemeralmetadata.go | 11 ++-- rollout/sync.go | 29 ++++------ test/e2e/canary_test.go | 1 + test/fixtures/e2e_suite.go | 6 +-- utils/annotations/annotations.go | 9 ++-- 13 files changed, 237 insertions(+), 43 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 7b7b2c5300..0b7aca1509 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -95,4 +95,4 @@ jobs: with: name: e2e-controller-k8s-${{ matrix.kubernetes-minor-version }}.log path: /tmp/e2e-controller.log - if: ${{ failure() }} + if: ${{ always() }} diff --git a/Makefile b/Makefile index fd5736aa15..6279623070 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ DEV_IMAGE ?= false E2E_INSTANCE_ID ?= argo-rollouts-e2e E2E_TEST_OPTIONS ?= E2E_PARALLEL ?= 1 -E2E_WAIT_TIMEOUT ?= 120 +E2E_WAIT_TIMEOUT ?= 90 GOPATH ?= $(shell go env GOPATH) # Global toolchain configuration @@ -239,7 +239,7 @@ start-e2e: ## start e2e test environment .PHONY: test-e2e test-e2e: install-devtools-local - ${DIST_DIR}/gotestsum --rerun-fails-report=rerunreport.txt --junitfile=junit.xml --format=testname --packages="./test/e2e" --rerun-fails=5 -- -timeout 60m -count 1 --tags e2e -p ${E2E_PARALLEL} -parallel ${E2E_PARALLEL} -v --short ./test/e2e ${E2E_TEST_OPTIONS} + ${DIST_DIR}/gotestsum --rerun-fails-report=rerunreport.txt --junitfile=junit.xml --format=testname --packages="./test/e2e" --rerun-fails=5 -- -timeout 90m -count 1 --tags e2e -p ${E2E_PARALLEL} -parallel ${E2E_PARALLEL} -v --short ./test/e2e ${E2E_TEST_OPTIONS} .PHONY: test-unit test-unit: install-devtools-local ## run unit tests diff --git a/experiments/replicaset.go b/experiments/replicaset.go index d91843a03a..1c07581758 100644 --- a/experiments/replicaset.go +++ b/experiments/replicaset.go @@ -6,6 +6,8 @@ import ( "fmt" "time" + "github.com/argoproj/argo-rollouts/utils/diff" + log "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -287,16 +289,39 @@ func (ec *experimentContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int sizeNeedsUpdate := oldScale != newScale scaled := false var err error + var updatedRS *appsv1.ReplicaSet if sizeNeedsUpdate { rsCopy := rs.DeepCopy() *(rsCopy.Spec.Replicas) = newScale - rs, err = ec.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) + + updatedRS, err = ec.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) + if err != nil { + if errors.IsConflict(err) { + ec.log.Infof("Conflict when updating replicaset %s, falling back to patch", rs.Name) + + patchRS := appsv1.ReplicaSet{} + patchRS.Spec.Replicas = rsCopy.Spec.Replicas + + patch, changed, err := diff.CreateTwoWayMergePatch(appsv1.ReplicaSet{}, patchRS, appsv1.ReplicaSet{}) + if err != nil { + return scaled, nil, err + } + + if changed { + ec.log.Infof("Patching expirment replicaset with patch: %s", string(patch)) + updatedRS, err = ec.kubeclientset.AppsV1().ReplicaSets(rs.Namespace).Patch(ctx, rs.Name, patchtypes.StrategicMergePatchType, patch, metav1.PatchOptions{}) + if err != nil { + return scaled, nil, err + } + } + } + } if err == nil && sizeNeedsUpdate { scaled = true ec.recorder.Eventf(ec.ex, record.EventOptions{EventReason: conditions.ScalingReplicaSetReason}, "Scaled %s ReplicaSet %s from %d to %d", scalingOperation, rs.Name, oldScale, newScale) } } - return scaled, rs, err + return scaled, updatedRS, err } func newReplicaSetAnnotations(experimentName, templateName string) map[string]string { diff --git a/hack/gen-crd-spec/main.go b/hack/gen-crd-spec/main.go index a3e31d92b8..60c1bde9d7 100644 --- a/hack/gen-crd-spec/main.go +++ b/hack/gen-crd-spec/main.go @@ -96,11 +96,12 @@ func NewCustomResourceDefinition() []*extensionsobj.CustomResourceDefinition { // clean up stuff left by controller-gen deleteFile("config/webhook/manifests.yaml") deleteFile("config/webhook") - deleteFile("config/argoproj.io_analysisruns.yaml") - deleteFile("config/argoproj.io_analysistemplates.yaml") - deleteFile("config/argoproj.io_clusteranalysistemplates.yaml") - deleteFile("config/argoproj.io_experiments.yaml") - deleteFile("config/argoproj.io_rollouts.yaml") + deleteFile("config/crd/argoproj.io_analysisruns.yaml") + deleteFile("config/crd/argoproj.io_analysistemplates.yaml") + deleteFile("config/crd/argoproj.io_clusteranalysistemplates.yaml") + deleteFile("config/crd/argoproj.io_experiments.yaml") + deleteFile("config/crd/argoproj.io_rollouts.yaml") + deleteFile("config/crd") deleteFile("config") crds := []*extensionsobj.CustomResourceDefinition{} diff --git a/rollout/canary.go b/rollout/canary.go index ed27bd0a79..fa33b43616 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -112,7 +112,7 @@ func (c *rolloutContext) reconcileCanaryStableReplicaSet() (bool, error) { } scaled, _, err := c.scaleReplicaSetAndRecordEvent(c.stableRS, desiredStableRSReplicaCount) if err != nil { - return scaled, fmt.Errorf("failed to scaleReplicaSetAndRecordEvent in reconcileCanaryStableReplicaSet:L %w", err) + return scaled, fmt.Errorf("failed to scaleReplicaSetAndRecordEvent in reconcileCanaryStableReplicaSet: %w", err) } return scaled, err } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 56b05c7177..9191cc4b93 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -8,6 +8,11 @@ import ( "testing" "time" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + k8stesting "k8s.io/client-go/testing" + "github.com/stretchr/testify/assert" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/apps/v1" @@ -2141,3 +2146,90 @@ func TestCanaryReplicaAndSpecChangedTogether(t *testing.T) { // check the canary one is updated assert.NotEqual(t, originReplicas, int(*updated.Spec.Replicas)) } + +func TestSyncRolloutWithConflictInScaleReplicaSet(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{ + { + SetWeight: int32Ptr(10), + }, { + Pause: &v1alpha1.RolloutPause{ + Duration: v1alpha1.DurationFromInt(10), + }, + }, + } + r1 := newCanaryRollout("foo", 10, nil, steps, int32Ptr(1), intstr.FromInt(1), intstr.FromInt(0)) + r2 := bumpVersion(r1) + + rs1 := newReplicaSetWithStatus(r1, 9, 9) + rs2 := newReplicaSetWithStatus(r2, 1, 1) + f.kubeobjects = append(f.kubeobjects, rs1, rs2) + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + f.expectPatchRolloutAction(r2) + f.expectUpdateReplicaSetAction(rs2) // attempt to scale replicaset but conflict + f.expectPatchReplicaSetAction(rs2) // instead of update patch replicaset + + key := fmt.Sprintf("%s/%s", r2.Namespace, r2.Name) + c, i, k8sI := f.newController(func() time.Duration { return 30 * time.Minute }) + + f.kubeclient.PrependReactor("update", "replicasets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, &appsv1.ReplicaSet{}, errors.NewConflict(schema.GroupResource{ + Group: "Apps", + Resource: "ReplicaSet", + }, "", fmt.Errorf("test error")) + }) + + f.runController(key, true, false, c, i, k8sI) +} + +func TestSyncRolloutWithConflictInSyncReplicaSetRevision(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{ + { + SetWeight: int32Ptr(10), + }, { + Pause: &v1alpha1.RolloutPause{ + Duration: v1alpha1.DurationFromInt(10), + }, + }, + } + r1 := newCanaryRollout("foo", 3, nil, steps, int32Ptr(1), intstr.FromInt(1), intstr.FromInt(0)) + r2 := bumpVersion(r1) + + rs1 := newReplicaSetWithStatus(r1, 3, 3) + rs2 := newReplicaSetWithStatus(r2, 3, 3) + rs2.Annotations["rollout.argoproj.io/revision"] = "1" + + f.kubeobjects = append(f.kubeobjects, rs1, rs2) + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + key := fmt.Sprintf("%s/%s", r1.Namespace, r1.Name) + c, i, k8sI := f.newController(func() time.Duration { return 30 * time.Minute }) + + f.kubeclient.PrependReactor("update", "replicasets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, &appsv1.ReplicaSet{}, errors.NewConflict(schema.GroupResource{ + Group: "Apps", + Resource: "ReplicaSet", + }, "", fmt.Errorf("test error")) + }) + + f.expectPatchRolloutAction(r2) + f.expectUpdateReplicaSetAction(rs1) // attempt to update replicaset revision but conflict + f.expectPatchReplicaSetAction(rs1) // instead of update patch replicaset + + f.expectUpdateReplicaSetAction(rs2) // attempt to scale replicaset but conflict + f.expectPatchReplicaSetAction(rs2) // instead of update patch replicaset + + f.runController(key, true, false, c, i, k8sI) +} diff --git a/rollout/controller.go b/rollout/controller.go index 972cea882c..583509b805 100644 --- a/rollout/controller.go +++ b/rollout/controller.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/argoproj/argo-rollouts/utils/diff" "k8s.io/apimachinery/pkg/runtime/schema" "github.com/argoproj/argo-rollouts/pkg/apis/rollouts" @@ -16,11 +17,13 @@ import ( log "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + patchtypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/dynamic" @@ -937,3 +940,78 @@ func remarshalRollout(r *v1alpha1.Rollout) *v1alpha1.Rollout { } return &remarshalled } + +// updateReplicaSetWithPatch updates the replicaset using patch and on +func (c *rolloutContext) updateReplicaSetFallbackToPatch(ctx context.Context, rs *appsv1.ReplicaSet) (*appsv1.ReplicaSet, error) { + rsCopy := rs.DeepCopy() + updatedRS, err := c.kubeclientset.AppsV1().ReplicaSets(rs.Namespace).Update(ctx, rs, metav1.UpdateOptions{}) + if err != nil { + if errors.IsConflict(err) { + c.log.Infof("Conflict when updating replicaset %s, falling back to patch", rs.Name) + + patchRS := appsv1.ReplicaSet{} + patchRS.Spec.Replicas = rsCopy.Spec.Replicas + patchRS.Annotations = rsCopy.Annotations + patchRS.Labels = rsCopy.Labels + patchRS.Spec.Template.Labels = rsCopy.Spec.Template.Labels + patchRS.Spec.Template.Annotations = rsCopy.Spec.Template.Annotations + patchRS.Spec.Selector = rsCopy.Spec.Selector + + patch, changed, err := diff.CreateTwoWayMergePatch(appsv1.ReplicaSet{}, patchRS, appsv1.ReplicaSet{}) + if err != nil { + return nil, err + } + + if changed { + c.log.Infof("Patching replicaset with patch: %s", string(patch)) + updatedRS, err = c.kubeclientset.AppsV1().ReplicaSets(rs.Namespace).Patch(ctx, rs.Name, patchtypes.StrategicMergePatchType, patch, metav1.PatchOptions{}) + if err != nil { + return nil, err + } + } + + err = c.replicaSetInformer.GetIndexer().Update(updatedRS) + if err != nil { + err = fmt.Errorf("error updating replicaset informer in scaleReplicaSet: %w", err) + return nil, err + } + + return updatedRS, err + } + } + + return updatedRS, err +} + +// updateRolloutWithRetry updates the rollout with a retry if there is a conflict from an update operation, it runs the modifyRollout function to update a fresh rollout from the cluster. +//func (c *rolloutContext) updateRolloutWithRetry(ctx context.Context, ro *v1alpha1.Rollout, modifyRollout func(ro *v1alpha1.Rollout) *v1alpha1.Rollout) (*v1alpha1.Rollout, error) { +// updatedRollout, err := c.argoprojclientset.ArgoprojV1alpha1().Rollouts(c.rollout.Namespace).Update(context.TODO(), c.rollout, metav1.UpdateOptions{}) +// if err != nil { +// if errors.IsConflict(err) { +// retryCount := 0 +// errRetry := retry.RetryOnConflict(retry.DefaultBackoff, func() error { +// retryCount++ +// c.log.Infof("conflict when updating rollout %s, retrying the update operation with new rollout from cluster, attempt: %d", c.rollout.Name, retryCount) +// roGet, err := c.argoprojclientset.ArgoprojV1alpha1().Rollouts(c.rollout.Namespace).Get(context.TODO(), c.rollout.Name, metav1.GetOptions{}) +// if err != nil { +// return fmt.Errorf("error getting rollout %s: %w", c.rollout.Name, err) +// } +// +// roCopy := modifyRollout(roGet) +// updatedRollout, err = c.argoprojclientset.ArgoprojV1alpha1().Rollouts(c.rollout.Namespace).Update(context.TODO(), roCopy, metav1.UpdateOptions{}) +// if err != nil { +// return err +// } +// +// return nil +// }) +// if errRetry != nil { +// return nil, errRetry +// } +// } else { +// c.log.WithError(err).Error("Error: updating rollout revision") +// return nil, err +// } +// } +// return updatedRollout, nil +//} diff --git a/rollout/controller_test.go b/rollout/controller_test.go index 892a2be64f..5d80287f30 100644 --- a/rollout/controller_test.go +++ b/rollout/controller_test.go @@ -792,6 +792,12 @@ func (f *fixture) expectPatchServiceAction(s *corev1.Service, newLabel string) i return len } +func (f *fixture) expectGetReplicaSetAction(r *appsv1.ReplicaSet) int { //nolint:unused + len := len(f.kubeactions) + f.kubeactions = append(f.kubeactions, core.NewGetAction(schema.GroupVersionResource{Resource: "replicasets"}, r.Namespace, r.Name)) + return len +} + func (f *fixture) expectCreateReplicaSetAction(r *appsv1.ReplicaSet) int { len := len(f.kubeactions) f.kubeactions = append(f.kubeactions, core.NewCreateAction(schema.GroupVersionResource{Resource: "replicasets"}, r.Namespace, r)) diff --git a/rollout/ephemeralmetadata.go b/rollout/ephemeralmetadata.go index 60b745ed29..17d6a60d9f 100644 --- a/rollout/ephemeralmetadata.go +++ b/rollout/ephemeralmetadata.go @@ -2,7 +2,6 @@ package rollout import ( "context" - "fmt" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -82,14 +81,12 @@ func (c *rolloutContext) syncEphemeralMetadata(ctx context.Context, rs *appsv1.R } // 2. Update ReplicaSet so that any new pods it creates will have the metadata - rs, err = c.kubeclientset.AppsV1().ReplicaSets(modifiedRS.Namespace).Update(ctx, modifiedRS, metav1.UpdateOptions{}) + rs, err = c.updateReplicaSetFallbackToPatch(ctx, modifiedRS) if err != nil { - return fmt.Errorf("error updating replicaset in syncEphemeralMetadata: %w", err) - } - err = c.replicaSetInformer.GetIndexer().Update(rs) - if err != nil { - return fmt.Errorf("error updating replicaset informer in syncEphemeralMetadata: %w", err) + c.log.Infof("failed to sync ephemeral metadata %v to ReplicaSet %s: %v", podMetadata, rs.Name, err) + return err } + c.log.Infof("synced ephemeral metadata %v to ReplicaSet %s", podMetadata, rs.Name) return nil } diff --git a/rollout/sync.go b/rollout/sync.go index 875949ee55..7654cc58e5 100644 --- a/rollout/sync.go +++ b/rollout/sync.go @@ -83,17 +83,14 @@ func (c *rolloutContext) syncReplicaSetRevision() (*appsv1.ReplicaSet, error) { affinityNeedsUpdate := replicasetutil.IfInjectedAntiAffinityRuleNeedsUpdate(rsCopy.Spec.Template.Spec.Affinity, *c.rollout) if annotationsUpdated || minReadySecondsNeedsUpdate || affinityNeedsUpdate { + rsCopy.Spec.MinReadySeconds = c.rollout.Spec.MinReadySeconds rsCopy.Spec.Template.Spec.Affinity = replicasetutil.GenerateReplicaSetAffinity(*c.rollout) - rs, err := c.kubeclientset.AppsV1().ReplicaSets(rsCopy.ObjectMeta.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) - if err != nil { - c.log.WithError(err).Error("Error: updating replicaset revision") - return nil, fmt.Errorf("error updating replicaset revision: %v", err) - } - c.log.Infof("Synced revision on ReplicaSet '%s' to '%s'", rs.Name, newRevision) - err = c.replicaSetInformer.GetIndexer().Update(rs) + + rs, err := c.updateReplicaSetFallbackToPatch(ctx, rsCopy) if err != nil { - return nil, fmt.Errorf("error updating replicaset informer in syncReplicaSetRevision: %w", err) + c.log.WithError(err).Errorf("Error: syncing replicaset revision on %s", rsCopy.Name) + return nil, err } return rs, nil } @@ -245,7 +242,7 @@ func (c *rolloutContext) createDesiredReplicaSet() (*appsv1.ReplicaSet, error) { cond := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionFalse, conditions.FailedRSCreateReason, msg) patchErr := c.patchCondition(c.rollout, newStatus, cond) if patchErr != nil { - c.log.Warnf("Error Patching Rollout: %s", patchErr.Error()) + c.log.Warnf("Error Patching Rollout Conditions: %s", patchErr.Error()) } return nil, err default: @@ -370,8 +367,8 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, rolloutReplicas := defaults.GetReplicasOrDefault(rollout.Spec.Replicas) annotationsNeedUpdate := annotations.ReplicasAnnotationsNeedUpdate(rs, rolloutReplicas) - scaled := false var err error + scaled := false if sizeNeedsUpdate || annotationsNeedUpdate { rsCopy := rs.DeepCopy() oldScale := defaults.GetReplicasOrDefault(rs.Spec.Replicas) @@ -381,14 +378,10 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, delete(rsCopy.Annotations, v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey) } - rs, err = c.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) - if err != nil { - return scaled, rs, fmt.Errorf("error updating replicaset %s: %w", rsCopy.Name, err) - } - err = c.replicaSetInformer.GetIndexer().Update(rs) + rs, err = c.updateReplicaSetFallbackToPatch(ctx, rsCopy) if err != nil { - err = fmt.Errorf("error updating replicaset informer in scaleReplicaSet: %w", err) - return scaled, rs, err + c.log.Infof("Error scaling replicasets %s: %v", rsCopy.Name, err) + return scaled, nil, err } if sizeNeedsUpdate { @@ -397,7 +390,7 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, c.recorder.Eventf(rollout, record.EventOptions{EventReason: conditions.ScalingReplicaSetReason}, conditions.ScalingReplicaSetMessage, scalingOperation, rs.Name, revision, oldScale, newScale) } } - return scaled, rs, err + return scaled, rs, nil } // calculateStatus calculates the common fields for all rollouts by looking into the provided replica sets. diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index 2c68ec8bd7..4f32090d16 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -658,6 +658,7 @@ func (s *CanarySuite) TestCanaryDynamicStableScale() { When(). MarkPodsReady("1", 1). // mark last remaining stable pod as ready (4/4 stable are ready) WaitForRevisionPodCount("2", 0). + Sleep(2*time.Second). //WaitForRevisionPodCount does not wait for terminating pods and so ExpectServiceSelector fails sleep a bit for the terminating pods to be deleted Then(). // Expect that the canary service selector is now set to stable because of dynamic stable scale is over and we have all pods up on stable rs ExpectServiceSelector("dynamic-stable-scale-canary", map[string]string{"app": "dynamic-stable-scale", "rollouts-pod-template-hash": "868d98995b"}, false). diff --git a/test/fixtures/e2e_suite.go b/test/fixtures/e2e_suite.go index a774afcf94..e78ae1aa68 100644 --- a/test/fixtures/e2e_suite.go +++ b/test/fixtures/e2e_suite.go @@ -54,7 +54,7 @@ const ( ) var ( - E2EWaitTimeout time.Duration = time.Second * 120 + E2EWaitTimeout time.Duration = time.Second * 90 E2EPodDelay = 0 E2EALBIngressAnnotations map[string]string @@ -143,8 +143,8 @@ func (s *E2ESuite) SetupSuite() { restConfig, err := config.ClientConfig() s.CheckError(err) s.Common.kubernetesHost = restConfig.Host - restConfig.Burst = defaults.DefaultBurst * 2 - restConfig.QPS = defaults.DefaultQPS * 2 + restConfig.Burst = defaults.DefaultBurst * 10 + restConfig.QPS = defaults.DefaultQPS * 10 s.namespace, _, err = config.Namespace() s.CheckError(err) s.kubeClient, err = kubernetes.NewForConfig(restConfig) diff --git a/utils/annotations/annotations.go b/utils/annotations/annotations.go index 121c6c0803..8c00cbe897 100644 --- a/utils/annotations/annotations.go +++ b/utils/annotations/annotations.go @@ -215,10 +215,11 @@ func SetNewReplicaSetAnnotations(rollout *v1alpha1.Rollout, newRS *appsv1.Replic } var annotationsToSkip = map[string]bool{ - corev1.LastAppliedConfigAnnotation: true, - RevisionAnnotation: true, - RevisionHistoryAnnotation: true, - DesiredReplicasAnnotation: true, + corev1.LastAppliedConfigAnnotation: true, + RevisionAnnotation: true, + RevisionHistoryAnnotation: true, + DesiredReplicasAnnotation: true, + "notified.notifications.argoproj.io": true, } // skipCopyAnnotation returns true if we should skip copying the annotation with the given annotation key