From 16e67cef29adce8f7ee1c0b3c45c377c929daef8 Mon Sep 17 00:00:00 2001 From: Zach Aller Date: Mon, 29 Apr 2024 15:33:52 -0500 Subject: [PATCH] fix: fallback to patch on scale conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Zach Aller fix: switch to retry logic Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller retry experiments Signed-off-by: Zach Aller remove TODO Signed-off-by: Zach Aller remove accidental add Signed-off-by: Zach Aller remove accidental add Signed-off-by: Zach Aller add retry to setting revision Signed-off-by: Zach Aller chore(deps): bump slsa-framework/slsa-github-generator from 1.10.0 to 2.0.0 (#3537) chore(deps): bump slsa-framework/slsa-github-generator Bumps [slsa-framework/slsa-github-generator](https://github.com/slsa-framework/slsa-github-generator) from 1.10.0 to 2.0.0. - [Release notes](https://github.com/slsa-framework/slsa-github-generator/releases) - [Changelog](https://github.com/slsa-framework/slsa-github-generator/blob/main/CHANGELOG.md) - [Commits](https://github.com/slsa-framework/slsa-github-generator/compare/v1.10.0...v2.0.0) --- updated-dependencies: - dependency-name: slsa-framework/slsa-github-generator dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump sigstore/cosign-installer from 3.4.0 to 3.5.0 (#3522) Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 3.4.0 to 3.5.0. - [Release notes](https://github.com/sigstore/cosign-installer/releases) - [Commits](https://github.com/sigstore/cosign-installer/compare/e1523de7571e31dbe865fd2e80c5c7c23ae71eb4...59acb6260d9c0ba8f4a2f9d9b48431a222b68e20) --- updated-dependencies: - dependency-name: sigstore/cosign-installer dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump golangci/golangci-lint-action from 4 to 5 (#3540) Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 4 to 5. - [Release notes](https://github.com/golangci/golangci-lint-action/releases) - [Commits](https://github.com/golangci/golangci-lint-action/compare/v4...v5) --- updated-dependencies: - dependency-name: golangci/golangci-lint-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> docs: provide recommendation for strategies (#3531) * docs: provide recommendation for strategies Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> * docs: traffic manager clarifications Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> * docs: explain canary with/out traffic manager Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> * docs: add 3 columns on the comparison table Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> --------- Signed-off-by: Kostis (Codefresh) <39800303+kostis-codefresh@users.noreply.github.com> feat(dashboard): change the color of the current rollout step (#3526) I feel that having the current (running) step in a orange color is misleading, as orange usually means warning. This commit changes the color to the `$argo-running-color`. Signed-off-by: Alejandro López Sánchez chore(deps): bump github.com/aws/aws-sdk-go-v2/service/cloudwatch from 1.37.0 to 1.38.0 (#3525) chore(deps): bump github.com/aws/aws-sdk-go-v2/service/cloudwatch Bumps [github.com/aws/aws-sdk-go-v2/service/cloudwatch](https://github.com/aws/aws-sdk-go-v2) from 1.37.0 to 1.38.0. - [Release notes](https://github.com/aws/aws-sdk-go-v2/releases) - [Changelog](https://github.com/aws/aws-sdk-go-v2/blob/service/s3/v1.38.0/CHANGELOG.md) - [Commits](https://github.com/aws/aws-sdk-go-v2/compare/service/s3/v1.37.0...service/s3/v1.38.0) --- updated-dependencies: - dependency-name: github.com/aws/aws-sdk-go-v2/service/cloudwatch dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> perform all of set revision actions on retry Signed-off-by: Zach Aller fix variable Signed-off-by: Zach Aller add retry counts to log Signed-off-by: Zach Aller add retry counts to logs Signed-off-by: Zach Aller clean logs, always dump controller e2e logs Signed-off-by: Zach Aller lower timeout Signed-off-by: Zach Aller bump timeout on e2e Signed-off-by: Zach Aller retry on rollout conflict Signed-off-by: Zach Aller don't reque on rs changes Signed-off-by: Zach Aller reque rs Signed-off-by: Zach Aller bump qps for e2e Signed-off-by: Zach Aller fix gen-crd Signed-off-by: Zach Aller switch to patch Signed-off-by: Zach Aller switch to patch Signed-off-by: Zach Aller add log Signed-off-by: Zach Aller move log lines Signed-off-by: Zach Aller Trigger Build Signed-off-by: Zach Aller fix one e2e test Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller add test Signed-off-by: Zach Aller chore(deps): bump actions/setup-go from 5.0.0 to 5.0.1 (#3552) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5.0.0 to 5.0.1. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v5.0.0...v5.0.1) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump codecov/codecov-action from 4.3.0 to 4.3.1 (#3550) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4.3.0 to 4.3.1. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4.3.0...v4.3.1) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> chore(deps): bump google.golang.org/protobuf from 1.33.0 to 1.34.0 (#3548) Bumps google.golang.org/protobuf from 1.33.0 to 1.34.0. --- updated-dependencies: - dependency-name: google.golang.org/protobuf dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> refactor Signed-off-by: Zach Aller add test for updating rs revision Signed-off-by: Zach Aller add retry for ephemeral metadata Signed-off-by: Zach Aller clear some fields Signed-off-by: Zach Aller add logs Signed-off-by: Zach Aller refactor into function Signed-off-by: Zach Aller change log Signed-off-by: Zach Aller switch rollout update to patch fallback Signed-off-by: Zach Aller siwtch ephemeral metadata sync to shared function Signed-off-by: Zach Aller siwtch merge type Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller don't update status Signed-off-by: Zach Aller switch rollout update to not use patch Signed-off-by: Zach Aller change log Signed-off-by: Zach Aller switch to small patch Signed-off-by: Zach Aller some cleanup Signed-off-by: Zach Aller remove not found rollout removal Signed-off-by: Zach Aller working setup Signed-off-by: Zach Aller lint Signed-off-by: Zach Aller fix test Signed-off-by: Zach Aller small cleanup Signed-off-by: Zach Aller --- .github/workflows/e2e.yaml | 4 +- .github/workflows/gh-pages.yaml | 2 +- .github/workflows/go.yml | 10 +-- .github/workflows/image-reuse.yaml | 4 +- .github/workflows/release.yaml | 12 +-- Makefile | 4 +- docs/concepts.md | 31 +++++++- experiments/replicaset.go | 29 ++++++- go.mod | 4 +- go.sum | 8 +- hack/gen-crd-spec/main.go | 11 +-- rollout/canary.go | 2 +- rollout/canary_test.go | 92 ++++++++++++++++++++++ rollout/controller.go | 78 ++++++++++++++++++ rollout/controller_test.go | 6 ++ rollout/ephemeralmetadata.go | 11 +-- rollout/sync.go | 29 +++---- test/e2e/canary_test.go | 1 + test/fixtures/e2e_suite.go | 6 +- ui/src/app/components/rollout/rollout.scss | 6 +- utils/annotations/annotations.go | 9 ++- 21 files changed, 291 insertions(+), 68 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 4bb16670e1..0b7aca1509 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -46,7 +46,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Go - uses: actions/setup-go@v5.0.0 + uses: actions/setup-go@v5.0.1 with: go-version: '1.22' - uses: actions/checkout@v4 @@ -95,4 +95,4 @@ jobs: with: name: e2e-controller-k8s-${{ matrix.kubernetes-minor-version }}.log path: /tmp/e2e-controller.log - if: ${{ failure() }} + if: ${{ always() }} diff --git a/.github/workflows/gh-pages.yaml b/.github/workflows/gh-pages.yaml index 8d11ebefab..e84c59d7ec 100644 --- a/.github/workflows/gh-pages.yaml +++ b/.github/workflows/gh-pages.yaml @@ -24,7 +24,7 @@ jobs: with: python-version: 3.x - name: Set up Go - uses: actions/setup-go@v5.0.0 + uses: actions/setup-go@v5.0.1 with: go-version: '1.22' - name: build diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0448d7b519..959ea6e835 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -36,13 +36,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Go - uses: actions/setup-go@v5.0.0 + uses: actions/setup-go@v5.0.1 with: go-version: ${{ env.GOLANG_VERSION }} - name: Checkout code uses: actions/checkout@v4 - name: Run golangci-lint - uses: golangci/golangci-lint-action@v4 + uses: golangci/golangci-lint-action@v5 with: version: v1.57.2 args: --timeout 6m @@ -51,7 +51,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Go - uses: actions/setup-go@v5.0.0 + uses: actions/setup-go@v5.0.1 with: go-version: ${{ env.GOLANG_VERSION }} id: go @@ -90,7 +90,7 @@ jobs: path: coverage.out - name: Upload code coverage information to codecov.io - uses: codecov/codecov-action@v4.3.0 + uses: codecov/codecov-action@v4.3.1 with: file: coverage.out @@ -103,7 +103,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - name: Setup Golang - uses: actions/setup-go@v5.0.0 + uses: actions/setup-go@v5.0.1 with: go-version: ${{ env.GOLANG_VERSION }} # k8s codegen generates files into GOPATH location instead of the GitHub git checkout location diff --git a/.github/workflows/image-reuse.yaml b/.github/workflows/image-reuse.yaml index 6589d35e78..b78d72a061 100644 --- a/.github/workflows/image-reuse.yaml +++ b/.github/workflows/image-reuse.yaml @@ -69,12 +69,12 @@ jobs: if: ${{ github.ref_type != 'tag'}} - name: Setup Golang - uses: actions/setup-go@v5.0.0 # v3.5.0 + uses: actions/setup-go@v5.0.1 # v3.5.0 with: go-version: ${{ inputs.go-version }} - name: Install cosign - uses: sigstore/cosign-installer@e1523de7571e31dbe865fd2e80c5c7c23ae71eb4 # v3.4.0 + uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 # v3.5.0 with: cosign-release: 'v2.2.0' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0f25b77c4d..c9e2ca60b8 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -51,7 +51,7 @@ jobs: id-token: write # for creating OIDC tokens for signing. packages: write # for uploading attestations. (https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/container/README.md#known-issues) # Must be refernced by a tag. https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/container/README.md#referencing-the-slsa-generator - uses: slsa-framework/slsa-github-generator/.github/workflows/generator_container_slsa3.yml@v1.10.0 + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_container_slsa3.yml@v2.0.0 with: image: quay.io/argoproj/argo-rollouts digest: ${{ needs.controller-image.outputs.image-digest }} @@ -67,7 +67,7 @@ jobs: id-token: write # for creating OIDC tokens for signing. packages: write # for uploading attestations. (https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/container/README.md#known-issues) # Must be refernced by a tag. https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/container/README.md#referencing-the-slsa-generator - uses: slsa-framework/slsa-github-generator/.github/workflows/generator_container_slsa3.yml@v1.10.0 + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_container_slsa3.yml@v2.0.0 with: image: quay.io/argoproj/kubectl-argo-rollouts digest: ${{ needs.plugin-image.outputs.image-digest }} @@ -90,7 +90,7 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Golang - uses: actions/setup-go@v5.0.0 # v4.0.1 + uses: actions/setup-go@v5.0.1 # v4.0.1 with: go-version: ${{ env.GOLANG_VERSION }} @@ -139,7 +139,7 @@ jobs: id-token: write # Needed for provenance signing and ID contents: write # Needed for release uploads # Must be refernced by a tag. https://github.com/slsa-framework/slsa-github-generator/blob/main/internal/builders/container/README.md#referencing-the-slsa-generator - uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v1.10.0 + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0 with: base64-subjects: '${{ needs.release-artifacts.outputs.hashes }}' provenance-name: 'argo-rollouts.intoto.jsonl' @@ -163,12 +163,12 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Golang - uses: actions/setup-go@v5.0.0 # v4.0.0 + uses: actions/setup-go@v5.0.1 # v4.0.0 with: go-version: ${{ env.GOLANG_VERSION }} - name: Install cosign - uses: sigstore/cosign-installer@e1523de7571e31dbe865fd2e80c5c7c23ae71eb4 # v3.4.0 + uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 # v3.5.0 with: cosign-release: 'v2.2.0' diff --git a/Makefile b/Makefile index fd5736aa15..6279623070 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ DEV_IMAGE ?= false E2E_INSTANCE_ID ?= argo-rollouts-e2e E2E_TEST_OPTIONS ?= E2E_PARALLEL ?= 1 -E2E_WAIT_TIMEOUT ?= 120 +E2E_WAIT_TIMEOUT ?= 90 GOPATH ?= $(shell go env GOPATH) # Global toolchain configuration @@ -239,7 +239,7 @@ start-e2e: ## start e2e test environment .PHONY: test-e2e test-e2e: install-devtools-local - ${DIST_DIR}/gotestsum --rerun-fails-report=rerunreport.txt --junitfile=junit.xml --format=testname --packages="./test/e2e" --rerun-fails=5 -- -timeout 60m -count 1 --tags e2e -p ${E2E_PARALLEL} -parallel ${E2E_PARALLEL} -v --short ./test/e2e ${E2E_TEST_OPTIONS} + ${DIST_DIR}/gotestsum --rerun-fails-report=rerunreport.txt --junitfile=junit.xml --format=testname --packages="./test/e2e" --rerun-fails=5 -- -timeout 90m -count 1 --tags e2e -p ${E2E_PARALLEL} -parallel ${E2E_PARALLEL} -v --short ./test/e2e ${E2E_TEST_OPTIONS} .PHONY: test-unit test-unit: install-devtools-local ## run unit tests diff --git a/docs/concepts.md b/docs/concepts.md index 26cd7f5cef..af85d7b4e2 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -46,4 +46,33 @@ A Canary deployment exposes a subset of users to the new version of the applicat [![How Canary deployments work](concepts-assets/canary-deployments.png)](concepts-assets/canary-deployments.png) The picture above shows a canary with two stages (10% and 33% of traffic goes to new version) but this is just an example. With Argo Rollouts you can define the exact number of stages -and percentages of traffic according to your use case. \ No newline at end of file +and percentages of traffic according to your use case. + +## Which strategy to choose + +In general Blue/Green is the easier strategy to start with, but also the more limited. We recommend you start with Blue/Green deployments first and as you gain confidence for your metrics and applications switch to Canaries. + +You also need to examine if your application can handle canaries or not. + +* Blue/Green always works because only one application is active at a time. Not all applications can have different versions running in parallel at the same time (which is what canaries are doing). This can be a showstopper for adopting canary deployments especially for legacy applications. +* Blue/Green is simpler because you can get their full value WITHOUT a traffic manager. While canaries can also work without a traffic manager, most of their advanced features assume a fine-grained way to control traffic. If you don't have a traffic manager, then you can easily get the full value +of blue/green deployments but only the basic capabilities of canaries. +* Blue/Green also works with services that use queues and databases (workers that fetch tasks). Argo Rollouts doesn't control traffic flow for +connections it doesn't understand (i.e. binary/queue channels). + +Here is a summary table for the possible approaches. + +| | Blue/Green | Basic Canary | Canary with Traffic manager | +|--------------------------:|:-------------------------:|:--------------------------:| :-----------------------------:| +| Adoption Complexity | Low | Medium | High | +| Flexibility | Low | High | Maximum | +| Needs traffic provider | No | No | Yes | +| Works with queue workers | Yes | No | No | +| Works with shared/locked resources | Yes | No | No | +| Traffic switch | All or nothing | Gradual percentage | Gradual percentage | +| Traffic control | 0% or 100% | coarse grained | fine grained | +| Traffic depends on | deployment state | number of canary pods | Any split option is possible | +| Advanced routing scenarios | No | No | Yes | +| Failure Blast Radius | Massive impact | Low impact | Low impact | + +Note that that traffic manager can be any compatible Service Mesh or Ingress Controller or Gateway API implementation (via a plugin). \ No newline at end of file diff --git a/experiments/replicaset.go b/experiments/replicaset.go index d91843a03a..1c07581758 100644 --- a/experiments/replicaset.go +++ b/experiments/replicaset.go @@ -6,6 +6,8 @@ import ( "fmt" "time" + "github.com/argoproj/argo-rollouts/utils/diff" + log "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -287,16 +289,39 @@ func (ec *experimentContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int sizeNeedsUpdate := oldScale != newScale scaled := false var err error + var updatedRS *appsv1.ReplicaSet if sizeNeedsUpdate { rsCopy := rs.DeepCopy() *(rsCopy.Spec.Replicas) = newScale - rs, err = ec.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) + + updatedRS, err = ec.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) + if err != nil { + if errors.IsConflict(err) { + ec.log.Infof("Conflict when updating replicaset %s, falling back to patch", rs.Name) + + patchRS := appsv1.ReplicaSet{} + patchRS.Spec.Replicas = rsCopy.Spec.Replicas + + patch, changed, err := diff.CreateTwoWayMergePatch(appsv1.ReplicaSet{}, patchRS, appsv1.ReplicaSet{}) + if err != nil { + return scaled, nil, err + } + + if changed { + ec.log.Infof("Patching expirment replicaset with patch: %s", string(patch)) + updatedRS, err = ec.kubeclientset.AppsV1().ReplicaSets(rs.Namespace).Patch(ctx, rs.Name, patchtypes.StrategicMergePatchType, patch, metav1.PatchOptions{}) + if err != nil { + return scaled, nil, err + } + } + } + } if err == nil && sizeNeedsUpdate { scaled = true ec.recorder.Eventf(ec.ex, record.EventOptions{EventReason: conditions.ScalingReplicaSetReason}, "Scaled %s ReplicaSet %s from %d to %d", scalingOperation, rs.Name, oldScale, newScale) } } - return scaled, rs, err + return scaled, updatedRS, err } func newReplicaSetAnnotations(experimentName, templateName string) map[string]string { diff --git a/go.mod b/go.mod index 936c58110d..20585676c7 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/argoproj/pkg v0.13.6 github.com/aws/aws-sdk-go-v2 v1.26.1 github.com/aws/aws-sdk-go-v2/config v1.27.11 - github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.37.0 + github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.38.0 github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.30.5 github.com/aws/smithy-go v1.20.2 github.com/blang/semver v3.5.1+incompatible @@ -42,7 +42,7 @@ require ( golang.org/x/oauth2 v0.19.0 google.golang.org/genproto/googleapis/api v0.0.0-20240227224415-6ceb2ff114de google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.33.0 + google.golang.org/protobuf v1.34.0 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.29.3 k8s.io/apiextensions-apiserver v0.29.3 diff --git a/go.sum b/go.sum index 735664be8f..818c7d0638 100644 --- a/go.sum +++ b/go.sum @@ -112,8 +112,8 @@ github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5 h1:PG1F3OD1szkuQPzDw3C github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5/go.mod h1:jU1li6RFryMz+so64PpKtudI+QzbKoIEivqdf6LNpOc= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= -github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.37.0 h1:sGGUnU/pUSzjrcCvQgN2pEc3aTQILyK2rRsWVY5CSt0= -github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.37.0/go.mod h1:U12sr6Lt14X96f16t+rR52+2BdqtydwN7DjEEHRMjO0= +github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.38.0 h1:vAfGwYFCcPDS9Bg7ckfMBer6olJLOHsOAVoKWpPIirs= +github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.38.0/go.mod h1:U12sr6Lt14X96f16t+rR52+2BdqtydwN7DjEEHRMjO0= github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.30.5 h1:/x2u/TOx+n17U+gz98TOw1HKJom0EOqrhL4SjrHr0cQ= github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.30.5/go.mod h1:e1McVqsud0JOERidvppLEHnuCdh/X6MRyL5L0LseAUk= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 h1:Ji0DY1xUsUr3I8cHps0G+XM3WWU16lP6yG8qu1GAZAs= @@ -1147,8 +1147,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= +google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc h1:2gGKlE2+asNV9m7xrywl36YYNnBG5ZQ0r/BOOxqPpmk= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= diff --git a/hack/gen-crd-spec/main.go b/hack/gen-crd-spec/main.go index a3e31d92b8..60c1bde9d7 100644 --- a/hack/gen-crd-spec/main.go +++ b/hack/gen-crd-spec/main.go @@ -96,11 +96,12 @@ func NewCustomResourceDefinition() []*extensionsobj.CustomResourceDefinition { // clean up stuff left by controller-gen deleteFile("config/webhook/manifests.yaml") deleteFile("config/webhook") - deleteFile("config/argoproj.io_analysisruns.yaml") - deleteFile("config/argoproj.io_analysistemplates.yaml") - deleteFile("config/argoproj.io_clusteranalysistemplates.yaml") - deleteFile("config/argoproj.io_experiments.yaml") - deleteFile("config/argoproj.io_rollouts.yaml") + deleteFile("config/crd/argoproj.io_analysisruns.yaml") + deleteFile("config/crd/argoproj.io_analysistemplates.yaml") + deleteFile("config/crd/argoproj.io_clusteranalysistemplates.yaml") + deleteFile("config/crd/argoproj.io_experiments.yaml") + deleteFile("config/crd/argoproj.io_rollouts.yaml") + deleteFile("config/crd") deleteFile("config") crds := []*extensionsobj.CustomResourceDefinition{} diff --git a/rollout/canary.go b/rollout/canary.go index ed27bd0a79..fa33b43616 100644 --- a/rollout/canary.go +++ b/rollout/canary.go @@ -112,7 +112,7 @@ func (c *rolloutContext) reconcileCanaryStableReplicaSet() (bool, error) { } scaled, _, err := c.scaleReplicaSetAndRecordEvent(c.stableRS, desiredStableRSReplicaCount) if err != nil { - return scaled, fmt.Errorf("failed to scaleReplicaSetAndRecordEvent in reconcileCanaryStableReplicaSet:L %w", err) + return scaled, fmt.Errorf("failed to scaleReplicaSetAndRecordEvent in reconcileCanaryStableReplicaSet: %w", err) } return scaled, err } diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 56b05c7177..9191cc4b93 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -8,6 +8,11 @@ import ( "testing" "time" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + k8stesting "k8s.io/client-go/testing" + "github.com/stretchr/testify/assert" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/apps/v1" @@ -2141,3 +2146,90 @@ func TestCanaryReplicaAndSpecChangedTogether(t *testing.T) { // check the canary one is updated assert.NotEqual(t, originReplicas, int(*updated.Spec.Replicas)) } + +func TestSyncRolloutWithConflictInScaleReplicaSet(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{ + { + SetWeight: int32Ptr(10), + }, { + Pause: &v1alpha1.RolloutPause{ + Duration: v1alpha1.DurationFromInt(10), + }, + }, + } + r1 := newCanaryRollout("foo", 10, nil, steps, int32Ptr(1), intstr.FromInt(1), intstr.FromInt(0)) + r2 := bumpVersion(r1) + + rs1 := newReplicaSetWithStatus(r1, 9, 9) + rs2 := newReplicaSetWithStatus(r2, 1, 1) + f.kubeobjects = append(f.kubeobjects, rs1, rs2) + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + f.expectPatchRolloutAction(r2) + f.expectUpdateReplicaSetAction(rs2) // attempt to scale replicaset but conflict + f.expectPatchReplicaSetAction(rs2) // instead of update patch replicaset + + key := fmt.Sprintf("%s/%s", r2.Namespace, r2.Name) + c, i, k8sI := f.newController(func() time.Duration { return 30 * time.Minute }) + + f.kubeclient.PrependReactor("update", "replicasets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, &appsv1.ReplicaSet{}, errors.NewConflict(schema.GroupResource{ + Group: "Apps", + Resource: "ReplicaSet", + }, "", fmt.Errorf("test error")) + }) + + f.runController(key, true, false, c, i, k8sI) +} + +func TestSyncRolloutWithConflictInSyncReplicaSetRevision(t *testing.T) { + f := newFixture(t) + defer f.Close() + + steps := []v1alpha1.CanaryStep{ + { + SetWeight: int32Ptr(10), + }, { + Pause: &v1alpha1.RolloutPause{ + Duration: v1alpha1.DurationFromInt(10), + }, + }, + } + r1 := newCanaryRollout("foo", 3, nil, steps, int32Ptr(1), intstr.FromInt(1), intstr.FromInt(0)) + r2 := bumpVersion(r1) + + rs1 := newReplicaSetWithStatus(r1, 3, 3) + rs2 := newReplicaSetWithStatus(r2, 3, 3) + rs2.Annotations["rollout.argoproj.io/revision"] = "1" + + f.kubeobjects = append(f.kubeobjects, rs1, rs2) + f.replicaSetLister = append(f.replicaSetLister, rs1, rs2) + + f.rolloutLister = append(f.rolloutLister, r2) + f.objects = append(f.objects, r2) + + key := fmt.Sprintf("%s/%s", r1.Namespace, r1.Name) + c, i, k8sI := f.newController(func() time.Duration { return 30 * time.Minute }) + + f.kubeclient.PrependReactor("update", "replicasets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, &appsv1.ReplicaSet{}, errors.NewConflict(schema.GroupResource{ + Group: "Apps", + Resource: "ReplicaSet", + }, "", fmt.Errorf("test error")) + }) + + f.expectPatchRolloutAction(r2) + f.expectUpdateReplicaSetAction(rs1) // attempt to update replicaset revision but conflict + f.expectPatchReplicaSetAction(rs1) // instead of update patch replicaset + + f.expectUpdateReplicaSetAction(rs2) // attempt to scale replicaset but conflict + f.expectPatchReplicaSetAction(rs2) // instead of update patch replicaset + + f.runController(key, true, false, c, i, k8sI) +} diff --git a/rollout/controller.go b/rollout/controller.go index 972cea882c..583509b805 100644 --- a/rollout/controller.go +++ b/rollout/controller.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/argoproj/argo-rollouts/utils/diff" "k8s.io/apimachinery/pkg/runtime/schema" "github.com/argoproj/argo-rollouts/pkg/apis/rollouts" @@ -16,11 +17,13 @@ import ( log "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + patchtypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/dynamic" @@ -937,3 +940,78 @@ func remarshalRollout(r *v1alpha1.Rollout) *v1alpha1.Rollout { } return &remarshalled } + +// updateReplicaSetWithPatch updates the replicaset using patch and on +func (c *rolloutContext) updateReplicaSetFallbackToPatch(ctx context.Context, rs *appsv1.ReplicaSet) (*appsv1.ReplicaSet, error) { + rsCopy := rs.DeepCopy() + updatedRS, err := c.kubeclientset.AppsV1().ReplicaSets(rs.Namespace).Update(ctx, rs, metav1.UpdateOptions{}) + if err != nil { + if errors.IsConflict(err) { + c.log.Infof("Conflict when updating replicaset %s, falling back to patch", rs.Name) + + patchRS := appsv1.ReplicaSet{} + patchRS.Spec.Replicas = rsCopy.Spec.Replicas + patchRS.Annotations = rsCopy.Annotations + patchRS.Labels = rsCopy.Labels + patchRS.Spec.Template.Labels = rsCopy.Spec.Template.Labels + patchRS.Spec.Template.Annotations = rsCopy.Spec.Template.Annotations + patchRS.Spec.Selector = rsCopy.Spec.Selector + + patch, changed, err := diff.CreateTwoWayMergePatch(appsv1.ReplicaSet{}, patchRS, appsv1.ReplicaSet{}) + if err != nil { + return nil, err + } + + if changed { + c.log.Infof("Patching replicaset with patch: %s", string(patch)) + updatedRS, err = c.kubeclientset.AppsV1().ReplicaSets(rs.Namespace).Patch(ctx, rs.Name, patchtypes.StrategicMergePatchType, patch, metav1.PatchOptions{}) + if err != nil { + return nil, err + } + } + + err = c.replicaSetInformer.GetIndexer().Update(updatedRS) + if err != nil { + err = fmt.Errorf("error updating replicaset informer in scaleReplicaSet: %w", err) + return nil, err + } + + return updatedRS, err + } + } + + return updatedRS, err +} + +// updateRolloutWithRetry updates the rollout with a retry if there is a conflict from an update operation, it runs the modifyRollout function to update a fresh rollout from the cluster. +//func (c *rolloutContext) updateRolloutWithRetry(ctx context.Context, ro *v1alpha1.Rollout, modifyRollout func(ro *v1alpha1.Rollout) *v1alpha1.Rollout) (*v1alpha1.Rollout, error) { +// updatedRollout, err := c.argoprojclientset.ArgoprojV1alpha1().Rollouts(c.rollout.Namespace).Update(context.TODO(), c.rollout, metav1.UpdateOptions{}) +// if err != nil { +// if errors.IsConflict(err) { +// retryCount := 0 +// errRetry := retry.RetryOnConflict(retry.DefaultBackoff, func() error { +// retryCount++ +// c.log.Infof("conflict when updating rollout %s, retrying the update operation with new rollout from cluster, attempt: %d", c.rollout.Name, retryCount) +// roGet, err := c.argoprojclientset.ArgoprojV1alpha1().Rollouts(c.rollout.Namespace).Get(context.TODO(), c.rollout.Name, metav1.GetOptions{}) +// if err != nil { +// return fmt.Errorf("error getting rollout %s: %w", c.rollout.Name, err) +// } +// +// roCopy := modifyRollout(roGet) +// updatedRollout, err = c.argoprojclientset.ArgoprojV1alpha1().Rollouts(c.rollout.Namespace).Update(context.TODO(), roCopy, metav1.UpdateOptions{}) +// if err != nil { +// return err +// } +// +// return nil +// }) +// if errRetry != nil { +// return nil, errRetry +// } +// } else { +// c.log.WithError(err).Error("Error: updating rollout revision") +// return nil, err +// } +// } +// return updatedRollout, nil +//} diff --git a/rollout/controller_test.go b/rollout/controller_test.go index 892a2be64f..5d80287f30 100644 --- a/rollout/controller_test.go +++ b/rollout/controller_test.go @@ -792,6 +792,12 @@ func (f *fixture) expectPatchServiceAction(s *corev1.Service, newLabel string) i return len } +func (f *fixture) expectGetReplicaSetAction(r *appsv1.ReplicaSet) int { //nolint:unused + len := len(f.kubeactions) + f.kubeactions = append(f.kubeactions, core.NewGetAction(schema.GroupVersionResource{Resource: "replicasets"}, r.Namespace, r.Name)) + return len +} + func (f *fixture) expectCreateReplicaSetAction(r *appsv1.ReplicaSet) int { len := len(f.kubeactions) f.kubeactions = append(f.kubeactions, core.NewCreateAction(schema.GroupVersionResource{Resource: "replicasets"}, r.Namespace, r)) diff --git a/rollout/ephemeralmetadata.go b/rollout/ephemeralmetadata.go index 60b745ed29..17d6a60d9f 100644 --- a/rollout/ephemeralmetadata.go +++ b/rollout/ephemeralmetadata.go @@ -2,7 +2,6 @@ package rollout import ( "context" - "fmt" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -82,14 +81,12 @@ func (c *rolloutContext) syncEphemeralMetadata(ctx context.Context, rs *appsv1.R } // 2. Update ReplicaSet so that any new pods it creates will have the metadata - rs, err = c.kubeclientset.AppsV1().ReplicaSets(modifiedRS.Namespace).Update(ctx, modifiedRS, metav1.UpdateOptions{}) + rs, err = c.updateReplicaSetFallbackToPatch(ctx, modifiedRS) if err != nil { - return fmt.Errorf("error updating replicaset in syncEphemeralMetadata: %w", err) - } - err = c.replicaSetInformer.GetIndexer().Update(rs) - if err != nil { - return fmt.Errorf("error updating replicaset informer in syncEphemeralMetadata: %w", err) + c.log.Infof("failed to sync ephemeral metadata %v to ReplicaSet %s: %v", podMetadata, rs.Name, err) + return err } + c.log.Infof("synced ephemeral metadata %v to ReplicaSet %s", podMetadata, rs.Name) return nil } diff --git a/rollout/sync.go b/rollout/sync.go index 875949ee55..7654cc58e5 100644 --- a/rollout/sync.go +++ b/rollout/sync.go @@ -83,17 +83,14 @@ func (c *rolloutContext) syncReplicaSetRevision() (*appsv1.ReplicaSet, error) { affinityNeedsUpdate := replicasetutil.IfInjectedAntiAffinityRuleNeedsUpdate(rsCopy.Spec.Template.Spec.Affinity, *c.rollout) if annotationsUpdated || minReadySecondsNeedsUpdate || affinityNeedsUpdate { + rsCopy.Spec.MinReadySeconds = c.rollout.Spec.MinReadySeconds rsCopy.Spec.Template.Spec.Affinity = replicasetutil.GenerateReplicaSetAffinity(*c.rollout) - rs, err := c.kubeclientset.AppsV1().ReplicaSets(rsCopy.ObjectMeta.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) - if err != nil { - c.log.WithError(err).Error("Error: updating replicaset revision") - return nil, fmt.Errorf("error updating replicaset revision: %v", err) - } - c.log.Infof("Synced revision on ReplicaSet '%s' to '%s'", rs.Name, newRevision) - err = c.replicaSetInformer.GetIndexer().Update(rs) + + rs, err := c.updateReplicaSetFallbackToPatch(ctx, rsCopy) if err != nil { - return nil, fmt.Errorf("error updating replicaset informer in syncReplicaSetRevision: %w", err) + c.log.WithError(err).Errorf("Error: syncing replicaset revision on %s", rsCopy.Name) + return nil, err } return rs, nil } @@ -245,7 +242,7 @@ func (c *rolloutContext) createDesiredReplicaSet() (*appsv1.ReplicaSet, error) { cond := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionFalse, conditions.FailedRSCreateReason, msg) patchErr := c.patchCondition(c.rollout, newStatus, cond) if patchErr != nil { - c.log.Warnf("Error Patching Rollout: %s", patchErr.Error()) + c.log.Warnf("Error Patching Rollout Conditions: %s", patchErr.Error()) } return nil, err default: @@ -370,8 +367,8 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, rolloutReplicas := defaults.GetReplicasOrDefault(rollout.Spec.Replicas) annotationsNeedUpdate := annotations.ReplicasAnnotationsNeedUpdate(rs, rolloutReplicas) - scaled := false var err error + scaled := false if sizeNeedsUpdate || annotationsNeedUpdate { rsCopy := rs.DeepCopy() oldScale := defaults.GetReplicasOrDefault(rs.Spec.Replicas) @@ -381,14 +378,10 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, delete(rsCopy.Annotations, v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey) } - rs, err = c.kubeclientset.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) - if err != nil { - return scaled, rs, fmt.Errorf("error updating replicaset %s: %w", rsCopy.Name, err) - } - err = c.replicaSetInformer.GetIndexer().Update(rs) + rs, err = c.updateReplicaSetFallbackToPatch(ctx, rsCopy) if err != nil { - err = fmt.Errorf("error updating replicaset informer in scaleReplicaSet: %w", err) - return scaled, rs, err + c.log.Infof("Error scaling replicasets %s: %v", rsCopy.Name, err) + return scaled, nil, err } if sizeNeedsUpdate { @@ -397,7 +390,7 @@ func (c *rolloutContext) scaleReplicaSet(rs *appsv1.ReplicaSet, newScale int32, c.recorder.Eventf(rollout, record.EventOptions{EventReason: conditions.ScalingReplicaSetReason}, conditions.ScalingReplicaSetMessage, scalingOperation, rs.Name, revision, oldScale, newScale) } } - return scaled, rs, err + return scaled, rs, nil } // calculateStatus calculates the common fields for all rollouts by looking into the provided replica sets. diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index 2c68ec8bd7..4f32090d16 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -658,6 +658,7 @@ func (s *CanarySuite) TestCanaryDynamicStableScale() { When(). MarkPodsReady("1", 1). // mark last remaining stable pod as ready (4/4 stable are ready) WaitForRevisionPodCount("2", 0). + Sleep(2*time.Second). //WaitForRevisionPodCount does not wait for terminating pods and so ExpectServiceSelector fails sleep a bit for the terminating pods to be deleted Then(). // Expect that the canary service selector is now set to stable because of dynamic stable scale is over and we have all pods up on stable rs ExpectServiceSelector("dynamic-stable-scale-canary", map[string]string{"app": "dynamic-stable-scale", "rollouts-pod-template-hash": "868d98995b"}, false). diff --git a/test/fixtures/e2e_suite.go b/test/fixtures/e2e_suite.go index a774afcf94..e78ae1aa68 100644 --- a/test/fixtures/e2e_suite.go +++ b/test/fixtures/e2e_suite.go @@ -54,7 +54,7 @@ const ( ) var ( - E2EWaitTimeout time.Duration = time.Second * 120 + E2EWaitTimeout time.Duration = time.Second * 90 E2EPodDelay = 0 E2EALBIngressAnnotations map[string]string @@ -143,8 +143,8 @@ func (s *E2ESuite) SetupSuite() { restConfig, err := config.ClientConfig() s.CheckError(err) s.Common.kubernetesHost = restConfig.Host - restConfig.Burst = defaults.DefaultBurst * 2 - restConfig.QPS = defaults.DefaultQPS * 2 + restConfig.Burst = defaults.DefaultBurst * 10 + restConfig.QPS = defaults.DefaultQPS * 10 s.namespace, _, err = config.Namespace() s.CheckError(err) s.kubeClient, err = kubernetes.NewForConfig(restConfig) diff --git a/ui/src/app/components/rollout/rollout.scss b/ui/src/app/components/rollout/rollout.scss index ad6247c478..6ee3b71099 100644 --- a/ui/src/app/components/rollout/rollout.scss +++ b/ui/src/app/components/rollout/rollout.scss @@ -7,7 +7,7 @@ background: $argo-color-gray-1; border-radius: 5px; - + &__header { display: flex; align-items: center; @@ -53,11 +53,11 @@ } &--current { - border: 2px solid $sherbert; + border: 2px solid $argo-running-color; } &--current:hover { - border: 2px solid $sherbert; + border: 2px solid $argo-running-color; } &-title { diff --git a/utils/annotations/annotations.go b/utils/annotations/annotations.go index 121c6c0803..8c00cbe897 100644 --- a/utils/annotations/annotations.go +++ b/utils/annotations/annotations.go @@ -215,10 +215,11 @@ func SetNewReplicaSetAnnotations(rollout *v1alpha1.Rollout, newRS *appsv1.Replic } var annotationsToSkip = map[string]bool{ - corev1.LastAppliedConfigAnnotation: true, - RevisionAnnotation: true, - RevisionHistoryAnnotation: true, - DesiredReplicasAnnotation: true, + corev1.LastAppliedConfigAnnotation: true, + RevisionAnnotation: true, + RevisionHistoryAnnotation: true, + DesiredReplicasAnnotation: true, + "notified.notifications.argoproj.io": true, } // skipCopyAnnotation returns true if we should skip copying the annotation with the given annotation key