diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b7c9b459b..54f13ec5f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -41,6 +41,8 @@ jobs: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64,linux/arm/v7 + build-args: | + REVISON=${{ github.sha }} tags: | ghcr.io/fluxcd/flagger:${{ steps.prep.outputs.VERSION }} labels: | diff --git a/.goreleaser.yml b/.goreleaser.yml index 5e2548722..fc2c59a0a 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -12,7 +12,3 @@ archives: - name_template: "{{ .Binary }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}" files: - none* -changelog: - filters: - exclude: - - '^CircleCI' diff --git a/Dockerfile b/Dockerfile index a45c9df14..d2d30928b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ FROM golang:1.15-alpine as builder ARG TARGETPLATFORM +ARG REVISON WORKDIR /workspace @@ -16,7 +17,9 @@ COPY cmd/ cmd/ COPY pkg/ pkg/ # build -RUN CGO_ENABLED=0 go build -a -o flagger ./cmd/flagger +RUN CGO_ENABLED=0 go build \ + -ldflags "-s -w -X github.com/fluxcd/flagger/pkg/version.REVISION=${REVISON}" \ + -a -o flagger ./cmd/flagger FROM alpine:3.12 diff --git a/Makefile b/Makefile index 40d503db8..6b7dbeda1 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,7 @@ VERSION?=$(shell grep 'VERSION' pkg/version/version.go | awk '{ print $$4 }' | t LT_VERSION?=$(shell grep 'VERSION' cmd/loadtester/main.go | awk '{ print $$4 }' | tr -d '"' | head -n1) build: - GIT_COMMIT=$$(git rev-list -1 HEAD) && CGO_ENABLED=0 GOOS=linux go build \ - -ldflags "-s -w -X github.com/fluxcd/flagger/pkg/version.REVISION=$${GIT_COMMIT}" \ - -a -installsuffix cgo -o ./bin/flagger ./cmd/flagger/* - docker build -t weaveworks/flagger:$(TAG) . -f Dockerfile - -push: - docker tag fluxcd/flagger:$(TAG) fluxcd/flagger:$(VERSION) - docker push fluxcd/flagger:$(VERSION) + CGO_ENABLED=0 go build -a -o ./bin/flagger ./cmd/flagger fmt: gofmt -l -s -w ./ @@ -48,13 +41,9 @@ release: git tag "v$(VERSION)" git push origin "v$(VERSION)" -release-notes: - cd /tmp && GH_REL_URL="https://github.com/buchanae/github-release-notes/releases/download/0.2.0/github-release-notes-linux-amd64-0.2.0.tar.gz" && \ - curl -sSL $${GH_REL_URL} | tar xz && sudo mv github-release-notes /usr/local/bin/ - loadtester-build: CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o ./bin/loadtester ./cmd/loadtester/* - docker build -t fluxcd/flagger-loadtester:$(LT_VERSION) . -f Dockerfile.loadtester + docker build -t ghcr.io/fluxcd/flagger-loadtester:$(LT_VERSION) . -f Dockerfile.loadtester loadtester-push: - docker push fluxcd/flagger-loadtester:$(LT_VERSION) + docker push ghcr.io/fluxcd/flagger-loadtester:$(LT_VERSION) diff --git a/artifacts/flagger/deployment.yaml b/artifacts/flagger/deployment.yaml index 03e818d7a..cc654fcbb 100644 --- a/artifacts/flagger/deployment.yaml +++ b/artifacts/flagger/deployment.yaml @@ -22,7 +22,7 @@ spec: serviceAccountName: flagger containers: - name: flagger - image: weaveworks/flagger:1.4.2 + image: ghcr.io/fluxcd/flagger:1.4.2 imagePullPolicy: IfNotPresent ports: - name: http diff --git a/charts/flagger/LICENSE b/charts/flagger/LICENSE index 6e292ed28..261eeb9e9 100644 --- a/charts/flagger/LICENSE +++ b/charts/flagger/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2018 Weaveworks. All rights reserved. + Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/charts/flagger/README.md b/charts/flagger/README.md index e62c7c070..9c297ff2e 100644 --- a/charts/flagger/README.md +++ b/charts/flagger/README.md @@ -110,7 +110,7 @@ The following tables lists the configurable parameters of the Flagger chart and Parameter | Description | Default --- | --- | --- -`image.repository` | Image repository | `weaveworks/flagger` +`image.repository` | Image repository | `ghcr.io/fluxcd/flagger` `image.tag` | Image tag | `` `image.pullPolicy` | Image pull policy | `IfNotPresent` `logLevel` | Log level | `info` diff --git a/charts/flagger/values.yaml b/charts/flagger/values.yaml index d25f85dfc..b752db6cb 100644 --- a/charts/flagger/values.yaml +++ b/charts/flagger/values.yaml @@ -1,7 +1,7 @@ # Default values for flagger. image: - repository: weaveworks/flagger + repository: ghcr.io/fluxcd/flagger tag: 1.4.2 pullPolicy: IfNotPresent pullSecret: diff --git a/charts/loadtester/values.yaml b/charts/loadtester/values.yaml index c089cc6d7..0f93aa876 100644 --- a/charts/loadtester/values.yaml +++ b/charts/loadtester/values.yaml @@ -1,7 +1,7 @@ replicaCount: 1 image: - repository: weaveworks/flagger-loadtester + repository: ghcr.io/fluxcd/flagger-loadtester tag: 0.18.0 pullPolicy: IfNotPresent diff --git a/docs/gitbook/README.md b/docs/gitbook/README.md index d46f730a5..2d1b58cee 100644 --- a/docs/gitbook/README.md +++ b/docs/gitbook/README.md @@ -4,27 +4,19 @@ description: Flagger is a progressive delivery Kubernetes operator # Introduction -[Flagger](https://github.com/fluxcd/flagger) is a **Kubernetes** operator that automates the promotion of -canary deployments using **Istio**, **Linkerd**, **App Mesh**, **NGINX**, **Skipper**, **Contour**, **Gloo** or **Traefik** routing for -traffic shifting and **Prometheus** metrics for canary analysis. The canary analysis can be extended with webhooks for -running system integration/acceptance tests, load tests, or any other custom validation. +[Flagger](https://github.com/weaveworks/flagger) is a **Kubernetes** operator that automates the promotion of canary deployments using **Istio**, **Linkerd**, **App Mesh**, **NGINX**, **Skipper**, **Contour**, **Gloo** or **Traefik** routing for traffic shifting and **Prometheus** metrics for canary analysis. The canary analysis can be extended with webhooks for running system integration/acceptance tests, load tests, or any other custom validation. -Flagger implements a control loop that gradually shifts traffic to the canary while measuring key performance indicators -like HTTP requests success rate, requests average duration and pods health. -Based on analysis of the **KPIs** a canary is promoted or aborted, and the analysis result is published to **Slack** or **MS Teams**. +Flagger implements a control loop that gradually shifts traffic to the canary while measuring key performance indicators like HTTP requests success rate, requests average duration and pods health. Based on analysis of the **KPIs** a canary is promoted or aborted, and the analysis result is published to **Slack** or **MS Teams**. -![Flagger overview diagram](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-canary-overview.png) +![Flagger overview diagram](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-canary-overview.png) -Flagger can be configured with Kubernetes custom resources and is compatible with any CI/CD solutions made for Kubernetes. -Since Flagger is declarative and reacts to Kubernetes events, -it can be used in **GitOps** pipelines together with Flux CD or JenkinsX. +Flagger can be configured with Kubernetes custom resources and is compatible with any CI/CD solutions made for Kubernetes. Since Flagger is declarative and reacts to Kubernetes events, it can be used in **GitOps** pipelines together with Flux CD or JenkinsX. This project is sponsored by [Weaveworks](https://www.weave.works/) ## Getting started -To get started with Flagger, chose one of the supported routing providers -and [install](install/flagger-install-on-kubernetes.md) Flagger with Helm or Kustomize. +To get started with Flagger, chose one of the supported routing providers and [install](install/flagger-install-on-kubernetes.md) Flagger with Helm or Kustomize. After install Flagger, you can follow one of the tutorials: @@ -47,3 +39,4 @@ After install Flagger, you can follow one of the tutorials: * [Istio](https://github.com/stefanprodan/gitops-istio) * [Linkerd](https://helm.workshop.flagger.dev) * [AWS App Mesh](https://eks.handson.flagger.dev) + diff --git a/docs/gitbook/SUMMARY.md b/docs/gitbook/SUMMARY.md index 0fcd3038e..839ca2b3f 100644 --- a/docs/gitbook/SUMMARY.md +++ b/docs/gitbook/SUMMARY.md @@ -41,3 +41,4 @@ * [Development Guide](dev/dev-guide.md) * [Release Guide](dev/release-guide.md) * [Upgrade Guide](dev/upgrade-guide.md) + diff --git a/docs/gitbook/dev/dev-guide.md b/docs/gitbook/dev/dev-guide.md index 52d8f371f..cc57b5679 100644 --- a/docs/gitbook/dev/dev-guide.md +++ b/docs/gitbook/dev/dev-guide.md @@ -2,36 +2,35 @@ This document describes how to build, test and run Flagger from source. -### Setup dev environment +## Setup dev environment Flagger is written in Go and uses Go modules for dependency management. On your dev machine install the following tools: -* go >= 1.14 -* git >= 2.20 -* bash >= 5.0 -* make >= 3.81 -* kubectl >= 1.16 -* kustomize >= 3.5 -* helm >= 3.0 -* docker >= 19.03 - -You'll also need a Kubernetes cluster for testing Flagger. -You can use Minikube, Kind, Docker desktop or any remote cluster -(AKS/EKS/GKE/etc) Kubernetes version 1.14 or newer. + +* go >= 1.14 +* git >= 2.20 +* bash >= 5.0 +* make >= 3.81 +* kubectl >= 1.16 +* kustomize >= 3.5 +* helm >= 3.0 +* docker >= 19.03 + +You'll also need a Kubernetes cluster for testing Flagger. You can use Minikube, Kind, Docker desktop or any remote cluster \(AKS/EKS/GKE/etc\) Kubernetes version 1.14 or newer. To start contributing to Flagger, fork the [repository](https://github.com/fluxcd/flagger) on GitHub. Create a dir inside your `GOPATH`: ```bash -mkdir -p $GOPATH/src/github.com/weaveworks +mkdir -p $GOPATH/src/github.com/fluxcd ``` Clone your fork: ```bash -cd $GOPATH/src/github.com/weaveworks +cd $GOPATH/src/github.com/fluxcd git clone https://github.com/YOUR_USERNAME/flagger cd flagger ``` @@ -46,11 +45,11 @@ Sync your fork regularly to keep it up-to-date with upstream: ```bash git fetch upstream -git checkout master -git merge upstream/master +git checkout main +git merge upstream/main ``` -### Build +## Build Download Go modules: @@ -58,19 +57,30 @@ Download Go modules: go mod download ``` -Build Flagger binary and container image: +Build Flagger binary: ```bash make build ``` -Build load tester binary and container image: +Build load tester binary: ```bash make loadtester-build ``` -### Code changes +## Code changes + +We require all commits to be signed. By signing off with your signature, you +certify that you wrote the patch or otherwise have the right to contribute the +material by the rules of the [DCO](https://raw.githubusercontent.com/fluxcd/flagger/main/DCO). + +If your `user.name` and `user.email` are configured in your Git config, +you can sign your commit automatically with: + +```bash +git commit -s +``` Before submitting a PR, make sure your changes are covered by unit tests. @@ -98,7 +108,7 @@ Run unit tests: make test ``` -### API changes +## API changes If you made changes to `pkg/apis` regenerate the Kubernetes client sets with: @@ -114,10 +124,9 @@ make crd Note that any change to the CRDs must be accompanied by an update to the Open API schema. -### Manual testing +## Manual testing -Install a service mesh and/or an ingress controller on your cluster and deploy Flagger -using one of the install options [listed here](https://docs.flagger.app/install/flagger-install-on-kubernetes). +Install a service mesh and/or an ingress controller on your cluster and deploy Flagger using one of the install options [listed here](https://docs.flagger.app/install/flagger-install-on-kubernetes). If you made changes to the CRDs, apply your local copy with: @@ -150,7 +159,7 @@ Another option to manually test your changes is to build and push the image to y ```bash make build -docker tag weaveworks/flagger:latest /flagger: +docker build -t /flagger: . docker push /flagger: ``` @@ -163,7 +172,7 @@ kubectl -n istio-system scale deployment/flagger --replicas=1 Now you can use one of the [tutorials](https://docs.flagger.app/) to manually test your changes. -### Integration testing +## Integration testing Flagger end-to-end tests can be run locally with [Kubernetes Kind](https://github.com/kubernetes-sigs/kind). @@ -173,39 +182,23 @@ Create a Kind cluster: kind create cluster ``` -Install a service mesh and/or an ingress controller in Kind. - -Linkerd example: - -```bash -linkerd install | kubectl apply -f - -linkerd check -``` - Build Flagger container image and load it on the cluster: ```bash make build -docker tag weaveworks/flagger:latest test/flagger:latest +docker build -t test/flagger:latest . kind load docker-image test/flagger:latest ``` -Install Flagger on the cluster and set the test image: - -```bash -kubectl apply -k ./kustomize/linkerd -kubectl -n linkerd set image deployment/flagger flagger=test/flagger:latest -kubectl -n linkerd rollout status deployment/flagger -``` -Run the Linkerd e2e tests: +Run the Istio e2e tests: ```bash -./test/e2e-linkerd-tests.sh +./test/istio/run.sh ``` For each service mesh and ingress controller there is a dedicated e2e test suite, -chose one that matches your changes from this [list](https://github.com/fluxcd/flagger/tree/master/test). +chose one that matches your changes from this [list](https://github.com/fluxcd/flagger/tree/main/test). When you open a pull request on Flagger repo, the unit and integration tests will be run in CI. diff --git a/docs/gitbook/dev/release-guide.md b/docs/gitbook/dev/release-guide.md index dc1869985..7d4cef33b 100644 --- a/docs/gitbook/dev/release-guide.md +++ b/docs/gitbook/dev/release-guide.md @@ -2,33 +2,34 @@ This document describes how to release Flagger. -### Release +## Release + +To release a new Flagger version \(e.g. `2.0.0`\) follow these steps: -To release a new Flagger version (e.g. `2.0.0`) follow these steps: * create a branch `git checkout -b prep-2.0.0` * set the version in code and manifests `TAG=2.0.0 make version-set` * commit changes and merge PR * checkout master `git checkout master && git pull` * tag master `make release` -### CI +## CI After the tag has been pushed to GitHub, the CI release pipeline does the following: + * creates a GitHub release * pushes the Flagger binary and change log to GitHub release * pushes the Flagger container image to Docker Hub * pushes the Helm chart to github-pages branch * GitHub pages publishes the new chart version on the Helm repository -### Docs +## Docs The documentation [website](https://docs.flagger.app) is built from the `docs` branch. After a Flagger release, publish the docs with: + * `git checkout master && git pull` * `git checkout docs` * `git rebase master` * `git push origin docs` - - diff --git a/docs/gitbook/dev/upgrade-guide.md b/docs/gitbook/dev/upgrade-guide.md index 157fbe233..641f0258e 100644 --- a/docs/gitbook/dev/upgrade-guide.md +++ b/docs/gitbook/dev/upgrade-guide.md @@ -2,9 +2,10 @@ This document describes how to upgrade Flagger. -### Upgrade canaries v1alpha3 to v1beta1 +## Upgrade canaries v1alpha3 to v1beta1 Canary CRD changes in `canaries.flagger.app/v1beta1`: + * the `spec.canaryAnalysis` field has been deprecated and replaced with `spec.analysis` * the `spec.analysis.interval` and `spec.analysis.threshold` fields are required * the `status.lastAppliedSpec` and `status.lastPromotedSpec` hashing algorithm changed to `hash/fnv` @@ -17,17 +18,17 @@ Canary CRD changes in `canaries.flagger.app/v1beta1`: * the `spec.service.meshName` field has been deprecated and no longer used for `provider: appmesh:v1beta2` Upgrade procedure: + * install the `v1beta1` CRDs * update Flagger deployment * replace `apiVersion: flagger.app/v1alpha3` with `apiVersion: flagger.app/v1beta1` in all canary manifests * replace `spec.canaryAnalysis` with `spec.analysis` in all canary manifests * update canary manifests in cluster -**Note** that after upgrading Flagger, all canaries will be triggered as the hash value used for tracking changes -is computed differently. You can set `spec.skipAnalysis: true` in all canary manifests before upgrading Flagger, -do the upgrade, wait for Flagger to finish the no-op promotions and finally set `skipAnalysis` to `false`. +**Note** that after upgrading Flagger, all canaries will be triggered as the hash value used for tracking changes is computed differently. You can set `spec.skipAnalysis: true` in all canary manifests before upgrading Flagger, do the upgrade, wait for Flagger to finish the no-op promotions and finally set `skipAnalysis` to `false`. Update builtin metrics: + * replace `threshold` with `thresholdRange.min` for request-success-rate * replace `threshold` with `thresholdRange.max` for request-duration @@ -43,11 +44,9 @@ metrics: interval: 1m ``` -### Istio telemetry v2 +## Istio telemetry v2 -Istio 1.5 comes with a breaking change for Flagger uses. In Istio telemetry v2 the metric -`istio_request_duration_seconds_bucket` has been removed and replaced with `istio_request_duration_milliseconds_bucket` -and this breaks the `request-duration` metric check. +Istio 1.5 comes with a breaking change for Flagger uses. In Istio telemetry v2 the metric `istio_request_duration_seconds_bucket` has been removed and replaced with `istio_request_duration_milliseconds_bucket` and this breaks the `request-duration` metric check. If are using **Istio 1.4**, you can create a metric template using the old duration metric like this: @@ -88,3 +87,4 @@ metrics: max: 0.500 interval: 1m ``` + diff --git a/docs/gitbook/faq.md b/docs/gitbook/faq.md index 01f218571..60dd5df85 100644 --- a/docs/gitbook/faq.md +++ b/docs/gitbook/faq.md @@ -1,10 +1,11 @@ -# Frequently asked questions +# FAQ -### Deployment Strategies +## Deployment Strategies **Which deployment strategies are supported by Flagger?** Flagger implements the following deployment strategies: + * [Canary Release](usage/deployment-strategies.md#canary-release) * [A/B Testing](usage/deployment-strategies.md#a-b-testing) * [Blue/Green](usage/deployment-strategies.md#blue-green-deployments) @@ -12,25 +13,21 @@ Flagger implements the following deployment strategies: **When should I use A/B testing instead of progressive traffic shifting?** -For frontend applications that require session affinity you should use HTTP headers or cookies match conditions -to ensure a set of users will stay on the same version for the whole duration of the canary analysis. +For frontend applications that require session affinity you should use HTTP headers or cookies match conditions to ensure a set of users will stay on the same version for the whole duration of the canary analysis. **Can I use Flagger to manage applications that live outside of a service mesh?** -For applications that are not deployed on a service mesh, Flagger can orchestrate Blue/Green style deployments -with Kubernetes L4 networking. +For applications that are not deployed on a service mesh, Flagger can orchestrate Blue/Green style deployments with Kubernetes L4 networking. **When can I use traffic mirroring?** -Traffic mirroring can be used for Blue/Green deployment strategy or a pre-stage in a Canary release. -Traffic mirroring will copy each incoming request, sending one request to the primary and one to the canary service. -Mirroring should be used for requests that are **idempotent** or capable of being processed twice (once by the primary and once by the canary). +Traffic mirroring can be used for Blue/Green deployment strategy or a pre-stage in a Canary release. Traffic mirroring will copy each incoming request, sending one request to the primary and one to the canary service. Mirroring should be used for requests that are **idempotent** or capable of being processed twice \(once by the primary and once by the canary\). **How to retry a failed release?** A canary analysis is triggered by changes in any of the following objects: -* Deployment/DaemonSet PodSpec (metadata, container image, command, ports, env, resources, etc) +* Deployment/DaemonSet PodSpec \(metadata, container image, command, ports, env, resources, etc\) * ConfigMaps mounted as volumes or mapped to environment variables * Secrets mounted as volumes or mapped to environment variables @@ -46,7 +43,7 @@ spec: timestamp: "2020-03-10T14:24:48+0000" ``` -### Kubernetes services +## Kubernetes services **How is an application exposed inside the cluster?** @@ -74,20 +71,23 @@ spec: portName: http ``` -If the `service.name` is not specified, then `targetRef.name` is used for the apex domain and canary/primary services name prefix. -You should treat the service name as an immutable field, changing it could result in routing conflicts. +If the `service.name` is not specified, then `targetRef.name` is used for the apex domain and canary/primary services name prefix. You should treat the service name as an immutable field, changing it could result in routing conflicts. Based on the canary spec service, Flagger generates the following Kubernetes ClusterIP service: * `..svc.cluster.local` + selector `app=-primary` + * `-primary..svc.cluster.local` + selector `app=-primary` + * `-canary..svc.cluster.local` + selector `app=` -This ensures that traffic coming from a namespace outside the mesh to `podinfo.test:9898` -will be routed to the latest stable release of your app. +This ensures that traffic coming from a namespace outside the mesh to `podinfo.test:9898` will be routed to the latest stable release of your app. ```yaml apiVersion: v1 @@ -133,16 +133,13 @@ spec: targetPort: http ``` -The `podinfo-canary.test:9898` address is available only during the -canary analysis and can be used for conformance testing or load testing. +The `podinfo-canary.test:9898` address is available only during the canary analysis and can be used for conformance testing or load testing. -### Multiple ports +## Multiple ports **My application listens on multiple ports, how can I expose them inside the cluster?** -If port discovery is enabled, Flagger scans the deployment spec and extracts the containers -ports excluding the port specified in the canary service and Envoy sidecar ports. -These ports will be used when generating the ClusterIP services. +If port discovery is enabled, Flagger scans the deployment spec and extracts the containers ports excluding the port specified in the canary service and Envoy sidecar ports. These ports will be used when generating the ClusterIP services. For a deployment that exposes two ports: @@ -184,7 +181,7 @@ spec: Both port `8080` and `9090` will be added to the ClusterIP services. -### Label selectors +## Label selectors **What labels selectors are supported by Flagger?** @@ -205,8 +202,7 @@ spec: app: podinfo ``` -Besides `app` Flagger supports `name` and `app.kubernetes.io/name` selectors. If you use a different -convention you can specify your label with the `-selector-labels` flag. +Besides `app` Flagger supports `name` and `app.kubernetes.io/name` selectors. If you use a different convention you can specify your label with the `-selector-labels` flag. **Is pod affinity and anti affinity supported?** @@ -241,7 +237,7 @@ spec: topologyKey: kubernetes.io/hostname ``` -### Metrics +## Metrics **How does Flagger measures the request success rate and duration?** @@ -287,7 +283,7 @@ sum( ) ``` -Envoy query (App Mesh): +Envoy query \(App Mesh\): ```javascript sum( @@ -310,24 +306,24 @@ sum( ) ``` -Envoy query (Contour or Gloo): +Envoy query \(Contour or Gloo\): ```javascript sum( - rate( - envoy_cluster_upstream_rq{ - envoy_cluster_name=~"$namespace-$workload", - envoy_response_code!~"5.*" - }[$interval] - ) + rate( + envoy_cluster_upstream_rq{ + envoy_cluster_name=~"$namespace-$workload", + envoy_response_code!~"5.*" + }[$interval] + ) ) / sum( - rate( - envoy_cluster_upstream_rq{ - envoy_cluster_name=~"$namespace-$workload", - }[$interval] - ) + rate( + envoy_cluster_upstream_rq{ + envoy_cluster_name=~"$namespace-$workload", + }[$interval] + ) ) ``` @@ -362,7 +358,7 @@ histogram_quantile(0.99, ) ``` -Envoy query (App Mesh, Contour or Gloo): +Envoy query \(App Mesh, Contour or Gloo\): ```javascript histogram_quantile(0.99, @@ -381,19 +377,15 @@ histogram_quantile(0.99, **Can I use custom metrics?** -The analysis can be extended with metrics provided by Prometheus, Datadog and AWS CloudWatch. For more details -on how custom metrics can be used please read the [metrics docs](usage/metrics.md). +The analysis can be extended with metrics provided by Prometheus, Datadog and AWS CloudWatch. For more details on how custom metrics can be used please read the [metrics docs](usage/metrics.md). -### Istio routing +## Istio routing **How does Flagger interact with Istio?** -Flagger creates an Istio Virtual Service and Destination Rules based on the Canary service spec. -The service configuration lets you expose an app inside or outside the mesh. -You can also define traffic policies, HTTP match conditions, URI rewrite rules, CORS policies, timeout and retries. +Flagger creates an Istio Virtual Service and Destination Rules based on the Canary service spec. The service configuration lets you expose an app inside or outside the mesh. You can also define traffic policies, HTTP match conditions, URI rewrite rules, CORS policies, timeout and retries. -The following spec exposes the `frontend` workload inside the mesh on `frontend.test.svc.cluster.local:9898` -and outside the mesh on `frontend.example.com`. You'll have to specify an Istio ingress gateway for external hosts. +The following spec exposes the `frontend` workload inside the mesh on `frontend.test.svc.cluster.local:9898` and outside the mesh on `frontend.example.com`. You'll have to specify an Istio ingress gateway for external hosts. ```yaml apiVersion: flagger.app/v1beta1 @@ -527,11 +519,9 @@ spec: mode: DISABLE ``` -Flagger keeps in sync the virtual service and destination rules with the canary service spec. -Any direct modification to the virtual service spec will be overwritten. +Flagger keeps in sync the virtual service and destination rules with the canary service spec. Any direct modification to the virtual service spec will be overwritten. -To expose a workload inside the mesh on `http://backend.test.svc.cluster.local:9898`, -the service spec can contain only the container port and the traffic policy: +To expose a workload inside the mesh on `http://backend.test.svc.cluster.local:9898`, the service spec can contain only the container port and the traffic policy: ```yaml apiVersion: flagger.app/v1beta1 @@ -572,11 +562,9 @@ spec: app: backend-primary ``` -Flagger works for user facing apps exposed outside the cluster via an ingress gateway -and for backend HTTP APIs that are accessible only from inside the mesh. +Flagger works for user facing apps exposed outside the cluster via an ingress gateway and for backend HTTP APIs that are accessible only from inside the mesh. -If `Delegation` is enabled, Flagger would generate Istio VirtualService without hosts and gateway, -making the service compatible with Istio delegation. +If `Delegation` is enabled, Flagger would generate Istio VirtualService without hosts and gateway, making the service compatible with Istio delegation. ```yaml apiVersion: flagger.app/v1beta1 @@ -651,15 +639,13 @@ spec: namespace: test ``` -Note that pilot env `PILOT_ENABLE_VIRTUAL_SERVICE_DELEGATE` must also be set. -(For the use of Istio Delegation, you can refer to the documentation of [Virtual Service](https://istio.io/latest/docs/reference/config/networking/virtual-service/#Delegate) and [pilot environment variables](https://istio.io/latest/docs/reference/commands/pilot-discovery/#envvars).) +Note that pilot env `PILOT_ENABLE_VIRTUAL_SERVICE_DELEGATE` must also be set. \(For the use of Istio Delegation, you can refer to the documentation of [Virtual Service](https://istio.io/latest/docs/reference/config/networking/virtual-service/#Delegate) and [pilot environment variables](https://istio.io/latest/docs/reference/commands/pilot-discovery/#envvars).\) -### Istio Ingress Gateway +## Istio Ingress Gateway **How can I expose multiple canaries on the same external domain?** -Assuming you have two apps, one that servers the main website and one that serves the REST API. -For each app you can define a canary object as: +Assuming you have two apps, one that servers the main website and one that serves the REST API. For each app you can define a canary object as: ```yaml apiVersion: flagger.app/v1beta1 @@ -697,13 +683,11 @@ spec: uri: / ``` -Based on the above configuration, Flagger will create two virtual services bounded to the same ingress gateway and external host. -Istio Pilot will [merge](https://istio.io/help/ops/traffic-management/deploy-guidelines/#multiple-virtual-services-and-destination-rules-for-the-same-host) -the two services and the website rule will be moved to the end of the list in the merged configuration. +Based on the above configuration, Flagger will create two virtual services bounded to the same ingress gateway and external host. Istio Pilot will [merge](https://istio.io/help/ops/traffic-management/deploy-guidelines/#multiple-virtual-services-and-destination-rules-for-the-same-host) the two services and the website rule will be moved to the end of the list in the merged configuration. Note that host merging only works if the canaries are bounded to a ingress gateway other than the `mesh` gateway. -### Istio Mutual TLS +## Istio Mutual TLS **How can I enable mTLS for a canary?** @@ -758,3 +742,4 @@ spec: ports: - number: 80 ``` + diff --git a/docs/gitbook/install/flagger-install-on-eks-appmesh.md b/docs/gitbook/install/flagger-install-on-eks-appmesh.md index 00a921e33..685bec7fa 100644 --- a/docs/gitbook/install/flagger-install-on-eks-appmesh.md +++ b/docs/gitbook/install/flagger-install-on-eks-appmesh.md @@ -16,8 +16,7 @@ The App Mesh integration with EKS is made out of the following components: ## Create a Kubernetes cluster -In order to create an EKS cluster you can use [eksctl](https://eksctl.io). -Eksctl is an open source command-line utility made by Weaveworks in collaboration with Amazon. +In order to create an EKS cluster you can use [eksctl](https://eksctl.io). Eksctl is an open source command-line utility made by Weaveworks in collaboration with Amazon. On MacOS you can install eksctl with Homebrew: @@ -36,9 +35,7 @@ eksctl create cluster --name=appmesh \ --appmesh-access ``` -The above command will create a two nodes cluster with -App Mesh [IAM policy](https://docs.aws.amazon.com/app-mesh/latest/userguide/MESH_IAM_user_policies.html) -attached to the EKS node instance role. +The above command will create a two nodes cluster with App Mesh [IAM policy](https://docs.aws.amazon.com/app-mesh/latest/userguide/MESH_IAM_user_policies.html) attached to the EKS node instance role. Verify the install with: @@ -97,8 +94,7 @@ helm upgrade -i appmesh-controller eks/appmesh-controller \ --wait --namespace appmesh-system ``` -In order to collect the App Mesh metrics that Flagger needs to run the canary analysis, -you'll need to setup a Prometheus instance to scrape the Envoy sidecars. +In order to collect the App Mesh metrics that Flagger needs to run the canary analysis, you'll need to setup a Prometheus instance to scrape the Envoy sidecars. Install the App Mesh Prometheus: @@ -118,7 +114,7 @@ helm repo add flagger https://flagger.app Install Flagger's Canary CRD: ```yaml -kubectl apply -f https://raw.githubusercontent.com/fluxcd/flagger/main/artifacts/flagger/crd.yaml +kubectl apply -f https://raw.githubusercontent.com/weaveworks/flagger/master/artifacts/flagger/crd.yaml ``` Deploy Flagger in the _**appmesh-system**_ namespace: @@ -146,6 +142,5 @@ You can access Grafana using port forwarding: kubectl -n appmesh-system port-forward svc/appmesh-grafana 3000:3000 ``` -Now that you have Flagger running, -you can try the [App Mesh canary deployments tutorial](https://docs.flagger.app/usage/appmesh-progressive-delivery). +Now that you have Flagger running, you can try the [App Mesh canary deployments tutorial](https://docs.flagger.app/usage/appmesh-progressive-delivery). diff --git a/docs/gitbook/install/flagger-install-on-google-cloud.md b/docs/gitbook/install/flagger-install-on-google-cloud.md index a1dc342e7..a3fab0efe 100644 --- a/docs/gitbook/install/flagger-install-on-google-cloud.md +++ b/docs/gitbook/install/flagger-install-on-google-cloud.md @@ -2,7 +2,7 @@ This guide walks you through setting up Flagger and Istio on Google Kubernetes Engine. -![GKE Cluster Overview](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-gke-istio.png) +![GKE Cluster Overview](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-gke-istio.png) ## Prerequisites @@ -205,12 +205,12 @@ jetstack/cert-manager ## Istio Gateway TLS setup -![Istio Let's Encrypt](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/istio-cert-manager-gke.png) +![Istio Let's Encrypt](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/istio-cert-manager-gke.png) Create a generic Istio Gateway to expose services outside the mesh on HTTPS: ```bash -REPO=https://raw.githubusercontent.com/fluxcd/flagger/main +REPO=https://raw.githubusercontent.com/weaveworks/flagger/master kubectl apply -f ${REPO}/artifacts/gke/istio-gateway.yaml ``` @@ -346,7 +346,7 @@ helm repo add flagger https://flagger.app Install Flagger's Canary CRD: ```yaml -kubectl apply -f https://raw.githubusercontent.com/fluxcd/flagger/main/artifacts/flagger/crd.yaml +kubectl apply -f https://raw.githubusercontent.com/weaveworks/flagger/master/artifacts/flagger/crd.yaml ``` Deploy Flagger in the `istio-system` namespace with Slack notifications enabled: diff --git a/docs/gitbook/install/flagger-install-on-kubernetes.md b/docs/gitbook/install/flagger-install-on-kubernetes.md index 09c5c576b..614377169 100644 --- a/docs/gitbook/install/flagger-install-on-kubernetes.md +++ b/docs/gitbook/install/flagger-install-on-kubernetes.md @@ -17,7 +17,7 @@ helm repo add flagger https://flagger.app Install Flagger's Canary CRD: ```yaml -kubectl apply -f https://raw.githubusercontent.com/fluxcd/flagger/main/artifacts/flagger/crd.yaml +kubectl apply -f https://raw.githubusercontent.com/weaveworks/flagger/master/artifacts/flagger/crd.yaml ``` Deploy Flagger for Istio: @@ -30,11 +30,9 @@ helm upgrade -i flagger flagger/flagger \ --set metricsServer=http://prometheus:9090 ``` -Note that Flagger depends on Istio telemetry and Prometheus, if you're installing Istio with istioctl -then you should be using the [default profile](https://istio.io/docs/setup/additional-setup/config-profiles/). +Note that Flagger depends on Istio telemetry and Prometheus, if you're installing Istio with istioctl then you should be using the [default profile](https://istio.io/docs/setup/additional-setup/config-profiles/). -For Istio multi-cluster shared control plane you can install Flagger -on each remote cluster and set the Istio control plane host cluster kubeconfig: +For Istio multi-cluster shared control plane you can install Flagger on each remote cluster and set the Istio control plane host cluster kubeconfig: ```bash helm upgrade -i flagger flagger/flagger \ @@ -46,9 +44,7 @@ helm upgrade -i flagger flagger/flagger \ --set istio.kubeconfig.key=kubeconfig ``` -Note that the Istio kubeconfig must be stored in a Kubernetes secret with a data key named `kubeconfig`. -For more details on how to configure Istio multi-cluster credentials -read the [Istio docs](https://istio.io/docs/setup/install/multicluster/shared-vpn/#credentials). +Note that the Istio kubeconfig must be stored in a Kubernetes secret with a data key named `kubeconfig`. For more details on how to configure Istio multi-cluster credentials read the [Istio docs](https://istio.io/docs/setup/install/multicluster/shared-vpn/#credentials). Deploy Flagger for Linkerd: @@ -122,8 +118,7 @@ helm delete flagger The command removes all the Kubernetes components associated with the chart and deletes the release. -> **Note** that on uninstall the Canary CRD will not be removed. Deleting the CRD will make Kubernetes ->remove all the objects owned by Flagger like Istio virtual services, Kubernetes deployments and ClusterIP services. +> **Note** that on uninstall the Canary CRD will not be removed. Deleting the CRD will make Kubernetes remove all the objects owned by Flagger like Istio virtual services, Kubernetes deployments and ClusterIP services. If you want to remove all the objects created by Flagger you have delete the Canary CRD with kubectl: @@ -173,13 +168,13 @@ As an alternative to Helm, Flagger can be installed with Kustomize **3.5.0** or Install Flagger for Istio: ```bash -kustomize build https://github.com/fluxcd/flagger/kustomize/istio?ref=main | kubectl apply -f - +kustomize build https://github.com/weaveworks/flagger/kustomize/istio | kubectl apply -f - ``` Install Flagger for AWS App Mesh: ```bash -kustomize build https://github.com/fluxcd/flagger/kustomize/appmesh?ref=main | kubectl apply -f - +kustomize build https://github.com/weaveworks/flagger/kustomize/appmesh | kubectl apply -f - ``` This deploys Flagger and sets the metrics server URL to App Mesh's Prometheus instance. @@ -187,7 +182,7 @@ This deploys Flagger and sets the metrics server URL to App Mesh's Prometheus in Install Flagger for Linkerd: ```bash -kustomize build https://github.com/fluxcd/flagger/kustomize/linkerd?ref=main | kubectl apply -f - +kustomize build https://github.com/weaveworks/flagger/kustomize/linkerd | kubectl apply -f - ``` This deploys Flagger in the `linkerd` namespace and sets the metrics server URL to Linkerd's Prometheus instance. @@ -195,7 +190,7 @@ This deploys Flagger in the `linkerd` namespace and sets the metrics server URL If you want to install a specific Flagger release, add the version number to the URL: ```bash -kustomize build https://github.com/fluxcd/flagger/kustomize/linkerd?ref=v1.0.0 | kubectl apply -f - +kustomize build https://github.com/weaveworks/flagger/kustomize/linkerd?ref=v1.0.0 | kubectl apply -f - ``` **Generic installer** @@ -203,14 +198,12 @@ kustomize build https://github.com/fluxcd/flagger/kustomize/linkerd?ref=v1.0.0 | Install Flagger and Prometheus for Contour, Gloo, NGINX, Skipper, or Traefik ingress: ```bash -kustomize build https://github.com/fluxcd/flagger/kustomize/kubernetes?ref=main | kubectl apply -f - +kustomize build https://github.com/weaveworks/flagger/kustomize/kubernetes | kubectl apply -f - ``` -This deploys Flagger and Prometheus in the `flagger-system` namespace, sets the metrics server URL -to `http://flagger-prometheus.flagger-system:9090` and the mesh provider to `kubernetes`. +This deploys Flagger and Prometheus in the `flagger-system` namespace, sets the metrics server URL to `http://flagger-prometheus.flagger-system:9090` and the mesh provider to `kubernetes`. -The Prometheus instance has a two hours data retention and is configured to scrape all pods in your cluster -that have the `prometheus.io/scrape: "true"` annotation. +The Prometheus instance has a two hours data retention and is configured to scrape all pods in your cluster that have the `prometheus.io/scrape: "true"` annotation. To target a different provider you can specify it in the canary custom resource: @@ -234,7 +227,7 @@ Create a kustomization file using Flagger as base and patch the container args: cat > kustomization.yaml < ``` -The alert provider **type** can be: `slack`, `msteams`, `rocket` or `discord`. When set to `discord`, -Flagger will use [Slack formatting](https://birdie0.github.io/discord-webhooks-guide/other/slack_formatting.html) -and will append `/slack` to the Discord address. +The alert provider **type** can be: `slack`, `msteams`, `rocket` or `discord`. When set to `discord`, Flagger will use [Slack formatting](https://birdie0.github.io/discord-webhooks-guide/other/slack_formatting.html) and will append `/slack` to the Discord address. When not specified, **channel** defaults to `general` and **username** defaults to `flagger`. -When **secretRef** is specified, the Kubernetes secret must contain a data field named `address`, -the address in the secret will take precedence over the **address** field in the provider spec. +When **secretRef** is specified, the Kubernetes secret must contain a data field named `address`, the address in the secret will take precedence over the **address** field in the provider spec. The canary analysis can have a list of alerts, each alert referencing an alert provider: @@ -103,15 +92,15 @@ The canary analysis can have a list of alerts, each alert referencing an alert p ``` Alert fields: -* **name** (required) -* **severity** levels: `info`, `warn`, `error` (default info) -* **providerRef.name** alert provider name (required) -* **providerRef.namespace** alert provider namespace (defaults to the canary namespace) -When the severity is set to `warn`, Flagger will alert when waiting on manual confirmation or if the analysis fails. -When the severity is set to `error`, Flagger will alert only if the canary analysis fails. +* **name** \(required\) +* **severity** levels: `info`, `warn`, `error` \(default info\) +* **providerRef.name** alert provider name \(required\) +* **providerRef.namespace** alert provider namespace \(defaults to the canary namespace\) -### Prometheus Alert Manager +When the severity is set to `warn`, Flagger will alert when waiting on manual confirmation or if the analysis fails. When the severity is set to `error`, Flagger will alert only if the canary analysis fails. + +## Prometheus Alert Manager You can use Alertmanager to trigger alerts when a canary deployment failed: diff --git a/docs/gitbook/usage/deployment-strategies.md b/docs/gitbook/usage/deployment-strategies.md index 059145343..5835cf903 100644 --- a/docs/gitbook/usage/deployment-strategies.md +++ b/docs/gitbook/usage/deployment-strategies.md @@ -1,33 +1,31 @@ # Deployment Strategies Flagger can run automated application analysis, promotion and rollback for the following deployment strategies: -* **Canary Release** (progressive traffic shifting) - * Istio, Linkerd, App Mesh, NGINX, Skipper, Contour, Gloo, Traefik -* **A/B Testing** (HTTP headers and cookies traffic routing) - * Istio, App Mesh, NGINX, Contour -* **Blue/Green** (traffic switching) - * Kubernetes CNI, Istio, Linkerd, App Mesh, NGINX, Contour, Gloo -* **Blue/Green Mirroring** (traffic shadowing) - * Istio - -For Canary releases and A/B testing you'll need a Layer 7 traffic management solution like a service mesh or an ingress controller. -For Blue/Green deployments no service mesh or ingress controller is required. + +* **Canary Release** \(progressive traffic shifting\) + * Istio, Linkerd, App Mesh, NGINX, Skipper, Contour, Gloo, Traefik +* **A/B Testing** \(HTTP headers and cookies traffic routing\) + * Istio, App Mesh, NGINX, Contour +* **Blue/Green** \(traffic switching\) + * Kubernetes CNI, Istio, Linkerd, App Mesh, NGINX, Contour, Gloo +* **Blue/Green Mirroring** \(traffic shadowing\) + * Istio + +For Canary releases and A/B testing you'll need a Layer 7 traffic management solution like a service mesh or an ingress controller. For Blue/Green deployments no service mesh or ingress controller is required. A canary analysis is triggered by changes in any of the following objects: -* Deployment PodSpec (container image, command, ports, env, resources, etc) +* Deployment PodSpec \(container image, command, ports, env, resources, etc\) * ConfigMaps mounted as volumes or mapped to environment variables * Secrets mounted as volumes or mapped to environment variables -### Canary Release +## Canary Release -Flagger implements a control loop that gradually shifts traffic to the canary while measuring key performance -indicators like HTTP requests success rate, requests average duration and pod health. -Based on analysis of the KPIs a canary is promoted or aborted. +Flagger implements a control loop that gradually shifts traffic to the canary while measuring key performance indicators like HTTP requests success rate, requests average duration and pod health. Based on analysis of the KPIs a canary is promoted or aborted. -![Flagger Canary Stages](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-canary-steps.png) +![Flagger Canary Stages](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-canary-steps.png) -The canary analysis runs periodically until it reaches the maximum traffic weight or the failed checks threshold. +The canary analysis runs periodically until it reaches the maximum traffic weight or the failed checks threshold. Spec: @@ -51,64 +49,58 @@ Spec: skipAnalysis: false ``` -The above analysis, if it succeeds, will run for 25 minutes while validating the HTTP metrics and webhooks every minute. -You can determine the minimum time it takes to validate and promote a canary deployment using this formula: +The above analysis, if it succeeds, will run for 25 minutes while validating the HTTP metrics and webhooks every minute. You can determine the minimum time it takes to validate and promote a canary deployment using this formula: -``` +```text interval * (maxWeight / stepWeight) ``` And the time it takes for a canary to be rollback when the metrics or webhook checks are failing: -``` -interval * threshold +```text +interval * threshold ``` -When `stepWeightPromotion` is specified, the promotion phase happens in stages, -the traffic is routed back to the primary pods in a progressive manner, -the primary weight is increased until it reaches 100%. +When `stepWeightPromotion` is specified, the promotion phase happens in stages, the traffic is routed back to the primary pods in a progressive manner, the primary weight is increased until it reaches 100%. -In emergency cases, you may want to skip the analysis phase and ship changes directly to production. -At any time you can set the `spec.skipAnalysis: true`. -When skip analysis is enabled, Flagger checks if the canary deployment is healthy and -promotes it without analysing it. If an analysis is underway, Flagger cancels it and runs the promotion. +In emergency cases, you may want to skip the analysis phase and ship changes directly to production. At any time you can set the `spec.skipAnalysis: true`. When skip analysis is enabled, Flagger checks if the canary deployment is healthy and promotes it without analysing it. If an analysis is underway, Flagger cancels it and runs the promotion. Gated canary promotion stages: * scan for canary deployments * check primary and canary deployment status - * halt advancement if a rolling update is underway - * halt advancement if pods are unhealthy + * halt advancement if a rolling update is underway + * halt advancement if pods are unhealthy * call confirm-rollout webhooks and check results - * halt advancement if any hook returns a non HTTP 2xx result + * halt advancement if any hook returns a non HTTP 2xx result * call pre-rollout webhooks and check results - * halt advancement if any hook returns a non HTTP 2xx result - * increment the failed checks counter -* increase canary traffic weight percentage from 0% to 2% (step weight) + * halt advancement if any hook returns a non HTTP 2xx result + * increment the failed checks counter +* increase canary traffic weight percentage from 0% to 2% \(step weight\) * call rollout webhooks and check results * check canary HTTP request success rate and latency - * halt advancement if any metric is under the specified threshold - * increment the failed checks counter + * halt advancement if any metric is under the specified threshold + * increment the failed checks counter * check if the number of failed checks reached the threshold - * route all traffic to primary - * scale to zero the canary deployment and mark it as failed - * call post-rollout webhooks - * post the analysis result to Slack - * wait for the canary deployment to be updated and start over -* increase canary traffic weight by 2% (step weight) till it reaches 50% (max weight) - * halt advancement if any webhook call fails - * halt advancement while canary request success rate is under the threshold - * halt advancement while canary request duration P99 is over the threshold - * halt advancement while any custom metric check fails - * halt advancement if the primary or canary deployment becomes unhealthy - * halt advancement while canary deployment is being scaled up/down by HPA + * route all traffic to primary + * scale to zero the canary deployment and mark it as failed + * call post-rollout webhooks + * post the analysis result to Slack + * wait for the canary deployment to be updated and start over +* increase canary traffic weight by 2% \(step weight\) till it reaches 50% \(max weight\) + * halt advancement if any webhook call fails + * halt advancement while canary request success rate is under the threshold + * halt advancement while canary request duration P99 is over the threshold + * halt advancement while any custom metric check fails + * halt advancement if the primary or canary deployment becomes unhealthy + * halt advancement while canary deployment is being scaled up/down by HPA * call confirm-promotion webhooks and check results - * halt advancement if any hook returns a non HTTP 2xx result + * halt advancement if any hook returns a non HTTP 2xx result * promote canary to primary - * copy ConfigMaps and Secrets from canary to primary - * copy canary deployment spec template over primary + * copy ConfigMaps and Secrets from canary to primary + * copy canary deployment spec template over primary * wait for primary rolling update to finish - * halt advancement if pods are unhealthy + * halt advancement if pods are unhealthy * route all traffic to primary * scale to zero the canary deployment * mark rollout as finished @@ -116,11 +108,12 @@ Gated canary promotion stages: * send notification with the canary analysis result * wait for the canary deployment to be updated and start over -#### Rollout Weights +### Rollout Weights -By default Flagger uses linear weight values for the promotion, with the start value, the step and the maximum weight value in 0 to 100 range. +By default Flagger uses linear weight values for the promotion, with the start value, the step and the maximum weight value in 0 to 100 range. Example: + ```yaml canary: analysis: @@ -128,40 +121,44 @@ canary: maxWeight: 50 stepWeight: 20 ``` + This configuration performs analysis starting from 20, increasing by 20 until weight goes above 50. -We would have steps (canary weight : primary weight): -* 20 (20 : 80) -* 40 (40 : 60) -* 60 (60 : 40) +We would have steps \(canary weight : primary weight\): + +* 20 \(20 : 80\) +* 40 \(40 : 60\) +* 60 \(60 : 40\) * promotion In order to enable non-linear promotion a new parameter was introduced: + * `stepWeights` - determines the ordered array of weights, which shall be used during canary promotion. Example: + ```yaml canary: analysis: promotion: stepWeights: [1, 2, 10, 80] ``` + This configuration performs analysis starting from 1, going through `stepWeights` values till 80. -We would have steps (canary weight : primary weight): -* 1 (1 : 99) -* 2 (2 : 98) -* 10 (10 : 90) -* 80 (20 : 60) +We would have steps \(canary weight : primary weight\): + +* 1 \(1 : 99\) +* 2 \(2 : 98\) +* 10 \(10 : 90\) +* 80 \(20 : 60\) * promotion -### A/B Testing +## A/B Testing -For frontend applications that require session affinity you should use HTTP headers or cookies match conditions -to ensure a set of users will stay on the same version for the whole duration of the canary analysis. +For frontend applications that require session affinity you should use HTTP headers or cookies match conditions to ensure a set of users will stay on the same version for the whole duration of the canary analysis. -![Flagger A/B Testing Stages](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-abtest-steps.png) +![Flagger A/B Testing Stages](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-abtest-steps.png) -You can enable A/B testing by specifying the HTTP match conditions and the number of iterations. -If Flagger finds a HTTP match condition, it will ignore the `maxWeight` and `stepWeight` settings. +You can enable A/B testing by specifying the HTTP match conditions and the number of iterations. If Flagger finds a HTTP match condition, it will ignore the `maxWeight` and `stepWeight` settings. Istio example: @@ -183,17 +180,16 @@ Istio example: regex: "^(.*?;)?(canary=always)(;.*)?$" ``` -The above configuration will run an analysis for ten minutes targeting the Safari users and those that have a test cookie. -You can determine the minimum time that it takes to validate and promote a canary deployment using this formula: +The above configuration will run an analysis for ten minutes targeting the Safari users and those that have a test cookie. You can determine the minimum time that it takes to validate and promote a canary deployment using this formula: -``` +```text interval * iterations ``` And the time it takes for a canary to be rollback when the metrics or webhook checks are failing: -``` -interval * threshold +```text +interval * threshold ``` Istio example: @@ -214,15 +210,14 @@ Istio example: app.kubernetes.io/name: "scheduler" ``` -The header keys must be lowercase and use hyphen as the separator. -Header values are case-sensitive and formatted as follows: -- `exact: "value"` for exact string match -- `prefix: "value"` for prefix-based match -- `suffix: "value"` for suffix-based match -- `regex: "value"` for [RE2](https://github.com/google/re2/wiki/Syntax) style regex-based match +The header keys must be lowercase and use hyphen as the separator. Header values are case-sensitive and formatted as follows: -Note that the `sourceLabels` match conditions are applicable only when the `mesh` gateway -is included in the `canary.service.gateways` list. +* `exact: "value"` for exact string match +* `prefix: "value"` for prefix-based match +* `suffix: "value"` for suffix-based match +* `regex: "value"` for [RE2](https://github.com/google/re2/wiki/Syntax) style regex-based match + +Note that the `sourceLabels` match conditions are applicable only when the `mesh` gateway is included in the `canary.service.gateways` list. App Mesh example: @@ -270,8 +265,7 @@ NGINX example: exact: "canary" ``` -Note that the NGINX ingress controller supports only exact matching for cookies names where the value must be set to `always`. -Starting with NGINX ingress v0.31, regex matching is supported for header values. +Note that the NGINX ingress controller supports only exact matching for cookies names where the value must be set to `always`. Starting with NGINX ingress v0.31, regex matching is supported for header values. The above configurations will route users with the x-canary header or canary cookie to the canary instance during analysis: @@ -280,12 +274,11 @@ curl -H 'X-Canary: insider' http://app.example.com curl -b 'canary=always' http://app.example.com ``` -### Blue/Green Deployments +## Blue/Green Deployments -For applications that are not deployed on a service mesh, Flagger can orchestrate blue/green style deployments -with Kubernetes L4 networking. When using Istio you have the option to mirror traffic between blue and green. +For applications that are not deployed on a service mesh, Flagger can orchestrate blue/green style deployments with Kubernetes L4 networking. When using Istio you have the option to mirror traffic between blue and green. -![Flagger Blue/Green Stages](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-bluegreen-steps.png) +![Flagger Blue/Green Stages](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-bluegreen-steps.png) You can use the blue/green deployment strategy by replacing `stepWeight/maxWeight` with `iterations` in the `analysis` spec: @@ -299,42 +292,30 @@ You can use the blue/green deployment strategy by replacing `stepWeight/maxWeigh threshold: 2 ``` -With the above configuration Flagger will run conformance and load tests on the canary pods for ten minutes. -If the metrics analysis succeeds, live traffic will be switched from the old version to the new one when the -canary is promoted. +With the above configuration Flagger will run conformance and load tests on the canary pods for ten minutes. If the metrics analysis succeeds, live traffic will be switched from the old version to the new one when the canary is promoted. The blue/green deployment strategy is supported for all service mesh providers. Blue/Green rollout steps for service mesh: -* detect new revision (deployment spec, secrets or configmaps changes) -* scale up the canary (green) + +* detect new revision \(deployment spec, secrets or configmaps changes\) +* scale up the canary \(green\) * run conformance tests for the canary pods * run load tests and metric checks for the canary pods every minute * abort the canary release if the failure threshold is reached * route traffic to canary -* promote canary spec over primary (blue) +* promote canary spec over primary \(blue\) * wait for primary rollout * route traffic to primary * scale down canary -After the analysis finishes, the traffic is routed to the canary (green) before triggering the primary (blue) -rolling update, this ensures a smooth transition to the new version avoiding dropping in-flight requests during -the Kubernetes deployment rollout. +After the analysis finishes, the traffic is routed to the canary \(green\) before triggering the primary \(blue\) rolling update, this ensures a smooth transition to the new version avoiding dropping in-flight requests during the Kubernetes deployment rollout. -### Blue/Green with Traffic Mirroring +## Blue/Green with Traffic Mirroring -Traffic Mirroring is a pre-stage in a Canary (progressive traffic shifting) or -Blue/Green deployment strategy. Traffic mirroring will copy each incoming -request, sending one request to the primary and one to the canary service. -The response from the primary is sent back to the user. The response from the canary -is discarded. Metrics are collected on both requests so that the deployment will -only proceed if the canary metrics are healthy. +Traffic Mirroring is a pre-stage in a Canary \(progressive traffic shifting\) or Blue/Green deployment strategy. Traffic mirroring will copy each incoming request, sending one request to the primary and one to the canary service. The response from the primary is sent back to the user. The response from the canary is discarded. Metrics are collected on both requests so that the deployment will only proceed if the canary metrics are healthy. -Mirroring should be used for requests that are **idempotent** or capable of -being processed twice (once by the primary and once by the canary). Reads are -idempotent. Before using mirroring on requests that may be writes, you should -consider what will happen if a write is duplicated and handled by the primary -and canary. +Mirroring should be used for requests that are **idempotent** or capable of being processed twice \(once by the primary and once by the canary\). Reads are idempotent. Before using mirroring on requests that may be writes, you should consider what will happen if a write is duplicated and handled by the primary and canary. To use mirroring, set `spec.analysis.mirror` to `true`. @@ -355,7 +336,8 @@ Istio example: ``` Mirroring rollout steps for service mesh: -* detect new revision (deployment spec, secrets or configmaps changes) + +* detect new revision \(deployment spec, secrets or configmaps changes\) * scale from zero the canary deployment * wait for the HPA to set the canary minimum replicas * check canary pods health @@ -367,7 +349,7 @@ Mirroring rollout steps for service mesh: * abort the canary release if the failure threshold is reached * stop traffic mirroring after the number of iterations is reached * route live traffic to the canary pods -* promote the canary (update the primary secrets, configmaps and deployment spec) +* promote the canary \(update the primary secrets, configmaps and deployment spec\) * wait for the primary deployment rollout to finish * wait for the HPA to set the primary minimum replicas * check primary pods health @@ -375,6 +357,5 @@ Mirroring rollout steps for service mesh: * scale to zero the canary * send notification with the canary analysis result -After the analysis finishes, the traffic is routed to the canary (green) before triggering the primary (blue) -rolling update, this ensures a smooth transition to the new version avoiding dropping in-flight requests during -the Kubernetes deployment rollout. \ No newline at end of file +After the analysis finishes, the traffic is routed to the canary \(green\) before triggering the primary \(blue\) rolling update, this ensures a smooth transition to the new version avoiding dropping in-flight requests during the Kubernetes deployment rollout. + diff --git a/docs/gitbook/usage/how-it-works.md b/docs/gitbook/usage/how-it-works.md index 319a91071..e58f7ad58 100644 --- a/docs/gitbook/usage/how-it-works.md +++ b/docs/gitbook/usage/how-it-works.md @@ -1,12 +1,10 @@ # How it works -[Flagger](https://github.com/fluxcd/flagger) can be configured to automate the release process -for Kubernetes workloads with a custom resource named canary. +[Flagger](https://github.com/weaveworks/flagger) can be configured to automate the release process for Kubernetes workloads with a custom resource named canary. -### Canary resource +## Canary resource -The canary custom resource defines the release process of an application running on Kubernetes -and is portable across clusters, service meshes and ingress providers. +The canary custom resource defines the release process of an application running on Kubernetes and is portable across clusters, service meshes and ingress providers. For a deployment named _podinfo_, a canary release with progressive traffic shifting can be defined as: @@ -43,15 +41,11 @@ spec: cmd: "hey -z 1m -q 10 -c 2 http://podinfo-canary.test:9898/" ``` -When you deploy a new version of an app, Flagger gradually shifts traffic to the canary, -and at the same time, measures the requests success rate as well as the average response duration. -You can extend the canary analysis with custom metrics, acceptance and load testing -to harden the validation process of your app release process. +When you deploy a new version of an app, Flagger gradually shifts traffic to the canary, and at the same time, measures the requests success rate as well as the average response duration. You can extend the canary analysis with custom metrics, acceptance and load testing to harden the validation process of your app release process. -If you are running multiple service meshes or ingress controllers in the same cluster, -you can override the global provider for a specific canary with `spec.provider`. +If you are running multiple service meshes or ingress controllers in the same cluster, you can override the global provider for a specific canary with `spec.provider`. -### Canary target +## Canary target A canary resource can target a Kubernetes Deployment or DaemonSet. @@ -75,10 +69,7 @@ Based on the above configuration, Flagger generates the following Kubernetes obj * `deployment/-primary` * `hpa/-primary` -The primary deployment is considered the stable release of your app, by default all traffic is routed to this version -and the target deployment is scaled to zero. -Flagger will detect changes to the target deployment (including secrets and configmaps) and will perform a -canary analysis before promoting the new version as primary. +The primary deployment is considered the stable release of your app, by default all traffic is routed to this version and the target deployment is scaled to zero. Flagger will detect changes to the target deployment \(including secrets and configmaps\) and will perform a canary analysis before promoting the new version as primary. **Note** that the target deployment must have a single label selector in the format `app: `: @@ -97,30 +88,17 @@ spec: app: podinfo ``` -In addition to `app`, Flagger supports `name` and `app.kubernetes.io/name` selectors. -If you use a different convention you can specify your label with -the `-selector-labels=my-app-label` command flag in the Flagger deployment manifest under containers args -or by setting `--set selectorLabels=my-app-label` when installing Flagger with Helm. +In addition to `app`, Flagger supports `name` and `app.kubernetes.io/name` selectors. If you use a different convention you can specify your label with the `-selector-labels=my-app-label` command flag in the Flagger deployment manifest under containers args or by setting `--set selectorLabels=my-app-label` when installing Flagger with Helm. -If the target deployment uses secrets and/or configmaps, Flagger will create a copy of each object using the `-primary` -suffix and will reference these objects in the primary deployment. If you annotate your ConfigMap or Secret with -`flagger.app/config-tracking: disabled`, Flagger will use the same object for the primary deployment instead of making -a primary copy. -You can disable the secrets/configmaps tracking globally with the `-enable-config-tracking=false` command flag in -the Flagger deployment manifest under containers args or by setting `--set configTracking.enabled=false` when -installing Flagger with Helm, but disabling config-tracking using the the per Secret/ConfigMap annotation may fit your -use-case better. +If the target deployment uses secrets and/or configmaps, Flagger will create a copy of each object using the `-primary` suffix and will reference these objects in the primary deployment. If you annotate your ConfigMap or Secret with `flagger.app/config-tracking: disabled`, Flagger will use the same object for the primary deployment instead of making a primary copy. You can disable the secrets/configmaps tracking globally with the `-enable-config-tracking=false` command flag in the Flagger deployment manifest under containers args or by setting `--set configTracking.enabled=false` when installing Flagger with Helm, but disabling config-tracking using the the per Secret/ConfigMap annotation may fit your use-case better. -The autoscaler reference is optional, when specified, Flagger will pause the traffic increase while the -target and primary deployments are scaled up or down. HPA can help reduce the resource usage during the canary analysis. +The autoscaler reference is optional, when specified, Flagger will pause the traffic increase while the target and primary deployments are scaled up or down. HPA can help reduce the resource usage during the canary analysis. -The progress deadline represents the maximum time in seconds for the canary deployment to make progress -before it is rolled back, defaults to ten minutes. +The progress deadline represents the maximum time in seconds for the canary deployment to make progress before it is rolled back, defaults to ten minutes. -### Canary service +## Canary service -A canary resource dictates how the target workload is exposed inside the cluster. -The canary target should expose a TCP port that will be used by Flagger to create the ClusterIP Services. +A canary resource dictates how the target workload is exposed inside the cluster. The canary target should expose a TCP port that will be used by Flagger to create the ClusterIP Services. ```yaml spec: @@ -132,27 +110,25 @@ spec: portDiscovery: true ``` -The container port from the target workload should match the `service.port` or `service.targetPort`. -The `service.name` is optional, defaults to `spec.targetRef.name`. -The `service.targetPort` can be a container port number or name. -The `service.portName` is optional (defaults to `http`), if your workload uses gRPC then set the port name to `grpc`. +The container port from the target workload should match the `service.port` or `service.targetPort`. The `service.name` is optional, defaults to `spec.targetRef.name`. The `service.targetPort` can be a container port number or name. The `service.portName` is optional \(defaults to `http`\), if your workload uses gRPC then set the port name to `grpc`. -If port discovery is enabled, Flagger scans the target workload and extracts the containers -ports excluding the port specified in the canary service and service mesh sidecar ports. -These ports will be used when generating the ClusterIP services. +If port discovery is enabled, Flagger scans the target workload and extracts the containers ports excluding the port specified in the canary service and service mesh sidecar ports. These ports will be used when generating the ClusterIP services. Based on the canary spec service, Flagger creates the following Kubernetes ClusterIP service: * `..svc.cluster.local` + selector `app=-primary` + * `-primary..svc.cluster.local` + selector `app=-primary` + * `-canary..svc.cluster.local` + selector `app=` -This ensures that traffic to `podinfo.test:9898` will be routed to the latest stable release of your app. -The `podinfo-canary.test:9898` address is available only during the -canary analysis and can be used for conformance testing or load testing. +This ensures that traffic to `podinfo.test:9898` will be routed to the latest stable release of your app. The `podinfo-canary.test:9898` address is available only during the canary analysis and can be used for conformance testing or load testing. You can configure Flagger to set annotations and labels for the generated services with: @@ -177,8 +153,7 @@ spec: test: "test" ``` -Besides port mapping and metadata, the service specification can contain URI match and rewrite rules, -timeout and retry polices: +Besides port mapping and metadata, the service specification can contain URI match and rewrite rules, timeout and retry polices: ```yaml spec: @@ -195,13 +170,11 @@ spec: timeout: 5s ``` -When using **Istio** as the mesh provider, you can also specify -HTTP header operations, CORS and traffic policies, Istio gateways and hosts. -The Istio routing configuration can be found [here](../faq.md#istio-routing). - -### Canary status +When using **Istio** as the mesh provider, you can also specify HTTP header operations, CORS and traffic policies, Istio gateways and hosts. The Istio routing configuration can be found [here](../faq.md#istio-routing). + +## Canary status -You can use kubectl to get the current status of canary deployments cluster wide: +You can use kubectl to get the current status of canary deployments cluster wide: ```bash kubectl get canaries --all-namespaces @@ -236,10 +209,7 @@ status: type: Promoted ``` -The `Promoted` status condition can have one of the following reasons: -Initialized, Waiting, Progressing, Promoting, Finalising, Succeeded or Failed. -A failed canary will have the promoted status set to `false`, -the reason to `failed` and the last applied spec will be different to the last promoted one. +The `Promoted` status condition can have one of the following reasons: Initialized, Waiting, Progressing, Promoting, Finalising, Succeeded or Failed. A failed canary will have the promoted status set to `false`, the reason to `failed` and the last applied spec will be different to the last promoted one. Wait for a successful rollout: @@ -267,14 +237,9 @@ kubectl wait canary/podinfo --for=condition=promoted --timeout=5m kubectl get canary/podinfo | grep Succeeded ``` -### Canary finalizers +## Canary finalizers -The default behavior of Flagger on canary deletion is to leave resources that aren't owned by the controller -in their current state. This simplifies the deletion action and avoids possible deadlocks during resource -finalization. In the event the canary was introduced with existing resource(s) (i.e. service, virtual service, etc.), -they would be mutated during the initialization phase and no longer reflect their initial state. If the desired -functionality upon deletion is to revert the resources to their initial state, the `revertOnDeletion` attribute -can be enabled. +The default behavior of Flagger on canary deletion is to leave resources that aren't owned by the controller in their current state. This simplifies the deletion action and avoids possible deadlocks during resource finalization. In the event the canary was introduced with existing resource\(s\) \(i.e. service, virtual service, etc.\), they would be mutated during the initialization phase and no longer reflect their initial state. If the desired functionality upon deletion is to revert the resources to their initial state, the `revertOnDeletion` attribute can be enabled. ```yaml spec: @@ -283,19 +248,18 @@ spec: When a deletion action is submitted to the cluster, Flagger will attempt to revert the following resources: -* [Canary target](#canary-target) replicas will be updated to the primary replica count -* [Canary service](#canary-service) selector will be reverted +* [Canary target](how-it-works.md#canary-target) replicas will be updated to the primary replica count +* [Canary service](how-it-works.md#canary-service) selector will be reverted * Mesh/Ingress traffic routed to the target -The recommended approach to disable canary analysis would be utilization of the `skipAnalysis` -attribute, which limits the need for resource reconciliation. Utilizing the `revertOnDeletion` attribute should be -enabled when you no longer plan to rely on Flagger for deployment management. +The recommended approach to disable canary analysis would be utilization of the `skipAnalysis` attribute, which limits the need for resource reconciliation. Utilizing the `revertOnDeletion` attribute should be enabled when you no longer plan to rely on Flagger for deployment management. -**Note** When this feature is enabled expect a delay in the delete action due to the reconciliation. +**Note** When this feature is enabled expect a delay in the delete action due to the reconciliation. -### Canary analysis +## Canary analysis The canary analysis defines: + * the type of [deployment strategy](deployment-strategies.md) * the [metrics](metrics.md) used to validate the canary version * the [webhooks](webhooks.md) used for conformance testing, load testing and manual gating @@ -336,6 +300,5 @@ Spec: - # hook ``` -The canary analysis runs periodically until it reaches the maximum traffic weight or the number of iterations. -On each run, Flagger calls the webhooks, checks the metrics and if the failed checks threshold is reached, stops the -analysis and rolls back the canary. If alerting is configured, Flagger will post the analysis result using the alert providers. +The canary analysis runs periodically until it reaches the maximum traffic weight or the number of iterations. On each run, Flagger calls the webhooks, checks the metrics and if the failed checks threshold is reached, stops the analysis and rolls back the canary. If alerting is configured, Flagger will post the analysis result using the alert providers. + diff --git a/docs/gitbook/usage/metrics.md b/docs/gitbook/usage/metrics.md index 8c9e531d9..499695049 100644 --- a/docs/gitbook/usage/metrics.md +++ b/docs/gitbook/usage/metrics.md @@ -1,11 +1,8 @@ # Metrics Analysis -As part of the analysis process, Flagger can validate service level objectives (SLOs) like -availability, error rate percentage, average response time and any other objective based on app specific metrics. -If a drop in performance is noticed during the SLOs analysis, -the release will be automatically rolled back with minimum impact to end-users. +As part of the analysis process, Flagger can validate service level objectives \(SLOs\) like availability, error rate percentage, average response time and any other objective based on app specific metrics. If a drop in performance is noticed during the SLOs analysis, the release will be automatically rolled back with minimum impact to end-users. -### Builtin metrics +## Builtin metrics Flagger comes with two builtin metric checks: HTTP request success rate and duration. @@ -26,16 +23,11 @@ Flagger comes with two builtin metric checks: HTTP request success rate and dura max: 500 ``` -For each metric you can specify a range of accepted values with `thresholdRange` -and the window size or the time series with `interval`. -The builtin checks are available for every service mesh / ingress controller -and are implemented with [Prometheus queries](../faq.md#metrics). +For each metric you can specify a range of accepted values with `thresholdRange` and the window size or the time series with `interval`. The builtin checks are available for every service mesh / ingress controller and are implemented with [Prometheus queries](../faq.md#metrics). -### Custom metrics +## Custom metrics -The canary analysis can be extended with custom metric checks. Using a `MetricTemplate` custom resource, you -configure Flagger to connect to a metric provider and run a query that returns a `float64` value. -The query result is used to validate the canary based on the specified threshold range. +The canary analysis can be extended with custom metric checks. Using a `MetricTemplate` custom resource, you configure Flagger to connect to a metric provider and run a query that returns a `float64` value. The query result is used to validate the canary based on the specified threshold range. ```yaml apiVersion: flagger.app/v1beta1 @@ -53,12 +45,12 @@ spec: The following variables are available in query templates: -- `name` (canary.metadata.name) -- `namespace` (canary.metadata.namespace) -- `target` (canary.spec.targetRef.name) -- `service` (canary.spec.service.name) -- `ingress` (canary.spec.ingresRef.name) -- `interval` (canary.spec.analysis.metrics[].interval) +* `name` \(canary.metadata.name\) +* `namespace` \(canary.metadata.namespace\) +* `target` \(canary.spec.targetRef.name\) +* `service` \(canary.spec.service.name\) +* `ingress` \(canary.spec.ingresRef.name\) +* `interval` \(canary.spec.analysis.metrics\[\].interval\) A canary analysis metric can reference a template with `templateRef`: @@ -79,10 +71,9 @@ A canary analysis metric can reference a template with `templateRef`: interval: 1m ``` -### Prometheus +## Prometheus -You can create custom metric checks targeting a Prometheus server -by setting the provider type to `prometheus` and writing the query in PromQL. +You can create custom metric checks targeting a Prometheus server by setting the provider type to `prometheus` and writing the query in PromQL. Prometheus template example: @@ -133,9 +124,7 @@ Reference the template in the canary analysis: interval: 1m ``` -The above configuration validates the canary by checking -if the HTTP 404 req/sec percentage is below 5 percent of the total traffic. -If the 404s rate reaches the 5% threshold, then the canary fails. +The above configuration validates the canary by checking if the HTTP 404 req/sec percentage is below 5 percent of the total traffic. If the 404s rate reaches the 5% threshold, then the canary fails. Prometheus gRPC error rate example: @@ -172,10 +161,9 @@ spec: The above template is for gRPC services instrumented with [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus). -### Prometheus authentication +## Prometheus authentication -If your Prometheus API requires basic authentication, you can create a secret -in the same namespace as the `MetricTemplate` with the basic-auth credentials: +If your Prometheus API requires basic authentication, you can create a secret in the same namespace as the `MetricTemplate` with the basic-auth credentials: ```yaml apiVersion: v1 @@ -204,7 +192,7 @@ spec: name: prom-basic-auth ``` -### Datadog +## Datadog You can create custom metric checks using the Datadog provider. @@ -266,8 +254,7 @@ Reference the template in the canary analysis: interval: 1m ``` - -### Amazon CloudWatch +## Amazon CloudWatch You can create custom metric checks using the CloudWatch metrics provider. @@ -347,7 +334,7 @@ Reference the template in the canary analysis: **Note** that Flagger need AWS IAM permission to perform `cloudwatch:GetMetricData` to use this provider. -### New Relic +## New Relic You can create custom metric checks using the New Relic provider. @@ -399,3 +386,4 @@ Reference the template in the canary analysis: max: 5 interval: 1m ``` + diff --git a/docs/gitbook/usage/monitoring.md b/docs/gitbook/usage/monitoring.md index 209003ba4..15dc10e7e 100644 --- a/docs/gitbook/usage/monitoring.md +++ b/docs/gitbook/usage/monitoring.md @@ -12,7 +12,7 @@ helm upgrade -i flagger-grafana flagger/grafana \ The dashboard shows the RED and USE metrics for the primary and canary workloads: -![Canary Dashboard](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/screens/grafana-canary-analysis.png) +![Canary Dashboard](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/screens/grafana-canary-analysis.png) ## Logging diff --git a/docs/gitbook/usage/webhooks.md b/docs/gitbook/usage/webhooks.md index fa0233bcd..24c0939ea 100644 --- a/docs/gitbook/usage/webhooks.md +++ b/docs/gitbook/usage/webhooks.md @@ -1,26 +1,42 @@ # Webhooks -The canary analysis can be extended with webhooks. Flagger will call each webhook URL and -determine from the response status code (HTTP 2xx) if the canary is failing or not. +The canary analysis can be extended with webhooks. Flagger will call each webhook URL and determine from the response status code \(HTTP 2xx\) if the canary is failing or not. There are several types of hooks: + * **confirm-rollout** hooks are executed before scaling up the canary deployment and can be used for manual approval. -The rollout is paused until the hook returns a successful HTTP status code. + + The rollout is paused until the hook returns a successful HTTP status code. + * **pre-rollout** hooks are executed before routing traffic to canary. -The canary advancement is paused if a pre-rollout hook fails and if the number of failures reach the -threshold the canary will be rollback. + + The canary advancement is paused if a pre-rollout hook fails and if the number of failures reach the + + threshold the canary will be rollback. + * **rollout** hooks are executed during the analysis on each iteration before the metric checks. -If a rollout hook call fails the canary advancement is paused and eventfully rolled back. + + If a rollout hook call fails the canary advancement is paused and eventfully rolled back. + * **confirm-promotion** hooks are executed before the promotion step. -The canary promotion is paused until the hooks return HTTP 200. -While the promotion is paused, Flagger will continue to run the metrics checks and rollout hooks. + + The canary promotion is paused until the hooks return HTTP 200. + + While the promotion is paused, Flagger will continue to run the metrics checks and rollout hooks. + * **post-rollout** hooks are executed after the canary has been promoted or rolled back. -If a post rollout hook fails the error is logged. + + If a post rollout hook fails the error is logged. + * **rollback** hooks are executed while a canary deployment is in either Progressing or Waiting status. -This provides the ability to rollback during analysis or while waiting for a confirmation. If a rollback hook -returns a successful HTTP status code, Flagger will stop the analysis and mark the canary release as failed. + + This provides the ability to rollback during analysis or while waiting for a confirmation. If a rollback hook + + returns a successful HTTP status code, Flagger will stop the analysis and mark the canary release as failed. + * **event** hooks are executed every time Flagger emits a Kubernetes event. When configured, -every action that Flagger takes during a canary deployment will be sent as JSON via an HTTP POST request. + + every action that Flagger takes during a canary deployment will be sent as JSON via an HTTP POST request. Spec: @@ -60,11 +76,11 @@ Spec: url: http://event-recevier.notifications/slack ``` -> **Note** that the sum of all rollout webhooks timeouts should be lower than the analysis interval. +> **Note** that the sum of all rollout webhooks timeouts should be lower than the analysis interval. -Webhook payload (HTTP POST): +Webhook payload \(HTTP POST\): -```json +```javascript { "name": "podinfo", "namespace": "test", @@ -81,11 +97,11 @@ Response status codes: * 200-202 - advance canary by increasing the traffic weight * timeout or non-2xx - halt advancement and increment failed checks -On a non-2xx response Flagger will include the response body (if any) in the failed checks log and Kubernetes events. +On a non-2xx response Flagger will include the response body \(if any\) in the failed checks log and Kubernetes events. -Event payload (HTTP POST): +Event payload \(HTTP POST\): -```json +```javascript { "name": "string (canary name)", "namespace": "string (canary namespace)", @@ -98,25 +114,20 @@ Event payload (HTTP POST): } ``` -The event receiver can create alerts based on the received phase -(possible values: ` Initialized`, `Waiting`, `Progressing`, `Promoting`, `Finalising`, `Succeeded` or `Failed`). +The event receiver can create alerts based on the received phase \(possible values: `Initialized`, `Waiting`, `Progressing`, `Promoting`, `Finalising`, `Succeeded` or `Failed`\). -### Load Testing +## Load Testing -For workloads that are not receiving constant traffic Flagger can be configured with a webhook, -that when called, will start a load test for the target workload. -If the target workload doesn't receive any traffic during the canary analysis, -Flagger metric checks will fail with "no values found for metric request-success-rate". +For workloads that are not receiving constant traffic Flagger can be configured with a webhook, that when called, will start a load test for the target workload. If the target workload doesn't receive any traffic during the canary analysis, Flagger metric checks will fail with "no values found for metric request-success-rate". -Flagger comes with a load testing service based on [rakyll/hey](https://github.com/rakyll/hey) -that generates traffic during analysis when configured as a webhook. +Flagger comes with a load testing service based on [rakyll/hey](https://github.com/rakyll/hey) that generates traffic during analysis when configured as a webhook. -![Flagger Load Testing Webhook](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-load-testing.png) +![Flagger Load Testing Webhook](https://raw.githubusercontent.com/weaveworks/flagger/master/docs/diagrams/flagger-load-testing.png) First you need to deploy the load test runner in a namespace with sidecar injection enabled: ```bash -kubectl apply -k github.com/fluxcd/flagger//kustomize/tester +kubectl apply -k github.com/weaveworks/flagger//kustomize/tester ``` Or by using Helm: @@ -129,7 +140,7 @@ helm upgrade -i flagger-loadtester flagger/loadtester \ --set cmd.timeout=1h ``` -When deployed the load tester API will be available at `http://flagger-loadtester.test/`. +When deployed the load tester API will be available at `http://flagger-loadtester.test/`. Now you can add webhooks to the canary analysis spec: @@ -149,12 +160,9 @@ webhooks: cmd: "hey -z 1m -q 10 -c 2 -m POST -d '{test: 2}' http://podinfo-canary.test:9898/echo" ``` -When the canary analysis starts, Flagger will call the webhooks and the load tester will run the `hey` commands -in the background, if they are not already running. This will ensure that during the -analysis, the `podinfo-canary.test` service will receive a steady stream of GET and POST requests. +When the canary analysis starts, Flagger will call the webhooks and the load tester will run the `hey` commands in the background, if they are not already running. This will ensure that during the analysis, the `podinfo-canary.test` service will receive a steady stream of GET and POST requests. -If your workload is exposed outside the mesh you can point `hey` to the -public URL and use HTTP2. +If your workload is exposed outside the mesh you can point `hey` to the public URL and use HTTP2. ```yaml webhooks: @@ -190,20 +198,18 @@ webhooks: cmd: "ghz --insecure --proto=/tmp/ghz/health.proto --call=grpc.health.v1.Health/Check podinfo.test:9898" ``` -The load tester can run arbitrary commands as long as the binary is present in the container image. -For example if you you want to replace `hey` with another CLI, you can create your own Docker image: +The load tester can run arbitrary commands as long as the binary is present in the container image. For example if you you want to replace `hey` with another CLI, you can create your own Docker image: -```dockerfile +```text FROM weaveworks/flagger-loadtester: RUN curl -Lo /usr/local/bin/my-cli https://github.com/user/repo/releases/download/ver/my-cli \ && chmod +x /usr/local/bin/my-cli ``` -### Load Testing Delegation +## Load Testing Delegation -The load tester can also forward testing tasks to external tools, by now [nGrinder](https://github.com/naver/ngrinder) -is supported. +The load tester can also forward testing tasks to external tools, by now [nGrinder](https://github.com/naver/ngrinder) is supported. To use this feature, add a load test task of type 'ngrinder' to the canary analysis spec: @@ -225,11 +231,10 @@ webhooks: # the interval between between nGrinder test status polling, default to 1s pollInterval: 5s ``` -When the canary analysis starts, the load tester will initiate a [clone_and_start request](https://github.com/naver/ngrinder/wiki/REST-API-PerfTest) -to the nGrinder server and start a new performance test. the load tester will periodically poll the nGrinder server -for the status of the test, and prevent duplicate requests from being sent in subsequent analysis loops. -### Integration Testing +When the canary analysis starts, the load tester will initiate a [clone\_and\_start request](https://github.com/naver/ngrinder/wiki/REST-API-PerfTest) to the nGrinder server and start a new performance test. the load tester will periodically poll the nGrinder server for the status of the test, and prevent duplicate requests from being sent in subsequent analysis loops. + +## Integration Testing Flagger comes with a testing service that can run Helm tests, Bats tests or Concord tests when configured as a webhook. @@ -243,7 +248,7 @@ helm upgrade -i flagger-helmtester flagger/loadtester \ --set serviceAccountName=tiller ``` -When deployed the Helm tester API will be available at `http://flagger-helmtester.kube-system/`. +When deployed the Helm tester API will be available at `http://flagger-helmtester.kube-system/`. Now you can add pre-rollout webhooks to the canary analysis spec: @@ -259,8 +264,7 @@ Now you can add pre-rollout webhooks to the canary analysis spec: cmd: "test {{ .Release.Name }} --cleanup" ``` -When the canary analysis starts, Flagger will call the pre-rollout webhooks before routing traffic to the canary. -If the helm test fails, Flagger will retry until the analysis threshold is reached and the canary is rolled back. +When the canary analysis starts, Flagger will call the pre-rollout webhooks before routing traffic to the canary. If the helm test fails, Flagger will retry until the analysis threshold is reached and the canary is rolled back. If you are using Helm v3, you'll have to create a dedicated service account and add the release namespace to the test command: @@ -276,10 +280,9 @@ If you are using Helm v3, you'll have to create a dedicated service account and cmd: "test {{ .Release.Name }} --timeout 3m -n {{ .Release.Namespace }}" ``` -If the test hangs or logs error messages hinting to insufficient permissions it can be related to RBAC, -check the [Troubleshooting](#Troubleshooting) section for an example configuration. +If the test hangs or logs error messages hinting to insufficient permissions it can be related to RBAC, check the [Troubleshooting](webhooks.md#Troubleshooting) section for an example configuration. -As an alternative to Helm you can use the [Bash Automated Testing System](https://github.com/bats-core/bats-core) to run your tests. +As an alternative to Helm you can use the [Bash Automated Testing System](https://github.com/bats-core/bats-core) to run your tests. ```yaml analysis: @@ -316,21 +319,13 @@ You can also configure the test runner to start a [Concord](https://concord.walm pollTimeout: "60" ``` -`org`, `project`, `repo` and `entrypoint` represents where your test process runs in Concord. -In order to authenticate to Concord, you need to set `apiKeyPath` to a path of a file containing a valid Concord API key - on the `flagger-helmtester` container. This can be done via mounting a Kubernetes secret in the tester's Deployment. -`pollInterval` represents the interval in seconds the web-hook will call Concord to see if the process has finished (Default is 5s). -`pollTimeout` represents the time in seconds the web-hook will try to call Concord before timing out (Default is 30s). +`org`, `project`, `repo` and `entrypoint` represents where your test process runs in Concord. In order to authenticate to Concord, you need to set `apiKeyPath` to a path of a file containing a valid Concord API key on the `flagger-helmtester` container. This can be done via mounting a Kubernetes secret in the tester's Deployment. `pollInterval` represents the interval in seconds the web-hook will call Concord to see if the process has finished \(Default is 5s\). `pollTimeout` represents the time in seconds the web-hook will try to call Concord before timing out \(Default is 30s\). -### Manual Gating +## Manual Gating -For manual approval of a canary deployment you can use the `confirm-rollout` and `confirm-promotion` webhooks. -The confirmation rollout hooks are executed before the pre-rollout hooks. -Flagger will halt the canary traffic shifting and analysis until the confirm webhook returns HTTP status 200. +For manual approval of a canary deployment you can use the `confirm-rollout` and `confirm-promotion` webhooks. The confirmation rollout hooks are executed before the pre-rollout hooks. Flagger will halt the canary traffic shifting and analysis until the confirm webhook returns HTTP status 200. -For manual rollback of a canary deployment you can use the `rollback` webhook. The rollback hook will be called -during the analysis and confirmation states. If a rollback webhook returns a successful HTTP status code, Flagger -will shift all traffic back to the primary instance and fail the canary. +For manual rollback of a canary deployment you can use the `rollback` webhook. The rollback hook will be called during the analysis and confirmation states. If a rollback webhook returns a successful HTTP status code, Flagger will shift all traffic back to the primary instance and fail the canary. Manual gating with Flagger's tester: @@ -342,7 +337,7 @@ Manual gating with Flagger's tester: url: http://flagger-loadtester.test/gate/halt ``` -The `/gate/halt` returns HTTP 403 thus blocking the rollout. +The `/gate/halt` returns HTTP 403 thus blocking the rollout. If you have notifications enabled, Flagger will post a message to Slack or MS Teams if a canary rollout is waiting for approval. @@ -371,13 +366,13 @@ By default the gate is closed, you can start or resume the canary rollout with: ```bash kubectl -n test exec -it flagger-loadtester-xxxx-xxxx sh -curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/gate/open +curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/gate/open ``` You can pause the rollout at any time with: ```bash -curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/gate/close +curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/gate/close ``` If a canary analysis is paused the status will change to waiting: @@ -389,8 +384,7 @@ NAME STATUS WEIGHT podinfo Waiting 0 ``` -The `confirm-promotion` hook type can be used to manually approve the canary promotion. -While the promotion is paused, Flagger will continue to run the metrics checks and load tests. +The `confirm-promotion` hook type can be used to manually approve the canary promotion. While the promotion is paused, Flagger will continue to run the metrics checks and load tests. ```yaml analysis: @@ -400,8 +394,7 @@ While the promotion is paused, Flagger will continue to run the metrics checks a url: http://flagger-loadtester.test/gate/halt ``` -The `rollback` hook type can be used to manually rollback the canary promotion. As with gating, rollbacks can be driven -with Flagger's tester API by setting the rollback URL to `/rollback/check` +The `rollback` hook type can be used to manually rollback the canary promotion. As with gating, rollbacks can be driven with Flagger's tester API by setting the rollback URL to `/rollback/check` ```yaml analysis: @@ -409,35 +402,36 @@ with Flagger's tester API by setting the rollback URL to `/rollback/check` - name: "rollback" type: rollback url: http://flagger-loadtester.test/rollback/check -``` +``` By default rollback is closed, you can rollback a canary rollout with: ```bash kubectl -n test exec -it flagger-loadtester-xxxx-xxxx sh -curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/rollback/open +curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/rollback/open ``` You can close the rollback with: ```bash -curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/rollback/close +curl -d '{"name": "podinfo","namespace":"test"}' http://localhost:8080/rollback/close ``` If you have notifications enabled, Flagger will post a message to Slack or MS Teams if a canary has been rolled back. -### Troubleshooting +## Troubleshooting -#### Manually check if helm test is running +### Manually check if helm test is running To debug in depth any issues with helm tests, you can execute commands on the flagger-loadtester pod. + ```bash kubectl exec -it deploy/flagger-loadtester -- bash helmv3 test -n --debug ``` -#### Helm tests hang during canary deployment +### Helm tests hang during canary deployment If test execution hangs or displays insufficient permissions, check your RBAC settings. @@ -474,3 +468,4 @@ roleRef: name: helm-smoke-tester apiGroup: rbac.authorization.k8s.io ``` + diff --git a/kustomize/base/flagger/deployment.yaml b/kustomize/base/flagger/deployment.yaml index f88be5875..f0bd8b133 100644 --- a/kustomize/base/flagger/deployment.yaml +++ b/kustomize/base/flagger/deployment.yaml @@ -20,7 +20,7 @@ spec: serviceAccountName: flagger containers: - name: flagger - image: weaveworks/flagger:1.0.0 + image: ghcr.io/fluxcd/flagger:1.0.0 imagePullPolicy: IfNotPresent ports: - name: http diff --git a/kustomize/tester/deployment.yaml b/kustomize/tester/deployment.yaml index b25e8150b..18e276e0e 100644 --- a/kustomize/tester/deployment.yaml +++ b/kustomize/tester/deployment.yaml @@ -18,7 +18,7 @@ spec: spec: containers: - name: loadtester - image: weaveworks/flagger-loadtester:0.18.0 + image: ghcr.io/fluxcd/flagger-loadtester:0.18.0 imagePullPolicy: IfNotPresent ports: - name: http