fluxcd · stefanprodan · Mar 11, 2019 · Mar 8, 2019 · Mar 8, 2019 · Mar 8, 2019
diff --git a/README.md b/README.md
@@ -29,6 +29,7 @@ Flagger documentation can be found at [docs.flagger.app](https://docs.flagger.ap
     * [Routing](https://docs.flagger.app/how-it-works#istio-routing)
     * [Canary deployment stages](https://docs.flagger.app/how-it-works#canary-deployment)
     * [Canary analysis](https://docs.flagger.app/how-it-works#canary-analysis)
+    * [A/B testing](https://docs.flagger.app/how-it-works#ab-testing)
     * [HTTP metrics](https://docs.flagger.app/how-it-works#http-metrics)
     * [Custom metrics](https://docs.flagger.app/how-it-works#custom-metrics)
     * [Webhooks](https://docs.flagger.app/how-it-works#webhooks)
@@ -167,7 +168,6 @@ For more details on how the canary analysis and promotion works please [read the
 
 ### Roadmap
 
-* Add A/B testing capabilities using fixed routing based on HTTP headers and cookies match conditions
 * Integrate with other service mesh technologies like AWS AppMesh and Linkerd v2
 * Add support for comparing the canary metrics to the primary ones and do the validation based on the derivation between the two
 

diff --git a/artifacts/ab-testing/canary.yaml b/artifacts/ab-testing/canary.yaml
@@ -0,0 +1,61 @@
+apiVersion: flagger.app/v1alpha3
+kind: Canary
+metadata:
+  name: abtest
+  namespace: test
+spec:
+  # deployment reference
+  targetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: abtest
+  # the maximum time in seconds for the canary deployment
+  # to make progress before it is rollback (default 600s)
+  progressDeadlineSeconds: 60
+  # HPA reference (optional)
+  autoscalerRef:
+    apiVersion: autoscaling/v2beta1
+    kind: HorizontalPodAutoscaler
+    name: abtest
+  service:
+    # container port
+    port: 9898
+    # Istio gateways (optional)
+    gateways:
+    - public-gateway.istio-system.svc.cluster.local
+    # Istio virtual service host names (optional)
+    hosts:
+    - abtest.istio.weavedx.com
+  canaryAnalysis:
+    # schedule interval (default 60s)
+    interval: 10s
+    # max number of failed metric checks before rollback
+    threshold: 10
+    # total number of iterations
+    iterations: 10
+    # canary match condition
+    match:
+      - headers:
+          user-agent:
+            regex: "^(?!.*Chrome)(?=.*\bSafari\b).*$"
+      - headers:
+          cookie:
+            regex: "^(.*?;)?(user=test)(;.*)?$"
+    metrics:
+    - name: istio_requests_total
+      # minimum req success rate (non 5xx responses)
+      # percentage (0-100)
+      threshold: 99
+      interval: 1m
+    - name: istio_request_duration_seconds_bucket
+      # maximum req duration P99
+      # milliseconds
+      threshold: 500
+      interval: 30s
+    # external checks (optional)
+    webhooks:
+      - name: load-test
+        url: http://flagger-loadtester.test/
+        timeout: 5s
+        metadata:
+          cmd: "hey -z 1m -q 10 -c 2 http://podinfo.test:9898/"
diff --git a/artifacts/ab-testing/deployment.yaml b/artifacts/ab-testing/deployment.yaml
@@ -0,0 +1,67 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: abtest
+  namespace: test
+  labels:
+    app: abtest
+spec:
+  minReadySeconds: 5
+  revisionHistoryLimit: 5
+  progressDeadlineSeconds: 60
+  strategy:
+    rollingUpdate:
+      maxUnavailable: 0
+    type: RollingUpdate
+  selector:
+    matchLabels:
+      app: abtest
+  template:
+    metadata:
+      annotations:
+        prometheus.io/scrape: "true"
+      labels:
+        app: abtest
+    spec:
+      containers:
+      - name: podinfod
+        image: quay.io/stefanprodan/podinfo:1.4.0
+        imagePullPolicy: IfNotPresent
+        ports:
+        - containerPort: 9898
+          name: http
+          protocol: TCP
+        command:
+        - ./podinfo
+        - --port=9898
+        - --level=info
+        - --random-delay=false
+        - --random-error=false
+        env:
+        - name: PODINFO_UI_COLOR
+          value: blue
+        livenessProbe:
+          exec:
+            command:
+            - podcli
+            - check
+            - http
+            - localhost:9898/healthz
+          initialDelaySeconds: 5
+          timeoutSeconds: 5
+        readinessProbe:
+          exec:
+            command:
+            - podcli
+            - check
+            - http
+            - localhost:9898/readyz
+          initialDelaySeconds: 5
+          timeoutSeconds: 5
+        resources:
+          limits:
+            cpu: 2000m
+            memory: 512Mi
+          requests:
+            cpu: 100m
+            memory: 64Mi
diff --git a/artifacts/ab-testing/hpa.yaml b/artifacts/ab-testing/hpa.yaml
@@ -0,0 +1,19 @@
+apiVersion: autoscaling/v2beta1
+kind: HorizontalPodAutoscaler
+metadata:
+  name: abtest
+  namespace: test
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: abtest
+  minReplicas: 2
+  maxReplicas: 4
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      # scale up if usage is above
+      # 99% of the requested CPU (100m)
+      targetAverageUtilization: 99
diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml
@@ -82,6 +82,8 @@ spec:
                 interval:
                   type: string
                   pattern: "^[0-9]+(m|s)"
+                iterations:
+                  type: number
                 threshold:
                   type: number
                 maxWeight:

diff --git a/charts/flagger/templates/crd.yaml b/charts/flagger/templates/crd.yaml
@@ -83,6 +83,8 @@ spec:
                 interval:
                   type: string
                   pattern: "^[0-9]+(m|s)"
+                iterations:
+                  type: number
                 threshold:
                   type: number
                 maxWeight:

diff --git a/cmd/flagger/main.go b/cmd/flagger/main.go
@@ -87,12 +87,6 @@ func main() {
 		logger.Fatalf("Error building example clientset: %s", err.Error())
 	}
 
-	if namespace == "" {
-		logger.Infof("Flagger Canary's Watcher is on all namespace")
-	} else {
-		logger.Infof("Flagger Canary's Watcher is on namespace %s", namespace)
-	}
-
 	flaggerInformerFactory := informers.NewSharedInformerFactoryWithOptions(flaggerClient, time.Second*30, informers.WithNamespace(namespace))
 
 	canaryInformer := flaggerInformerFactory.Flagger().V1alpha3().Canaries()
@@ -105,6 +99,9 @@ func main() {
 	}
 
 	logger.Infof("Connected to Kubernetes API %s", ver)
+	if namespace != "" {
+		logger.Infof("Watching namespace %s", namespace)
+	}
 
 	ok, err := controller.CheckMetricsServer(metricsServer)
 	if ok {

diff --git a/docs/gitbook/how-it-works.md b/docs/gitbook/how-it-works.md
@@ -327,6 +327,47 @@ At any time you can set the `spec.skipAnalysis: true`.
 When skip analysis is enabled, Flagger checks if the canary deployment is healthy and 
 promotes it without analysing it. If an analysis is underway, Flagger cancels it and runs the promotion.
 
+### A/B Testing
+
+Besides weighted routing, Flagger can be configured to route traffic to the canary based on HTTP match conditions.
+In an A/B testing scenario, you'll be using HTTP headers or cookies to target a certain segment of your users.
+This is particularly useful for frontend applications that require session affinity.
+
+Spec:
+
+```yaml
+  canaryAnalysis:
+    # schedule interval (default 60s)
+    interval: 1m
+    # total number of iterations
+    iterations: 10
+    # max number of failed iterations before rollback
+    threshold: 2
+    # canary match condition
+    match:
+      - headers:
+          user-agent:
+            regex: "^(?!.*Chrome)(?=.*\bSafari\b).*$"
+      - headers:
+          cookie:
+            regex: "^(.*?;)?(user=test)(;.*)?$"
+```
+
+The above configuration will run an analysis for ten minutes targeting the Safari users and those that have a test cookie.
+You can determine the minimum time that it takes to validate and promote a canary deployment using this formula:
+
+```
+interval * iterations
+```
+
+And the time it takes for a canary to be rollback when the metrics or webhook checks are failing:
+
+```
+interval * threshold 
+```
+
+Make sure that the analysis threshold is lower than the number of iterations.
+
 ### HTTP Metrics
 
 The canary analysis is using the following Prometheus queries:

diff --git a/pkg/apis/flagger/v1alpha3/types.go b/pkg/apis/flagger/v1alpha3/types.go
@@ -98,6 +98,7 @@ type CanaryStatus struct {
 	Phase        CanaryPhase `json:"phase"`
 	FailedChecks int         `json:"failedChecks"`
 	CanaryWeight int         `json:"canaryWeight"`
+	Iterations   int         `json:"iterations"`
 	// +optional
 	TrackedConfigs *map[string]string `json:"trackedConfigs,omitempty"`
 	// +optional
@@ -122,12 +123,14 @@ type CanaryService struct {
 
 // CanaryAnalysis is used to describe how the analysis should be done
 type CanaryAnalysis struct {
-	Interval   string          `json:"interval"`
-	Threshold  int             `json:"threshold"`
-	MaxWeight  int             `json:"maxWeight"`
-	StepWeight int             `json:"stepWeight"`
-	Metrics    []CanaryMetric  `json:"metrics"`
-	Webhooks   []CanaryWebhook `json:"webhooks,omitempty"`
+	Interval   string                           `json:"interval"`
+	Threshold  int                              `json:"threshold"`
+	MaxWeight  int                              `json:"maxWeight"`
+	StepWeight int                              `json:"stepWeight"`
+	Metrics    []CanaryMetric                   `json:"metrics"`
+	Webhooks   []CanaryWebhook                  `json:"webhooks,omitempty"`
+	Match      []istiov1alpha3.HTTPMatchRequest `json:"match,omitempty"`
+	Iterations int                              `json:"iterations,omitempty"`
 }
 
 // CanaryMetric holds the reference to Istio metrics used for canary analysis

diff --git a/pkg/apis/flagger/v1alpha3/zz_generated.deepcopy.go b/pkg/apis/flagger/v1alpha3/zz_generated.deepcopy.go
diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go
@@ -2,6 +2,8 @@ package controller
 
 import (
 	"github.com/stefanprodan/flagger/pkg/apis/flagger/v1alpha3"
+	istiov1alpha1 "github.com/stefanprodan/flagger/pkg/apis/istio/common/v1alpha1"
+	istiov1alpha3 "github.com/stefanprodan/flagger/pkg/apis/istio/v1alpha3"
 	clientset "github.com/stefanprodan/flagger/pkg/client/clientset/versioned"
 	fakeFlagger "github.com/stefanprodan/flagger/pkg/client/clientset/versioned/fake"
 	informers "github.com/stefanprodan/flagger/pkg/client/informers/externalversions"
@@ -38,9 +40,12 @@ type Mocks struct {
 	router        router.Interface
 }
 
-func SetupMocks() Mocks {
+func SetupMocks(abtest bool) Mocks {
 	// init canary
 	canary := newTestCanary()
+	if abtest {
+		canary = newTestCanaryAB()
+	}
 	flaggerClient := fakeFlagger.NewSimpleClientset(canary)
 
 	// init kube clientset and register mock objects
@@ -261,6 +266,55 @@ func newTestCanary() *v1alpha3.Canary {
 	return cd
 }
 
+func newTestCanaryAB() *v1alpha3.Canary {
+	cd := &v1alpha3.Canary{
+		TypeMeta: metav1.TypeMeta{APIVersion: v1alpha3.SchemeGroupVersion.String()},
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: "default",
+			Name:      "podinfo",
+		},
+		Spec: v1alpha3.CanarySpec{
+			TargetRef: hpav1.CrossVersionObjectReference{
+				Name:       "podinfo",
+				APIVersion: "apps/v1",
+				Kind:       "Deployment",
+			},
+			AutoscalerRef: &hpav1.CrossVersionObjectReference{
+				Name:       "podinfo",
+				APIVersion: "autoscaling/v2beta1",
+				Kind:       "HorizontalPodAutoscaler",
+			}, Service: v1alpha3.CanaryService{
+				Port: 9898,
+			}, CanaryAnalysis: v1alpha3.CanaryAnalysis{
+				Threshold:  10,
+				Iterations: 10,
+				Match: []istiov1alpha3.HTTPMatchRequest{
+					{
+						Headers: map[string]istiov1alpha1.StringMatch{
+							"x-user-type": {
+								Exact: "test",
+							},
+						},
+					},
+				},
+				Metrics: []v1alpha3.CanaryMetric{
+					{
+						Name:      "istio_requests_total",
+						Threshold: 99,
+						Interval:  "1m",
+					},
+					{
+						Name:      "istio_request_duration_seconds_bucket",
+						Threshold: 500,
+						Interval:  "1m",
+					},
+				},
+			},
+		},
+	}
+	return cd
+}
+
 func newTestDeployment() *appsv1.Deployment {
 	d := &appsv1.Deployment{
 		TypeMeta: metav1.TypeMeta{APIVersion: appsv1.SchemeGroupVersion.String()},