From a9b466535a758375f3b538c8c0e793edc9074441 Mon Sep 17 00:00:00 2001 From: Jerop Date: Wed, 10 Aug 2022 14:26:03 -0400 Subject: [PATCH] TEP-0090: Matrix - Retries [TEP-0090: Matrix][tep-0090] proposed executing a `PipelineTask` in parallel `TaskRuns` and `Runs` with substitutions from combinations of `Parameters` in a `Matrix`. Today, a matrixed `PipelineTask` that has retries will reattempt each matrixed `TaskRun` when one of them fails. In this change, we fix this issue such that each retry is completed for each matrixed `TaskRun`. That is, each retry for each `TaskRun` is completed before it is reattempted; failure in one matrixed `TaskRun` no longer affects retries for other matrixed `TaskRuns` from the same `PipelineTask`. [tep-0090]: https://github.com/tektoncd/community/blob/main/teps/0090-matrix.md --- docs/matrix.md | 44 +- pkg/reconciler/pipelinerun/pipelinerun.go | 3 + .../pipelinerun/pipelinerun_test.go | 444 ++++++++++++++++++ 3 files changed, 490 insertions(+), 1 deletion(-) diff --git a/docs/matrix.md b/docs/matrix.md index cf872615b58..b98382ea4a4 100644 --- a/docs/matrix.md +++ b/docs/matrix.md @@ -19,6 +19,7 @@ weight: 11 - [Fan Out](#fan-out) - [`PipelineTasks` with `Tasks`](#pipelinetasks-with-tasks) - [`PipelineTasks` with `Custom Tasks`](#pipelinetasks-with-custom-tasks) +- [Retries](#retries) ## Overview @@ -521,6 +522,47 @@ status: pipelineTaskName: platforms-and-browsers ``` +## Retries + +The `retries` field is used to specify the number of times a `PipelineTask` should be retried when its `TaskRun` or +`Run` fails, see the [documentation][retries] for further details. When a `PipelineTask` is fanned out using `Matrix`, +a given `TaskRun` or `Run` executed will be retried as much as the field in the `retries` field of the `PipelineTask`. + +For example, the `PipelineTask` in this `PipelineRun` will be fanned out into three `TaskRuns` each of which will be +retried once: + +```yaml +apiVersion: tekton.dev/v1beta1 +kind: PipelineRun +metadata: + generateName: matrixed-pr-with-retries- +spec: + pipelineSpec: + tasks: + - name: matrix-and-params + matrix: + - name: platform + value: + - linux + - mac + - windows + params: + - name: browser + value: chrome + retries: 1 + taskSpec: + params: + - name: platform + - name: browser + steps: + - name: echo + image: alpine + script: | + echo "$(params.platform) and $(params.browser)" + exit 1 +``` + [cel]: https://github.com/tektoncd/experimental/tree/1609827ea81d05c8d00f8933c5c9d6150cd36989/cel [pr-with-matrix]: ../examples/v1beta1/pipelineruns/alpha/pipelinerun-with-matrix.yaml -[pr-with-matrix-and-results]: ../examples/v1beta1/pipelineruns/alpha/pipelinerun-with-matrix-and-results.yaml \ No newline at end of file +[pr-with-matrix-and-results]: ../examples/v1beta1/pipelineruns/alpha/pipelinerun-with-matrix-and-results.yaml +[retries]: pipelines.md#using-the-retries-field \ No newline at end of file diff --git a/pkg/reconciler/pipelinerun/pipelinerun.go b/pkg/reconciler/pipelinerun/pipelinerun.go index 8586e2d1306..8c8f6251eda 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun.go +++ b/pkg/reconciler/pipelinerun/pipelinerun.go @@ -808,6 +808,9 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para tr, _ := c.taskRunLister.TaskRuns(pr.Namespace).Get(taskRunName) if tr != nil { + if !tr.Status.GetCondition(apis.ConditionSucceeded).IsFalse() { + return tr, nil + } // Don't modify the lister cache's copy. tr = tr.DeepCopy() // is a retry diff --git a/pkg/reconciler/pipelinerun/pipelinerun_test.go b/pkg/reconciler/pipelinerun/pipelinerun_test.go index 16c6d8ca695..84e01278e82 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun_test.go +++ b/pkg/reconciler/pipelinerun/pipelinerun_test.go @@ -9164,6 +9164,450 @@ spec: } } +func TestReconciler_PipelineTaskMatrixWithRetries(t *testing.T) { + names.TestingSeed() + + task := parse.MustParseTask(t, ` +metadata: + name: mytask + namespace: foo +spec: + params: + - name: platform + - name: browser + steps: + - name: echo + image: alpine + script: | + echo "$(params.platform) and $(params.browser)" + exit 1 +`) + + cms := []*corev1.ConfigMap{withEmbeddedStatus(withEnabledAlphaAPIFields(newFeatureFlagsConfigMap()), config.MinimalEmbeddedStatus)} + cms = append(cms, withMaxMatrixCombinationsCount(newDefaultsConfigMap(), 10)) + + tests := []struct { + name string + trs []*v1beta1.TaskRun + prs []*v1beta1.PipelineRun + expectedPipelineRun *v1beta1.PipelineRun + expectedTaskRuns []*v1beta1.TaskRun + }{{ + name: "matrixed pipelinetask with retries, where one taskrun has failed and another one is running", + trs: []*v1beta1.TaskRun{ + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-0", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: linux + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "False" +`), + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-1", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: mac + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "Unknown" +`), + }, + prs: []*v1beta1.PipelineRun{ + parse.MustParsePipelineRun(t, ` +metadata: + name: pr + namespace: foo + annotations: {} + labels: + tekton.dev/pipeline: p +spec: + serviceAccountName: test-sa + pipelineRef: + name: p +status: + pipelineSpec: + tasks: + - name: platforms-and-browsers + retries: 1 + taskRef: + name: mytask + kind: Task + matrix: + - name: platform + value: + - linux + - mac + params: + - name: browser + value: chrome + conditions: + - type: Succeeded + status: "Unknown" + reason: "Running" + message: "Tasks Completed: 0 (Failed: 0, Cancelled 0), Incomplete: 1, Skipped: 0" + childReferences: + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-0 + pipelineTaskName: platforms-and-browsers + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-1 + pipelineTaskName: platforms-and-browsers + taskRuns: {} + runs: {} +`), + }, + expectedPipelineRun: parse.MustParsePipelineRun(t, ` +metadata: + name: pr + namespace: foo + annotations: {} + labels: + tekton.dev/pipeline: p +spec: + serviceAccountName: test-sa + pipelineRef: + name: p +status: + pipelineSpec: + tasks: + - name: platforms-and-browsers + retries: 1 + taskRef: + name: mytask + kind: Task + matrix: + - name: platform + value: + - linux + - mac + params: + - name: browser + value: chrome + conditions: + - type: Succeeded + status: "Unknown" + reason: "Running" + message: "Tasks Completed: 0 (Failed: 0, Cancelled 0), Incomplete: 1, Skipped: 0" + childReferences: + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-0 + pipelineTaskName: platforms-and-browsers + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-1 + pipelineTaskName: platforms-and-browsers + taskRuns: {} + runs: {} +`), + expectedTaskRuns: []*v1beta1.TaskRun{ + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-0", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: linux + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "Unknown" + retriesStatus: + - conditions: + - status: "False" + type: Succeeded +`), + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-1", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: mac + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "Unknown" +`), + }, + }, { + name: "matrixed pipelinetask with retries, where both taskruns have failed", + trs: []*v1beta1.TaskRun{ + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-0", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: linux + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "False" +`), + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-1", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: mac + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "False" +`), + }, + prs: []*v1beta1.PipelineRun{ + parse.MustParsePipelineRun(t, ` +metadata: + name: pr + namespace: foo + annotations: {} + labels: + tekton.dev/pipeline: p +spec: + serviceAccountName: test-sa + pipelineRef: + name: p +status: + pipelineSpec: + tasks: + - name: platforms-and-browsers + retries: 1 + taskRef: + name: mytask + kind: Task + matrix: + - name: platform + value: + - linux + - mac + params: + - name: browser + value: chrome + conditions: + - type: Succeeded + status: "Unknown" + reason: "Running" + message: "Tasks Completed: 0 (Failed: 0, Cancelled 0), Incomplete: 1, Skipped: 0" + childReferences: + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-0 + pipelineTaskName: platforms-and-browsers + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-1 + pipelineTaskName: platforms-and-browsers + taskRuns: {} + runs: {} +`), + }, + expectedPipelineRun: parse.MustParsePipelineRun(t, ` +metadata: + name: pr + namespace: foo + annotations: {} + labels: + tekton.dev/pipeline: p +spec: + serviceAccountName: test-sa + pipelineRef: + name: p +status: + pipelineSpec: + tasks: + - name: platforms-and-browsers + retries: 1 + taskRef: + name: mytask + kind: Task + matrix: + - name: platform + value: + - linux + - mac + params: + - name: browser + value: chrome + conditions: + - type: Succeeded + status: "Unknown" + reason: "Running" + message: "Tasks Completed: 0 (Failed: 0, Cancelled 0), Incomplete: 1, Skipped: 0" + childReferences: + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-0 + pipelineTaskName: platforms-and-browsers + - apiVersion: tekton.dev/v1beta1 + kind: TaskRun + name: pr-platforms-and-browsers-1 + pipelineTaskName: platforms-and-browsers + taskRuns: {} + runs: {} +`), + expectedTaskRuns: []*v1beta1.TaskRun{ + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-0", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: linux + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "Unknown" + retriesStatus: + - conditions: + - status: "False" + type: Succeeded +`), + mustParseTaskRunWithObjectMeta(t, + taskRunObjectMeta("pr-platforms-and-browsers-1", "foo", + "pr", "p", "platforms-and-browsers", false), + ` +spec: + params: + - name: platform + value: mac + - name: browser + value: chrome + resources: {} + serviceAccountName: test-sa + taskRef: + name: mytask + timeout: 1h0m0s + retries: 1 +status: + conditions: + - type: Succeeded + status: "Unknown" + retriesStatus: + - conditions: + - status: "False" + type: Succeeded +`), + }, + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := test.Data{ + Tasks: []*v1beta1.Task{task}, + TaskRuns: tt.trs, + PipelineRuns: tt.prs, + ConfigMaps: cms, + } + prt := newPipelineRunTest(d, t) + defer prt.Cancel() + + _, clients := prt.reconcileRun("foo", "pr", []string{}, false) + taskRuns, err := clients.Pipeline.TektonV1beta1().TaskRuns("foo").List(prt.TestAssets.Ctx, metav1.ListOptions{ + LabelSelector: fmt.Sprintf("tekton.dev/pipelineRun=pr,tekton.dev/pipelineTask=platforms-and-browsers"), + Limit: 1, + }) + if err != nil { + t.Fatalf("Failure to list TaskRun's %s", err) + } + + if len(taskRuns.Items) != 2 { + t.Fatalf("Expected 2 TaskRuns got %d", len(taskRuns.Items)) + } + + for i := range taskRuns.Items { + expectedTaskRun := tt.expectedTaskRuns[i] + if d := cmp.Diff(expectedTaskRun, &taskRuns.Items[i], ignoreResourceVersion, ignoreTypeMeta, ignoreLastTransitionTime, ignoreStartTime); d != "" { + t.Errorf("expected to see TaskRun %v created. Diff %s", tt.expectedTaskRuns[i].Name, diff.PrintWantGot(d)) + } + } + + pipelineRun, err := clients.Pipeline.TektonV1beta1().PipelineRuns("foo").Get(prt.TestAssets.Ctx, "pr", metav1.GetOptions{}) + if err != nil { + t.Fatalf("Got an error getting reconciled run out of fake client: %s", err) + } + if d := cmp.Diff(tt.expectedPipelineRun, pipelineRun, ignoreResourceVersion, ignoreTypeMeta, ignoreLastTransitionTime, ignoreStartTime, cmpopts.SortSlices(lessChildReferences)); d != "" { + t.Errorf("expected PipelineRun was not created. Diff %s", diff.PrintWantGot(d)) + } + }) + } +} + func TestReconciler_PipelineTaskMatrixWithCustomTask(t *testing.T) { names.TestingSeed()