tektoncd · tekton-robot · May 6, 2019 · Apr 4, 2019 · bobcatfish · Apr 15, 2019
diff --git a/docs/pipelines.md b/docs/pipelines.md
@@ -10,6 +10,7 @@ This document defines `Pipelines` and their capabilities.
   - [Pipeline Tasks](#pipeline-tasks)
     - [From](#from)
     - [RunAfter](#runafter)
+    - [Retries](#retries)
 - [Ordering](#ordering)
 - [Examples](#examples)
 
@@ -41,6 +42,7 @@ following fields:
       - [`runAfter`](#runAfter) - Used when the [Pipeline Task](#pipeline-task)
         should be executed after another Pipeline Task, but there is no
         [output linking](#from) required
+      - [`retries`](#retries) - Used when the task is wanted to be executed if it fails. Could a network error or a missing dependency. It does not apply to cancellations.      
 
 [kubernetes-overview]:
   https://kubernetes.io/docs/concepts/overview/working-with-objects/kubernetes-objects/#required-fields
@@ -255,6 +257,24 @@ is no output from `test-app`, so `build-app` uses `runAfter` to indicate that
 `test-app` should run before it, regardless of the order they appear in the
 spec.
 
+#### retries
+
+Sometimes is needed some policy for retrying tasks for various reasons such as network errors, missing dependencies or upload problems. 
+Any of those issue must be reflected as False (corev1.ConditionFalse) within the TaskRun Status Succeeded Condition. 
+For that reason there is an optional attribute called `retries` which declares how many times that task should be retried in case of failure,
+
+By default and in its absence there are no retries; its value is 0.
+
+```yaml
+tasks:
+  - name: build-the-image
+    retries: 1
+    taskRef:
+      name: build-push
+```
+
+In this example, the task "build-the-image" will be executed and if the first run fails a second one would triggered. But, if that fails no more would triggered: a max of two executions.  
+
 ## Ordering
 
 The [Pipeline Tasks](#pipeline-tasks) in a `Pipeline` can be connected and run

diff --git a/pkg/apis/pipeline/v1alpha1/pipeline_types.go b/pkg/apis/pipeline/v1alpha1/pipeline_types.go
@@ -73,6 +73,10 @@ type PipelineTask struct {
 	Name    string  `json:"name"`
 	TaskRef TaskRef `json:"taskRef"`
 
+	// Retries represents how many times this task should be retried in case of task failure: ConditionSucceeded set to False
+	// +optional
+	Retries int `json:"retries",omitempty`
+
 	// RunAfter is the list of PipelineTask names that should be executed before
 	// this Task executes. (Used to force a specific ordering in graph execution.)
 	// +optional

diff --git a/pkg/apis/pipeline/v1alpha1/taskrun_types.go b/pkg/apis/pipeline/v1alpha1/taskrun_types.go
@@ -138,6 +138,10 @@ type TaskRunStatus struct {
 	// Steps describes the state of each build step container.
 	// +optional
 	Steps []StepState `json:"steps,omitempty"`
+	// RetriesStatus contains the history of TaskRunStatus in case of a retry in order to keep record of failures.
+	// All TaskRunStatus stored in RetriesStatus will have no date within the RetriesStatus as is redundant.
+	// +optional
+	RetriesStatus []TaskRunStatus `json:"retriesStatus,omitempty"`
 }
 
 // GetCondition returns the Condition matching the given type.

diff --git a/pkg/apis/pipeline/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pipeline/v1alpha1/zz_generated.deepcopy.go
diff --git a/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun.go b/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun.go
@@ -144,6 +144,9 @@ func NewController(
 // converge the two. It then updates the Status block of the Pipeline Run
 // resource with the current status of the resource.
 func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
+
+	c.Logger.Infof("Reconciling %v", time.Now())
+
 	// Convert the namespace/name string into a distinct namespace and name
 	namespace, name, err := cache.SplitMetaNamespaceKey(key)
 	if err != nil {
@@ -275,6 +278,9 @@ func (c *Reconciler) reconcile(ctx context.Context, pr *v1alpha1.PipelineRun) er
 		func(name string) (v1alpha1.TaskInterface, error) {
 			return c.taskLister.Tasks(pr.Namespace).Get(name)
 		},
+		func(name string) (*v1alpha1.TaskRun, error) {
+			return c.taskRunLister.TaskRuns(pr.Namespace).Get(name)
+		},
 		func(name string) (v1alpha1.TaskInterface, error) {
 			return c.clusterTaskLister.Get(name)
 		},
@@ -311,6 +317,13 @@ func (c *Reconciler) reconcile(ctx context.Context, pr *v1alpha1.PipelineRun) er
 		}
 		return nil
 	}
+
+	if pipelineState.IsDone() && pr.IsDone() {
+		c.timeoutHandler.Release(pr)
+		c.Recorder.Event(pr, corev1.EventTypeNormal, eventReasonSucceeded, "PipelineRun completed successfully.")
+		return nil
+	}
+
 	if err := resources.ValidateFrom(pipelineState); err != nil {
 		// This Run has failed, so we need to mark it as failed and stop reconciling it
 		pr.Status.SetCondition(&apis.Condition{
@@ -338,11 +351,6 @@ func (c *Reconciler) reconcile(ctx context.Context, pr *v1alpha1.PipelineRun) er
 		}
 	}
 
-	err = resources.ResolveTaskRuns(c.taskRunLister.TaskRuns(pr.Namespace).Get, pipelineState)
-	if err != nil {
-		return fmt.Errorf("Error getting TaskRuns for Pipeline %s: %s", p.Name, err)
-	}
-
 	// If the pipelinerun is cancelled, cancel tasks and update status
 	if pr.IsCancelled() {
 		return cancelPipelineRun(pr, pipelineState, c.PipelineClientSet)
@@ -438,7 +446,19 @@ func (c *Reconciler) createTaskRun(logger *zap.SugaredLogger, rprt *resources.Re
 	}
 	labels[pipeline.GroupName+pipeline.PipelineRunLabelKey] = pr.Name
 
-	tr := &v1alpha1.TaskRun{
+	tr, _ := c.taskRunLister.TaskRuns(pr.Namespace).Get(rprt.TaskRunName)
+
+	if tr != nil {
+		//is a retry
+		addRetryHistory(tr)
+		clearStatus(tr)
+		tr.Status.SetCondition(&apis.Condition{
+			Type:   apis.ConditionSucceeded,
+			Status: corev1.ConditionUnknown,
+		})
+		return c.PipelineClientSet.TektonV1alpha1().TaskRuns(pr.Namespace).UpdateStatus(tr)
+	}
+	tr = &v1alpha1.TaskRun{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:            rprt.TaskRunName,
 			Namespace:       pr.Namespace,
@@ -464,6 +484,19 @@ func (c *Reconciler) createTaskRun(logger *zap.SugaredLogger, rprt *resources.Re
 	return c.PipelineClientSet.TektonV1alpha1().TaskRuns(pr.Namespace).Create(tr)
 }
 
+func addRetryHistory(tr *v1alpha1.TaskRun) {
+	newStatus := *tr.Status.DeepCopy()
+	newStatus.RetriesStatus = nil
+	tr.Status.RetriesStatus = append(tr.Status.RetriesStatus, newStatus)
+}
+
+func clearStatus(tr *v1alpha1.TaskRun) {
+	tr.Status.StartTime = nil
+	tr.Status.CompletionTime = nil
+	tr.Status.Results = nil
+	tr.Status.PodName = ""
+}
+
 func (c *Reconciler) updateStatus(pr *v1alpha1.PipelineRun) (*v1alpha1.PipelineRun, error) {
 	newPr, err := c.pipelineRunLister.PipelineRuns(pr.Namespace).Get(pr.Name)
 	if err != nil {

diff --git a/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun_test.go b/pkg/reconciler/v1alpha1/pipelinerun/pipelinerun_test.go
@@ -15,6 +15,7 @@ package pipelinerun
 
 import (
 	"context"
+	"fmt"
 	"strings"
 	"testing"
 	"time"
@@ -512,6 +513,15 @@ func TestReconcileOnCompletedPipelineRun(t *testing.T) {
 	if actual == nil {
 		t.Errorf("Expected a PipelineRun to be updated, but it wasn't.")
 	}
+	actions := clients.Pipeline.Actions()
+	for _, action := range actions {
+		if action != nil {
+			resource := action.GetResource().Resource
+			if resource != "pipelineruns" {
+				t.Fatalf("Expected client to not have created a TaskRun for the completed PipelineRun, but it did")
+			}
+		}
+	}
 
 	// Check that the PipelineRun was reconciled correctly
 	reconciledRun, err := clients.Pipeline.Tekton().PipelineRuns("foo").Get("test-pipeline-run-completed", metav1.GetOptions{})
@@ -679,6 +689,55 @@ func TestReconcileWithTimeout(t *testing.T) {
 		t.Errorf("TaskRun timeout %s should be less than or equal to PipelineRun timeout %s", actual.Spec.Timeout.Duration.String(), prs[0].Spec.Timeout.Duration.String())
 	}
 }
+func TestReconcileCancelledPipelineRun(t *testing.T) {
+	ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline", "foo", tb.PipelineSpec(
+		tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(1)),
+	))}
+	prs := []*v1alpha1.PipelineRun{tb.PipelineRun("test-pipeline-run-with-timeout", "foo",
+		tb.PipelineRunSpec("test-pipeline",
+			tb.PipelineRunCancelled,
+		),
+	)}
+	ts := []*v1alpha1.Task{tb.Task("hello-world", "foo")}
+
+	d := test.Data{
+		PipelineRuns: prs,
+		Pipelines:    ps,
+		Tasks:        ts,
+	}
+
+	// create fake recorder for testing
+	fr := record.NewFakeRecorder(2)
+
+	testAssets := getPipelineRunController(d, fr)
+	c := testAssets.Controller
+	clients := testAssets.Clients
+
+	err := c.Reconciler.Reconcile(context.Background(), "foo/test-pipeline-run-with-timeout")
+	if err != nil {
+		t.Errorf("Did not expect to see error when reconciling completed PipelineRun but saw %s", err)
+	}
+
+	// Check that the PipelineRun was reconciled correctly
+	reconciledRun, err := clients.Pipeline.Tekton().PipelineRuns("foo").Get("test-pipeline-run-with-timeout", metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("Somehow had error getting completed reconciled run out of fake client: %s", err)
+	}
+
+	// The PipelineRun should be still cancelled.
+	if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "PipelineRunCancelled" {
+		t.Errorf("Expected PipelineRun to be cancelled, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded))
+	}
+
+	// Check that no TaskRun is created or run
+	actions := clients.Pipeline.Actions()
+	for _, action := range actions {
+		actionType := fmt.Sprintf("%T", action)
+		if !(actionType == "testing.UpdateActionImpl" || actionType == "testing.GetActionImpl") {
+			t.Errorf("Expected a TaskRun to be get/updated, but it was %s", actionType)
+		}
+	}
+}
 
 func TestReconcilePropagateLabels(t *testing.T) {
 	names.TestingSeed()
@@ -742,3 +801,102 @@ func TestReconcilePropagateLabels(t *testing.T) {
 		t.Errorf("expected to see TaskRun %v created. Diff %s", expectedTaskRun, d)
 	}
 }
+
+func TestReconcileWithTimeoutAndRetry(t *testing.T) {
+
+	tcs := []struct {
+		name               string
+		retries            int
+		conditionSucceeded corev1.ConditionStatus
+	}{
+		{
+			name:               "One try has to be done",
+			retries:            1,
+			conditionSucceeded: corev1.ConditionFalse,
+		},
+		{
+			name:               "No more retries are needed",
+			retries:            2,
+			conditionSucceeded: corev1.ConditionUnknown,
+		},
+	}
+
+	for _, tc := range tcs {
+
+		t.Run(tc.name, func(t *testing.T) {
+			ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline-retry", "foo", tb.PipelineSpec(
+				tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(tc.retries)),
+			))}
+			prs := []*v1alpha1.PipelineRun{tb.PipelineRun("test-pipeline-retry-run-with-timeout", "foo",
+				tb.PipelineRunSpec("test-pipeline-retry",
+					tb.PipelineRunServiceAccount("test-sa"),
+					tb.PipelineRunTimeout(&metav1.Duration{Duration: 12 * time.Hour}),
+				),
+				tb.PipelineRunStatus(
+					tb.PipelineRunStartTime(time.Now().AddDate(0, 0, -1))),
+			)}
+
+			ts := []*v1alpha1.Task{
+				tb.Task("hello-world", "foo"),
+			}
+			trs := []*v1alpha1.TaskRun{
+				tb.TaskRun("hello-world-1", "foo",
+					tb.TaskRunStatus(
+						tb.PodName("my-pod-name"),
+						tb.Condition(apis.Condition{
+							Type:   apis.ConditionSucceeded,
+							Status: corev1.ConditionFalse,
+						}),
+						tb.Retry(v1alpha1.TaskRunStatus{
+							Status: duckv1beta1.Status{
+								Conditions: []apis.Condition{{
+									Type:   apis.ConditionSucceeded,
+									Status: corev1.ConditionFalse,
+								}},
+							},
+						}),
+					)),
+			}
+
+			prtrs := &v1alpha1.PipelineRunTaskRunStatus{
+				PipelineTaskName: "hello-world-1",
+				Status:           &trs[0].Status,
+			}
+			prs[0].Status.TaskRuns = make(map[string]*v1alpha1.PipelineRunTaskRunStatus)
+			prs[0].Status.TaskRuns["hello-world-1"] = prtrs
+
+			d := test.Data{
+				PipelineRuns: prs,
+				Pipelines:    ps,
+				Tasks:        ts,
+				TaskRuns:     trs,
+			}
+
+			fr := record.NewFakeRecorder(2)
+
+			testAssets := getPipelineRunController(d, fr)
+			c := testAssets.Controller
+			clients := testAssets.Clients
+
+			err := c.Reconciler.Reconcile(context.Background(), "foo/test-pipeline-retry-run-with-timeout")
+			if err != nil {
+				t.Errorf("Did not expect to see error when reconciling completed PipelineRun but saw %s", err)
+			}
+
+			// Check that the PipelineRun was reconciled correctly
+			reconciledRun, err := clients.Pipeline.TektonV1alpha1().PipelineRuns("foo").Get("test-pipeline-retry-run-with-timeout", metav1.GetOptions{})
+			if err != nil {
+				t.Fatalf("Somehow had error getting completed reconciled run out of fake client: %s", err)
+			}
+
+			if len(reconciledRun.Status.TaskRuns["hello-world-1"].Status.RetriesStatus) != tc.retries {
+				t.Fatalf(" %d retry expected but %d ", tc.retries, len(reconciledRun.Status.TaskRuns["hello-world-1"].Status.RetriesStatus))
+			}
+
+			if status := reconciledRun.Status.TaskRuns["hello-world-1"].Status.GetCondition(apis.ConditionSucceeded).Status; status != tc.conditionSucceeded {
+				t.Fatalf("Succedded expected to be %s but is %s", tc.conditionSucceeded, status)
+			}
+
+		})
+	}
+}