From 028ae9eb46dd0be85036a448839222e084f3b07d Mon Sep 17 00:00:00 2001 From: Andrea Frittoli Date: Thu, 7 May 2020 15:44:23 +0100 Subject: [PATCH] Sync the pipelinerun status from the informers When we reconcile a pipelinerun, we should ensure that the pipelinerun status is always in sync with the actual list of taskruns that can be provided by the taskrun informer. The only way to filter taskruns is by labels tekton.dev/pipelinerun. In case an orphaned taskrun is found, we can use the other labels on the taskrun to reconstruct the missing entry in the pipelinerun status. --- pkg/reconciler/pipelinerun/pipelinerun.go | 89 ++++++ .../pipelinerun/pipelinerun_test.go | 292 +++++++++++++++++- .../resources/pipelinerunresolution.go | 6 +- 3 files changed, 380 insertions(+), 7 deletions(-) diff --git a/pkg/reconciler/pipelinerun/pipelinerun.go b/pkg/reconciler/pipelinerun/pipelinerun.go index e297cf8d100..bf5bf4776c2 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun.go +++ b/pkg/reconciler/pipelinerun/pipelinerun.go @@ -21,6 +21,7 @@ import ( "fmt" "path/filepath" "reflect" + "strconv" "strings" "time" @@ -44,6 +45,7 @@ import ( "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/cache" "knative.dev/pkg/apis" "knative.dev/pkg/configmap" @@ -197,6 +199,14 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { return err } + // Make sure that the PipelineRun status is in sync with the actual TaskRuns + err = c.updatePipelineRunStatusFromInformer(pr) + if err != nil { + // This should not fail. Return the error so we can re-try later. + c.Logger.Errorf("Error while syncing the pipelinerun status: %v", err.Error()) + return err + } + // Reconcile this copy of the pipelinerun and then write back any status or label // updates regardless of whether the reconciliation errored out. if err = c.reconcile(ctx, pr); err != nil { @@ -943,3 +953,82 @@ func storePipelineSpec(ctx context.Context, pr *v1alpha1.PipelineRun, ps *v1alph } return nil } + +func (c *Reconciler) updatePipelineRunStatusFromInformer(pr *v1alpha1.PipelineRun) error { + pipelineRunLabels := getTaskrunLabels(pr, "") + taskRuns, err := c.taskRunLister.TaskRuns(pr.Namespace).List(labels.SelectorFromSet(pipelineRunLabels)) + if err != nil { + c.Logger.Errorf("Could not list TaskRuns %#v", err) + return err + } + // Store a list of Condition TaskRuns for each PipelineTask (by name) + conditionTaskRuns := make(map[string][]*v1alpha1.TaskRun) + // Map PipelineTask names to TaskRun names that were already in the status + taskRunByPipelineTask := make(map[string]string) + // First loop over all the TaskRuns associated to Tasks + for _, taskrun := range taskRuns { + lbls := taskrun.GetLabels() + pipelineTaskName := lbls[pipeline.GroupName+pipeline.PipelineTaskLabelKey] + if _, ok := lbls[pipeline.GroupName+pipeline.ConditionCheckKey]; ok { + // Save condition for looping over them after this + if _, ok := conditionTaskRuns[pipelineTaskName]; !ok { + // If it's the first condition taskrun, initialise the slice + conditionTaskRuns[pipelineTaskName] = []*v1alpha1.TaskRun{} + } + conditionTaskRuns[pipelineTaskName] = append(conditionTaskRuns[pipelineTaskName], taskrun) + continue + } + // Map pipeline task to taskrun name + taskRunByPipelineTask[pipelineTaskName] = taskrun.Name + if _, ok := pr.Status.TaskRuns[taskrun.Name]; !ok { + // This taskrun was missing from the status. + // Add it without conditions, which are handled in the next loop + pr.Status.TaskRuns[taskrun.Name] = &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: pipelineTaskName, + Status: &taskrun.Status, + ConditionChecks: nil, + } + } + } + // Then loop by pipelinetask name over all the TaskRuns associated to Conditions + for pipelineTaskName, actualConditionTaskRuns := range conditionTaskRuns { + taskRunName, ok := taskRunByPipelineTask[pipelineTaskName] + if !ok { + // The pipelineTask associated to the conditions was not found in the pipelinerun + // status. This means that the conditions were orphaned, and never added to the + // status. In this case we need to generate a new TaskRun name, that will be used + // to run the TaskRun if the conditions are passed. + taskRunName = resources.GetTaskRunName(pr.Status.TaskRuns, pipelineTaskName, pr.Name) + pr.Status.TaskRuns[taskRunName] = &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: pipelineTaskName, + Status: nil, + ConditionChecks: nil, + } + } + // Build the map of condition checks for the taskrun + // If there were no other condition, initialise the map + conditionChecks := pr.Status.TaskRuns[taskRunName].ConditionChecks + if conditionChecks == nil { + conditionChecks = make(map[string]*v1alpha1.PipelineRunConditionCheckStatus) + } + for i, foundTaskRun := range actualConditionTaskRuns { + lbls := foundTaskRun.GetLabels() + if _, ok := conditionChecks[foundTaskRun.Name]; !ok { + // The condition check was not found, so we need to add it + // We only add the condition name, the status can now be gathered by the + // normal reconcile process + if conditionName, ok := lbls[pipeline.GroupName+pipeline.ConditionNameKey]; ok { + conditionChecks[foundTaskRun.Name] = &v1alpha1.PipelineRunConditionCheckStatus{ + ConditionName: fmt.Sprintf("%s-%s", conditionName, strconv.Itoa(i)), + } + } else { + // The condition name label is missing, so we cannot recover this + c.Logger.Warnf("found an orphaned condition taskrun %#v with missing %s label", + foundTaskRun, pipeline.ConditionNameKey) + } + } + } + pr.Status.TaskRuns[taskRunName].ConditionChecks = conditionChecks + } + return nil +} diff --git a/pkg/reconciler/pipelinerun/pipelinerun_test.go b/pkg/reconciler/pipelinerun/pipelinerun_test.go index 302a3ed35c5..ad1e7ec4baa 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun_test.go +++ b/pkg/reconciler/pipelinerun/pipelinerun_test.go @@ -817,8 +817,8 @@ func TestReconcileOnCompletedPipelineRun(t *testing.T) { t.Fatalf("Expected client to have updated the TaskRun status for a completed PipelineRun, but it did not") } - actual := clients.Pipeline.Actions()[1].(ktesting.UpdateAction).GetObject().(*v1alpha1.PipelineRun) - if actual == nil { + _, ok := clients.Pipeline.Actions()[1].(ktesting.UpdateAction).GetObject().(*v1alpha1.PipelineRun) + if !ok { t.Errorf("Expected a PipelineRun to be updated, but it wasn't.") } t.Log(clients.Pipeline.Actions()) @@ -1932,7 +1932,7 @@ func TestReconcileWithTaskResults(t *testing.T) { ), } trs := []*v1alpha1.TaskRun{ - tb.TaskRun("test-pipeline-run-different-service-accs-a-task-9l9zj", + tb.TaskRun("test-pipeline-run-different-service-accs-a-task-xxyyy", tb.TaskRunNamespace("foo"), tb.TaskRunOwnerReference("PipelineRun", "test-pipeline-run-different-service-accs", tb.OwnerReferenceAPIVersion("tekton.dev/v1alpha1"), @@ -1975,7 +1975,7 @@ func TestReconcileWithTaskResults(t *testing.T) { if err != nil { t.Fatalf("Somehow had error getting completed reconciled run out of fake client: %s", err) } - expectedTaskRunName := "test-pipeline-run-different-service-accs-b-task-mz4c7" + expectedTaskRunName := "test-pipeline-run-different-service-accs-b-task-9l9zj" expectedTaskRun := tb.TaskRun(expectedTaskRunName, tb.TaskRunNamespace("foo"), tb.TaskRunOwnerReference("PipelineRun", "test-pipeline-run-different-service-accs", @@ -2188,3 +2188,287 @@ func Test_storePipelineSpec(t *testing.T) { t.Fatalf(diff.PrintWantGot(d)) } } + +func TestReconcileOutOfSyncPipelineRun(t *testing.T) { + // It may happen that a PipelineRun creates one or more TaskRuns during reconcile + // but it fails to sync the update on the status back. This test verifies that + // the reconciler is able to coverge back to a consistent state with the orphaned + // TaskRuns back in the PipelineRun status. + // For more details, see https://github.com/tektoncd/pipeline/issues/2558 + prOutOfSyncName := "test-pipeline-run-out-of-sync" + helloWorldTask := tb.Task("hello-world", tb.TaskNamespace("foo")) + + // Condition checks for the third task + prccs3 := make(map[string]*v1alpha1.PipelineRunConditionCheckStatus) + conditionCheckName3 := prOutOfSyncName + "hello-world-3-always-true-xxxyyy" + prccs3[conditionCheckName3] = &v1alpha1.PipelineRunConditionCheckStatus{ + ConditionName: "always-true-0", + Status: &v1alpha1.ConditionCheckStatus{ + Status: duckv1beta1.Status{ + Conditions: duckv1beta1.Conditions{ + apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + }, + }, + }, + } + // Condition checks for the fourth task + prccs4 := make(map[string]*v1alpha1.PipelineRunConditionCheckStatus) + conditionCheckName4 := prOutOfSyncName + "hello-world-4-always-true-xxxyyy" + prccs4[conditionCheckName4] = &v1alpha1.PipelineRunConditionCheckStatus{ + ConditionName: "always-true-0", + Status: &v1alpha1.ConditionCheckStatus{ + Status: duckv1beta1.Status{ + Conditions: duckv1beta1.Conditions{ + apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + }, + }, + }, + } + testPipeline := tb.Pipeline("test-pipeline", tb.PipelineNamespace("foo"), tb.PipelineSpec( + tb.PipelineTask("hello-world-1", helloWorldTask.Name), + tb.PipelineTask("hello-world-2", helloWorldTask.Name), + tb.PipelineTask("hello-world-3", helloWorldTask.Name, tb.PipelineTaskCondition("always-true")), + tb.PipelineTask("hello-world-4", helloWorldTask.Name, tb.PipelineTaskCondition("always-true")))) + + // This taskrun is in the pipelinerun status. It completed successfully. + taskRunDone := tb.TaskRun("test-pipeline-run-out-of-sync-hello-world-1", + tb.TaskRunNamespace("foo"), + tb.TaskRunOwnerReference("PipelineRun", prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineLabelKey, testPipeline.Name), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineRunLabelKey, prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineTaskLabelKey, "hello-world-1"), + tb.TaskRunSpec(tb.TaskRunTaskRef("hello-world")), + tb.TaskRunStatus( + tb.StatusCondition(apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionTrue, + }), + ), + ) + + // This taskrun is *not* in the pipelinerun status. It's still running. + taskRunOrphaned := tb.TaskRun("test-pipeline-run-out-of-sync-hello-world-2", + tb.TaskRunNamespace("foo"), + tb.TaskRunOwnerReference("PipelineRun", prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineLabelKey, testPipeline.Name), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineRunLabelKey, prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineTaskLabelKey, "hello-world-2"), + tb.TaskRunSpec(tb.TaskRunTaskRef("hello-world")), + tb.TaskRunStatus( + tb.StatusCondition(apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }), + ), + ) + + // This taskrun has a condition attached. The condition is in the pipelinerun, but the taskrun + // itself is *not* in the pipelinerun status. It's still running. + taskRunWithCondition := tb.TaskRun("test-pipeline-run-out-of-sync-hello-world-3", + tb.TaskRunNamespace("foo"), + tb.TaskRunOwnerReference("PipelineRun", prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineLabelKey, testPipeline.Name), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineRunLabelKey, prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineTaskLabelKey, "hello-world-3"), + tb.TaskRunSpec(tb.TaskRunTaskRef("hello-world")), + tb.TaskRunStatus( + tb.StatusCondition(apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }), + ), + ) + + taskRunForConditionOfOrphanedTaskRun := tb.TaskRun(conditionCheckName3, + tb.TaskRunNamespace("foo"), + tb.TaskRunOwnerReference("PipelineRun", prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineLabelKey, testPipeline.Name), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineRunLabelKey, prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineTaskLabelKey, "hello-world-3"), + tb.TaskRunLabel(pipeline.GroupName+pipeline.ConditionCheckKey, conditionCheckName3), + tb.TaskRunLabel(pipeline.GroupName+pipeline.ConditionNameKey, "always-true"), + tb.TaskRunSpec(tb.TaskRunTaskRef("always-true-0")), + tb.TaskRunStatus( + tb.StatusCondition(apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }), + ), + ) + + // This taskrun has a condition attached. The condition is *not* the in pipelinerun, and it's still + // running. The taskrun itself was not created yet. + taskRunWithOrphanedConditionName := "test-pipeline-run-out-of-sync-hello-world-4" + + taskRunForOrphanedCondition := tb.TaskRun(conditionCheckName4, + tb.TaskRunNamespace("foo"), + tb.TaskRunOwnerReference("PipelineRun", prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineLabelKey, testPipeline.Name), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineRunLabelKey, prOutOfSyncName), + tb.TaskRunLabel(pipeline.GroupName+pipeline.PipelineTaskLabelKey, "hello-world-4"), + tb.TaskRunLabel(pipeline.GroupName+pipeline.ConditionCheckKey, conditionCheckName4), + tb.TaskRunLabel(pipeline.GroupName+pipeline.ConditionNameKey, "always-true"), + tb.TaskRunSpec(tb.TaskRunTaskRef("always-true-0")), + tb.TaskRunStatus( + tb.StatusCondition(apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }), + ), + ) + + prOutOfSync := tb.PipelineRun(prOutOfSyncName, + tb.PipelineRunNamespace("foo"), + tb.PipelineRunSpec(testPipeline.Name, tb.PipelineRunServiceAccountName("test-sa")), + tb.PipelineRunStatus(tb.PipelineRunStatusCondition(apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + Reason: "", + Message: "", + }), + tb.PipelineRunTaskRunsStatus(taskRunDone.Name, &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: "hello-world-1", + Status: &v1alpha1.TaskRunStatus{}, + }), + tb.PipelineRunTaskRunsStatus(taskRunWithCondition.Name, &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: "hello-world-3", + Status: nil, + ConditionChecks: prccs3, + }), + ), + ) + prs := []*v1alpha1.PipelineRun{prOutOfSync} + ps := []*v1alpha1.Pipeline{testPipeline} + ts := []*v1alpha1.Task{helloWorldTask} + trs := []*v1alpha1.TaskRun{taskRunDone, taskRunOrphaned, taskRunWithCondition, + taskRunForOrphanedCondition, taskRunForConditionOfOrphanedTaskRun} + cs := []*v1alpha1.Condition{ + tb.Condition("always-true", tb.ConditionNamespace("foo"), tb.ConditionSpec( + tb.ConditionSpecCheck("", "foo", tb.Args("bar")), + )), + } + d := test.Data{ + PipelineRuns: prs, + Pipelines: ps, + Tasks: ts, + TaskRuns: trs, + Conditions: cs, + } + + testAssets, cancel := getPipelineRunController(t, d) + defer cancel() + c := testAssets.Controller + clients := testAssets.Clients + + if err := c.Reconciler.Reconcile(context.Background(), "foo/"+prOutOfSync.Name); err != nil { + t.Fatalf("Error reconciling: %s", err) + } + + // if len(clients.Pipeline.Actions()) != 2 { + // t.Fatalf("Expected client to have updated the TaskRun status for a completed PipelineRun, but it did not") + // } + + _, ok := clients.Pipeline.Actions()[1].(ktesting.UpdateAction).GetObject().(*v1alpha1.PipelineRun) + if !ok { + t.Errorf("Expected a PipelineRun to be updated, but it wasn't.") + } + t.Log(clients.Pipeline.Actions()) + actions := clients.Pipeline.Actions() + pipelineUpdates := 0 + for _, action := range actions { + if action != nil { + switch { + case action.Matches("create", "taskruns"): + t.Errorf("Expected client to not have created a TaskRun, but it did") + case action.Matches("update", "pipelineruns"): + pipelineUpdates++ + default: + continue + } + } + } + if pipelineUpdates != 2 { + // If only the pipelinerun status changed, we expect one update + t.Fatalf("Expected client to have updated the pipelinerun once, but it did %d times", pipelineUpdates) + } + + // Check that the PipelineRun was reconciled correctly + reconciledRun, err := clients.Pipeline.TektonV1alpha1().PipelineRuns("foo").Get(prOutOfSync.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Somehow had error getting completed reconciled run out of fake client: %s", err) + } + + // This PipelineRun should still be running and the status should reflect that + if !reconciledRun.Status.GetCondition(apis.ConditionSucceeded).IsUnknown() { + t.Errorf("Expected PipelineRun status to be running, but was %v", reconciledRun.Status.GetCondition(apis.ConditionSucceeded)) + } + + expectedTaskRunsStatus := make(map[string]*v1alpha1.PipelineRunTaskRunStatus) + // taskRunDone did not change + expectedTaskRunsStatus[taskRunDone.Name] = &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: "hello-world-1", + Status: &v1alpha1.TaskRunStatus{ + Status: duckv1beta1.Status{ + Conditions: []apis.Condition{ + { + Type: apis.ConditionSucceeded, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + } + // taskRunOrphaned was recovered into the status + expectedTaskRunsStatus[taskRunOrphaned.Name] = &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: "hello-world-2", + Status: &v1alpha1.TaskRunStatus{ + Status: duckv1beta1.Status{ + Conditions: []apis.Condition{ + { + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + }, + }, + }, + } + // taskRunWithCondition was recovered into the status. The condition did not change. + expectedTaskRunsStatus[taskRunWithCondition.Name] = &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: "hello-world-3", + Status: &v1alpha1.TaskRunStatus{ + Status: duckv1beta1.Status{ + Conditions: []apis.Condition{ + { + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + }, + }, + }, + ConditionChecks: prccs3, + } + // taskRunWithOrphanedConditionName had the condition recovered into the status. No taskrun. + expectedTaskRunsStatus[taskRunWithOrphanedConditionName] = &v1alpha1.PipelineRunTaskRunStatus{ + PipelineTaskName: "hello-world-4", + ConditionChecks: prccs4, + } + + // We cannot just diff status directly because the taskrun name for the orphaned condition + // is dynamically generated, but we can change the name to allow us to then diff. + for taskRunName, taskRunStatus := range reconciledRun.Status.TaskRuns { + if strings.HasPrefix(taskRunName, taskRunWithOrphanedConditionName) { + reconciledRun.Status.TaskRuns[taskRunWithOrphanedConditionName] = taskRunStatus + delete(reconciledRun.Status.TaskRuns, taskRunName) + break + } + } + if d := cmp.Diff(reconciledRun.Status.TaskRuns, expectedTaskRunsStatus); d != "" { + t.Fatalf("Expected PipelineRun status to match TaskRun(s) status, but got a mismatch: %s", d) + } +} diff --git a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go index 4866226b59a..e2fa94e2396 100644 --- a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go +++ b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go @@ -317,7 +317,7 @@ func ResolvePipelineRun( rprt := ResolvedPipelineRunTask{ PipelineTask: &pt, - TaskRunName: getTaskRunName(pipelineRun.Status.TaskRuns, pt.Name, pipelineRun.Name), + TaskRunName: GetTaskRunName(pipelineRun.Status.TaskRuns, pt.Name, pipelineRun.Name), } // Find the Task that this PipelineTask is using @@ -397,8 +397,8 @@ func getConditionCheckName(taskRunStatus map[string]*v1alpha1.PipelineRunTaskRun return names.SimpleNameGenerator.RestrictLengthWithRandomSuffix(fmt.Sprintf("%s-%s", trName, conditionRegisterName)) } -// getTaskRunName should return a unique name for a `TaskRun` if one has not already been defined, and the existing one otherwise. -func getTaskRunName(taskRunsStatus map[string]*v1alpha1.PipelineRunTaskRunStatus, ptName, prName string) string { +// GetTaskRunName should return a unique name for a `TaskRun` if one has not already been defined, and the existing one otherwise. +func GetTaskRunName(taskRunsStatus map[string]*v1alpha1.PipelineRunTaskRunStatus, ptName, prName string) string { for k, v := range taskRunsStatus { if v.PipelineTaskName == ptName { return k