Add granular termination reason in container termination message

Related with tektoncd#7539 and tektoncd#7223 To report specific Steps termination reasons we need to know why its continer finished; we use the termination message to store a new "state" with this information. We are adding a new field to store this information per step. Co-authored-by: JeromeJu <[email protected]> Co-authored-by: Chitrang Patel <[email protected]>
renzodavid9 · Jan 13, 2024 · 19ccad8 · 19ccad8
1 parent 933f2a0
commit 19ccad8
Show file tree

Hide file tree

Showing 13 changed files with 751 additions and 33 deletions.
diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go
@@ -178,7 +178,7 @@ func main() {
 	if err := e.Go(); err != nil {
 		breakpointExitPostFile := e.PostFile + breakpointExitSuffix
 		switch t := err.(type) { //nolint:errorlint // checking for multiple types with errors.As is ugly.
-		case skipError:
+		case entrypoint.SkipError:
 			log.Print("Skipping step because a previous step failed")
 			os.Exit(1)
 		case termination.MessageLengthError:

diff --git a/cmd/entrypoint/waiter.go b/cmd/entrypoint/waiter.go
@@ -71,7 +71,7 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
 			if breakpointOnFailure {
 				return nil
 			}
-			return skipError("error file present, bail and skip the step")
+			return entrypoint.ErrSkipPreviousStepFailed
 		}
 		select {
 		case <-ctx.Done():
@@ -86,9 +86,3 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
 		}
 	}
 }
-
-type skipError string
-
-func (e skipError) Error() string {
-	return string(e)
-}
diff --git a/cmd/entrypoint/waiter_test.go b/cmd/entrypoint/waiter_test.go
@@ -153,7 +153,7 @@ func TestRealWaiterWaitWithErrorWaitfile(t *testing.T) {
 		if err == nil {
 			t.Errorf("expected skipError upon encounter error waitfile")
 		}
-		var skipErr skipError
+		var skipErr entrypoint.SkipError
 		if errors.As(err, &skipErr) {
 			close(doneCh)
 		} else {
@@ -292,7 +292,7 @@ func TestRealWaiterWaitContextWithErrorWaitfile(t *testing.T) {
 		if err == nil {
 			t.Errorf("expected skipError upon encounter error waitfile")
 		}
-		var skipErr skipError
+		var skipErr entrypoint.SkipError
 		if errors.As(err, &skipErr) {
 			close(doneCh)
 		} else {

diff --git a/docs/pipeline-api.md b/docs/pipeline-api.md
@@ -4638,6 +4638,16 @@ string
 <td>
 </td>
 </tr>
+<tr>
+<td>
+<code>terminationReason</code><br/>
+<em>
+string
+</em>
+</td>
+<td>
+</td>
+</tr>
 </tbody>
 </table>
 <h3 id="tekton.dev/v1.StepTemplate">StepTemplate

diff --git a/pkg/apis/pipeline/v1/openapi_generated.go b/pkg/apis/pipeline/v1/openapi_generated.go
diff --git a/pkg/apis/pipeline/v1/swagger.json b/pkg/apis/pipeline/v1/swagger.json
@@ -1609,6 +1609,9 @@
           "description": "Details about a terminated container",
           "$ref": "#/definitions/v1.ContainerStateTerminated"
         },
+        "terminationReason": {
+          "type": "string"
+        },
         "waiting": {
           "description": "Details about a waiting container",
           "$ref": "#/definitions/v1.ContainerStateWaiting"

diff --git a/pkg/apis/pipeline/v1/taskrun_types.go b/pkg/apis/pipeline/v1/taskrun_types.go
@@ -357,6 +357,7 @@ type StepState struct {
 	Container             string              `json:"container,omitempty"`
 	ImageID               string              `json:"imageID,omitempty"`
 	Results               []TaskRunStepResult `json:"results,omitempty"`
+	TerminationReason     string              `json:"terminationReason,omitempty"`
 }
 
 // SidecarState reports the results of running a sidecar in a Task.

diff --git a/pkg/entrypoint/entrypointer.go b/pkg/entrypoint/entrypointer.go
@@ -53,11 +53,19 @@ func (e ContextError) Error() string {
 	return string(e)
 }
 
+type SkipError string
+
+func (e SkipError) Error() string {
+	return string(e)
+}
+
 var (
 	// ErrContextDeadlineExceeded is the error returned when the context deadline is exceeded
 	ErrContextDeadlineExceeded = ContextError(context.DeadlineExceeded.Error())
 	// ErrContextCanceled is the error returned when the context is canceled
 	ErrContextCanceled = ContextError(context.Canceled.Error())
+	// ErrSkipPreviousStepFailed is the error returned when the step is skipped due to previous step error
+	ErrSkipPreviousStepFailed = SkipError("error file present, bail and skip the step")
 )
 
 // IsContextDeadlineError determine whether the error is context deadline
@@ -165,6 +173,11 @@ func (e Entrypointer) Go() error {
 				Value:      time.Now().Format(timeFormat),
 				ResultType: result.InternalTektonResultType,
 			})
+
+			if errors.Is(err, ErrSkipPreviousStepFailed) {
+				output = append(output, e.outputRunResult(pod.TerminationReasonSkipped))
+			}
+
 			return err
 		}
 	}
@@ -194,26 +207,18 @@ func (e Entrypointer) Go() error {
 			}
 		}()
 		err = e.Runner.Run(ctx, e.Command...)
-		if errors.Is(err, ErrContextDeadlineExceeded) {
-			output = append(output, result.RunResult{
-				Key:        "Reason",
-				Value:      "TimeoutExceeded",
-				ResultType: result.InternalTektonResultType,
-			})
-		}
 	}
 
 	var ee *exec.ExitError
 	switch {
 	case err != nil && errors.Is(err, ErrContextCanceled):
 		logger.Info("Step was canceling")
-		output = append(output, result.RunResult{
-			Key:        "Reason",
-			Value:      "Cancelled",
-			ResultType: result.InternalTektonResultType,
-		})
+		output = append(output, e.outputRunResult(pod.TerminationReasonCancelled))
 		e.WritePostFile(e.PostFile, ErrContextCanceled)
 		e.WriteExitCodeFile(e.StepMetadataDir, syscall.SIGKILL.String())
+	case errors.Is(err, ErrContextDeadlineExceeded):
+		e.WritePostFile(e.PostFile, err)
+		output = append(output, e.outputRunResult(pod.TerminationReasonTimeoutExceeded))
 	case err != nil && e.BreakpointOnFailure:
 		logger.Info("Skipping writing to PostFile")
 	case e.OnError == ContinueOnError && errors.As(err, &ee):
@@ -336,3 +341,12 @@ func (e Entrypointer) waitingCancellation(ctx context.Context, cancel context.Ca
 	cancel()
 	return nil
 }
+
+// outputRunResult returns the run reason for a termination
+func (e Entrypointer) outputRunResult(terminationReason string) result.RunResult {
+	return result.RunResult{
+		Key:        "Reason",
+		Value:      terminationReason,
+		ResultType: result.InternalTektonResultType,
+	}
+}
diff --git a/pkg/entrypoint/entrypointer_test.go b/pkg/entrypoint/entrypointer_test.go
@@ -747,29 +747,203 @@ func TestIsContextCanceledError(t *testing.T) {
 	}
 }
 
+func TestTerminationReason(t *testing.T) {
+	tests := []struct {
+		desc              string
+		waitFiles         []string
+		onError           string
+		runError          error
+		expectedRunErr    error
+		expectedExitCode  *string
+		expectedWrotefile *string
+		expectedStatus    []result.RunResult
+	}{
+		{
+			desc:              "reason completed",
+			expectedExitCode:  ptr("0"),
+			expectedWrotefile: ptr("postfile"),
+			expectedStatus: []result.RunResult{
+				{
+					Key:        "StartedAt",
+					ResultType: result.InternalTektonResultType,
+				},
+			},
+		},
+		{
+			desc:              "reason continued",
+			onError:           ContinueOnError,
+			runError:          ptr(exec.ExitError{}),
+			expectedRunErr:    ptr(exec.ExitError{}),
+			expectedExitCode:  ptr("-1"),
+			expectedWrotefile: ptr("postfile"),
+			expectedStatus: []result.RunResult{
+				{
+					Key:        "ExitCode",
+					Value:      "-1",
+					ResultType: result.InternalTektonResultType,
+				},
+				{
+					Key:        "StartedAt",
+					ResultType: result.InternalTektonResultType,
+				},
+			},
+		},
+		{
+			desc:              "reason errored",
+			runError:          ptr(exec.Error{}),
+			expectedRunErr:    ptr(exec.Error{}),
+			expectedWrotefile: ptr("postfile.err"),
+			expectedStatus: []result.RunResult{
+				{
+					Key:        "StartedAt",
+					ResultType: result.InternalTektonResultType,
+				},
+			},
+		},
+		{
+			desc:              "reason timedout",
+			runError:          ErrContextDeadlineExceeded,
+			expectedRunErr:    ErrContextDeadlineExceeded,
+			expectedWrotefile: ptr("postfile.err"),
+			expectedStatus: []result.RunResult{
+				{
+					Key:        "Reason",
+					Value:      pod.TerminationReasonTimeoutExceeded,
+					ResultType: result.InternalTektonResultType,
+				},
+				{
+					Key:        "StartedAt",
+					ResultType: result.InternalTektonResultType,
+				},
+			},
+		},
+		{
+			desc:              "reason skipped",
+			waitFiles:         []string{"file"},
+			expectedRunErr:    ErrSkipPreviousStepFailed,
+			expectedWrotefile: ptr("postfile.err"),
+			expectedStatus: []result.RunResult{
+				{
+					Key:        "Reason",
+					Value:      pod.TerminationReasonSkipped,
+					ResultType: result.InternalTektonResultType,
+				},
+				{
+					Key:        "StartedAt",
+					ResultType: result.InternalTektonResultType,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.desc, func(t *testing.T) {
+			fw, fr, fpw := &fakeWaiter{skipStep: true}, &fakeRunner{runError: test.runError}, &fakePostWriter{}
+
+			tmpFolder, err := os.MkdirTemp("", "")
+			if err != nil {
+				t.Fatalf("unexpected error creating temporary folder: %v", err)
+			} else {
+				defer os.RemoveAll(tmpFolder)
+			}
+
+			terminationFile, err := os.CreateTemp(tmpFolder, "termination")
+			if err != nil {
+				t.Fatalf("unexpected error creating termination file: %v", err)
+			}
+
+			e := Entrypointer{
+				Command:             append([]string{}, []string{}...),
+				WaitFiles:           test.waitFiles,
+				PostFile:            "postfile",
+				Waiter:              fw,
+				Runner:              fr,
+				PostWriter:          fpw,
+				TerminationPath:     terminationFile.Name(),
+				BreakpointOnFailure: false,
+				StepMetadataDir:     tmpFolder,
+				OnError:             test.onError,
+			}
+
+			err = e.Go()
+
+			if d := cmp.Diff(test.expectedRunErr, err); d != "" {
+				t.Fatalf("entrypoint error doesn't match %s", diff.PrintWantGot(d))
+			}
+
+			if d := cmp.Diff(test.expectedExitCode, fpw.exitCode); d != "" {
+				t.Fatalf("exitCode doesn't match %s", diff.PrintWantGot(d))
+			}
+
+			if d := cmp.Diff(test.expectedWrotefile, fpw.wrote); d != "" {
+				t.Fatalf("wrote file doesn't match %s", diff.PrintWantGot(d))
+			}
+
+			termination, err := getTermination(t, terminationFile.Name())
+			if err != nil {
+				t.Fatalf("error getting termination output: %v", err)
+			}
+
+			if d := cmp.Diff(test.expectedStatus, termination); d != "" {
+				t.Fatalf("termination status doesn't match %s", diff.PrintWantGot(d))
+			}
+		})
+	}
+}
+
+func getTermination(t *testing.T, terminationFile string) ([]result.RunResult, error) {
+	t.Helper()
+	fileContents, err := os.ReadFile(terminationFile)
+	if err != nil {
+		return nil, err
+	}
+
+	logger, _ := logging.NewLogger("", "status")
+	terminationStatus, err := termination.ParseMessage(logger, string(fileContents))
+	if err != nil {
+		return nil, err
+	}
+
+	for i, termination := range terminationStatus {
+		if termination.Key == "StartedAt" {
+			terminationStatus[i].Value = ""
+		}
+	}
+
+	return terminationStatus, nil
+}
+
 type fakeWaiter struct {
 	sync.Mutex
 	waited             []string
 	waitCancelDuration time.Duration
+	skipStep           bool
 }
 
 func (f *fakeWaiter) Wait(ctx context.Context, file string, _ bool, _ bool) error {
-	if file == pod.DownwardMountCancelFile && f.waitCancelDuration > 0 {
+	switch {
+	case file == pod.DownwardMountCancelFile && f.waitCancelDuration > 0:
 		time.Sleep(f.waitCancelDuration)
-	} else if file == pod.DownwardMountCancelFile {
+	case file == pod.DownwardMountCancelFile:
 		return nil
+	case f.skipStep:
+		return ErrSkipPreviousStepFailed
 	}
+
 	f.Lock()
 	f.waited = append(f.waited, file)
 	f.Unlock()
 	return nil
 }
 
-type fakeRunner struct{ args *[]string }
+type fakeRunner struct {
+	args     *[]string
+	runError error
+}
 
 func (f *fakeRunner) Run(ctx context.Context, args ...string) error {
 	f.args = &args
-	return nil
+	return f.runError
 }
 
 type fakePostWriter struct {
@@ -903,3 +1077,7 @@ func getMockSpireClient(ctx context.Context) (spire.EntrypointerAPIClient, spire
 
 	return sc, sc, tr
 }
+
+func ptr[T any](value T) *T {
+	return &value
+}