Skip to content

Commit

Permalink
Add granular termination reason in container termination message
Browse files Browse the repository at this point in the history
Related with tektoncd#7223.

To report specific Steps termination reasons we need to know why its continer finished; we use the termination message to store a new "state" with this information. We evaluated changing the container `reason` directly, but looks like k8s doesn't allow this.
  • Loading branch information
renzodavid9 committed Nov 17, 2023
1 parent 97184c3 commit 05bef69
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 20 deletions.
2 changes: 1 addition & 1 deletion cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func main() {
if err := e.Go(); err != nil {
breakpointExitPostFile := e.PostFile + breakpointExitSuffix
switch t := err.(type) { //nolint:errorlint // checking for multiple types with errors.As is ugly.
case skipError:
case entrypoint.SkipError:
log.Print("Skipping step because a previous step failed")
os.Exit(1)
case termination.MessageLengthError:
Expand Down
8 changes: 1 addition & 7 deletions cmd/entrypoint/waiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
if breakpointOnFailure {
return nil
}
return skipError("error file present, bail and skip the step")
return entrypoint.SkipErroPreviousStepFailed
}
select {
case <-ctx.Done():
Expand All @@ -86,9 +86,3 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
}
}
}

type skipError string

func (e skipError) Error() string {
return string(e)
}
44 changes: 32 additions & 12 deletions pkg/entrypoint/entrypointer.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,19 @@ func (e ContextError) Error() string {
return string(e)
}

type SkipError string

func (e SkipError) Error() string {
return string(e)
}

var (
// ErrContextDeadlineExceeded is the error returned when the context deadline is exceeded
ErrContextDeadlineExceeded = ContextError(context.DeadlineExceeded.Error())
// ErrContextCanceled is the error returned when the context is canceled
ErrContextCanceled = ContextError(context.Canceled.Error())
// SkipErroPreviousStepFailed is the error returned when the step is skipped due to previous step error
SkipErroPreviousStepFailed = SkipError("error file present, bail and skip the step")
)

// IsContextDeadlineError determine whether the error is context deadline
Expand Down Expand Up @@ -160,6 +168,11 @@ func (e Entrypointer) Go() error {
Value: time.Now().Format(timeFormat),
ResultType: result.InternalTektonResultType,
})

if errors.Is(err, SkipErroPreviousStepFailed) {
output = append(output, e.getTerminationReason(result.TerminationReasonSkipped))
}

return err
}
}
Expand Down Expand Up @@ -189,28 +202,22 @@ func (e Entrypointer) Go() error {
}
}()
err = e.Runner.Run(ctx, e.Command...)
if errors.Is(err, ErrContextDeadlineExceeded) {
output = append(output, result.RunResult{
Key: "Reason",
Value: "TimeoutExceeded",
ResultType: result.InternalTektonResultType,
})
}
}

var ee *exec.ExitError
switch {
case err != nil && errors.Is(err, ErrContextCanceled):
logger.Info("Step was canceling")
output = append(output, result.RunResult{
Key: "Reason",
Value: "Cancelled",
ResultType: result.InternalTektonResultType,
})
e.WritePostFile(e.PostFile, ErrContextCanceled)
e.WriteExitCodeFile(e.StepMetadataDir, syscall.SIGKILL.String())
output = append(output, e.getTerminationReason(result.TerminationReasonCancelled))

case errors.Is(err, ErrContextDeadlineExceeded):
output = append(output, e.getTerminationReason(result.TerminationReasonTimeoutExceeded))

case err != nil && e.BreakpointOnFailure:
logger.Info("Skipping writing to PostFile")

case e.OnError == ContinueOnError && errors.As(err, &ee):
// with continue on error and an ExitError, write non-zero exit code and a post file
exitCode := strconv.Itoa(ee.ExitCode())
Expand All @@ -221,13 +228,18 @@ func (e Entrypointer) Go() error {
})
e.WritePostFile(e.PostFile, nil)
e.WriteExitCodeFile(e.StepMetadataDir, exitCode)
output = append(output, e.getTerminationReason(result.TerminationReasonContinued))

case err == nil:
// if err is nil, write zero exit code and a post file
e.WritePostFile(e.PostFile, nil)
e.WriteExitCodeFile(e.StepMetadataDir, "0")
output = append(output, e.getTerminationReason(result.TerminationReasonCompleted))

default:
// for a step without continue on error and any error, write a post file with .err
e.WritePostFile(e.PostFile, err)
output = append(output, e.getTerminationReason(result.TerminationReasonError))
}

// strings.Split(..) with an empty string returns an array that contains one element, an empty string.
Expand Down Expand Up @@ -317,3 +329,11 @@ func (e Entrypointer) waitingCancellation(ctx context.Context, cancel context.Ca
cancel()
return nil
}

func (e Entrypointer) getTerminationReason(terminationReason string) result.RunResult {
return result.RunResult{
Key: "Reason",
Value: terminationReason,
ResultType: result.InternalTektonResultType,
}
}
13 changes: 13 additions & 0 deletions pkg/result/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@ const (
InternalTektonResultType = 3
// UnknownResultType default unknown result type value
UnknownResultType = 10

// TerminationReasonCompleted indicates a step finished successfully.
TerminationReasonCompleted = "Completed"
// TerminationReasonContinued indicates a step errored but was ignored since onError was set to continue.
TerminationReasonContinued = "Continued"
// TerminationReasonError indicates a step failed with a non-zero exit code.
TerminationReasonError = "Error"
// TerminationReasonTimeoutExceeded indicates a step execution timed out.
TerminationReasonTimeoutExceeded = "TimeoutExceeded"
// TerminationReasonSkipped indicates a step execution was skipped due to previous step failed.
TerminationReasonSkipped = "Skipped"
// TerminationReasonCancelled indicates a step was cancelled by user.
TerminationReasonCancelled = "Cancelled"
)

// RunResult is used to write key/value pairs to TaskRun pod termination messages.
Expand Down

0 comments on commit 05bef69

Please sign in to comment.