From 9412bef9fe76ccb14fcacce85e3ccbbda213a153 Mon Sep 17 00:00:00 2001 From: Cedric Lamoriniere Date: Thu, 7 Sep 2017 18:29:08 +0200 Subject: [PATCH] Update job workload doc with backoff failure policy (#5319) Add to the Jobs documentation how to use the new backoffLimit field that limit the number of Pod failure before considering the Job as failed. --- docs/concepts/workloads/controllers/job.yaml | 1 + .../workloads/controllers/jobs-run-to-completion.md | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/docs/concepts/workloads/controllers/job.yaml b/docs/concepts/workloads/controllers/job.yaml index ece4512a8acfc..eb8af28fb6930 100644 --- a/docs/concepts/workloads/controllers/job.yaml +++ b/docs/concepts/workloads/controllers/job.yaml @@ -12,4 +12,5 @@ spec: image: perl command: ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] restartPolicy: Never + backoffLimit: 4 diff --git a/docs/concepts/workloads/controllers/jobs-run-to-completion.md b/docs/concepts/workloads/controllers/jobs-run-to-completion.md index 4ba2f74b006ad..401368d0356ba 100644 --- a/docs/concepts/workloads/controllers/jobs-run-to-completion.md +++ b/docs/concepts/workloads/controllers/jobs-run-to-completion.md @@ -198,6 +198,12 @@ sometimes be started twice. If you do specify `.spec.parallelism` and `.spec.completions` both greater than 1, then there may be multiple pods running at once. Therefore, your pods must also be tolerant of concurrency. +### Pod Backoff failure policy + +There are situations where you want to fail a Job after some amount of retries due to a logical error in configuration etc. +To do so set `.spec.template.spec.backoffLimit` to specify the number of retries before considering a Job as failed. +The back-off limit is set by default to 6. Failed Pods associated with the Job are recreated by the Job controller with an exponential back-off delay (10s, 20s, 40s ...) capped at six minutes, The back-off limit is reset if no new failed Pods appear before the Job's next status check. + ## Job Termination and Cleanup When a Job completes, no more Pods are created, but the Pods are not deleted either. Since they are terminated, @@ -232,6 +238,7 @@ spec: image: perl command: ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] restartPolicy: Never + backoffLimit: 5 ``` Note that both the Job Spec and the Pod Template Spec within the Job have a field with the same name.