diff --git a/resource_customizations/serving.kserve.io/InferenceService/health.lua b/resource_customizations/serving.kserve.io/InferenceService/health.lua index 85da1161f315f..94959de841d59 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/health.lua +++ b/resource_customizations/serving.kserve.io/InferenceService/health.lua @@ -1,50 +1,59 @@ --- isInferenceServiceInRawDeploymentMode determines if the inference service deployed in RawDeployment mode --- KServe v12 and above supports Rawdeployment for Inference graphs. For Inference services, KServe has supported RawDeployment model since [v0.7.0](https://github.com/kserve/kserve/releases/tag/v0.7.0). -function isInferenceServiceInRawDeploymentMode(obj) - if obj.metadata.annotations == nil then - return false - end - local deploymentMode = obj.metadata.annotations["serving.kserve.io/deploymentMode"] - return deploymentMode ~= nil and deploymentMode == "RawDeployment" -end - local health_status = {} + health_status.status = "Progressing" -health_status.message = "Waiting for status update." -if obj.status ~= nil and obj.status.conditions ~= nil then - local status_true = 0 +health_status.message = "Waiting for InferenceService to report status..." + +if obj.status ~= nil then + + local progressing = false + local degraded = false local status_false = 0 local status_unknown = 0 - health_status.message = "" - for i, condition in pairs(obj.status.conditions) do - if condition.status == "True" and (condition.type == "IngressReady" or condition.type == "PredictorConfigurationReady" or condition.type == "PredictorReady" or condition.type == "PredictorRouteReady" or condition.type == "Ready") then - status_true = status_true + 1 - elseif condition.status == "False" or condition.status == "Unknown" then - msg = condition.type .. " is " .. condition.status - if condition.reason ~= nil and condition.reason ~= "" then - msg = msg .. ", since " .. condition.reason .. "." - end - if condition.message ~= nil and condition.message ~= "" then - msg = msg .. " " .. condition.message - end - health_status.message = health_status.message .. msg .. "\n" - if condition.status == "False" then - status_false = status_false + 1 + local msg = "" + + if obj.status.modelStatus ~= nil then + if obj.status.modelStatus.transitionStatus ~= "UpToDate" then + if obj.status.modelStatus.transitionStatus == "InProgress" then + progressing = true else - status_unknown = status_unknown + 1 + degraded = true end + msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus end end - if ((isInferenceServiceInRawDeploymentMode(obj) and status_true == 3) or status_true == 5) and status_false == 0 and status_unknown == 0 then - health_status.message = "Inference Service is healthy." - health_status.status = "Healthy" - return health_status - elseif status_false > 0 then - health_status.status = "Degraded" - return health_status - else - health_status.status = "Progressing" - return health_status + + if obj.status.conditions ~= nil then + for i, condition in pairs(obj.status.conditions) do + + if condition.status == "Unknown" then + status_unknown = status_unknown + 1 + elseif condition.status == "False" then + status_false = status_false + 1 + end + + if condition.status ~= "True" then + msg = msg .. " | " .. i .. ": " .. condition.type .. " | " .. condition.status + if condition.reason ~= nil and condition.reason ~= "" then + msg = msg .. " | " .. condition.reason + end + if condition.message ~= nil and condition.message ~= "" then + msg = msg .. " | " .. condition.message + end + end + + end + + if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then + health_status.status = "Healthy" + msg = "InferenceService is healthy." + elseif degraded == false and status_unknown >= 0 then + health_status.status = "Progressing" + else + health_status.status = "Degraded" + end + + health_status.message = msg end end -return health_status \ No newline at end of file + +return health_status diff --git a/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml b/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml index 1dc5576f93f3a..670b194f79d41 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml +++ b/resource_customizations/serving.kserve.io/InferenceService/health_test.yaml @@ -1,17 +1,41 @@ tests: - healthStatus: status: Progressing - message: "PredictorConfigurationReady is Unknown\nPredictorReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nPredictorRouteReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\n" + message: ' | 1: PredictorConfigurationReady | Unknown | 2: PredictorReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 3: PredictorRouteReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 4: Ready | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready.' inputPath: testdata/progressing.yaml +- healthStatus: + status: Progressing + message: '0: transitionStatus | InProgress | 1: LatestDeploymentReady | Unknown | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | Unknown | 3: PredictorReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 4: PredictorRouteReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 5: Ready | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 6: RoutesReady | Unknown | PredictorRouteReady not ready' + inputPath: testdata/progressing_ocp.yaml +- healthStatus: + status: Progressing + message: "0: transitionStatus | InProgress | 1: PredictorReady | False | 2: Ready | False" + inputPath: testdata/progressing_modelmesh.yaml - healthStatus: status: Degraded - message: "IngressReady is False, since Predictor ingress not created.\nPredictorConfigurationReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nPredictorReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nReady is False, since Predictor ingress not created.\n" + message: '0: transitionStatus | BlockedByFailedLoad | 1: IngressReady | False | Predictor ingress not created | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 3: PredictorReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 5: Ready | False | Predictor ingress not created' inputPath: testdata/degraded.yaml +- healthStatus: + status: Degraded + message: '0: transitionStatus | BlockedByFailedLoad | 1: LatestDeploymentReady | False | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-predictor-00002" failed with message: . | 3: PredictorReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 4: PredictorRouteReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 5: Ready | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 6: RoutesReady | False | PredictorRouteReady not ready' + inputPath: testdata/degraded_ocp.yaml +- healthStatus: + status: Degraded + message: "0: transitionStatus | BlockedByFailedLoad" + inputPath: testdata/degraded_modelmesh.yaml - healthStatus: status: Healthy - message: Inference Service is healthy. + message: InferenceService is healthy. inputPath: testdata/healthy.yaml - healthStatus: status: Healthy - message: Inference Service is healthy. + message: InferenceService is healthy. + inputPath: testdata/healthy_ocp.yaml +- healthStatus: + status: Healthy + message: InferenceService is healthy. + inputPath: testdata/healthy_modelmesh.yaml +- healthStatus: + status: Healthy + message: InferenceService is healthy. inputPath: testdata/healthy_raw.yaml diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded.yaml index 0cd337860c670..291e4392f59f8 100644 --- a/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded.yaml +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded.yaml @@ -28,3 +28,5 @@ status: reason: Predictor ingress not created status: "False" type: Ready + modelStatus: + transitionStatus: BlockedByFailedLoad \ No newline at end of file diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_modelmesh.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_modelmesh.yaml new file mode 100644 index 0000000000000..54ac46fa59356 --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_modelmesh.yaml @@ -0,0 +1,16 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-30T22:43:16Z' + status: 'True' + type: PredictorReady + - lastTransitionTime: '2024-05-30T22:43:16Z' + status: 'True' + type: Ready + modelStatus: + transitionStatus: BlockedByFailedLoad diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_ocp.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_ocp.yaml new file mode 100644 index 0000000000000..d85c755dea51b --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_ocp.yaml @@ -0,0 +1,42 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-30T23:03:45Z' + reason: PredictorConfigurationReady not ready + severity: Info + status: 'False' + type: LatestDeploymentReady + - lastTransitionTime: '2024-05-30T23:03:45Z' + message: 'Revision "helloworld-predictor-00002" failed with message: .' + reason: RevisionFailed + severity: Info + status: 'False' + type: PredictorConfigurationReady + - lastTransitionTime: '2024-05-30T23:03:45Z' + message: Configuration "helloworld-predictor" does not have any ready Revision. + reason: RevisionMissing + status: 'False' + type: PredictorReady + - lastTransitionTime: '2024-05-30T23:03:45Z' + message: Configuration "helloworld-predictor" does not have any ready Revision. + reason: RevisionMissing + severity: Info + status: 'False' + type: PredictorRouteReady + - lastTransitionTime: '2024-05-30T23:03:45Z' + message: Configuration "helloworld-predictor" does not have any ready Revision. + reason: RevisionMissing + status: 'False' + type: Ready + - lastTransitionTime: '2024-05-30T23:03:45Z' + reason: PredictorRouteReady not ready + severity: Info + status: 'False' + type: RoutesReady + modelStatus: + transitionStatus: BlockedByFailedLoad diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_modelmesh.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_modelmesh.yaml new file mode 100644 index 0000000000000..290171afe2cdd --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_modelmesh.yaml @@ -0,0 +1,16 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-30T22:43:16Z' + status: 'True' + type: PredictorReady + - lastTransitionTime: '2024-05-30T22:43:16Z' + status: 'True' + type: Ready + modelStatus: + transitionStatus: UpToDate diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml new file mode 100644 index 0000000000000..9d65c2b379e05 --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml @@ -0,0 +1,35 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-30T22:14:31Z' + status: 'True' + type: IngressReady + - lastTransitionTime: '2024-05-30T22:14:30Z' + severity: Info + status: 'True' + type: LatestDeploymentReady + - lastTransitionTime: '2024-05-30T22:14:30Z' + severity: Info + status: 'True' + type: PredictorConfigurationReady + - lastTransitionTime: '2024-05-30T22:14:31Z' + status: 'True' + type: PredictorReady + - lastTransitionTime: '2024-05-30T22:14:31Z' + severity: Info + status: 'True' + type: PredictorRouteReady + - lastTransitionTime: '2024-05-30T22:14:31Z' + status: 'True' + type: Ready + - lastTransitionTime: '2024-05-30T22:14:31Z' + severity: Info + status: 'True' + type: RoutesReady + modelStatus: + transitionStatus: UpToDate diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_modelmesh.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_modelmesh.yaml new file mode 100644 index 0000000000000..1edb429504e33 --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_modelmesh.yaml @@ -0,0 +1,16 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-30T22:43:16Z' + status: 'False' + type: PredictorReady + - lastTransitionTime: '2024-05-30T22:43:16Z' + status: 'False' + type: Ready + modelStatus: + transitionStatus: InProgress diff --git a/resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_ocp.yaml b/resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_ocp.yaml new file mode 100644 index 0000000000000..aa476e80cebb4 --- /dev/null +++ b/resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_ocp.yaml @@ -0,0 +1,40 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: helloworld + namespace: default +spec: {} +status: + conditions: + - lastTransitionTime: '2024-05-30T22:29:46Z' + reason: PredictorConfigurationReady not ready + severity: Info + status: Unknown + type: LatestDeploymentReady + - lastTransitionTime: '2024-05-30T22:29:46Z' + severity: Info + status: Unknown + type: PredictorConfigurationReady + - lastTransitionTime: '2024-05-30T22:29:46Z' + message: Configuration "helloworld-predictor" is waiting for a Revision to become ready. + reason: RevisionMissing + status: Unknown + type: PredictorReady + - lastTransitionTime: '2024-05-30T22:29:46Z' + message: Configuration "helloworld-predictor" is waiting for a Revision to become ready. + reason: RevisionMissing + severity: Info + status: Unknown + type: PredictorRouteReady + - lastTransitionTime: '2024-05-30T22:29:46Z' + message: Configuration "helloworld-predictor" is waiting for a Revision to become ready. + reason: RevisionMissing + status: Unknown + type: Ready + - lastTransitionTime: '2024-05-30T22:29:46Z' + reason: PredictorRouteReady not ready + severity: Info + status: Unknown + type: RoutesReady + modelStatus: + transitionStatus: InProgress