Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: KServe InferenceService Health Checks support for ModelMesh #20142

Merged
merged 2 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,50 +1,59 @@
-- isInferenceServiceInRawDeploymentMode determines if the inference service deployed in RawDeployment mode
-- KServe v12 and above supports Rawdeployment for Inference graphs. For Inference services, KServe has supported RawDeployment model since [v0.7.0](https://github.com/kserve/kserve/releases/tag/v0.7.0).
function isInferenceServiceInRawDeploymentMode(obj)
if obj.metadata.annotations == nil then
return false
end
local deploymentMode = obj.metadata.annotations["serving.kserve.io/deploymentMode"]
return deploymentMode ~= nil and deploymentMode == "RawDeployment"
end

local health_status = {}

health_status.status = "Progressing"
health_status.message = "Waiting for status update."
if obj.status ~= nil and obj.status.conditions ~= nil then
local status_true = 0
health_status.message = "Waiting for InferenceService to report status..."

if obj.status ~= nil then

local progressing = false
local degraded = false
local status_false = 0
local status_unknown = 0
health_status.message = ""
for i, condition in pairs(obj.status.conditions) do
if condition.status == "True" and (condition.type == "IngressReady" or condition.type == "PredictorConfigurationReady" or condition.type == "PredictorReady" or condition.type == "PredictorRouteReady" or condition.type == "Ready") then
status_true = status_true + 1
elseif condition.status == "False" or condition.status == "Unknown" then
msg = condition.type .. " is " .. condition.status
if condition.reason ~= nil and condition.reason ~= "" then
msg = msg .. ", since " .. condition.reason .. "."
end
if condition.message ~= nil and condition.message ~= "" then
msg = msg .. " " .. condition.message
end
health_status.message = health_status.message .. msg .. "\n"
if condition.status == "False" then
status_false = status_false + 1
local msg = ""

if obj.status.modelStatus ~= nil then
if obj.status.modelStatus.transitionStatus ~= "UpToDate" then
if obj.status.modelStatus.transitionStatus == "InProgress" then
progressing = true
else
status_unknown = status_unknown + 1
degraded = true
end
msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus
end
end
if ((isInferenceServiceInRawDeploymentMode(obj) and status_true == 3) or status_true == 5) and status_false == 0 and status_unknown == 0 then
health_status.message = "Inference Service is healthy."
health_status.status = "Healthy"
return health_status
elseif status_false > 0 then
health_status.status = "Degraded"
return health_status
else
health_status.status = "Progressing"
return health_status

if obj.status.conditions ~= nil then
for i, condition in pairs(obj.status.conditions) do

if condition.status == "Unknown" then
status_unknown = status_unknown + 1
elseif condition.status == "False" then
status_false = status_false + 1
end

if condition.status ~= "True" then
msg = msg .. " | " .. i .. ": " .. condition.type .. " | " .. condition.status
if condition.reason ~= nil and condition.reason ~= "" then
msg = msg .. " | " .. condition.reason
end
if condition.message ~= nil and condition.message ~= "" then
msg = msg .. " | " .. condition.message
end
end

end

if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then
health_status.status = "Healthy"
msg = "InferenceService is healthy."
elseif degraded == false and status_unknown >= 0 then
health_status.status = "Progressing"
else
health_status.status = "Degraded"
end

health_status.message = msg
end
end
return health_status

return health_status
Original file line number Diff line number Diff line change
@@ -1,17 +1,41 @@
tests:
- healthStatus:
status: Progressing
message: "PredictorConfigurationReady is Unknown\nPredictorReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nPredictorRouteReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\n"
message: ' | 1: PredictorConfigurationReady | Unknown | 2: PredictorReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 3: PredictorRouteReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 4: Ready | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready.'
inputPath: testdata/progressing.yaml
- healthStatus:
status: Progressing
message: '0: transitionStatus | InProgress | 1: LatestDeploymentReady | Unknown | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | Unknown | 3: PredictorReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 4: PredictorRouteReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 5: Ready | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 6: RoutesReady | Unknown | PredictorRouteReady not ready'
inputPath: testdata/progressing_ocp.yaml
- healthStatus:
status: Progressing
message: "0: transitionStatus | InProgress | 1: PredictorReady | False | 2: Ready | False"
inputPath: testdata/progressing_modelmesh.yaml
- healthStatus:
status: Degraded
message: "IngressReady is False, since Predictor ingress not created.\nPredictorConfigurationReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nPredictorReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nReady is False, since Predictor ingress not created.\n"
message: '0: transitionStatus | BlockedByFailedLoad | 1: IngressReady | False | Predictor ingress not created | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 3: PredictorReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 5: Ready | False | Predictor ingress not created'
inputPath: testdata/degraded.yaml
- healthStatus:
status: Degraded
message: '0: transitionStatus | BlockedByFailedLoad | 1: LatestDeploymentReady | False | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-predictor-00002" failed with message: . | 3: PredictorReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 4: PredictorRouteReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 5: Ready | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 6: RoutesReady | False | PredictorRouteReady not ready'
inputPath: testdata/degraded_ocp.yaml
- healthStatus:
status: Degraded
message: "0: transitionStatus | BlockedByFailedLoad"
inputPath: testdata/degraded_modelmesh.yaml
- healthStatus:
status: Healthy
message: Inference Service is healthy.
message: InferenceService is healthy.
inputPath: testdata/healthy.yaml
- healthStatus:
status: Healthy
message: Inference Service is healthy.
message: InferenceService is healthy.
inputPath: testdata/healthy_ocp.yaml
- healthStatus:
status: Healthy
message: InferenceService is healthy.
inputPath: testdata/healthy_modelmesh.yaml
- healthStatus:
status: Healthy
message: InferenceService is healthy.
inputPath: testdata/healthy_raw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ status:
reason: Predictor ingress not created
status: "False"
type: Ready
modelStatus:
transitionStatus: BlockedByFailedLoad
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: Ready
modelStatus:
transitionStatus: BlockedByFailedLoad
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T23:03:45Z'
reason: PredictorConfigurationReady not ready
severity: Info
status: 'False'
type: LatestDeploymentReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: 'Revision "helloworld-predictor-00002" failed with message: .'
reason: RevisionFailed
severity: Info
status: 'False'
type: PredictorConfigurationReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: Configuration "helloworld-predictor" does not have any ready Revision.
reason: RevisionMissing
status: 'False'
type: PredictorReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: Configuration "helloworld-predictor" does not have any ready Revision.
reason: RevisionMissing
severity: Info
status: 'False'
type: PredictorRouteReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: Configuration "helloworld-predictor" does not have any ready Revision.
reason: RevisionMissing
status: 'False'
type: Ready
- lastTransitionTime: '2024-05-30T23:03:45Z'
reason: PredictorRouteReady not ready
severity: Info
status: 'False'
type: RoutesReady
modelStatus:
transitionStatus: BlockedByFailedLoad
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: Ready
modelStatus:
transitionStatus: UpToDate
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:14:31Z'
status: 'True'
type: IngressReady
- lastTransitionTime: '2024-05-30T22:14:30Z'
severity: Info
status: 'True'
type: LatestDeploymentReady
- lastTransitionTime: '2024-05-30T22:14:30Z'
severity: Info
status: 'True'
type: PredictorConfigurationReady
- lastTransitionTime: '2024-05-30T22:14:31Z'
status: 'True'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:14:31Z'
severity: Info
status: 'True'
type: PredictorRouteReady
- lastTransitionTime: '2024-05-30T22:14:31Z'
status: 'True'
type: Ready
- lastTransitionTime: '2024-05-30T22:14:31Z'
severity: Info
status: 'True'
type: RoutesReady
modelStatus:
transitionStatus: UpToDate
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'False'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'False'
type: Ready
modelStatus:
transitionStatus: InProgress
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:29:46Z'
reason: PredictorConfigurationReady not ready
severity: Info
status: Unknown
type: LatestDeploymentReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
severity: Info
status: Unknown
type: PredictorConfigurationReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready.
reason: RevisionMissing
status: Unknown
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready.
reason: RevisionMissing
severity: Info
status: Unknown
type: PredictorRouteReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready.
reason: RevisionMissing
status: Unknown
type: Ready
- lastTransitionTime: '2024-05-30T22:29:46Z'
reason: PredictorRouteReady not ready
severity: Info
status: Unknown
type: RoutesReady
modelStatus:
transitionStatus: InProgress
Loading