forked from argoproj/argo-cd
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: update health check to support modelmesh (argoproj#20142)
Signed-off-by: Trevor Royer <[email protected]> Co-authored-by: Dan Garfield <[email protected]> Signed-off-by: Moleus <[email protected]>
- Loading branch information
Showing
9 changed files
with
244 additions
and
44 deletions.
There are no files selected for viewing
89 changes: 49 additions & 40 deletions
89
resource_customizations/serving.kserve.io/InferenceService/health.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,59 @@ | ||
-- isInferenceServiceInRawDeploymentMode determines if the inference service deployed in RawDeployment mode | ||
-- KServe v12 and above supports Rawdeployment for Inference graphs. For Inference services, KServe has supported RawDeployment model since [v0.7.0](https://github.com/kserve/kserve/releases/tag/v0.7.0). | ||
function isInferenceServiceInRawDeploymentMode(obj) | ||
if obj.metadata.annotations == nil then | ||
return false | ||
end | ||
local deploymentMode = obj.metadata.annotations["serving.kserve.io/deploymentMode"] | ||
return deploymentMode ~= nil and deploymentMode == "RawDeployment" | ||
end | ||
|
||
local health_status = {} | ||
|
||
health_status.status = "Progressing" | ||
health_status.message = "Waiting for status update." | ||
if obj.status ~= nil and obj.status.conditions ~= nil then | ||
local status_true = 0 | ||
health_status.message = "Waiting for InferenceService to report status..." | ||
|
||
if obj.status ~= nil then | ||
|
||
local progressing = false | ||
local degraded = false | ||
local status_false = 0 | ||
local status_unknown = 0 | ||
health_status.message = "" | ||
for i, condition in pairs(obj.status.conditions) do | ||
if condition.status == "True" and (condition.type == "IngressReady" or condition.type == "PredictorConfigurationReady" or condition.type == "PredictorReady" or condition.type == "PredictorRouteReady" or condition.type == "Ready") then | ||
status_true = status_true + 1 | ||
elseif condition.status == "False" or condition.status == "Unknown" then | ||
msg = condition.type .. " is " .. condition.status | ||
if condition.reason ~= nil and condition.reason ~= "" then | ||
msg = msg .. ", since " .. condition.reason .. "." | ||
end | ||
if condition.message ~= nil and condition.message ~= "" then | ||
msg = msg .. " " .. condition.message | ||
end | ||
health_status.message = health_status.message .. msg .. "\n" | ||
if condition.status == "False" then | ||
status_false = status_false + 1 | ||
local msg = "" | ||
|
||
if obj.status.modelStatus ~= nil then | ||
if obj.status.modelStatus.transitionStatus ~= "UpToDate" then | ||
if obj.status.modelStatus.transitionStatus == "InProgress" then | ||
progressing = true | ||
else | ||
status_unknown = status_unknown + 1 | ||
degraded = true | ||
end | ||
msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus | ||
end | ||
end | ||
if ((isInferenceServiceInRawDeploymentMode(obj) and status_true == 3) or status_true == 5) and status_false == 0 and status_unknown == 0 then | ||
health_status.message = "Inference Service is healthy." | ||
health_status.status = "Healthy" | ||
return health_status | ||
elseif status_false > 0 then | ||
health_status.status = "Degraded" | ||
return health_status | ||
else | ||
health_status.status = "Progressing" | ||
return health_status | ||
|
||
if obj.status.conditions ~= nil then | ||
for i, condition in pairs(obj.status.conditions) do | ||
|
||
if condition.status == "Unknown" then | ||
status_unknown = status_unknown + 1 | ||
elseif condition.status == "False" then | ||
status_false = status_false + 1 | ||
end | ||
|
||
if condition.status ~= "True" then | ||
msg = msg .. " | " .. i .. ": " .. condition.type .. " | " .. condition.status | ||
if condition.reason ~= nil and condition.reason ~= "" then | ||
msg = msg .. " | " .. condition.reason | ||
end | ||
if condition.message ~= nil and condition.message ~= "" then | ||
msg = msg .. " | " .. condition.message | ||
end | ||
end | ||
|
||
end | ||
|
||
if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then | ||
health_status.status = "Healthy" | ||
msg = "InferenceService is healthy." | ||
elseif degraded == false and status_unknown >= 0 then | ||
health_status.status = "Progressing" | ||
else | ||
health_status.status = "Degraded" | ||
end | ||
|
||
health_status.message = msg | ||
end | ||
end | ||
return health_status | ||
|
||
return health_status |
32 changes: 28 additions & 4 deletions
32
resource_customizations/serving.kserve.io/InferenceService/health_test.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,41 @@ | ||
tests: | ||
- healthStatus: | ||
status: Progressing | ||
message: "PredictorConfigurationReady is Unknown\nPredictorReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nPredictorRouteReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\n" | ||
message: ' | 1: PredictorConfigurationReady | Unknown | 2: PredictorReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 3: PredictorRouteReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 4: Ready | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready.' | ||
inputPath: testdata/progressing.yaml | ||
- healthStatus: | ||
status: Progressing | ||
message: '0: transitionStatus | InProgress | 1: LatestDeploymentReady | Unknown | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | Unknown | 3: PredictorReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 4: PredictorRouteReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 5: Ready | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 6: RoutesReady | Unknown | PredictorRouteReady not ready' | ||
inputPath: testdata/progressing_ocp.yaml | ||
- healthStatus: | ||
status: Progressing | ||
message: "0: transitionStatus | InProgress | 1: PredictorReady | False | 2: Ready | False" | ||
inputPath: testdata/progressing_modelmesh.yaml | ||
- healthStatus: | ||
status: Degraded | ||
message: "IngressReady is False, since Predictor ingress not created.\nPredictorConfigurationReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nPredictorReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nReady is False, since Predictor ingress not created.\n" | ||
message: '0: transitionStatus | BlockedByFailedLoad | 1: IngressReady | False | Predictor ingress not created | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 3: PredictorReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 5: Ready | False | Predictor ingress not created' | ||
inputPath: testdata/degraded.yaml | ||
- healthStatus: | ||
status: Degraded | ||
message: '0: transitionStatus | BlockedByFailedLoad | 1: LatestDeploymentReady | False | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-predictor-00002" failed with message: . | 3: PredictorReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 4: PredictorRouteReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 5: Ready | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 6: RoutesReady | False | PredictorRouteReady not ready' | ||
inputPath: testdata/degraded_ocp.yaml | ||
- healthStatus: | ||
status: Degraded | ||
message: "0: transitionStatus | BlockedByFailedLoad" | ||
inputPath: testdata/degraded_modelmesh.yaml | ||
- healthStatus: | ||
status: Healthy | ||
message: Inference Service is healthy. | ||
message: InferenceService is healthy. | ||
inputPath: testdata/healthy.yaml | ||
- healthStatus: | ||
status: Healthy | ||
message: Inference Service is healthy. | ||
message: InferenceService is healthy. | ||
inputPath: testdata/healthy_ocp.yaml | ||
- healthStatus: | ||
status: Healthy | ||
message: InferenceService is healthy. | ||
inputPath: testdata/healthy_modelmesh.yaml | ||
- healthStatus: | ||
status: Healthy | ||
message: InferenceService is healthy. | ||
inputPath: testdata/healthy_raw.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_modelmesh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: helloworld | ||
namespace: default | ||
spec: {} | ||
status: | ||
conditions: | ||
- lastTransitionTime: '2024-05-30T22:43:16Z' | ||
status: 'True' | ||
type: PredictorReady | ||
- lastTransitionTime: '2024-05-30T22:43:16Z' | ||
status: 'True' | ||
type: Ready | ||
modelStatus: | ||
transitionStatus: BlockedByFailedLoad |
42 changes: 42 additions & 0 deletions
42
resource_customizations/serving.kserve.io/InferenceService/testdata/degraded_ocp.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: helloworld | ||
namespace: default | ||
spec: {} | ||
status: | ||
conditions: | ||
- lastTransitionTime: '2024-05-30T23:03:45Z' | ||
reason: PredictorConfigurationReady not ready | ||
severity: Info | ||
status: 'False' | ||
type: LatestDeploymentReady | ||
- lastTransitionTime: '2024-05-30T23:03:45Z' | ||
message: 'Revision "helloworld-predictor-00002" failed with message: .' | ||
reason: RevisionFailed | ||
severity: Info | ||
status: 'False' | ||
type: PredictorConfigurationReady | ||
- lastTransitionTime: '2024-05-30T23:03:45Z' | ||
message: Configuration "helloworld-predictor" does not have any ready Revision. | ||
reason: RevisionMissing | ||
status: 'False' | ||
type: PredictorReady | ||
- lastTransitionTime: '2024-05-30T23:03:45Z' | ||
message: Configuration "helloworld-predictor" does not have any ready Revision. | ||
reason: RevisionMissing | ||
severity: Info | ||
status: 'False' | ||
type: PredictorRouteReady | ||
- lastTransitionTime: '2024-05-30T23:03:45Z' | ||
message: Configuration "helloworld-predictor" does not have any ready Revision. | ||
reason: RevisionMissing | ||
status: 'False' | ||
type: Ready | ||
- lastTransitionTime: '2024-05-30T23:03:45Z' | ||
reason: PredictorRouteReady not ready | ||
severity: Info | ||
status: 'False' | ||
type: RoutesReady | ||
modelStatus: | ||
transitionStatus: BlockedByFailedLoad |
16 changes: 16 additions & 0 deletions
16
resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_modelmesh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: helloworld | ||
namespace: default | ||
spec: {} | ||
status: | ||
conditions: | ||
- lastTransitionTime: '2024-05-30T22:43:16Z' | ||
status: 'True' | ||
type: PredictorReady | ||
- lastTransitionTime: '2024-05-30T22:43:16Z' | ||
status: 'True' | ||
type: Ready | ||
modelStatus: | ||
transitionStatus: UpToDate |
35 changes: 35 additions & 0 deletions
35
resource_customizations/serving.kserve.io/InferenceService/testdata/healthy_ocp.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: helloworld | ||
namespace: default | ||
spec: {} | ||
status: | ||
conditions: | ||
- lastTransitionTime: '2024-05-30T22:14:31Z' | ||
status: 'True' | ||
type: IngressReady | ||
- lastTransitionTime: '2024-05-30T22:14:30Z' | ||
severity: Info | ||
status: 'True' | ||
type: LatestDeploymentReady | ||
- lastTransitionTime: '2024-05-30T22:14:30Z' | ||
severity: Info | ||
status: 'True' | ||
type: PredictorConfigurationReady | ||
- lastTransitionTime: '2024-05-30T22:14:31Z' | ||
status: 'True' | ||
type: PredictorReady | ||
- lastTransitionTime: '2024-05-30T22:14:31Z' | ||
severity: Info | ||
status: 'True' | ||
type: PredictorRouteReady | ||
- lastTransitionTime: '2024-05-30T22:14:31Z' | ||
status: 'True' | ||
type: Ready | ||
- lastTransitionTime: '2024-05-30T22:14:31Z' | ||
severity: Info | ||
status: 'True' | ||
type: RoutesReady | ||
modelStatus: | ||
transitionStatus: UpToDate |
16 changes: 16 additions & 0 deletions
16
...rce_customizations/serving.kserve.io/InferenceService/testdata/progressing_modelmesh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: helloworld | ||
namespace: default | ||
spec: {} | ||
status: | ||
conditions: | ||
- lastTransitionTime: '2024-05-30T22:43:16Z' | ||
status: 'False' | ||
type: PredictorReady | ||
- lastTransitionTime: '2024-05-30T22:43:16Z' | ||
status: 'False' | ||
type: Ready | ||
modelStatus: | ||
transitionStatus: InProgress |
40 changes: 40 additions & 0 deletions
40
resource_customizations/serving.kserve.io/InferenceService/testdata/progressing_ocp.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
name: helloworld | ||
namespace: default | ||
spec: {} | ||
status: | ||
conditions: | ||
- lastTransitionTime: '2024-05-30T22:29:46Z' | ||
reason: PredictorConfigurationReady not ready | ||
severity: Info | ||
status: Unknown | ||
type: LatestDeploymentReady | ||
- lastTransitionTime: '2024-05-30T22:29:46Z' | ||
severity: Info | ||
status: Unknown | ||
type: PredictorConfigurationReady | ||
- lastTransitionTime: '2024-05-30T22:29:46Z' | ||
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready. | ||
reason: RevisionMissing | ||
status: Unknown | ||
type: PredictorReady | ||
- lastTransitionTime: '2024-05-30T22:29:46Z' | ||
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready. | ||
reason: RevisionMissing | ||
severity: Info | ||
status: Unknown | ||
type: PredictorRouteReady | ||
- lastTransitionTime: '2024-05-30T22:29:46Z' | ||
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready. | ||
reason: RevisionMissing | ||
status: Unknown | ||
type: Ready | ||
- lastTransitionTime: '2024-05-30T22:29:46Z' | ||
reason: PredictorRouteReady not ready | ||
severity: Info | ||
status: Unknown | ||
type: RoutesReady | ||
modelStatus: | ||
transitionStatus: InProgress |