From ab701655d768488759162b47061f93b4f2025de0 Mon Sep 17 00:00:00 2001 From: Siddharth Venkatesan Date: Tue, 3 Sep 2024 12:25:02 -0700 Subject: [PATCH] =?UTF-8?q?[fix][sf]=20fix=20bug=20with=20PyPredictor=20to?= =?UTF-8?q?=20remove=20worker,=20add=20specific=20fla=E2=80=A6=20(#2356)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/java/ai/djl/python/engine/PyPredictor.java | 6 +++--- .../java/ai/djl/serving/models/ModelManager.java | 12 ++++++++++++ wlm/src/main/java/ai/djl/serving/wlm/ModelInfo.java | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/engines/python/src/main/java/ai/djl/python/engine/PyPredictor.java b/engines/python/src/main/java/ai/djl/python/engine/PyPredictor.java index bc597c640..213cd68df 100644 --- a/engines/python/src/main/java/ai/djl/python/engine/PyPredictor.java +++ b/engines/python/src/main/java/ai/djl/python/engine/PyPredictor.java @@ -64,13 +64,13 @@ public PyPredictor( @Override @SuppressWarnings("unchecked") public List batchPredict(List inputs) throws TranslateException { + if (process.isModelUnrecoverable()) { + throw new EngineException("Backend Python process is unrecoverable."); + } if (!process.isReady()) { // TODO: wait for restart throw new TranslateException("Backend Python process is stopped."); } - if (process.isModelUnrecoverable()) { - throw new EngineException("Backend Python process is unrecoverable."); - } Object first = inputs.get(0); if (first instanceof Input) { int size = inputs.size(); diff --git a/serving/src/main/java/ai/djl/serving/models/ModelManager.java b/serving/src/main/java/ai/djl/serving/models/ModelManager.java index 482bb10b2..515cb22bc 100644 --- a/serving/src/main/java/ai/djl/serving/models/ModelManager.java +++ b/serving/src/main/java/ai/djl/serving/models/ModelManager.java @@ -29,6 +29,7 @@ import ai.djl.serving.wlm.WorkerPool; import ai.djl.serving.wlm.WorkerPoolConfig; import ai.djl.serving.workflow.Workflow; +import ai.djl.util.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -416,6 +417,17 @@ public CompletableFuture> workerStatus() { if (wlm.getWorkerPool(wpc).isFullyScaled()) { data.put(modelName, new StatusResponse("Healthy")); } else { + boolean sageMakerHealthCheckOverride = + Boolean.parseBoolean( + Utils.getEnvOrSystemProperty( + "SAGEMAKER_HEALTH_CHECK_OVERRIDE")); + if (sageMakerHealthCheckOverride) { + logger.info( + "SAGEMAKER_HEALTH_CHECK_OVERRIDE is" + + " enabled. Failing ping as" + + " requested"); + hasFailure = true; + } data.put(modelName, new StatusResponse("Unhealthy")); } break; diff --git a/wlm/src/main/java/ai/djl/serving/wlm/ModelInfo.java b/wlm/src/main/java/ai/djl/serving/wlm/ModelInfo.java index 3981de701..b2685ebe6 100644 --- a/wlm/src/main/java/ai/djl/serving/wlm/ModelInfo.java +++ b/wlm/src/main/java/ai/djl/serving/wlm/ModelInfo.java @@ -396,7 +396,7 @@ public Status getStatus() { Boolean.parseBoolean( Utils.getEnvOrSystemProperty("SERVING_HEALTH_CHECK_OVERRIDE")); if (isHealthCheckOverrideEnabled) { - logger.error( + logger.info( "SERVING_HEALTH_CHECK_OVERRIDE is enabled. At least 1 model worker" + " has exhausted all retries. Not marking model as failed"); return Status.READY;