From e5b2521525f5f22761dbc3ca265ba82656951a9a Mon Sep 17 00:00:00 2001 From: Taylor Goodhart Date: Tue, 23 Aug 2022 10:31:59 -0700 Subject: [PATCH 1/4] Add support for auto-detecting number of neuron cores --- .../amazonaws/ml/mms/util/ConfigManager.java | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java index ad09e2e0e..c14adce7b 100644 --- a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java +++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java @@ -12,6 +12,7 @@ */ package com.amazonaws.ml.mms.util; +import com.amazonaws.ml.mms.util.JsonUtils; import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslContextBuilder; import io.netty.handler.ssl.util.SelfSignedCertificate; @@ -19,6 +20,8 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.lang.reflect.Field; import java.net.InetAddress; import java.net.UnknownHostException; @@ -67,6 +70,7 @@ public final class ConfigManager { private static final String MMS_NETTY_CLIENT_THREADS = "netty_client_threads"; private static final String MMS_JOB_QUEUE_SIZE = "job_queue_size"; private static final String MMS_NUMBER_OF_GPU = "number_of_gpu"; + private static final String MMS_NUMBER_OF_NEURON_CORES = "number_of_neuron_cores"; private static final String MMS_ASYNC_LOGGING = "async_logging"; private static final String MMS_CORS_ALLOWED_ORIGIN = "cors_allowed_origin"; private static final String MMS_CORS_ALLOWED_METHODS = "cors_allowed_methods"; @@ -143,6 +147,13 @@ private ConfigManager(Arguments args) { getAvailableGpu(), getIntProperty(MMS_NUMBER_OF_GPU, Integer.MAX_VALUE)))); + prop.setProperty( + MMS_NUMBER_OF_NEURON_CORES, + String.valueOf( + Integer.min( + getAvailableNeuronCores(), + getIntProperty(MMS_NUMBER_OF_NEURON_CORES, Integer.MAX_VALUE)))); + String pythonExecutable = args.getPythonExecutable(); if (pythonExecutable != null) { prop.setProperty("PYTHON_EXECUTABLE", pythonExecutable); @@ -258,6 +269,10 @@ public int getNumberOfGpu() { return getIntProperty(MMS_NUMBER_OF_GPU, 0); } + public int getNumberOfNeuronCores() { + return getIntProperty(MMS_NUMBER_OF_NEURON_CORES, 0); + } + public String getMmsDefaultServiceHandler() { return getProperty(MMS_DEFAULT_SERVICE_HANDLER, null); } @@ -283,6 +298,9 @@ public int getDefaultWorkers() { if (workers == 0) { workers = getNumberOfGpu(); } + if (workers == 0) { + workers = getNumberOfNeuronCores(); + } if (workers == 0) { workers = Runtime.getRuntime().availableProcessors(); } @@ -453,6 +471,8 @@ public String dumpConfigurations() { + System.getProperty("java.io.tmpdir") + "\nNumber of GPUs: " + getNumberOfGpu() + + "\nNumber of Neuron Cores: " + + getNumberOfNeuronCores() + "\nNumber of CPUs: " + runtime.availableProcessors() + "\nMax heap size: " @@ -587,6 +607,26 @@ private static int getAvailableGpu() { } } + private static final class NeuronConfig{ + int nc_count; + } + + private static int getAvailableNeuronCores() { + try { + Process process = + Runtime.getRuntime().exec("neuron-ls --json-output"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; + } + Reader reader = new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8); + NeuronConfig[] results = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class); + return Arrays.stream(results).mapToInt(r -> r.nc_count).sum(); + } catch (IOException | InterruptedException e) { + return 0; + } + } + public static final class Arguments { private String mmsConfigFile; From 42542b586802186cfcb233d74a3c25d0763617c3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 23 Aug 2022 18:02:39 +0000 Subject: [PATCH 2/4] Fix style issues --- .../com/amazonaws/ml/mms/util/ConfigManager.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java index c14adce7b..8c79e0d8e 100644 --- a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java +++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java @@ -12,7 +12,7 @@ */ package com.amazonaws.ml.mms.util; -import com.amazonaws.ml.mms.util.JsonUtils; +import com.google.gson.annotations.SerializedName; import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslContextBuilder; import io.netty.handler.ssl.util.SelfSignedCertificate; @@ -607,10 +607,6 @@ private static int getAvailableGpu() { } } - private static final class NeuronConfig{ - int nc_count; - } - private static int getAvailableNeuronCores() { try { Process process = @@ -621,12 +617,17 @@ private static int getAvailableNeuronCores() { } Reader reader = new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8); NeuronConfig[] results = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class); - return Arrays.stream(results).mapToInt(r -> r.nc_count).sum(); + return Arrays.stream(results).mapToInt(c -> c.numNeuronCores).sum(); } catch (IOException | InterruptedException e) { return 0; } } + private static final class NeuronConfig{ + @SerializedName("nc_count") + private int numNeuronCores; + } + public static final class Arguments { private String mmsConfigFile; From 4ad29a1df90efbb4d0f3b16b47cb6c7586996638 Mon Sep 17 00:00:00 2001 From: Taylor Goodhart Date: Tue, 23 Aug 2022 21:55:02 +0000 Subject: [PATCH 3/4] Fix more formatting issues --- .../java/com/amazonaws/ml/mms/util/ConfigManager.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java index 8c79e0d8e..9b445128a 100644 --- a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java +++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java @@ -609,23 +609,22 @@ private static int getAvailableGpu() { private static int getAvailableNeuronCores() { try { - Process process = - Runtime.getRuntime().exec("neuron-ls --json-output"); + Process process = Runtime.getRuntime().exec("neuron-ls --json-output"); int ret = process.waitFor(); if (ret != 0) { return 0; } Reader reader = new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8); - NeuronConfig[] results = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class); + NeuronConfig[] results = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class); return Arrays.stream(results).mapToInt(c -> c.numNeuronCores).sum(); } catch (IOException | InterruptedException e) { return 0; } } - private static final class NeuronConfig{ - @SerializedName("nc_count") - private int numNeuronCores; + private static final class NeuronConfig { + @SerializedName("nc_count") + int numNeuronCores; } public static final class Arguments { From ad1351785d0eaef45c947005a6871ec37bfcbc62 Mon Sep 17 00:00:00 2001 From: aws-taylor <57725958+aws-taylor@users.noreply.github.com> Date: Wed, 24 Aug 2022 10:19:26 -0700 Subject: [PATCH 4/4] Update configuration.md --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 77bf13265..10963a95b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -172,7 +172,7 @@ Most of those properties are designed for performance tuning. Adjusting those nu * enable_envvars_config: Enable configuring MMS through environment variables. When this option is set to "true", all the static configurations of MMS can come through environment variables as well. default: false * number_of_netty_threads: number frontend netty thread, default: number of logical processors available to the JVM. * netty_client_threads: number of backend netty thread, default: number of logical processors available to the JVM. -* default_workers_per_model: number of workers to create for each model that loaded at startup time, default: available GPUs in system or number of logical processors available to the JVM. +* default_workers_per_model: number of workers to create for each model that loaded at startup time, default: available GPUs in system, available Neuron cores in system, or number of logical processors available to the JVM. * job_queue_size: number inference jobs that frontend will queue before backend can serve, default 100. Useful in cases where certain requests take predictably longer to complete. * async_logging: enable asynchronous logging for higher throughput, log output may be delayed if this is enabled, default: false. * default_response_timeout: Timeout, in seconds, used for model's backend workers before they are deemed unresponsive and rebooted. default: 120 seconds.