From e5b2521525f5f22761dbc3ca265ba82656951a9a Mon Sep 17 00:00:00 2001
From: Taylor Goodhart <goodhart@amazon.com>
Date: Tue, 23 Aug 2022 10:31:59 -0700
Subject: [PATCH 1/4] Add support for auto-detecting number of neuron cores

---
 .../amazonaws/ml/mms/util/ConfigManager.java  | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
index ad09e2e0e..c14adce7b 100644
--- a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
+++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
@@ -12,6 +12,7 @@
  */
 package com.amazonaws.ml.mms.util;
 
+import com.amazonaws.ml.mms.util.JsonUtils;
 import io.netty.handler.ssl.SslContext;
 import io.netty.handler.ssl.SslContextBuilder;
 import io.netty.handler.ssl.util.SelfSignedCertificate;
@@ -19,6 +20,8 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
 import java.lang.reflect.Field;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
@@ -67,6 +70,7 @@ public final class ConfigManager {
     private static final String MMS_NETTY_CLIENT_THREADS = "netty_client_threads";
     private static final String MMS_JOB_QUEUE_SIZE = "job_queue_size";
     private static final String MMS_NUMBER_OF_GPU = "number_of_gpu";
+    private static final String MMS_NUMBER_OF_NEURON_CORES = "number_of_neuron_cores";
     private static final String MMS_ASYNC_LOGGING = "async_logging";
     private static final String MMS_CORS_ALLOWED_ORIGIN = "cors_allowed_origin";
     private static final String MMS_CORS_ALLOWED_METHODS = "cors_allowed_methods";
@@ -143,6 +147,13 @@ private ConfigManager(Arguments args) {
                                 getAvailableGpu(),
                                 getIntProperty(MMS_NUMBER_OF_GPU, Integer.MAX_VALUE))));
 
+        prop.setProperty(
+                MMS_NUMBER_OF_NEURON_CORES,
+                String.valueOf(
+                        Integer.min(
+                                getAvailableNeuronCores(),
+                                getIntProperty(MMS_NUMBER_OF_NEURON_CORES, Integer.MAX_VALUE))));
+
         String pythonExecutable = args.getPythonExecutable();
         if (pythonExecutable != null) {
             prop.setProperty("PYTHON_EXECUTABLE", pythonExecutable);
@@ -258,6 +269,10 @@ public int getNumberOfGpu() {
         return getIntProperty(MMS_NUMBER_OF_GPU, 0);
     }
 
+    public int getNumberOfNeuronCores() {
+        return getIntProperty(MMS_NUMBER_OF_NEURON_CORES, 0);
+    }
+
     public String getMmsDefaultServiceHandler() {
         return getProperty(MMS_DEFAULT_SERVICE_HANDLER, null);
     }
@@ -283,6 +298,9 @@ public int getDefaultWorkers() {
         if (workers == 0) {
             workers = getNumberOfGpu();
         }
+        if (workers == 0) {
+            workers = getNumberOfNeuronCores();
+        }
         if (workers == 0) {
             workers = Runtime.getRuntime().availableProcessors();
         }
@@ -453,6 +471,8 @@ public String dumpConfigurations() {
                 + System.getProperty("java.io.tmpdir")
                 + "\nNumber of GPUs: "
                 + getNumberOfGpu()
+                + "\nNumber of Neuron Cores: "
+                + getNumberOfNeuronCores()
                 + "\nNumber of CPUs: "
                 + runtime.availableProcessors()
                 + "\nMax heap size: "
@@ -587,6 +607,26 @@ private static int getAvailableGpu() {
         }
     }
 
+    private static final class NeuronConfig{
+        int nc_count;
+    }
+
+    private static int getAvailableNeuronCores() {
+        try {
+            Process process =
+                    Runtime.getRuntime().exec("neuron-ls --json-output");
+            int ret = process.waitFor();
+            if (ret != 0) {
+                return 0;
+            }
+            Reader reader = new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8);
+            NeuronConfig[] results  = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class);
+            return Arrays.stream(results).mapToInt(r -> r.nc_count).sum();
+        } catch (IOException | InterruptedException e) {
+            return 0;
+        }
+    }
+
     public static final class Arguments {
 
         private String mmsConfigFile;

From 42542b586802186cfcb233d74a3c25d0763617c3 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-58-150.us-west-2.compute.internal>
Date: Tue, 23 Aug 2022 18:02:39 +0000
Subject: [PATCH 2/4] Fix style issues

---
 .../com/amazonaws/ml/mms/util/ConfigManager.java    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
index c14adce7b..8c79e0d8e 100644
--- a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
+++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
@@ -12,7 +12,7 @@
  */
 package com.amazonaws.ml.mms.util;
 
-import com.amazonaws.ml.mms.util.JsonUtils;
+import com.google.gson.annotations.SerializedName;
 import io.netty.handler.ssl.SslContext;
 import io.netty.handler.ssl.SslContextBuilder;
 import io.netty.handler.ssl.util.SelfSignedCertificate;
@@ -607,10 +607,6 @@ private static int getAvailableGpu() {
         }
     }
 
-    private static final class NeuronConfig{
-        int nc_count;
-    }
-
     private static int getAvailableNeuronCores() {
         try {
             Process process =
@@ -621,12 +617,17 @@ private static int getAvailableNeuronCores() {
             }
             Reader reader = new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8);
             NeuronConfig[] results  = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class);
-            return Arrays.stream(results).mapToInt(r -> r.nc_count).sum();
+            return Arrays.stream(results).mapToInt(c -> c.numNeuronCores).sum();
         } catch (IOException | InterruptedException e) {
             return 0;
         }
     }
 
+    private static final class NeuronConfig{
+	@SerializedName("nc_count")
+	private int numNeuronCores;
+    }
+
     public static final class Arguments {
 
         private String mmsConfigFile;

From 4ad29a1df90efbb4d0f3b16b47cb6c7586996638 Mon Sep 17 00:00:00 2001
From: Taylor Goodhart <goodhart@amazon.com>
Date: Tue, 23 Aug 2022 21:55:02 +0000
Subject: [PATCH 3/4] Fix more formatting issues

---
 .../java/com/amazonaws/ml/mms/util/ConfigManager.java | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
index 8c79e0d8e..9b445128a 100644
--- a/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
+++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/util/ConfigManager.java
@@ -609,23 +609,22 @@ private static int getAvailableGpu() {
 
     private static int getAvailableNeuronCores() {
         try {
-            Process process =
-                    Runtime.getRuntime().exec("neuron-ls --json-output");
+            Process process = Runtime.getRuntime().exec("neuron-ls --json-output");
             int ret = process.waitFor();
             if (ret != 0) {
                 return 0;
             }
             Reader reader = new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8);
-            NeuronConfig[] results  = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class);
+            NeuronConfig[] results = JsonUtils.GSON.fromJson(reader, NeuronConfig[].class);
             return Arrays.stream(results).mapToInt(c -> c.numNeuronCores).sum();
         } catch (IOException | InterruptedException e) {
             return 0;
         }
     }
 
-    private static final class NeuronConfig{
-	@SerializedName("nc_count")
-	private int numNeuronCores;
+    private static final class NeuronConfig {
+        @SerializedName("nc_count")
+        int numNeuronCores;
     }
 
     public static final class Arguments {

From ad1351785d0eaef45c947005a6871ec37bfcbc62 Mon Sep 17 00:00:00 2001
From: aws-taylor <57725958+aws-taylor@users.noreply.github.com>
Date: Wed, 24 Aug 2022 10:19:26 -0700
Subject: [PATCH 4/4] Update configuration.md

---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 77bf13265..10963a95b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -172,7 +172,7 @@ Most of those properties are designed for performance tuning. Adjusting those nu
 * enable_envvars_config: Enable configuring MMS through environment variables. When this option is set to "true", all the static configurations of MMS can come through environment variables as well. default: false
 * number_of_netty_threads: number frontend netty thread, default: number of logical processors available to the JVM.
 * netty_client_threads: number of backend netty thread, default: number of logical processors available to the JVM.
-* default_workers_per_model: number of workers to create for each model that loaded at startup time, default: available GPUs in system or number of logical processors available to the JVM.
+* default_workers_per_model: number of workers to create for each model that loaded at startup time, default: available GPUs in system, available Neuron cores in system, or number of logical processors available to the JVM.
 * job_queue_size: number inference jobs that frontend will queue before backend can serve, default 100. Useful in cases where certain requests take predictably longer to complete.
 * async_logging: enable asynchronous logging for higher throughput, log output may be delayed if this is enabled, default: false.
 * default_response_timeout: Timeout, in seconds, used for model's backend workers before they are deemed unresponsive and rebooted. default: 120 seconds.