Add max_enqueued_batches option for model servers

PiperOrigin-RevId: 654998187
tensorflow · Jul 23, 2024 · 7c99259 · 7c99259
1 parent 6b9cf7c
commit 7c99259
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 0 deletions.
diff --git a/tensorflow_serving/servables/tensorflow/tfrt_saved_model_factory.cc b/tensorflow_serving/servables/tensorflow/tfrt_saved_model_factory.cc
@@ -236,6 +236,8 @@ absl::Status TfrtSavedModelFactory::CreateTfrtSavedModelWithMetadata(
   compile_options.use_gpu_compile_and_execute_op =
       config_.tfrt_use_fused_gpu_op();
   compile_options.min_num_batch_threads = config_.tfrt_min_num_batch_threads();
+  compile_options.min_max_enqueued_batches =
+      config_.tfrt_min_max_enqueued_batches();
 
   options.graph_execution_options.run_placer_grappler_on_functions =
       config_.run_placer_grappler_on_functions();

diff --git a/tensorflow_serving/servables/tensorflow/tfrt_saved_model_source_adapter.proto b/tensorflow_serving/servables/tensorflow/tfrt_saved_model_source_adapter.proto
@@ -221,6 +221,9 @@ message TfrtSavedModelConfig {
     IFRT_CLIENT_PATHWAYS = 2;
   }
   IfrtClientType ifrt_client_type = 2024;
+
+  // The minimum of the maximum number of outstanding enqueue batches
+  int64 tfrt_min_max_enqueued_batches = 2025;
 }
 
 // Config proto for TfrtSavedModelSourceAdapter.