Add flags for gpu multi-streaming support.

- A flag to specify the number of virtual GPU devices (stream groups) to create - A flag to indicate whether to use the new fused kernel (`gpurt.compile_and_execute`). Also init GpuRunner in model server. PiperOrigin-RevId: 575365834
tensorflow · Oct 21, 2023 · 77cabde · 77cabde
1 parent 2cd560b
commit 77cabde
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 0 deletions.
diff --git a/tensorflow_serving/servables/tensorflow/tfrt_saved_model_factory.cc b/tensorflow_serving/servables/tensorflow/tfrt_saved_model_factory.cc
@@ -227,10 +227,14 @@ absl::Status TfrtSavedModelFactory::CreateTfrtSavedModelWithMetadata(
       config_.use_tpu_host_allocator_for_inputs();
   compile_options.tpu_allow_unpadded_batch =
       ToTpuAllowUnpaddedBatch(config_.tpu_unpadded_batch_mode());
+  compile_options.use_gpu_compile_and_execute_op =
+      config_.tfrt_use_fused_gpu_op();
 
   options.graph_execution_options.run_placer_grappler_on_functions =
       config_.run_placer_grappler_on_functions();
   options.graph_execution_options.enable_tfrt_gpu = config_.enable_tfrt_gpu();
+  options.graph_execution_options.tfrt_gpu_parallelism =
+      config_.tfrt_gpu_parallelism();
   options.graph_execution_options.gpu_system_memory_size_in_mb =
       config_.gpu_system_memory_size_in_mb();
   options.graph_execution_options.enable_grappler_function_optimizer =

diff --git a/tensorflow_serving/servables/tensorflow/tfrt_saved_model_source_adapter.proto b/tensorflow_serving/servables/tensorflow/tfrt_saved_model_source_adapter.proto
@@ -192,6 +192,12 @@ message TfrtSavedModelConfig {
   // Disables compilations after model initialization is complete
   // (ignored if enable_model_warmup is false)
   bool freeze_after_init = 2016;
+
+  // The number of virtual GPUs to create on a physical GPU.
+  int32 tfrt_gpu_parallelism = 2017;
+
+  // Whether to use fused op for GPU compile, execute and data transfer.
+  bool tfrt_use_fused_gpu_op = 2018;
 }
 
 // Config proto for TfrtSavedModelSourceAdapter.