diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index cae95d20ca23d..1623ebb3aa74c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -903,6 +903,11 @@ def create_engine_config(self) -> EngineConfig: "--enable-prefix-caching is currently not " "supported for multimodal models and has been disabled.") self.enable_prefix_caching = False + if model_config.is_encoder_decoder_model: + logger.warning( + "Block Manager v2 does not support encoder-decoder models" + " currently. Using Block Manager v1 as fallback.") + self.use_v2_block_manager = False cache_config = CacheConfig( block_size=self.block_size if self.device != "neuron" else