From 5e6de91bc8c70fa52a792fd450f5342c3d49afff Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Fri, 3 Mar 2023 15:35:23 +0100
Subject: [PATCH 01/16] add encoder decoder model

---
 optimum/exporters/onnx/config.py             | 30 +++++++++++++++++-
 optimum/exporters/onnx/model_configs.py      |  6 ++--
 optimum/exporters/tasks.py                   |  5 +++
 optimum/onnxruntime/modeling_seq2seq.py      | 33 ++++++++++++++++++++
 optimum/utils/normalized_config.py           |  1 +
 tests/exporters/exporters_utils.py           |  3 +-
 tests/onnxruntime/test_modeling.py           | 21 +++++++++++--
 tests/onnxruntime/utils_onnxruntime_tests.py |  3 +-
 8 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/optimum/exporters/onnx/config.py b/optimum/exporters/onnx/config.py
index 28d32a55fb..780ff0a3d8 100644
--- a/optimum/exporters/onnx/config.py
+++ b/optimum/exporters/onnx/config.py
@@ -267,7 +267,7 @@ def torch_to_onnx_input_map(self) -> Dict[str, str]:
         return {}
 
 
-class EncoderDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast):
+class DummyEncoderDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast):
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,)
 
     def __init__(
@@ -341,6 +341,34 @@ def __init__(
 
             self.DUMMY_INPUT_GENERATOR_CLASSES += self._past_key_values_generator
 
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        common_inputs = {}
+        if self._behavior is not ConfigBehavior.DECODER:
+            common_inputs["input_ids"] = {0: "batch_size", 1: "encoder_sequence_length"}
+
+        common_inputs["attention_mask"] = {0: "batch_size", 1: "encoder_sequence_length"}
+
+        if self._behavior is not ConfigBehavior.ENCODER:
+            # TODO: it is likely this pop() is unwanted as we then always hit
+            # https://github.com/huggingface/transformers/blob/v4.26.0/src/transformers/models/t5/modeling_t5.py#L965-L969
+            common_inputs.pop("attention_mask")
+
+            if self.use_past_in_inputs:
+                # TODO: validate the axis name for attention_mask
+                # common_inputs["attention_mask"][1] = "past_encoder_sequence_length + sequence_length"
+                common_inputs["decoder_input_ids"] = {0: "batch_size"}
+            else:
+                common_inputs["decoder_input_ids"] = {0: "batch_size", 1: "decoder_sequence_length"}
+
+            if self.use_past_in_inputs:
+                self.add_past_key_values(common_inputs, direction="inputs")
+
+        if self._behavior is ConfigBehavior.DECODER:
+            common_inputs["encoder_outputs"] = {0: "batch_size", 1: "encoder_sequence_length"}
+
+        return common_inputs
+
     @property
     def torch_to_onnx_input_map(self) -> Dict[str, str]:
         if self._behavior is ConfigBehavior.DECODER:
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index db0256e4d0..f31c764a4a 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -47,7 +47,7 @@
 from .config import (
     AudioOnnxConfig,
     AudioToTextOnnxConfig,
-    EncoderDecoderOnnxConfig,
+    DummyEncoderDecoderOnnxConfig,
     TextAndVisionOnnxConfig,
     TextDecoderOnnxConfig,
     TextEncoderOnnxConfig,
@@ -1168,7 +1168,7 @@ class TrOCROnnxConfig(TextSeq2SeqOnnxConfig):
     )
 
 
-class VisionEncoderDecoderOnnxConfig(EncoderDecoderOnnxConfig):
+class VisionEncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig
     ATOL_FOR_VALIDATION = 1e-3
 
@@ -1439,3 +1439,5 @@ def overwrite_shape_and_generate_input(
             dummy_input = dummy_input_gen.generate(input_name, framework=framework)
 
         return dummy_input
+class EncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig):
+    NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 0ebcfc2759..b29f95fd54 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -497,6 +497,11 @@ class TasksManager:
             onnx="ElectraOnnxConfig",
             tflite="ElectraTFLiteConfig",
         ),
+        "encoder-decoder": supported_tasks_mapping(
+            "seq2seq-lm",
+            "seq2seq-lm-with-past",
+            onnx="EncoderDecoderOnnxConfig",
+        ),
         "flaubert": supported_tasks_mapping(
             "feature-extraction",
             "fill-mask",
diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py
index c436a900cb..e011b81f89 100644
--- a/optimum/onnxruntime/modeling_seq2seq.py
+++ b/optimum/onnxruntime/modeling_seq2seq.py
@@ -1092,6 +1092,39 @@ class ORTModelForSeq2SeqLM(ORTModelForConditionalGeneration, GenerationMixin):
     auto_model_class = AutoModelForSeq2SeqLM
     main_input_name = "input_ids"
 
+    def __init__(
+        self,
+        encoder_session: ort.InferenceSession,
+        decoder_session: ort.InferenceSession,
+        config: "PretrainedConfig",
+        decoder_with_past_session: Optional[ort.InferenceSession] = None,
+        use_io_binding: Optional[bool] = None,
+        model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
+        preprocessors: Optional[List] = None,
+        generation_config: Optional[GenerationConfig] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            encoder_session,
+            decoder_session,
+            config,
+            decoder_with_past_session,
+            use_io_binding,
+            model_save_dir,
+            preprocessors,
+            generation_config,
+            **kwargs,
+        )
+
+        if config.model_type == "encoder-decoder":
+            self.encoder.normalized_config = NormalizedConfigManager.get_normalized_config_class(
+                config.encoder.model_type
+            )(config.encoder)
+
+            self.decoder.normalized_config = NormalizedConfigManager.get_normalized_config_class(
+                config.decoder.model_type
+            )(config.decoder)
+
     def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder:
         return ORTEncoder(session, self)
 
diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index c5f3d5ce4c..e65c3c42d6 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -220,6 +220,7 @@ class NormalizedConfigManager:
         "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"),
         "donut-swin": NormalizedVisionConfig,
         "electra": NormalizedTextConfig,
+        "encoder-decoder": NormalizedEncoderDecoderConfig,
         "gpt2": GPT2LikeNormalizedTextConfig,
         "gpt-bigcode": GPT2LikeNormalizedTextConfig,
         "gpt_neo": NormalizedTextConfig.with_args(num_attention_heads="num_heads"),
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 7a20fa4528..9ea8472786 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -161,6 +161,7 @@
     "camembert": "camembert-base",
     "clip": "openai/clip-vit-base-patch32",
     "convbert": "YituTech/conv-bert-base",
+    "convnext": "facebook/convnext-tiny-224",
     "codegen": "hf-internal-testing/tiny-random-CodeGenModel",  # Not using Salesforce/codegen-350M-multi because it takes too much time for testing.
     "data2vec-text": "facebook/data2vec-text-base",
     "data2vec-vision": "facebook/data2vec-vision-base",
@@ -168,10 +169,10 @@
     "deberta": "hf-internal-testing/tiny-random-DebertaModel",  # Not using microsoft/deberta-base because it takes too much time for testing.
     "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",  # Not using microsoft/deberta-v2-xlarge because it takes too much time for testing.
     "deit": "facebook/deit-small-patch16-224",
-    "convnext": "facebook/convnext-tiny-224",
     "detr": "hf-internal-testing/tiny-random-detr",  # Not using facebook/detr-resnet-50 because it takes too much time for testing.
     "distilbert": "distilbert-base-cased",
     "electra": "google/electra-base-generator",
+    "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail",
     "flaubert": "hf-internal-testing/tiny-random-flaubert",  # TODO
     "gpt2": "gpt2",
     "gpt-neo": "EleutherAI/gpt-neo-125M",
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index f28a3676be..1da97e121e 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3059,6 +3059,7 @@ class ORTModelForSeq2SeqLMIntegrationTest(ORTModelTestMixin):
         # "bigbird_pegasus",
         "blenderbot",
         "blenderbot_small",
+        "encoder-decoder",
         "longt5",
         "m2m_100",
         "marian",
@@ -3097,11 +3098,13 @@ def test_load_vanilla_transformers_which_is_not_supported(self):
 
     @parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
     def test_generate_utils(self, test_name: str, model_arch: str, use_cache: str):
+        if model_arch == "encoder-decoder":
+            use_cache = False
         model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
-        model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name])
+        model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_cache=use_cache)
         tokenizer = get_preprocessor(model_id)
         text = "This is a sample output"
         tokens = tokenizer(text, return_tensors="pt")
@@ -3164,6 +3167,8 @@ def test_merge_from_onnx_and_save(self, model_arch):
 
     @parameterized.expand(grid_parameters(FULL_GRID))
     def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
+        if model_arch == "encoder-decoder" and use_cache is True:
+            return
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 
@@ -3173,6 +3178,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
             "use_cache": use_cache,
             "use_merged": use_merged,
         }
+
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
@@ -3224,6 +3230,8 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
 
     @parameterized.expand(grid_parameters(FULL_GRID))
     def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
+        if model_arch == "encoder-decoder" and use_cache is True:
+            return
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 
@@ -3233,6 +3241,7 @@ def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cac
             "use_cache": use_cache,
             "use_merged": use_merged,
         }
+
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
@@ -3287,6 +3296,8 @@ def test_pipeline_model_is_none(self):
     @require_torch_gpu
     @pytest.mark.gpu_test
     def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool):
+        if model_arch == "encoder-decoder":
+            use_cache = False
         model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
         self._setup(model_args)
 
@@ -3358,7 +3369,7 @@ def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: st
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     @pytest.mark.gpu_test  # mark as GPU test as well to run the without/with cache timing test on the slow tests
     def test_compare_with_and_without_past_key_values(self, model_arch: str):
-        if model_arch == "m2m_100":
+        if model_arch == "m2m_100" and model_arch == "encoder-decoder":
             return  # TODO: this test is failing for m2m_100
         model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False}
         self._setup(model_args)
@@ -3446,6 +3457,8 @@ def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, mode
     @require_torch_gpu
     @pytest.mark.gpu_test
     def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
+        if model_arch == "encoder-decoder":
+            use_cache = False
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 
@@ -3455,6 +3468,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
             "use_cache": use_cache,
             "use_merged": use_merged,
         }
+
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
@@ -3491,6 +3505,8 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
     def test_compare_generation_to_io_binding(
         self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool
     ):
+        if model_arch == "encoder-decoder":
+            use_cache = False
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 
@@ -3500,6 +3516,7 @@ def test_compare_generation_to_io_binding(
             "use_cache": use_cache,
             "use_merged": use_merged,
         }
+
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
index be0f3d0c31..066e0757fb 100644
--- a/tests/onnxruntime/utils_onnxruntime_tests.py
+++ b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -39,6 +39,7 @@
     "camembert": "hf-internal-testing/tiny-random-camembert",
     "clip": "hf-internal-testing/tiny-random-CLIPModel",
     "convbert": "hf-internal-testing/tiny-random-ConvBertModel",
+    "convnext": "hf-internal-testing/tiny-random-convnext",
     "codegen": "hf-internal-testing/tiny-random-CodeGenModel",
     "data2vec_text": "hf-internal-testing/tiny-random-Data2VecTextModel",
     "data2vec_vision": "hf-internal-testing/tiny-random-Data2VecVisionModel",
@@ -46,10 +47,10 @@
     "deberta": "hf-internal-testing/tiny-random-DebertaModel",
     "deberta_v2": "hf-internal-testing/tiny-random-DebertaV2Model",
     "deit": "hf-internal-testing/tiny-random-DeiTModel",
-    "convnext": "hf-internal-testing/tiny-random-convnext",
     "detr": "hf-internal-testing/tiny-random-detr",
     "distilbert": "hf-internal-testing/tiny-random-DistilBertModel",
     "electra": "hf-internal-testing/tiny-random-ElectraModel",
+    "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail",
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",

From 8b44014929e3b1e35fd1117c522c840f2f146a0a Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Fri, 3 Mar 2023 16:25:56 +0100
Subject: [PATCH 02/16] update tests

---
 optimum/onnxruntime/modeling_seq2seq.py |  1 +
 tests/onnxruntime/test_modeling.py      | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py
index e011b81f89..3183e15de0 100644
--- a/optimum/onnxruntime/modeling_seq2seq.py
+++ b/optimum/onnxruntime/modeling_seq2seq.py
@@ -1186,6 +1186,7 @@ def prepare_inputs_for_generation(
         input_ids,
         past_key_values=None,
         attention_mask=None,
+        token_type_ids=None,
         head_mask=None,
         decoder_head_mask=None,
         cross_attn_head_mask=None,
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 1da97e121e..94947641fd 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3472,12 +3472,12 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
-        onnx_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=False).to(
-            "cuda"
-        )
-        io_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=True).to(
-            "cuda"
-        )
+        onnx_model = ORTModelForSeq2SeqLM.from_pretrained(
+            self.onnx_model_dirs[test_name], use_io_binding=False, use_cache=use_cache
+        ).to("cuda")
+        io_model = ORTModelForSeq2SeqLM.from_pretrained(
+            self.onnx_model_dirs[test_name], use_io_binding=True, use_cache=use_cache
+        ).to("cuda")
 
         self.assertFalse(onnx_model.use_io_binding)
         self.assertTrue(io_model.use_io_binding)
@@ -3520,12 +3520,12 @@ def test_compare_generation_to_io_binding(
         self._setup(model_args)
 
         model_id = MODEL_NAMES[model_arch]
-        onnx_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=False).to(
-            "cuda"
-        )
-        io_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=True).to(
-            "cuda"
-        )
+        onnx_model = ORTModelForSeq2SeqLM.from_pretrained(
+            self.onnx_model_dirs[test_name], use_io_binding=False, use_cache=use_cache
+        ).to("cuda")
+        io_model = ORTModelForSeq2SeqLM.from_pretrained(
+            self.onnx_model_dirs[test_name], use_io_binding=True, use_cache=use_cache
+        ).to("cuda")
 
         tokenizer = get_preprocessor(model_id)
         tokens = tokenizer("This is a sample output", return_tensors="pt").to("cuda")

From 6e680350f827a9848770e2b4d5e49fd3e7e86660 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Mon, 6 Mar 2023 09:22:00 +0100
Subject: [PATCH 03/16] update docs and tests

---
 docs/source/exporters/onnx/overview.mdx | 2 ++
 tests/exporters/exporters_utils.py      | 3 ++-
 tests/onnxruntime/test_modeling.py      | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
index e70e8afa84..9852ec162c 100644
--- a/docs/source/exporters/onnx/overview.mdx
+++ b/docs/source/exporters/onnx/overview.mdx
@@ -40,6 +40,7 @@ Supported architectures:
 - DistilBert
 - Donut-Swin
 - Electra
+- Encoder Decoder
 - Flaubert
 - GPT-2
 - GPT-BigCode
@@ -88,6 +89,7 @@ Supported architectures:
 - TROCR
 - UniSpeech
 - UniSpeech SAT
+- Vision Encoder Decoder
 - Vit
 - Wav2Vec2
 - Wav2Vec2 Conformer
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 9ea8472786..9e720c2abc 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -42,6 +42,7 @@
     "camembert": "hf-internal-testing/tiny-random-camembert",
     "clip": "hf-internal-testing/tiny-random-CLIPModel",
     "convbert": "hf-internal-testing/tiny-random-ConvBertModel",
+    "convnext": "hf-internal-testing/tiny-random-convnext",
     "codegen": "hf-internal-testing/tiny-random-CodeGenModel",
     "cvt": "hf-internal-testing/tiny-random-CvTModel",
     "data2vec-text": "hf-internal-testing/tiny-random-Data2VecTextModel",
@@ -51,10 +52,10 @@
     "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",
     "deit": "hf-internal-testing/tiny-random-DeiTModel",
     "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel",
-    "convnext": "hf-internal-testing/tiny-random-convnext",
     "detr": "hf-internal-testing/tiny-random-DetrModel",  # hf-internal-testing/tiny-random-detr is larger
     "distilbert": "hf-internal-testing/tiny-random-DistilBertModel",
     "electra": "hf-internal-testing/tiny-random-ElectraModel",
+    "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail",
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt-bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 94947641fd..da50b1da7c 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3369,7 +3369,7 @@ def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: st
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     @pytest.mark.gpu_test  # mark as GPU test as well to run the without/with cache timing test on the slow tests
     def test_compare_with_and_without_past_key_values(self, model_arch: str):
-        if model_arch == "m2m_100" and model_arch == "encoder-decoder":
+        if model_arch == "m2m_100" or model_arch == "encoder-decoder":
             return  # TODO: this test is failing for m2m_100
         model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False}
         self._setup(model_args)

From aa74a82f635fd0065b8100402da9cd4ee1c79184 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Mon, 6 Mar 2023 09:34:30 +0000
Subject: [PATCH 04/16] fixed tests

---
 tests/exporters/onnx/test_onnx_export.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py
index 7e172452cd..b7d8690c78 100644
--- a/tests/exporters/onnx/test_onnx_export.py
+++ b/tests/exporters/onnx/test_onnx_export.py
@@ -161,6 +161,9 @@ def _get_models_to_test(export_models_dict: Dict):
 
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
+                    if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
+                        continue
+
                     onnx_config_constructor = TasksManager.get_exporter_config_constructor(
                         model_type=model_type, exporter="onnx", task=task, model_name=model_name
                     )

From 5ad004e46c79046c52ea2aefd9073678ff01c413 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Mon, 6 Mar 2023 11:37:19 +0000
Subject: [PATCH 05/16] update tests

---
 tests/exporters/onnx/test_exporters_onnx_cli.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index 1d25240c18..1d28aaafa5 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -57,6 +57,8 @@ def _get_models_to_test(export_models_dict: Dict):
 
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
+                    if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
+                        continue
                     onnx_config_class = TasksManager.get_exporter_config_constructor(
                         "onnx", task=task, model_type=model_type
                     )

From a88eee31183a4992169b988a40e07e87f905f6b2 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Tue, 7 Mar 2023 10:29:10 +0100
Subject: [PATCH 06/16] update tests

---
 tests/exporters/onnx/test_exporters_onnx_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index 1d28aaafa5..f7aec73301 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -119,7 +119,7 @@ def _get_models_to_test(export_models_dict: Dict):
             # TODO: segformer task can not be automatically inferred
             # TODO: xlm-roberta model auto-infers text-generation, but we don't support it
             # TODO: perceiver auto-infers default, but we don't support it (why?)
-            if model_type not in ["segformer", "xlm-roberta", "perceiver", "vision-encoder-decoder"]:
+            if model_type not in ["segformer", "xlm-roberta", "perceiver", "vision-encoder-decoder", "encoder-decoder"]:
                 models_to_test.append(
                     (f"{model_type}_no_task", model_type, model_name, "auto", "default", False, False)
                 )

From 2a0abff6319561c2b3c93c0d3ff797e33c02643d Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Tue, 21 Mar 2023 12:10:55 +0100
Subject: [PATCH 07/16] update tests

---
 tests/exporters/exporters_utils.py              | 2 +-
 tests/exporters/onnx/test_exporters_onnx_cli.py | 1 +
 tests/exporters/onnx/test_onnx_export.py        | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 9e720c2abc..53d08f58af 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -55,7 +55,7 @@
     "detr": "hf-internal-testing/tiny-random-DetrModel",  # hf-internal-testing/tiny-random-detr is larger
     "distilbert": "hf-internal-testing/tiny-random-DistilBertModel",
     "electra": "hf-internal-testing/tiny-random-ElectraModel",
-    "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail",
+    "encoder-decoder": "hf-internal-testing/tiny-random-EncoderDecoderModel-bert-bert",
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt-bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index f7aec73301..fe7d687dc5 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -58,6 +58,7 @@ def _get_models_to_test(export_models_dict: Dict):
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
                     if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
+                        # The model uses bert as decoder and does not support past key values
                         continue
                     onnx_config_class = TasksManager.get_exporter_config_constructor(
                         "onnx", task=task, model_type=model_type
diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py
index b7d8690c78..dd9b6f64d7 100644
--- a/tests/exporters/onnx/test_onnx_export.py
+++ b/tests/exporters/onnx/test_onnx_export.py
@@ -162,6 +162,7 @@ def _get_models_to_test(export_models_dict: Dict):
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
                     if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
+                        # The model uses bert as decoder and do not have support past key values
                         continue
 
                     onnx_config_constructor = TasksManager.get_exporter_config_constructor(

From bbafef600b21f36665a9a4f94a022f9b88880f45 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Tue, 21 Mar 2023 12:13:16 +0100
Subject: [PATCH 08/16] update tests

---
 tests/exporters/onnx/test_onnx_export.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py
index dd9b6f64d7..b7d8690c78 100644
--- a/tests/exporters/onnx/test_onnx_export.py
+++ b/tests/exporters/onnx/test_onnx_export.py
@@ -162,7 +162,6 @@ def _get_models_to_test(export_models_dict: Dict):
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
                     if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
-                        # The model uses bert as decoder and do not have support past key values
                         continue
 
                     onnx_config_constructor = TasksManager.get_exporter_config_constructor(

From 9933f1a1b0537b888e2adbb1017a2535846e3ee2 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Tue, 21 Mar 2023 12:18:22 +0100
Subject: [PATCH 09/16] update tests

---
 tests/exporters/onnx/test_onnx_export.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py
index b7d8690c78..fc2d143aec 100644
--- a/tests/exporters/onnx/test_onnx_export.py
+++ b/tests/exporters/onnx/test_onnx_export.py
@@ -162,6 +162,7 @@ def _get_models_to_test(export_models_dict: Dict):
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
                     if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
+                        # The model uses bert as decoder and does not support past key values
                         continue
 
                     onnx_config_constructor = TasksManager.get_exporter_config_constructor(

From fb9b1af647e478d7212b4014a1e4654c62f9bee9 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Thu, 20 Apr 2023 09:21:01 +0200
Subject: [PATCH 10/16] update tests

---
 optimum/exporters/onnx/config.py        | 2 +-
 optimum/exporters/onnx/model_configs.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/optimum/exporters/onnx/config.py b/optimum/exporters/onnx/config.py
index 780ff0a3d8..2db092ead1 100644
--- a/optimum/exporters/onnx/config.py
+++ b/optimum/exporters/onnx/config.py
@@ -267,7 +267,7 @@ def torch_to_onnx_input_map(self) -> Dict[str, str]:
         return {}
 
 
-class DummyEncoderDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast):
+class EncoderDecoderBaseOnnxConfig(OnnxSeq2SeqConfigWithPast):
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,)
 
     def __init__(
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index f31c764a4a..260d6ab308 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -47,7 +47,7 @@
 from .config import (
     AudioOnnxConfig,
     AudioToTextOnnxConfig,
-    DummyEncoderDecoderOnnxConfig,
+    EncoderDecoderBaseOnnxConfig,
     TextAndVisionOnnxConfig,
     TextDecoderOnnxConfig,
     TextEncoderOnnxConfig,
@@ -1168,7 +1168,7 @@ class TrOCROnnxConfig(TextSeq2SeqOnnxConfig):
     )
 
 
-class VisionEncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig):
+class VisionEncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig
     ATOL_FOR_VALIDATION = 1e-3
 
@@ -1439,5 +1439,6 @@ def overwrite_shape_and_generate_input(
             dummy_input = dummy_input_gen.generate(input_name, framework=framework)
 
         return dummy_input
-class EncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig):
+
+class EncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig

From 5e3193e49362dedb655e8dff5cd3a51e222d9031 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Thu, 20 Apr 2023 12:54:08 +0530
Subject: [PATCH 11/16] Update tests/onnxruntime/test_modeling.py

Co-authored-by: fxmarty <9808326+fxmarty@users.noreply.github.com>
---
 tests/onnxruntime/test_modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index da50b1da7c..49511323f9 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3168,7 +3168,7 @@ def test_merge_from_onnx_and_save(self, model_arch):
     @parameterized.expand(grid_parameters(FULL_GRID))
     def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
         if model_arch == "encoder-decoder" and use_cache is True:
-            return
+            self.skipTest("encoder-decoder model type with use_cache=True is not supported")
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 

From 5f637597cb14b76f3cccde66b6610e09a82f02e8 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Thu, 20 Apr 2023 12:54:43 +0530
Subject: [PATCH 12/16] Apply suggestions from code review

Co-authored-by: fxmarty <9808326+fxmarty@users.noreply.github.com>
---
 tests/onnxruntime/test_modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 49511323f9..c8e5bd4d74 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3370,7 +3370,7 @@ def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: st
     @pytest.mark.gpu_test  # mark as GPU test as well to run the without/with cache timing test on the slow tests
     def test_compare_with_and_without_past_key_values(self, model_arch: str):
         if model_arch == "m2m_100" or model_arch == "encoder-decoder":
-            return  # TODO: this test is failing for m2m_100
+            self.skipTest("m2m_100 and encoder-decoder comparison with/without pkv fail or is not supported")
         model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False}
         self._setup(model_args)
         model_args = {"test_name": model_arch + "_True", "model_arch": model_arch, "use_cache": True}

From 6efa5d2200c349d77d0297f24876f6a634eb8dd0 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Thu, 20 Apr 2023 09:30:26 +0200
Subject: [PATCH 13/16] udpate testt

---
 tests/onnxruntime/test_modeling.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index c8e5bd4d74..5ccb75a5b8 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3231,7 +3231,8 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
     @parameterized.expand(grid_parameters(FULL_GRID))
     def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
         if model_arch == "encoder-decoder" and use_cache is True:
-            return
+            self.skipTest("encoder-decoder model type with use_cache=True is not supported")
+
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 

From b5180ccc82aa451e4221c79ca03f162098937beb Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Mon, 28 Aug 2023 14:52:22 +0200
Subject: [PATCH 14/16] change seq2seq-lm to text-generation

---
 optimum/exporters/onnx/model_configs.py         | 1 +
 optimum/exporters/tasks.py                      | 4 ++--
 tests/exporters/onnx/test_exporters_onnx_cli.py | 8 +++++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 260d6ab308..d20b668884 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -1440,5 +1440,6 @@ def overwrite_shape_and_generate_input(
 
         return dummy_input
 
+
 class EncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index b29f95fd54..b292d33266 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -498,8 +498,8 @@ class TasksManager:
             tflite="ElectraTFLiteConfig",
         ),
         "encoder-decoder": supported_tasks_mapping(
-            "seq2seq-lm",
-            "seq2seq-lm-with-past",
+            "text-generation",
+            "text-generation-with-past",
             onnx="EncoderDecoderOnnxConfig",
         ),
         "flaubert": supported_tasks_mapping(
diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index fe7d687dc5..a73d21307d 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -120,7 +120,13 @@ def _get_models_to_test(export_models_dict: Dict):
             # TODO: segformer task can not be automatically inferred
             # TODO: xlm-roberta model auto-infers text-generation, but we don't support it
             # TODO: perceiver auto-infers default, but we don't support it (why?)
-            if model_type not in ["segformer", "xlm-roberta", "perceiver", "vision-encoder-decoder", "encoder-decoder"]:
+            if model_type not in [
+                "segformer",
+                "xlm-roberta",
+                "perceiver",
+                "vision-encoder-decoder",
+                "encoder-decoder",
+            ]:
                 models_to_test.append(
                     (f"{model_type}_no_task", model_type, model_name, "auto", "default", False, False)
                 )

From 9b56a505b32614ef08cd0a8e56bbbbd7d82bd421 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Mon, 28 Aug 2023 15:47:12 +0200
Subject: [PATCH 15/16] fix task

---
 optimum/exporters/tasks.py                      | 4 ++--
 tests/exporters/onnx/test_exporters_onnx_cli.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index b292d33266..f4908dcb35 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -498,8 +498,8 @@ class TasksManager:
             tflite="ElectraTFLiteConfig",
         ),
         "encoder-decoder": supported_tasks_mapping(
-            "text-generation",
-            "text-generation-with-past",
+            "text2text-generation",
+            "text2text-generation-with-past",
             onnx="EncoderDecoderOnnxConfig",
         ),
         "flaubert": supported_tasks_mapping(
diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index a73d21307d..23cea094f5 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -57,7 +57,7 @@ def _get_models_to_test(export_models_dict: Dict):
 
             for model_name, tasks in model_tasks.items():
                 for task in tasks:
-                    if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past":
+                    if model_type == "encoder-decoder" and task == "text2text-generation-with-past":
                         # The model uses bert as decoder and does not support past key values
                         continue
                     onnx_config_class = TasksManager.get_exporter_config_constructor(

From 72af8667338e0ed4374a59ff213d53cbdc7dc7b3 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit@huggingface.co>
Date: Fri, 1 Sep 2023 10:19:41 +0200
Subject: [PATCH 16/16] fic tests

---
 optimum/onnxruntime/modeling_seq2seq.py       |  4 +++
 .../exporters/onnx/test_exporters_onnx_cli.py |  2 +-
 tests/onnxruntime/test_modeling.py            | 30 ++++++++++++++-----
 tests/onnxruntime/utils_onnxruntime_tests.py  |  2 +-
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py
index 3183e15de0..42952a2581 100644
--- a/optimum/onnxruntime/modeling_seq2seq.py
+++ b/optimum/onnxruntime/modeling_seq2seq.py
@@ -1097,7 +1097,9 @@ def __init__(
         encoder_session: ort.InferenceSession,
         decoder_session: ort.InferenceSession,
         config: "PretrainedConfig",
+        onnx_paths: List[str],
         decoder_with_past_session: Optional[ort.InferenceSession] = None,
+        use_cache: bool = True,
         use_io_binding: Optional[bool] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
         preprocessors: Optional[List] = None,
@@ -1108,7 +1110,9 @@ def __init__(
             encoder_session,
             decoder_session,
             config,
+            onnx_paths,
             decoder_with_past_session,
+            use_cache,
             use_io_binding,
             model_save_dir,
             preprocessors,
diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py
index 23cea094f5..b9291fa407 100644
--- a/tests/exporters/onnx/test_exporters_onnx_cli.py
+++ b/tests/exporters/onnx/test_exporters_onnx_cli.py
@@ -120,11 +120,11 @@ def _get_models_to_test(export_models_dict: Dict):
             # TODO: segformer task can not be automatically inferred
             # TODO: xlm-roberta model auto-infers text-generation, but we don't support it
             # TODO: perceiver auto-infers default, but we don't support it (why?)
+            # TODO: encoder-decoder auto-infers text3text-generation, but it uses bert as decoder and does not support past key values
             if model_type not in [
                 "segformer",
                 "xlm-roberta",
                 "perceiver",
-                "vision-encoder-decoder",
                 "encoder-decoder",
             ]:
                 models_to_test.append(
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 5ccb75a5b8..c8aff67cde 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -3098,8 +3098,8 @@ def test_load_vanilla_transformers_which_is_not_supported(self):
 
     @parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
     def test_generate_utils(self, test_name: str, model_arch: str, use_cache: str):
-        if model_arch == "encoder-decoder":
-            use_cache = False
+        if model_arch == "encoder-decoder" and use_cache is True:
+            self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder")
         model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
         self._setup(model_args)
 
@@ -3123,6 +3123,9 @@ def test_generate_utils(self, test_name: str, model_arch: str, use_cache: str):
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     def test_merge_from_transformers_and_save(self, model_arch):
+        if model_arch == "encoder-decoder":
+            self.skipTest("encoder-decoder model type with use_merged=True is not supported for bert as a decoder")
+
         if "text2text-generation-with-past" not in TasksManager.get_supported_tasks_for_model_type(
             model_arch.replace("_", "-"), exporter="onnx"
         ):
@@ -3142,6 +3145,9 @@ def test_merge_from_transformers_and_save(self, model_arch):
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     def test_merge_from_onnx_and_save(self, model_arch):
+        if model_arch == "encoder-decoder":
+            self.skipTest("encoder-decoder model type with use_merged=True is not supported for bert as a decoder")
+
         model_id = MODEL_NAMES[model_arch]
         task = "text2text-generation-with-past"
 
@@ -3168,7 +3174,8 @@ def test_merge_from_onnx_and_save(self, model_arch):
     @parameterized.expand(grid_parameters(FULL_GRID))
     def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
         if model_arch == "encoder-decoder" and use_cache is True:
-            self.skipTest("encoder-decoder model type with use_cache=True is not supported")
+            self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder")
+
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
 
@@ -3207,6 +3214,9 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
         tokenizer = get_preprocessor(model_id)
         tokens = tokenizer("This is a sample output", return_tensors="pt")
         decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2
+        if model_arch == "encoder-decoder":
+            decoder_start_token_id = tokenizer.cls_token_id
+
         decoder_inputs = {"decoder_input_ids": torch.ones((1, 1), dtype=torch.long) * decoder_start_token_id}
 
         with torch.no_grad():
@@ -3231,7 +3241,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
     @parameterized.expand(grid_parameters(FULL_GRID))
     def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool):
         if model_arch == "encoder-decoder" and use_cache is True:
-            self.skipTest("encoder-decoder model type with use_cache=True is not supported")
+            self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder")
 
         if use_cache is False and use_merged is True:
             self.skipTest("use_cache=False, use_merged=True are uncompatible")
@@ -3249,24 +3259,28 @@ def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cac
         onnx_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_cache=use_cache)
         tokenizer = get_preprocessor(model_id)
 
+        decoder_start_token_id = onnx_model.config.decoder_start_token_id if model_arch != "mbart" else 2
+        if model_arch == "encoder-decoder":
+            decoder_start_token_id = tokenizer.cls_token_id
+
         # Text2Text generation
         pipe = pipeline("text2text-generation", model=onnx_model, tokenizer=tokenizer)
         text = "This is a test"
-        outputs = pipe(text)
+        outputs = pipe(text, decoder_start_token_id=decoder_start_token_id)
         self.assertEqual(pipe.device, onnx_model.device)
         self.assertIsInstance(outputs[0]["generated_text"], str)
 
         # Summarization
         pipe = pipeline("summarization", model=onnx_model, tokenizer=tokenizer)
         text = "This is a test"
-        outputs = pipe(text)
+        outputs = pipe(text, decoder_start_token_id=decoder_start_token_id)
         self.assertEqual(pipe.device, onnx_model.device)
         self.assertIsInstance(outputs[0]["summary_text"], str)
 
         # Translation
         pipe = pipeline("translation_en_to_de", model=onnx_model, tokenizer=tokenizer)
         text = "This is a test"
-        outputs = pipe(text)
+        outputs = pipe(text, decoder_start_token_id=decoder_start_token_id)
         self.assertEqual(pipe.device, onnx_model.device)
         self.assertIsInstance(outputs[0]["translation_text"], str)
 
@@ -3413,6 +3427,8 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
 
     @parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
     def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, model_arch: str, use_cache: bool):
+        if model_arch == "encoder-decoder" and use_cache is True:
+            self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder")
         model_args = {
             "test_name": test_name + "_True",
             "model_arch": model_arch,
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
index 066e0757fb..cf776f11ed 100644
--- a/tests/onnxruntime/utils_onnxruntime_tests.py
+++ b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -50,7 +50,7 @@
     "detr": "hf-internal-testing/tiny-random-detr",
     "distilbert": "hf-internal-testing/tiny-random-DistilBertModel",
     "electra": "hf-internal-testing/tiny-random-ElectraModel",
-    "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail",
+    "encoder-decoder": "hf-internal-testing/tiny-random-EncoderDecoderModel-bert-bert",
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",