From 5e6de91bc8c70fa52a792fd450f5342c3d49afff Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 3 Mar 2023 15:35:23 +0100 Subject: [PATCH 01/16] add encoder decoder model --- optimum/exporters/onnx/config.py | 30 +++++++++++++++++- optimum/exporters/onnx/model_configs.py | 6 ++-- optimum/exporters/tasks.py | 5 +++ optimum/onnxruntime/modeling_seq2seq.py | 33 ++++++++++++++++++++ optimum/utils/normalized_config.py | 1 + tests/exporters/exporters_utils.py | 3 +- tests/onnxruntime/test_modeling.py | 21 +++++++++++-- tests/onnxruntime/utils_onnxruntime_tests.py | 3 +- 8 files changed, 95 insertions(+), 7 deletions(-) diff --git a/optimum/exporters/onnx/config.py b/optimum/exporters/onnx/config.py index 28d32a55fb..780ff0a3d8 100644 --- a/optimum/exporters/onnx/config.py +++ b/optimum/exporters/onnx/config.py @@ -267,7 +267,7 @@ def torch_to_onnx_input_map(self) -> Dict[str, str]: return {} -class EncoderDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): +class DummyEncoderDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,) def __init__( @@ -341,6 +341,34 @@ def __init__( self.DUMMY_INPUT_GENERATOR_CLASSES += self._past_key_values_generator + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + common_inputs = {} + if self._behavior is not ConfigBehavior.DECODER: + common_inputs["input_ids"] = {0: "batch_size", 1: "encoder_sequence_length"} + + common_inputs["attention_mask"] = {0: "batch_size", 1: "encoder_sequence_length"} + + if self._behavior is not ConfigBehavior.ENCODER: + # TODO: it is likely this pop() is unwanted as we then always hit + # https://github.com/huggingface/transformers/blob/v4.26.0/src/transformers/models/t5/modeling_t5.py#L965-L969 + common_inputs.pop("attention_mask") + + if self.use_past_in_inputs: + # TODO: validate the axis name for attention_mask + # common_inputs["attention_mask"][1] = "past_encoder_sequence_length + sequence_length" + common_inputs["decoder_input_ids"] = {0: "batch_size"} + else: + common_inputs["decoder_input_ids"] = {0: "batch_size", 1: "decoder_sequence_length"} + + if self.use_past_in_inputs: + self.add_past_key_values(common_inputs, direction="inputs") + + if self._behavior is ConfigBehavior.DECODER: + common_inputs["encoder_outputs"] = {0: "batch_size", 1: "encoder_sequence_length"} + + return common_inputs + @property def torch_to_onnx_input_map(self) -> Dict[str, str]: if self._behavior is ConfigBehavior.DECODER: diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index db0256e4d0..f31c764a4a 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -47,7 +47,7 @@ from .config import ( AudioOnnxConfig, AudioToTextOnnxConfig, - EncoderDecoderOnnxConfig, + DummyEncoderDecoderOnnxConfig, TextAndVisionOnnxConfig, TextDecoderOnnxConfig, TextEncoderOnnxConfig, @@ -1168,7 +1168,7 @@ class TrOCROnnxConfig(TextSeq2SeqOnnxConfig): ) -class VisionEncoderDecoderOnnxConfig(EncoderDecoderOnnxConfig): +class VisionEncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig ATOL_FOR_VALIDATION = 1e-3 @@ -1439,3 +1439,5 @@ def overwrite_shape_and_generate_input( dummy_input = dummy_input_gen.generate(input_name, framework=framework) return dummy_input +class EncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 0ebcfc2759..b29f95fd54 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -497,6 +497,11 @@ class TasksManager: onnx="ElectraOnnxConfig", tflite="ElectraTFLiteConfig", ), + "encoder-decoder": supported_tasks_mapping( + "seq2seq-lm", + "seq2seq-lm-with-past", + onnx="EncoderDecoderOnnxConfig", + ), "flaubert": supported_tasks_mapping( "feature-extraction", "fill-mask", diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index c436a900cb..e011b81f89 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -1092,6 +1092,39 @@ class ORTModelForSeq2SeqLM(ORTModelForConditionalGeneration, GenerationMixin): auto_model_class = AutoModelForSeq2SeqLM main_input_name = "input_ids" + def __init__( + self, + encoder_session: ort.InferenceSession, + decoder_session: ort.InferenceSession, + config: "PretrainedConfig", + decoder_with_past_session: Optional[ort.InferenceSession] = None, + use_io_binding: Optional[bool] = None, + model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, + preprocessors: Optional[List] = None, + generation_config: Optional[GenerationConfig] = None, + **kwargs, + ): + super().__init__( + encoder_session, + decoder_session, + config, + decoder_with_past_session, + use_io_binding, + model_save_dir, + preprocessors, + generation_config, + **kwargs, + ) + + if config.model_type == "encoder-decoder": + self.encoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( + config.encoder.model_type + )(config.encoder) + + self.decoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( + config.decoder.model_type + )(config.decoder) + def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: return ORTEncoder(session, self) diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index c5f3d5ce4c..e65c3c42d6 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -220,6 +220,7 @@ class NormalizedConfigManager: "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"), "donut-swin": NormalizedVisionConfig, "electra": NormalizedTextConfig, + "encoder-decoder": NormalizedEncoderDecoderConfig, "gpt2": GPT2LikeNormalizedTextConfig, "gpt-bigcode": GPT2LikeNormalizedTextConfig, "gpt_neo": NormalizedTextConfig.with_args(num_attention_heads="num_heads"), diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 7a20fa4528..9ea8472786 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -161,6 +161,7 @@ "camembert": "camembert-base", "clip": "openai/clip-vit-base-patch32", "convbert": "YituTech/conv-bert-base", + "convnext": "facebook/convnext-tiny-224", "codegen": "hf-internal-testing/tiny-random-CodeGenModel", # Not using Salesforce/codegen-350M-multi because it takes too much time for testing. "data2vec-text": "facebook/data2vec-text-base", "data2vec-vision": "facebook/data2vec-vision-base", @@ -168,10 +169,10 @@ "deberta": "hf-internal-testing/tiny-random-DebertaModel", # Not using microsoft/deberta-base because it takes too much time for testing. "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model", # Not using microsoft/deberta-v2-xlarge because it takes too much time for testing. "deit": "facebook/deit-small-patch16-224", - "convnext": "facebook/convnext-tiny-224", "detr": "hf-internal-testing/tiny-random-detr", # Not using facebook/detr-resnet-50 because it takes too much time for testing. "distilbert": "distilbert-base-cased", "electra": "google/electra-base-generator", + "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail", "flaubert": "hf-internal-testing/tiny-random-flaubert", # TODO "gpt2": "gpt2", "gpt-neo": "EleutherAI/gpt-neo-125M", diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index f28a3676be..1da97e121e 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3059,6 +3059,7 @@ class ORTModelForSeq2SeqLMIntegrationTest(ORTModelTestMixin): # "bigbird_pegasus", "blenderbot", "blenderbot_small", + "encoder-decoder", "longt5", "m2m_100", "marian", @@ -3097,11 +3098,13 @@ def test_load_vanilla_transformers_which_is_not_supported(self): @parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]})) def test_generate_utils(self, test_name: str, model_arch: str, use_cache: str): + if model_arch == "encoder-decoder": + use_cache = False model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache} self._setup(model_args) model_id = MODEL_NAMES[model_arch] - model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name]) + model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_cache=use_cache) tokenizer = get_preprocessor(model_id) text = "This is a sample output" tokens = tokenizer(text, return_tensors="pt") @@ -3164,6 +3167,8 @@ def test_merge_from_onnx_and_save(self, model_arch): @parameterized.expand(grid_parameters(FULL_GRID)) def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): + if model_arch == "encoder-decoder" and use_cache is True: + return if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") @@ -3173,6 +3178,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach "use_cache": use_cache, "use_merged": use_merged, } + self._setup(model_args) model_id = MODEL_NAMES[model_arch] @@ -3224,6 +3230,8 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach @parameterized.expand(grid_parameters(FULL_GRID)) def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): + if model_arch == "encoder-decoder" and use_cache is True: + return if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") @@ -3233,6 +3241,7 @@ def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cac "use_cache": use_cache, "use_merged": use_merged, } + self._setup(model_args) model_id = MODEL_NAMES[model_arch] @@ -3287,6 +3296,8 @@ def test_pipeline_model_is_none(self): @require_torch_gpu @pytest.mark.gpu_test def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool): + if model_arch == "encoder-decoder": + use_cache = False model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache} self._setup(model_args) @@ -3358,7 +3369,7 @@ def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: st @parameterized.expand(SUPPORTED_ARCHITECTURES) @pytest.mark.gpu_test # mark as GPU test as well to run the without/with cache timing test on the slow tests def test_compare_with_and_without_past_key_values(self, model_arch: str): - if model_arch == "m2m_100": + if model_arch == "m2m_100" and model_arch == "encoder-decoder": return # TODO: this test is failing for m2m_100 model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False} self._setup(model_args) @@ -3446,6 +3457,8 @@ def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, mode @require_torch_gpu @pytest.mark.gpu_test def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): + if model_arch == "encoder-decoder": + use_cache = False if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") @@ -3455,6 +3468,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: "use_cache": use_cache, "use_merged": use_merged, } + self._setup(model_args) model_id = MODEL_NAMES[model_arch] @@ -3491,6 +3505,8 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: def test_compare_generation_to_io_binding( self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool ): + if model_arch == "encoder-decoder": + use_cache = False if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") @@ -3500,6 +3516,7 @@ def test_compare_generation_to_io_binding( "use_cache": use_cache, "use_merged": use_merged, } + self._setup(model_args) model_id = MODEL_NAMES[model_arch] diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index be0f3d0c31..066e0757fb 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -39,6 +39,7 @@ "camembert": "hf-internal-testing/tiny-random-camembert", "clip": "hf-internal-testing/tiny-random-CLIPModel", "convbert": "hf-internal-testing/tiny-random-ConvBertModel", + "convnext": "hf-internal-testing/tiny-random-convnext", "codegen": "hf-internal-testing/tiny-random-CodeGenModel", "data2vec_text": "hf-internal-testing/tiny-random-Data2VecTextModel", "data2vec_vision": "hf-internal-testing/tiny-random-Data2VecVisionModel", @@ -46,10 +47,10 @@ "deberta": "hf-internal-testing/tiny-random-DebertaModel", "deberta_v2": "hf-internal-testing/tiny-random-DebertaV2Model", "deit": "hf-internal-testing/tiny-random-DeiTModel", - "convnext": "hf-internal-testing/tiny-random-convnext", "detr": "hf-internal-testing/tiny-random-detr", "distilbert": "hf-internal-testing/tiny-random-DistilBertModel", "electra": "hf-internal-testing/tiny-random-ElectraModel", + "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail", "flaubert": "hf-internal-testing/tiny-random-flaubert", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", From 8b44014929e3b1e35fd1117c522c840f2f146a0a Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 3 Mar 2023 16:25:56 +0100 Subject: [PATCH 02/16] update tests --- optimum/onnxruntime/modeling_seq2seq.py | 1 + tests/onnxruntime/test_modeling.py | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index e011b81f89..3183e15de0 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -1186,6 +1186,7 @@ def prepare_inputs_for_generation( input_ids, past_key_values=None, attention_mask=None, + token_type_ids=None, head_mask=None, decoder_head_mask=None, cross_attn_head_mask=None, diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 1da97e121e..94947641fd 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3472,12 +3472,12 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: self._setup(model_args) model_id = MODEL_NAMES[model_arch] - onnx_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=False).to( - "cuda" - ) - io_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=True).to( - "cuda" - ) + onnx_model = ORTModelForSeq2SeqLM.from_pretrained( + self.onnx_model_dirs[test_name], use_io_binding=False, use_cache=use_cache + ).to("cuda") + io_model = ORTModelForSeq2SeqLM.from_pretrained( + self.onnx_model_dirs[test_name], use_io_binding=True, use_cache=use_cache + ).to("cuda") self.assertFalse(onnx_model.use_io_binding) self.assertTrue(io_model.use_io_binding) @@ -3520,12 +3520,12 @@ def test_compare_generation_to_io_binding( self._setup(model_args) model_id = MODEL_NAMES[model_arch] - onnx_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=False).to( - "cuda" - ) - io_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_io_binding=True).to( - "cuda" - ) + onnx_model = ORTModelForSeq2SeqLM.from_pretrained( + self.onnx_model_dirs[test_name], use_io_binding=False, use_cache=use_cache + ).to("cuda") + io_model = ORTModelForSeq2SeqLM.from_pretrained( + self.onnx_model_dirs[test_name], use_io_binding=True, use_cache=use_cache + ).to("cuda") tokenizer = get_preprocessor(model_id) tokens = tokenizer("This is a sample output", return_tensors="pt").to("cuda") From 6e680350f827a9848770e2b4d5e49fd3e7e86660 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 6 Mar 2023 09:22:00 +0100 Subject: [PATCH 03/16] update docs and tests --- docs/source/exporters/onnx/overview.mdx | 2 ++ tests/exporters/exporters_utils.py | 3 ++- tests/onnxruntime/test_modeling.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index e70e8afa84..9852ec162c 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -40,6 +40,7 @@ Supported architectures: - DistilBert - Donut-Swin - Electra +- Encoder Decoder - Flaubert - GPT-2 - GPT-BigCode @@ -88,6 +89,7 @@ Supported architectures: - TROCR - UniSpeech - UniSpeech SAT +- Vision Encoder Decoder - Vit - Wav2Vec2 - Wav2Vec2 Conformer diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 9ea8472786..9e720c2abc 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -42,6 +42,7 @@ "camembert": "hf-internal-testing/tiny-random-camembert", "clip": "hf-internal-testing/tiny-random-CLIPModel", "convbert": "hf-internal-testing/tiny-random-ConvBertModel", + "convnext": "hf-internal-testing/tiny-random-convnext", "codegen": "hf-internal-testing/tiny-random-CodeGenModel", "cvt": "hf-internal-testing/tiny-random-CvTModel", "data2vec-text": "hf-internal-testing/tiny-random-Data2VecTextModel", @@ -51,10 +52,10 @@ "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model", "deit": "hf-internal-testing/tiny-random-DeiTModel", "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel", - "convnext": "hf-internal-testing/tiny-random-convnext", "detr": "hf-internal-testing/tiny-random-DetrModel", # hf-internal-testing/tiny-random-detr is larger "distilbert": "hf-internal-testing/tiny-random-DistilBertModel", "electra": "hf-internal-testing/tiny-random-ElectraModel", + "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail", "flaubert": "hf-internal-testing/tiny-random-flaubert", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt-bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 94947641fd..da50b1da7c 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3369,7 +3369,7 @@ def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: st @parameterized.expand(SUPPORTED_ARCHITECTURES) @pytest.mark.gpu_test # mark as GPU test as well to run the without/with cache timing test on the slow tests def test_compare_with_and_without_past_key_values(self, model_arch: str): - if model_arch == "m2m_100" and model_arch == "encoder-decoder": + if model_arch == "m2m_100" or model_arch == "encoder-decoder": return # TODO: this test is failing for m2m_100 model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False} self._setup(model_args) From aa74a82f635fd0065b8100402da9cd4ee1c79184 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 6 Mar 2023 09:34:30 +0000 Subject: [PATCH 04/16] fixed tests --- tests/exporters/onnx/test_onnx_export.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py index 7e172452cd..b7d8690c78 100644 --- a/tests/exporters/onnx/test_onnx_export.py +++ b/tests/exporters/onnx/test_onnx_export.py @@ -161,6 +161,9 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: + if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": + continue + onnx_config_constructor = TasksManager.get_exporter_config_constructor( model_type=model_type, exporter="onnx", task=task, model_name=model_name ) From 5ad004e46c79046c52ea2aefd9073678ff01c413 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 6 Mar 2023 11:37:19 +0000 Subject: [PATCH 05/16] update tests --- tests/exporters/onnx/test_exporters_onnx_cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index 1d25240c18..1d28aaafa5 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -57,6 +57,8 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: + if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": + continue onnx_config_class = TasksManager.get_exporter_config_constructor( "onnx", task=task, model_type=model_type ) From a88eee31183a4992169b988a40e07e87f905f6b2 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 7 Mar 2023 10:29:10 +0100 Subject: [PATCH 06/16] update tests --- tests/exporters/onnx/test_exporters_onnx_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index 1d28aaafa5..f7aec73301 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -119,7 +119,7 @@ def _get_models_to_test(export_models_dict: Dict): # TODO: segformer task can not be automatically inferred # TODO: xlm-roberta model auto-infers text-generation, but we don't support it # TODO: perceiver auto-infers default, but we don't support it (why?) - if model_type not in ["segformer", "xlm-roberta", "perceiver", "vision-encoder-decoder"]: + if model_type not in ["segformer", "xlm-roberta", "perceiver", "vision-encoder-decoder", "encoder-decoder"]: models_to_test.append( (f"{model_type}_no_task", model_type, model_name, "auto", "default", False, False) ) From 2a0abff6319561c2b3c93c0d3ff797e33c02643d Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 21 Mar 2023 12:10:55 +0100 Subject: [PATCH 07/16] update tests --- tests/exporters/exporters_utils.py | 2 +- tests/exporters/onnx/test_exporters_onnx_cli.py | 1 + tests/exporters/onnx/test_onnx_export.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 9e720c2abc..53d08f58af 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -55,7 +55,7 @@ "detr": "hf-internal-testing/tiny-random-DetrModel", # hf-internal-testing/tiny-random-detr is larger "distilbert": "hf-internal-testing/tiny-random-DistilBertModel", "electra": "hf-internal-testing/tiny-random-ElectraModel", - "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail", + "encoder-decoder": "hf-internal-testing/tiny-random-EncoderDecoderModel-bert-bert", "flaubert": "hf-internal-testing/tiny-random-flaubert", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt-bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index f7aec73301..fe7d687dc5 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -58,6 +58,7 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": + # The model uses bert as decoder and does not support past key values continue onnx_config_class = TasksManager.get_exporter_config_constructor( "onnx", task=task, model_type=model_type diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py index b7d8690c78..dd9b6f64d7 100644 --- a/tests/exporters/onnx/test_onnx_export.py +++ b/tests/exporters/onnx/test_onnx_export.py @@ -162,6 +162,7 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": + # The model uses bert as decoder and do not have support past key values continue onnx_config_constructor = TasksManager.get_exporter_config_constructor( From bbafef600b21f36665a9a4f94a022f9b88880f45 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 21 Mar 2023 12:13:16 +0100 Subject: [PATCH 08/16] update tests --- tests/exporters/onnx/test_onnx_export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py index dd9b6f64d7..b7d8690c78 100644 --- a/tests/exporters/onnx/test_onnx_export.py +++ b/tests/exporters/onnx/test_onnx_export.py @@ -162,7 +162,6 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": - # The model uses bert as decoder and do not have support past key values continue onnx_config_constructor = TasksManager.get_exporter_config_constructor( From 9933f1a1b0537b888e2adbb1017a2535846e3ee2 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 21 Mar 2023 12:18:22 +0100 Subject: [PATCH 09/16] update tests --- tests/exporters/onnx/test_onnx_export.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py index b7d8690c78..fc2d143aec 100644 --- a/tests/exporters/onnx/test_onnx_export.py +++ b/tests/exporters/onnx/test_onnx_export.py @@ -162,6 +162,7 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": + # The model uses bert as decoder and does not support past key values continue onnx_config_constructor = TasksManager.get_exporter_config_constructor( From fb9b1af647e478d7212b4014a1e4654c62f9bee9 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Thu, 20 Apr 2023 09:21:01 +0200 Subject: [PATCH 10/16] update tests --- optimum/exporters/onnx/config.py | 2 +- optimum/exporters/onnx/model_configs.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/optimum/exporters/onnx/config.py b/optimum/exporters/onnx/config.py index 780ff0a3d8..2db092ead1 100644 --- a/optimum/exporters/onnx/config.py +++ b/optimum/exporters/onnx/config.py @@ -267,7 +267,7 @@ def torch_to_onnx_input_map(self) -> Dict[str, str]: return {} -class DummyEncoderDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): +class EncoderDecoderBaseOnnxConfig(OnnxSeq2SeqConfigWithPast): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,) def __init__( diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index f31c764a4a..260d6ab308 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -47,7 +47,7 @@ from .config import ( AudioOnnxConfig, AudioToTextOnnxConfig, - DummyEncoderDecoderOnnxConfig, + EncoderDecoderBaseOnnxConfig, TextAndVisionOnnxConfig, TextDecoderOnnxConfig, TextEncoderOnnxConfig, @@ -1168,7 +1168,7 @@ class TrOCROnnxConfig(TextSeq2SeqOnnxConfig): ) -class VisionEncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig): +class VisionEncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig ATOL_FOR_VALIDATION = 1e-3 @@ -1439,5 +1439,6 @@ def overwrite_shape_and_generate_input( dummy_input = dummy_input_gen.generate(input_name, framework=framework) return dummy_input -class EncoderDecoderOnnxConfig(DummyEncoderDecoderOnnxConfig): + +class EncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig From 5e3193e49362dedb655e8dff5cd3a51e222d9031 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Thu, 20 Apr 2023 12:54:08 +0530 Subject: [PATCH 11/16] Update tests/onnxruntime/test_modeling.py Co-authored-by: fxmarty <9808326+fxmarty@users.noreply.github.com> --- tests/onnxruntime/test_modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index da50b1da7c..49511323f9 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3168,7 +3168,7 @@ def test_merge_from_onnx_and_save(self, model_arch): @parameterized.expand(grid_parameters(FULL_GRID)) def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): if model_arch == "encoder-decoder" and use_cache is True: - return + self.skipTest("encoder-decoder model type with use_cache=True is not supported") if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") From 5f637597cb14b76f3cccde66b6610e09a82f02e8 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Thu, 20 Apr 2023 12:54:43 +0530 Subject: [PATCH 12/16] Apply suggestions from code review Co-authored-by: fxmarty <9808326+fxmarty@users.noreply.github.com> --- tests/onnxruntime/test_modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 49511323f9..c8e5bd4d74 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3370,7 +3370,7 @@ def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: st @pytest.mark.gpu_test # mark as GPU test as well to run the without/with cache timing test on the slow tests def test_compare_with_and_without_past_key_values(self, model_arch: str): if model_arch == "m2m_100" or model_arch == "encoder-decoder": - return # TODO: this test is failing for m2m_100 + self.skipTest("m2m_100 and encoder-decoder comparison with/without pkv fail or is not supported") model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False} self._setup(model_args) model_args = {"test_name": model_arch + "_True", "model_arch": model_arch, "use_cache": True} From 6efa5d2200c349d77d0297f24876f6a634eb8dd0 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Thu, 20 Apr 2023 09:30:26 +0200 Subject: [PATCH 13/16] udpate testt --- tests/onnxruntime/test_modeling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index c8e5bd4d74..5ccb75a5b8 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3231,7 +3231,8 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach @parameterized.expand(grid_parameters(FULL_GRID)) def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): if model_arch == "encoder-decoder" and use_cache is True: - return + self.skipTest("encoder-decoder model type with use_cache=True is not supported") + if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") From b5180ccc82aa451e4221c79ca03f162098937beb Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 28 Aug 2023 14:52:22 +0200 Subject: [PATCH 14/16] change seq2seq-lm to text-generation --- optimum/exporters/onnx/model_configs.py | 1 + optimum/exporters/tasks.py | 4 ++-- tests/exporters/onnx/test_exporters_onnx_cli.py | 8 +++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 260d6ab308..d20b668884 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -1440,5 +1440,6 @@ def overwrite_shape_and_generate_input( return dummy_input + class EncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index b29f95fd54..b292d33266 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -498,8 +498,8 @@ class TasksManager: tflite="ElectraTFLiteConfig", ), "encoder-decoder": supported_tasks_mapping( - "seq2seq-lm", - "seq2seq-lm-with-past", + "text-generation", + "text-generation-with-past", onnx="EncoderDecoderOnnxConfig", ), "flaubert": supported_tasks_mapping( diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index fe7d687dc5..a73d21307d 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -120,7 +120,13 @@ def _get_models_to_test(export_models_dict: Dict): # TODO: segformer task can not be automatically inferred # TODO: xlm-roberta model auto-infers text-generation, but we don't support it # TODO: perceiver auto-infers default, but we don't support it (why?) - if model_type not in ["segformer", "xlm-roberta", "perceiver", "vision-encoder-decoder", "encoder-decoder"]: + if model_type not in [ + "segformer", + "xlm-roberta", + "perceiver", + "vision-encoder-decoder", + "encoder-decoder", + ]: models_to_test.append( (f"{model_type}_no_task", model_type, model_name, "auto", "default", False, False) ) From 9b56a505b32614ef08cd0a8e56bbbbd7d82bd421 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 28 Aug 2023 15:47:12 +0200 Subject: [PATCH 15/16] fix task --- optimum/exporters/tasks.py | 4 ++-- tests/exporters/onnx/test_exporters_onnx_cli.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index b292d33266..f4908dcb35 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -498,8 +498,8 @@ class TasksManager: tflite="ElectraTFLiteConfig", ), "encoder-decoder": supported_tasks_mapping( - "text-generation", - "text-generation-with-past", + "text2text-generation", + "text2text-generation-with-past", onnx="EncoderDecoderOnnxConfig", ), "flaubert": supported_tasks_mapping( diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index a73d21307d..23cea094f5 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -57,7 +57,7 @@ def _get_models_to_test(export_models_dict: Dict): for model_name, tasks in model_tasks.items(): for task in tasks: - if model_type == "encoder-decoder" and task == "seq2seq-lm-with-past": + if model_type == "encoder-decoder" and task == "text2text-generation-with-past": # The model uses bert as decoder and does not support past key values continue onnx_config_class = TasksManager.get_exporter_config_constructor( From 72af8667338e0ed4374a59ff213d53cbdc7dc7b3 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 1 Sep 2023 10:19:41 +0200 Subject: [PATCH 16/16] fic tests --- optimum/onnxruntime/modeling_seq2seq.py | 4 +++ .../exporters/onnx/test_exporters_onnx_cli.py | 2 +- tests/onnxruntime/test_modeling.py | 30 ++++++++++++++----- tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index 3183e15de0..42952a2581 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -1097,7 +1097,9 @@ def __init__( encoder_session: ort.InferenceSession, decoder_session: ort.InferenceSession, config: "PretrainedConfig", + onnx_paths: List[str], decoder_with_past_session: Optional[ort.InferenceSession] = None, + use_cache: bool = True, use_io_binding: Optional[bool] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, preprocessors: Optional[List] = None, @@ -1108,7 +1110,9 @@ def __init__( encoder_session, decoder_session, config, + onnx_paths, decoder_with_past_session, + use_cache, use_io_binding, model_save_dir, preprocessors, diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index 23cea094f5..b9291fa407 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -120,11 +120,11 @@ def _get_models_to_test(export_models_dict: Dict): # TODO: segformer task can not be automatically inferred # TODO: xlm-roberta model auto-infers text-generation, but we don't support it # TODO: perceiver auto-infers default, but we don't support it (why?) + # TODO: encoder-decoder auto-infers text3text-generation, but it uses bert as decoder and does not support past key values if model_type not in [ "segformer", "xlm-roberta", "perceiver", - "vision-encoder-decoder", "encoder-decoder", ]: models_to_test.append( diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 5ccb75a5b8..c8aff67cde 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3098,8 +3098,8 @@ def test_load_vanilla_transformers_which_is_not_supported(self): @parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]})) def test_generate_utils(self, test_name: str, model_arch: str, use_cache: str): - if model_arch == "encoder-decoder": - use_cache = False + if model_arch == "encoder-decoder" and use_cache is True: + self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder") model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache} self._setup(model_args) @@ -3123,6 +3123,9 @@ def test_generate_utils(self, test_name: str, model_arch: str, use_cache: str): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_merge_from_transformers_and_save(self, model_arch): + if model_arch == "encoder-decoder": + self.skipTest("encoder-decoder model type with use_merged=True is not supported for bert as a decoder") + if "text2text-generation-with-past" not in TasksManager.get_supported_tasks_for_model_type( model_arch.replace("_", "-"), exporter="onnx" ): @@ -3142,6 +3145,9 @@ def test_merge_from_transformers_and_save(self, model_arch): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_merge_from_onnx_and_save(self, model_arch): + if model_arch == "encoder-decoder": + self.skipTest("encoder-decoder model type with use_merged=True is not supported for bert as a decoder") + model_id = MODEL_NAMES[model_arch] task = "text2text-generation-with-past" @@ -3168,7 +3174,8 @@ def test_merge_from_onnx_and_save(self, model_arch): @parameterized.expand(grid_parameters(FULL_GRID)) def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): if model_arch == "encoder-decoder" and use_cache is True: - self.skipTest("encoder-decoder model type with use_cache=True is not supported") + self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder") + if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") @@ -3207,6 +3214,9 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach tokenizer = get_preprocessor(model_id) tokens = tokenizer("This is a sample output", return_tensors="pt") decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2 + if model_arch == "encoder-decoder": + decoder_start_token_id = tokenizer.cls_token_id + decoder_inputs = {"decoder_input_ids": torch.ones((1, 1), dtype=torch.long) * decoder_start_token_id} with torch.no_grad(): @@ -3231,7 +3241,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach @parameterized.expand(grid_parameters(FULL_GRID)) def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cache: bool, use_merged: bool): if model_arch == "encoder-decoder" and use_cache is True: - self.skipTest("encoder-decoder model type with use_cache=True is not supported") + self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder") if use_cache is False and use_merged is True: self.skipTest("use_cache=False, use_merged=True are uncompatible") @@ -3249,24 +3259,28 @@ def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cac onnx_model = ORTModelForSeq2SeqLM.from_pretrained(self.onnx_model_dirs[test_name], use_cache=use_cache) tokenizer = get_preprocessor(model_id) + decoder_start_token_id = onnx_model.config.decoder_start_token_id if model_arch != "mbart" else 2 + if model_arch == "encoder-decoder": + decoder_start_token_id = tokenizer.cls_token_id + # Text2Text generation pipe = pipeline("text2text-generation", model=onnx_model, tokenizer=tokenizer) text = "This is a test" - outputs = pipe(text) + outputs = pipe(text, decoder_start_token_id=decoder_start_token_id) self.assertEqual(pipe.device, onnx_model.device) self.assertIsInstance(outputs[0]["generated_text"], str) # Summarization pipe = pipeline("summarization", model=onnx_model, tokenizer=tokenizer) text = "This is a test" - outputs = pipe(text) + outputs = pipe(text, decoder_start_token_id=decoder_start_token_id) self.assertEqual(pipe.device, onnx_model.device) self.assertIsInstance(outputs[0]["summary_text"], str) # Translation pipe = pipeline("translation_en_to_de", model=onnx_model, tokenizer=tokenizer) text = "This is a test" - outputs = pipe(text) + outputs = pipe(text, decoder_start_token_id=decoder_start_token_id) self.assertEqual(pipe.device, onnx_model.device) self.assertIsInstance(outputs[0]["translation_text"], str) @@ -3413,6 +3427,8 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): @parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]})) def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, model_arch: str, use_cache: bool): + if model_arch == "encoder-decoder" and use_cache is True: + self.skipTest("encoder-decoder model type with use_cache=True is not supported for bert as a decoder") model_args = { "test_name": test_name + "_True", "model_arch": model_arch, diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 066e0757fb..cf776f11ed 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -50,7 +50,7 @@ "detr": "hf-internal-testing/tiny-random-detr", "distilbert": "hf-internal-testing/tiny-random-DistilBertModel", "electra": "hf-internal-testing/tiny-random-ElectraModel", - "encoder-decoder": "patrickvonplaten/bert2bert_cnn_daily_mail", + "encoder-decoder": "hf-internal-testing/tiny-random-EncoderDecoderModel-bert-bert", "flaubert": "hf-internal-testing/tiny-random-flaubert", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",