From 2f5da0d930d56c8188b421535a0bd48f30049eee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Marty?=
 <9808326+fxmarty@users.noreply.github.com>
Date: Fri, 11 Aug 2023 11:42:29 +0200
Subject: [PATCH 1/4] fix doc ortmodel

---
 .../onnx/usage_guides/export_a_model.mdx      |   2 +
 optimum/onnxruntime/modeling_ort.py           | 136 +++++-------------
 2 files changed, 35 insertions(+), 103 deletions(-)

diff --git a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
index c6d37b2404..135dd9b457 100644
--- a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
+++ b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
@@ -320,6 +320,8 @@ main_export(
 )
 ```
 
+For tasks that require only a single ONNX (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU.
+
 ### Customize the export of Transformers models with custom modeling
 
 Optimum supports the export of Transformers models with custom modeling that use [`trust_remote_code=True`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModel.from_pretrained.trust_remote_code), not officially supported in the Transormers library but usable with its functionality as [pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines) and [generation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationMixin.generate).
diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py
index 1784766c6a..ee86d5525a 100644
--- a/optimum/onnxruntime/modeling_ort.py
+++ b/optimum/onnxruntime/modeling_ort.py
@@ -39,7 +39,7 @@
     AutoModelForSequenceClassification,
     AutoModelForTokenClassification,
 )
-from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
+from transformers.file_utils import add_start_docstrings, add_end_docstrings, add_start_docstrings_to_model_forward
 from transformers.modeling_outputs import (
     BaseModelOutput,
     CausalLMOutput,
@@ -85,16 +85,11 @@
 _FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor"
 _PROCESSOR_FOR_DOC = "AutoProcessor"
 
-ONNX_MODEL_START_DOCSTRING = r"""
+ONNX_MODEL_END_DOCSTRING = r"""
     This model inherits from [`~onnxruntime.modeling_ort.ORTModel`]. Check the superclass documentation for the generic methods the
     library implements for all its model (such as downloading or saving)
 
-    Args:
-        config (`transformers.PretrainedConfig`): [PretrainedConfig](https://huggingface.co/docs/transformers/main_classes/configuration#transformers.PretrainedConfig) is the Model configuration class with all the parameters of the model.
-            Initializing with a config file does not load the weights associated with the model, only the
-            configuration. Check out the [`~onnxruntime.modeling_ort.ORTModel.from_pretrained`] method to load the model weights.
-        model (`onnxruntime.InferenceSession`): [onnxruntime.InferenceSession](https://onnxruntime.ai/docs/api/python/api_summary.html#inferencesession) is the main class used to run a model. Check out the [`~onnxruntime.modeling_ort.ORTModel.load_model`] method for more information.
-        use_io_binding (`Optional[bool]`, defaults to `None`): Whether to use IOBinding during inference to avoid memory copy between the host and devices. Defaults to `True` if the device is CUDA, otherwise defaults to `False`.
+    This class should be initialized using the [`onnxruntime.modeling_ort.ORTModel.from_pretrained`] method.
 """
 
 ONNX_TEXT_INPUTS_DOCSTRING = r"""
@@ -863,15 +858,10 @@ def raise_on_numpy_input_io_binding(self, use_torch: bool):
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a BaseModelOutput for feature-extraction tasks.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForFeatureExtraction(ORTModel):
     """
-    Feature Extraction model for ONNX.
+    ONNX Model for feature-extraction task.
     """
 
     auto_model_class = AutoModel
@@ -976,15 +966,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a MaskedLMOutput for masked language modeling tasks.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForMaskedLM(ORTModel):
     """
-    Masked language model for ONNX.
+    ONNX Model with a MaskedLMOutput for masked language modeling tasks.
     """
 
     auto_model_class = AutoModelForMaskedLM
@@ -1084,15 +1069,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a QuestionAnsweringModelOutput for extractive question-answering tasks like SQuAD.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForQuestionAnswering(ORTModel):
     """
-    Question Answering model for ONNX.
+    ONNX Model with a QuestionAnsweringModelOutput for extractive question-answering tasks like SQuAD.
     """
 
     auto_model_class = AutoModelForQuestionAnswering
@@ -1211,16 +1191,11 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a sequence classification/regression head on top (a linear layer on top of the
-    pooled output) e.g. for GLUE tasks.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForSequenceClassification(ORTModel):
     """
-    Sequence Classification model for ONNX.
+    ONNX Model with a sequence classification/regression head on top (a linear layer on top of the
+    pooled output) e.g. for GLUE tasks.
     """
 
     auto_model_class = AutoModelForSequenceClassification
@@ -1317,16 +1292,11 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
-    for Named-Entity-Recognition (NER) tasks.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForTokenClassification(ORTModel):
     """
-    Token Classification model for ONNX.
+    ONNX Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
+    for Named-Entity-Recognition (NER) tasks.
     """
 
     auto_model_class = AutoModelForTokenClassification
@@ -1420,16 +1390,11 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
-    softmax) e.g. for RocStories/SWAG tasks.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForMultipleChoice(ORTModel):
     """
-    Multiple choice model for ONNX.
+    ONNX Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
+    softmax) e.g. for RocStories/SWAG tasks.
     """
 
     auto_model_class = AutoModelForMultipleChoice
@@ -1531,15 +1496,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model for image-classification tasks.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForImageClassification(ORTModel):
     """
-    Image Classification model for ONNX.
+    ONNX Model for image-classification tasks.
     """
 
     auto_model_class = AutoModelForImageClassification
@@ -1630,15 +1590,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with an all-MLP decode head on top e.g. for ADE20k, CityScapes.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForSemanticSegmentation(ORTModel):
     """
-    Semantic Segmentation model for ONNX.
+    ONNX Model for semantic-segmentation, with an all-MLP decode head on top e.g. for ADE20k, CityScapes.
     """
 
     auto_model_class = AutoModelForSemanticSegmentation
@@ -1741,16 +1696,11 @@ def _prepare_onnx_inputs(self, use_torch: bool, **kwargs):
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like
-    SUPERB Keyword Spotting.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForAudioClassification(ORTModel):
     """
-    Audio Classification model for ONNX.
+    ONNX Model for audio-classification, with a sequence classification head on top (a linear layer over the pooled output) for tasks like
+    SUPERB Keyword Spotting.
     """
 
     auto_model_class = AutoModelForAudioClassification
@@ -1832,15 +1782,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with a language modeling head on top for Connectionist Temporal Classification (CTC).
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForCTC(ORTModel):
     """
-    CTC model for ONNX.
+    ONNX Model with a language modeling head on top for Connectionist Temporal Classification (CTC).
     """
 
     auto_model_class = AutoModelForCTC
@@ -1920,15 +1865,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model with an XVector feature extraction head on top for tasks like Speaker Verification.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForAudioXVector(ORTModel):
     """
-    Audio XVector model for ONNX.
+    ONNX Model with an XVector feature extraction head on top for tasks like Speaker Verification.
     """
 
     auto_model_class = AutoModelForAudioXVector
@@ -2014,15 +1954,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    Onnx Model for with a frame classification head on top for tasks like Speaker Diarization.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForAudioFrameClassification(ORTModel):
     """
-    Audio Frame Classification model for ONNX.
+    ONNX Model for with a frame classification head on top for tasks like Speaker Diarization.
     """
 
     auto_model_class = AutoModelForAudioFrameClassification
@@ -2099,15 +2034,10 @@ def forward(
 """
 
 
-@add_start_docstrings(
-    """
-    ONNX Model for any custom tasks. It can be used to leverage the inference acceleration for any single-file ONNX model.
-    """,
-    ONNX_MODEL_START_DOCSTRING,
-)
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForCustomTasks(ORTModel):
     """
-    Model for any custom tasks if the ONNX model is stored in a single file.
+    ONNX Model for any custom tasks. It can be used to leverage the inference acceleration for any single-file ONNX model, that may use custom inputs and outputs.
     """
 
     @add_start_docstrings_to_model_forward(

From b9873a955e1a8f87e09b52a4f3d91eb162e3e97c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Marty?=
 <9808326+fxmarty@users.noreply.github.com>
Date: Fri, 11 Aug 2023 11:51:15 +0200
Subject: [PATCH 2/4] style

---
 optimum/onnxruntime/modeling_ort.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py
index ee86d5525a..a436e98aba 100644
--- a/optimum/onnxruntime/modeling_ort.py
+++ b/optimum/onnxruntime/modeling_ort.py
@@ -39,7 +39,7 @@
     AutoModelForSequenceClassification,
     AutoModelForTokenClassification,
 )
-from transformers.file_utils import add_start_docstrings, add_end_docstrings, add_start_docstrings_to_model_forward
+from transformers.file_utils import add_end_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
 from transformers.modeling_outputs import (
     BaseModelOutput,
     CausalLMOutput,

From 018b9dba85cf6062bdf12019dee7cbc3231e6be5 Mon Sep 17 00:00:00 2001
From: fxmarty <9808326+fxmarty@users.noreply.github.com>
Date: Fri, 11 Aug 2023 21:23:48 +0900
Subject: [PATCH 3/4] Update
 docs/source/exporters/onnx/usage_guides/export_a_model.mdx

Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
---
 docs/source/exporters/onnx/usage_guides/export_a_model.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
index 135dd9b457..1ff74cb11c 100644
--- a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
+++ b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
@@ -320,7 +320,7 @@ main_export(
 )
 ```
 
-For tasks that require only a single ONNX (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU.
+For tasks that require only a single ONNX file (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU.
 
 ### Customize the export of Transformers models with custom modeling
 

From 74b8d2627a60672e97017028c38442c399bc0a70 Mon Sep 17 00:00:00 2001
From: fxmarty <9808326+fxmarty@users.noreply.github.com>
Date: Fri, 11 Aug 2023 21:23:58 +0900
Subject: [PATCH 4/4] Update optimum/onnxruntime/modeling_ort.py

Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
---
 optimum/onnxruntime/modeling_ort.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py
index a436e98aba..75e44bd243 100644
--- a/optimum/onnxruntime/modeling_ort.py
+++ b/optimum/onnxruntime/modeling_ort.py
@@ -1957,7 +1957,7 @@ def forward(
 @add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTModelForAudioFrameClassification(ORTModel):
     """
-    ONNX Model for with a frame classification head on top for tasks like Speaker Diarization.
+    ONNX Model with a frame classification head on top for tasks like Speaker Diarization.
     """
 
     auto_model_class = AutoModelForAudioFrameClassification