From 2f5da0d930d56c8188b421535a0bd48f30049eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Marty?= <9808326+fxmarty@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:42:29 +0200 Subject: [PATCH 1/4] fix doc ortmodel --- .../onnx/usage_guides/export_a_model.mdx | 2 + optimum/onnxruntime/modeling_ort.py | 136 +++++------------- 2 files changed, 35 insertions(+), 103 deletions(-) diff --git a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx index c6d37b2404..135dd9b457 100644 --- a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx +++ b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx @@ -320,6 +320,8 @@ main_export( ) ``` +For tasks that require only a single ONNX (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU. + ### Customize the export of Transformers models with custom modeling Optimum supports the export of Transformers models with custom modeling that use [`trust_remote_code=True`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModel.from_pretrained.trust_remote_code), not officially supported in the Transormers library but usable with its functionality as [pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines) and [generation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationMixin.generate). diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index 1784766c6a..ee86d5525a 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -39,7 +39,7 @@ AutoModelForSequenceClassification, AutoModelForTokenClassification, ) -from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward +from transformers.file_utils import add_start_docstrings, add_end_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_outputs import ( BaseModelOutput, CausalLMOutput, @@ -85,16 +85,11 @@ _FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor" _PROCESSOR_FOR_DOC = "AutoProcessor" -ONNX_MODEL_START_DOCSTRING = r""" +ONNX_MODEL_END_DOCSTRING = r""" This model inherits from [`~onnxruntime.modeling_ort.ORTModel`]. Check the superclass documentation for the generic methods the library implements for all its model (such as downloading or saving) - Args: - config (`transformers.PretrainedConfig`): [PretrainedConfig](https://huggingface.co/docs/transformers/main_classes/configuration#transformers.PretrainedConfig) is the Model configuration class with all the parameters of the model. - Initializing with a config file does not load the weights associated with the model, only the - configuration. Check out the [`~onnxruntime.modeling_ort.ORTModel.from_pretrained`] method to load the model weights. - model (`onnxruntime.InferenceSession`): [onnxruntime.InferenceSession](https://onnxruntime.ai/docs/api/python/api_summary.html#inferencesession) is the main class used to run a model. Check out the [`~onnxruntime.modeling_ort.ORTModel.load_model`] method for more information. - use_io_binding (`Optional[bool]`, defaults to `None`): Whether to use IOBinding during inference to avoid memory copy between the host and devices. Defaults to `True` if the device is CUDA, otherwise defaults to `False`. + This class should be initialized using the [`onnxruntime.modeling_ort.ORTModel.from_pretrained`] method. """ ONNX_TEXT_INPUTS_DOCSTRING = r""" @@ -863,15 +858,10 @@ def raise_on_numpy_input_io_binding(self, use_torch: bool): """ -@add_start_docstrings( - """ - Onnx Model with a BaseModelOutput for feature-extraction tasks. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForFeatureExtraction(ORTModel): """ - Feature Extraction model for ONNX. + ONNX Model for feature-extraction task. """ auto_model_class = AutoModel @@ -976,15 +966,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with a MaskedLMOutput for masked language modeling tasks. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForMaskedLM(ORTModel): """ - Masked language model for ONNX. + ONNX Model with a MaskedLMOutput for masked language modeling tasks. """ auto_model_class = AutoModelForMaskedLM @@ -1084,15 +1069,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with a QuestionAnsweringModelOutput for extractive question-answering tasks like SQuAD. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForQuestionAnswering(ORTModel): """ - Question Answering model for ONNX. + ONNX Model with a QuestionAnsweringModelOutput for extractive question-answering tasks like SQuAD. """ auto_model_class = AutoModelForQuestionAnswering @@ -1211,16 +1191,11 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with a sequence classification/regression head on top (a linear layer on top of the - pooled output) e.g. for GLUE tasks. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForSequenceClassification(ORTModel): """ - Sequence Classification model for ONNX. + ONNX Model with a sequence classification/regression head on top (a linear layer on top of the + pooled output) e.g. for GLUE tasks. """ auto_model_class = AutoModelForSequenceClassification @@ -1317,16 +1292,11 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. - for Named-Entity-Recognition (NER) tasks. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForTokenClassification(ORTModel): """ - Token Classification model for ONNX. + ONNX Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. + for Named-Entity-Recognition (NER) tasks. """ auto_model_class = AutoModelForTokenClassification @@ -1420,16 +1390,11 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a - softmax) e.g. for RocStories/SWAG tasks. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForMultipleChoice(ORTModel): """ - Multiple choice model for ONNX. + ONNX Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a + softmax) e.g. for RocStories/SWAG tasks. """ auto_model_class = AutoModelForMultipleChoice @@ -1531,15 +1496,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model for image-classification tasks. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForImageClassification(ORTModel): """ - Image Classification model for ONNX. + ONNX Model for image-classification tasks. """ auto_model_class = AutoModelForImageClassification @@ -1630,15 +1590,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with an all-MLP decode head on top e.g. for ADE20k, CityScapes. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForSemanticSegmentation(ORTModel): """ - Semantic Segmentation model for ONNX. + ONNX Model for semantic-segmentation, with an all-MLP decode head on top e.g. for ADE20k, CityScapes. """ auto_model_class = AutoModelForSemanticSegmentation @@ -1741,16 +1696,11 @@ def _prepare_onnx_inputs(self, use_torch: bool, **kwargs): """ -@add_start_docstrings( - """ - Onnx Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like - SUPERB Keyword Spotting. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForAudioClassification(ORTModel): """ - Audio Classification model for ONNX. + ONNX Model for audio-classification, with a sequence classification head on top (a linear layer over the pooled output) for tasks like + SUPERB Keyword Spotting. """ auto_model_class = AutoModelForAudioClassification @@ -1832,15 +1782,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with a language modeling head on top for Connectionist Temporal Classification (CTC). - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForCTC(ORTModel): """ - CTC model for ONNX. + ONNX Model with a language modeling head on top for Connectionist Temporal Classification (CTC). """ auto_model_class = AutoModelForCTC @@ -1920,15 +1865,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model with an XVector feature extraction head on top for tasks like Speaker Verification. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForAudioXVector(ORTModel): """ - Audio XVector model for ONNX. + ONNX Model with an XVector feature extraction head on top for tasks like Speaker Verification. """ auto_model_class = AutoModelForAudioXVector @@ -2014,15 +1954,10 @@ def forward( """ -@add_start_docstrings( - """ - Onnx Model for with a frame classification head on top for tasks like Speaker Diarization. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForAudioFrameClassification(ORTModel): """ - Audio Frame Classification model for ONNX. + ONNX Model for with a frame classification head on top for tasks like Speaker Diarization. """ auto_model_class = AutoModelForAudioFrameClassification @@ -2099,15 +2034,10 @@ def forward( """ -@add_start_docstrings( - """ - ONNX Model for any custom tasks. It can be used to leverage the inference acceleration for any single-file ONNX model. - """, - ONNX_MODEL_START_DOCSTRING, -) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForCustomTasks(ORTModel): """ - Model for any custom tasks if the ONNX model is stored in a single file. + ONNX Model for any custom tasks. It can be used to leverage the inference acceleration for any single-file ONNX model, that may use custom inputs and outputs. """ @add_start_docstrings_to_model_forward( From b9873a955e1a8f87e09b52a4f3d91eb162e3e97c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Marty?= <9808326+fxmarty@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:51:15 +0200 Subject: [PATCH 2/4] style --- optimum/onnxruntime/modeling_ort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index ee86d5525a..a436e98aba 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -39,7 +39,7 @@ AutoModelForSequenceClassification, AutoModelForTokenClassification, ) -from transformers.file_utils import add_start_docstrings, add_end_docstrings, add_start_docstrings_to_model_forward +from transformers.file_utils import add_end_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_outputs import ( BaseModelOutput, CausalLMOutput, From 018b9dba85cf6062bdf12019dee7cbc3231e6be5 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Fri, 11 Aug 2023 21:23:48 +0900 Subject: [PATCH 3/4] Update docs/source/exporters/onnx/usage_guides/export_a_model.mdx Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> --- docs/source/exporters/onnx/usage_guides/export_a_model.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx index 135dd9b457..1ff74cb11c 100644 --- a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx +++ b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx @@ -320,7 +320,7 @@ main_export( ) ``` -For tasks that require only a single ONNX (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU. +For tasks that require only a single ONNX file (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU. ### Customize the export of Transformers models with custom modeling From 74b8d2627a60672e97017028c38442c399bc0a70 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Fri, 11 Aug 2023 21:23:58 +0900 Subject: [PATCH 4/4] Update optimum/onnxruntime/modeling_ort.py Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> --- optimum/onnxruntime/modeling_ort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index a436e98aba..75e44bd243 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -1957,7 +1957,7 @@ def forward( @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForAudioFrameClassification(ORTModel): """ - ONNX Model for with a frame classification head on top for tasks like Speaker Diarization. + ONNX Model with a frame classification head on top for tasks like Speaker Diarization. """ auto_model_class = AutoModelForAudioFrameClassification