Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Precise ORTModel documentation #1268

Merged
merged 5 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/exporters/onnx/usage_guides/export_a_model.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,8 @@ main_export(
)
```

For tasks that require only a single ONNX (e.g. encoder-only), an exported model with custom inputs/outputs can then be used with the class [`optimum.onnxruntime.ORTModelForCustomTasks`] for inference with ONNX Runtime on CPU or GPU.
fxmarty marked this conversation as resolved.
Show resolved Hide resolved

### Customize the export of Transformers models with custom modeling

Optimum supports the export of Transformers models with custom modeling that use [`trust_remote_code=True`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModel.from_pretrained.trust_remote_code), not officially supported in the Transormers library but usable with its functionality as [pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines) and [generation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationMixin.generate).
Expand Down
136 changes: 33 additions & 103 deletions optimum/onnxruntime/modeling_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
AutoModelForSequenceClassification,
AutoModelForTokenClassification,
)
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.file_utils import add_end_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.modeling_outputs import (
BaseModelOutput,
CausalLMOutput,
Expand Down Expand Up @@ -85,16 +85,11 @@
_FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor"
_PROCESSOR_FOR_DOC = "AutoProcessor"

ONNX_MODEL_START_DOCSTRING = r"""
ONNX_MODEL_END_DOCSTRING = r"""
This model inherits from [`~onnxruntime.modeling_ort.ORTModel`]. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving)

Args:
config (`transformers.PretrainedConfig`): [PretrainedConfig](https://huggingface.co/docs/transformers/main_classes/configuration#transformers.PretrainedConfig) is the Model configuration class with all the parameters of the model.
Initializing with a config file does not load the weights associated with the model, only the
configuration. Check out the [`~onnxruntime.modeling_ort.ORTModel.from_pretrained`] method to load the model weights.
model (`onnxruntime.InferenceSession`): [onnxruntime.InferenceSession](https://onnxruntime.ai/docs/api/python/api_summary.html#inferencesession) is the main class used to run a model. Check out the [`~onnxruntime.modeling_ort.ORTModel.load_model`] method for more information.
use_io_binding (`Optional[bool]`, defaults to `None`): Whether to use IOBinding during inference to avoid memory copy between the host and devices. Defaults to `True` if the device is CUDA, otherwise defaults to `False`.
This class should be initialized using the [`onnxruntime.modeling_ort.ORTModel.from_pretrained`] method.
"""

ONNX_TEXT_INPUTS_DOCSTRING = r"""
Expand Down Expand Up @@ -863,15 +858,10 @@ def raise_on_numpy_input_io_binding(self, use_torch: bool):
"""


@add_start_docstrings(
"""
Onnx Model with a BaseModelOutput for feature-extraction tasks.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForFeatureExtraction(ORTModel):
"""
Feature Extraction model for ONNX.
ONNX Model for feature-extraction task.
"""

auto_model_class = AutoModel
Expand Down Expand Up @@ -976,15 +966,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with a MaskedLMOutput for masked language modeling tasks.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForMaskedLM(ORTModel):
"""
Masked language model for ONNX.
ONNX Model with a MaskedLMOutput for masked language modeling tasks.
"""

auto_model_class = AutoModelForMaskedLM
Expand Down Expand Up @@ -1084,15 +1069,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with a QuestionAnsweringModelOutput for extractive question-answering tasks like SQuAD.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForQuestionAnswering(ORTModel):
"""
Question Answering model for ONNX.
ONNX Model with a QuestionAnsweringModelOutput for extractive question-answering tasks like SQuAD.
"""

auto_model_class = AutoModelForQuestionAnswering
Expand Down Expand Up @@ -1211,16 +1191,11 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with a sequence classification/regression head on top (a linear layer on top of the
pooled output) e.g. for GLUE tasks.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForSequenceClassification(ORTModel):
"""
Sequence Classification model for ONNX.
ONNX Model with a sequence classification/regression head on top (a linear layer on top of the
pooled output) e.g. for GLUE tasks.
"""

auto_model_class = AutoModelForSequenceClassification
Expand Down Expand Up @@ -1317,16 +1292,11 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
for Named-Entity-Recognition (NER) tasks.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForTokenClassification(ORTModel):
"""
Token Classification model for ONNX.
ONNX Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
for Named-Entity-Recognition (NER) tasks.
"""

auto_model_class = AutoModelForTokenClassification
Expand Down Expand Up @@ -1420,16 +1390,11 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
softmax) e.g. for RocStories/SWAG tasks.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForMultipleChoice(ORTModel):
"""
Multiple choice model for ONNX.
ONNX Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
softmax) e.g. for RocStories/SWAG tasks.
"""

auto_model_class = AutoModelForMultipleChoice
Expand Down Expand Up @@ -1531,15 +1496,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model for image-classification tasks.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForImageClassification(ORTModel):
"""
Image Classification model for ONNX.
ONNX Model for image-classification tasks.
"""

auto_model_class = AutoModelForImageClassification
Expand Down Expand Up @@ -1630,15 +1590,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with an all-MLP decode head on top e.g. for ADE20k, CityScapes.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForSemanticSegmentation(ORTModel):
"""
Semantic Segmentation model for ONNX.
ONNX Model for semantic-segmentation, with an all-MLP decode head on top e.g. for ADE20k, CityScapes.
"""

auto_model_class = AutoModelForSemanticSegmentation
Expand Down Expand Up @@ -1741,16 +1696,11 @@ def _prepare_onnx_inputs(self, use_torch: bool, **kwargs):
"""


@add_start_docstrings(
"""
Onnx Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like
SUPERB Keyword Spotting.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForAudioClassification(ORTModel):
"""
Audio Classification model for ONNX.
ONNX Model for audio-classification, with a sequence classification head on top (a linear layer over the pooled output) for tasks like
SUPERB Keyword Spotting.
"""

auto_model_class = AutoModelForAudioClassification
Expand Down Expand Up @@ -1832,15 +1782,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with a language modeling head on top for Connectionist Temporal Classification (CTC).
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForCTC(ORTModel):
"""
CTC model for ONNX.
ONNX Model with a language modeling head on top for Connectionist Temporal Classification (CTC).
"""

auto_model_class = AutoModelForCTC
Expand Down Expand Up @@ -1920,15 +1865,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model with an XVector feature extraction head on top for tasks like Speaker Verification.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForAudioXVector(ORTModel):
"""
Audio XVector model for ONNX.
ONNX Model with an XVector feature extraction head on top for tasks like Speaker Verification.
"""

auto_model_class = AutoModelForAudioXVector
Expand Down Expand Up @@ -2014,15 +1954,10 @@ def forward(
"""


@add_start_docstrings(
"""
Onnx Model for with a frame classification head on top for tasks like Speaker Diarization.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForAudioFrameClassification(ORTModel):
"""
Audio Frame Classification model for ONNX.
ONNX Model for with a frame classification head on top for tasks like Speaker Diarization.
fxmarty marked this conversation as resolved.
Show resolved Hide resolved
"""

auto_model_class = AutoModelForAudioFrameClassification
Expand Down Expand Up @@ -2099,15 +2034,10 @@ def forward(
"""


@add_start_docstrings(
"""
ONNX Model for any custom tasks. It can be used to leverage the inference acceleration for any single-file ONNX model.
""",
ONNX_MODEL_START_DOCSTRING,
)
@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
class ORTModelForCustomTasks(ORTModel):
"""
Model for any custom tasks if the ONNX model is stored in a single file.
ONNX Model for any custom tasks. It can be used to leverage the inference acceleration for any single-file ONNX model, that may use custom inputs and outputs.
"""

@add_start_docstrings_to_model_forward(
Expand Down
Loading