huggingface · ifeherva · Aug 17, 2023 · Aug 17, 2023 · Aug 18, 2023 · Aug 21, 2023
diff --git a/docs/source/en/index.md b/docs/source/en/index.md
@@ -333,7 +333,7 @@ Flax), PyTorch, and/or TensorFlow.
 |             DETA              |       ✅        |         ❌         |      ❌      |
 |             DETR              |       ✅        |         ❌         |      ❌      |
 |             DiNAT             |       ✅        |         ❌         |      ❌      |
-|            DINOv2             |       ✅        |         ❌         |      ❌      |
+|            DINOv2             |       ✅        |         ❌         |      ✅      |
 |          DistilBERT           |       ✅        |         ✅         |      ✅      |
 |           DonutSwin           |       ✅        |         ❌         |      ❌      |
 |              DPR              |       ✅        |         ✅         |      ❌      |

diff --git a/docs/source/en/model_doc/dinov2.md b/docs/source/en/model_doc/dinov2.md
@@ -43,3 +43,14 @@ The original code can be found [here](https://github.com/facebookresearch/dinov2
 
 [[autodoc]] Dinov2ForImageClassification
     - forward
+
+## FlaxDinov2Model
+
+[[autodoc]] FlaxDinov2Model
+    - __call__
+
+## FlaxDinov2ForImageClassification
+
+[[autodoc]] FlaxDinov2ForImageClassification
+    - __call__
+
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -4012,6 +4012,13 @@
             "FlaxCLIPVisionPreTrainedModel",
         ]
     )
+    _import_structure["models.dinov2"].extend(
+        [
+            "FlaxDinov2ForImageClassification",
+            "FlaxDinov2Model",
+            "FlaxDinov2PreTrainedModel",
+        ]
+    )
     _import_structure["models.distilbert"].extend(
         [
             "FlaxDistilBertForMaskedLM",
@@ -7476,6 +7483,7 @@
             FlaxCLIPVisionModel,
             FlaxCLIPVisionPreTrainedModel,
         )
+        from .models.dinov2 import FlaxDinov2ForImageClassification, FlaxDinov2Model, FlaxDinov2PreTrainedModel
         from .models.distilbert import (
             FlaxDistilBertForMaskedLM,
             FlaxDistilBertForMultipleChoice,

diff --git a/src/transformers/modeling_flax_outputs.py b/src/transformers/modeling_flax_outputs.py
@@ -698,3 +698,31 @@ class FlaxSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
     encoder_last_hidden_state: Optional[jnp.ndarray] = None
     encoder_hidden_states: Optional[Tuple[jnp.ndarray]] = None
     encoder_attentions: Optional[Tuple[jnp.ndarray]] = None
+
+
+@flax.struct.dataclass
+class FlaxImageClassifierOutput(ModelOutput):
+    """
+    Base class for outputs of image classification models.
+
+    Args:
+        loss (`jnp.ndarray` of shape `(1,)`, *optional*, returned when `labels` is provided):
+            Classification (or regression if config.num_labels==1) loss.
+        logits (`jnp.ndarray` of shape `(batch_size, config.num_labels)`):
+            Classification (or regression if config.num_labels==1) scores (before SoftMax).
+        hidden_states (`tuple(jnp.ndarray)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
+            Tuple of `jnp.ndarray` (one for the output of the embeddings, if the model has an embedding layer, + one
+            for the output of each stage) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states (also
+            called feature maps) of the model at the output of each stage.
+        attentions (`tuple(jnp.ndarray)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
+            Tuple of `jnp.ndarray` (one for each layer) of shape `(batch_size, num_heads, patch_size,
+            sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+    """
+
+    loss: Optional[jnp.ndarray] = None
+    logits: jnp.ndarray = None
+    hidden_states: Optional[Tuple[jnp.ndarray]] = None
+    attentions: Optional[Tuple[jnp.ndarray]] = None
diff --git a/src/transformers/models/auto/modeling_flax_auto.py b/src/transformers/models/auto/modeling_flax_auto.py
@@ -37,6 +37,7 @@
         ("blenderbot-small", "FlaxBlenderbotSmallModel"),
         ("bloom", "FlaxBloomModel"),
         ("clip", "FlaxCLIPModel"),
+        ("dinov2", "FlaxDinov2Model"),
         ("distilbert", "FlaxDistilBertModel"),
         ("electra", "FlaxElectraModel"),
         ("gpt-sw3", "FlaxGPT2Model"),
@@ -122,6 +123,7 @@
     [
         # Model for Image-classsification
         ("beit", "FlaxBeitForImageClassification"),
+        ("dinov2", "FlaxDinov2ForImageClassification"),
         ("regnet", "FlaxRegNetForImageClassification"),
         ("resnet", "FlaxResNetForImageClassification"),
         ("vit", "FlaxViTForImageClassification"),

diff --git a/src/transformers/models/dinov2/__init__.py b/src/transformers/models/dinov2/__init__.py
@@ -16,6 +16,7 @@
 from ...utils import (
     OptionalDependencyNotAvailable,
     _LazyModule,
+    is_flax_available,
     is_torch_available,
 )
 
@@ -38,6 +39,18 @@
         "Dinov2Backbone",
     ]
 
+try:
+    if not is_flax_available():
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    pass
+else:
+    _import_structure["modeling_flax_dinov2"] = [
+        "FlaxDinov2ForImageClassification",
+        "FlaxDinov2Model",
+        "FlaxDinov2PreTrainedModel",
+    ]
+
 if TYPE_CHECKING:
     from .configuration_dinov2 import DINOV2_PRETRAINED_CONFIG_ARCHIVE_MAP, Dinov2Config, Dinov2OnnxConfig
 
@@ -55,6 +68,14 @@
             Dinov2PreTrainedModel,
         )
 
+    try:
+        if not is_flax_available():
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        pass
+    else:
+        from .modeling_flax_dinov2 import FlaxDinov2ForImageClassification, FlaxDinov2Model, FlaxDinov2PreTrainedModel
+
 else:
     import sys