From 46d00fba6000a7cccf96ac67e874559fe8ab78f3 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 12 Sep 2024 14:04:44 -0600
Subject: [PATCH 1/9] feat(exporters/onnx): Add GraniteOnnxConfig and task
 support list

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/model_configs.py | 10 ++++++++++
 optimum/exporters/tasks.py              |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index e77f649f69..f20078d06b 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -290,6 +290,16 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
     pass
 
 
+class GraniteOnnxConfig(TextDecoderOnnxConfig):
+    # GG TODO: Bump past 4.44.2 once the next release is out
+    MIN_TRANSFORMERS_VERSION = version.parse("4.44.2")
+    DEFAULT_ONNX_OPSET = 14  # Granite follows Llama's default
+
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
+    DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
+    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+
+
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index a489f34fb0..fdc8bfcb53 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -915,6 +915,13 @@ class TasksManager:
             "text-classification",
             onnx="LlamaOnnxConfig",
         ),
+        "granite": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "text-generation",
+            "text-generation-with-past",
+            onnx="GraniteOnnxConfig",
+        ),
         "pegasus": supported_tasks_mapping(
             "feature-extraction",
             "feature-extraction-with-past",

From 2ff14b41345817d331b0c84afaaad44d6984ac14 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 12 Sep 2024 14:05:24 -0600
Subject: [PATCH 2/9] feat: Add granite's normalized config for inference

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/utils/normalized_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index 81207b7649..9ceed24c2d 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -281,6 +281,7 @@ class NormalizedConfigManager:
         "xlm-roberta": NormalizedTextConfig,
         "yolos": NormalizedVisionConfig,
         "qwen2": NormalizedTextConfig,
+        "granite": NormalizedTextConfigWithGQA,
     }
 
     @classmethod

From 43dfbae6a233745a33773d35c43e1f7a7932995d Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 12 Sep 2024 15:54:40 -0600
Subject: [PATCH 3/9] feat(onnx opt): Add onnx optimization support for granite

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/onnxruntime/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py
index 128e2406f1..9e92e0bd32 100644
--- a/optimum/onnxruntime/utils.py
+++ b/optimum/onnxruntime/utils.py
@@ -128,6 +128,7 @@ class ORTConfigManager:
         "gpt-neo": "gpt2",
         "gpt-neox": "gpt2",
         "gptj": "gpt2",
+        "granite": "gpt2",
         # longt5 with O4 results in segmentation fault
         "longt5": "bert",
         "llama": "gpt2",

From ac0ea8b6ba9855669683643b17bc462921acdbb2 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 8 Oct 2024 12:58:48 -0600
Subject: [PATCH 4/9] fix(onnx/granite): Use LlamaOnnxConfig as the base for
 GraniteOnnxConfig

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/model_configs.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index f20078d06b..c5faffcf0b 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -290,14 +290,8 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
     pass
 
 
-class GraniteOnnxConfig(TextDecoderOnnxConfig):
-    # GG TODO: Bump past 4.44.2 once the next release is out
-    MIN_TRANSFORMERS_VERSION = version.parse("4.44.2")
-    DEFAULT_ONNX_OPSET = 14  # Granite follows Llama's default
-
-    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
-    DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
-    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+class GraniteOnnxConfig(LlamaOnnxConfig):
+    MIN_TRANSFORMERS_VERSION = version.parse("4.45.0")
 
 
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):

From d02856ce4ff35b7e0b1aa3c3f8107a6c234905be Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 8 Oct 2024 13:00:11 -0600
Subject: [PATCH 5/9] fix(onnxruntime): Add "granite" to list of model types
 with grouped attention

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/onnxruntime/modeling_decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
index bda3ec98d9..8e431a9f06 100644
--- a/optimum/onnxruntime/modeling_decoder.py
+++ b/optimum/onnxruntime/modeling_decoder.py
@@ -340,7 +340,7 @@ def prepare_past_key_values(
             if self.model_type == "gemma":
                 num_attention_heads = self.normalized_config.num_key_value_heads
                 embed_size_per_head = self.normalized_config.head_dim
-            elif self.model_type in {"mistral", "llama", "qwen2"}:
+            elif self.model_type in {"mistral", "llama", "qwen2", "granite"}:
                 num_attention_heads = self.normalized_config.num_key_value_heads
             else:
                 num_attention_heads = self.normalized_config.num_attention_heads

From 05c301eafcc1bb4a835ad043f70e4f8dc9985e99 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 8 Oct 2024 14:51:57 -0600
Subject: [PATCH 6/9] fix: Add granite to the list of models that require
 position_ids

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
index 675566ba23..8df5ee4258 100644
--- a/optimum/exporters/onnx/utils.py
+++ b/optimum/exporters/onnx/utils.py
@@ -86,6 +86,7 @@
     "phi",
     "phi3",
     "qwen2",
+    "granite",
 }
 
 

From fbd051a1fc5e20a958ee989768b4199ffc3020b0 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 22 Oct 2024 16:17:17 -0600
Subject: [PATCH 7/9] fix(granite): Add MIN_TORCH_VERSION for recently fixed
 torch bug

https://github.com/huggingface/optimum/pull/2043#issuecomment-2427975461

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/model_configs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index c5faffcf0b..4f5e47d5df 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -292,6 +292,7 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
 
 class GraniteOnnxConfig(LlamaOnnxConfig):
     MIN_TRANSFORMERS_VERSION = version.parse("4.45.0")
+    MIN_TORCH_VERSION = version.parse("2.5.0")
 
 
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):

From ad17e28cb50bf281224bec1907d76ecfb60102c3 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 22 Oct 2024 16:17:45 -0600
Subject: [PATCH 8/9] test(granite): Add tiny random granite test for onnx
 exporter

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 tests/exporters/exporters_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index c8a33b0be3..ccccb5510b 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -100,6 +100,7 @@
     "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJModel",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",
     "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel",

From 22de7b77f1d686e4b7fbe4d2b52008a9bd4513d5 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Fri, 25 Oct 2024 11:31:42 -0600
Subject: [PATCH 9/9] tests(onnxruntime): Add granite to onnxruntime tests

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 tests/onnxruntime/test_modeling.py           | 1 +
 tests/onnxruntime/utils_onnxruntime_tests.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 33243da278..6819ed938d 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2311,6 +2311,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "gpt_neo",
         "gpt_neox",
         "gptj",
+        "granite",
         "llama",
         "mistral",
         "mpt",
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
index 5071d0081a..ca0b7151b6 100644
--- a/tests/onnxruntime/utils_onnxruntime_tests.py
+++ b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -104,6 +104,7 @@
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "hubert": "hf-internal-testing/tiny-random-HubertModel",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",