From 46d00fba6000a7cccf96ac67e874559fe8ab78f3 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 12 Sep 2024 14:04:44 -0600 Subject: [PATCH 1/9] feat(exporters/onnx): Add GraniteOnnxConfig and task support list Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/model_configs.py | 10 ++++++++++ optimum/exporters/tasks.py | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index e77f649f69..f20078d06b 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -290,6 +290,16 @@ class GemmaOnnxConfig(LlamaOnnxConfig): pass +class GraniteOnnxConfig(TextDecoderOnnxConfig): + # GG TODO: Bump past 4.44.2 once the next release is out + MIN_TRANSFORMERS_VERSION = version.parse("4.44.2") + DEFAULT_ONNX_OPSET = 14 # Granite follows Llama's default + + DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) + DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): DEFAULT_ONNX_OPSET = 14 # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1. NORMALIZED_CONFIG_CLASS = NormalizedTextConfig diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index a489f34fb0..fdc8bfcb53 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -915,6 +915,13 @@ class TasksManager: "text-classification", onnx="LlamaOnnxConfig", ), + "granite": supported_tasks_mapping( + "feature-extraction", + "feature-extraction-with-past", + "text-generation", + "text-generation-with-past", + onnx="GraniteOnnxConfig", + ), "pegasus": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", From 2ff14b41345817d331b0c84afaaad44d6984ac14 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 12 Sep 2024 14:05:24 -0600 Subject: [PATCH 2/9] feat: Add granite's normalized config for inference Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/utils/normalized_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 81207b7649..9ceed24c2d 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -281,6 +281,7 @@ class NormalizedConfigManager: "xlm-roberta": NormalizedTextConfig, "yolos": NormalizedVisionConfig, "qwen2": NormalizedTextConfig, + "granite": NormalizedTextConfigWithGQA, } @classmethod From 43dfbae6a233745a33773d35c43e1f7a7932995d Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 12 Sep 2024 15:54:40 -0600 Subject: [PATCH 3/9] feat(onnx opt): Add onnx optimization support for granite Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/onnxruntime/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py index 128e2406f1..9e92e0bd32 100644 --- a/optimum/onnxruntime/utils.py +++ b/optimum/onnxruntime/utils.py @@ -128,6 +128,7 @@ class ORTConfigManager: "gpt-neo": "gpt2", "gpt-neox": "gpt2", "gptj": "gpt2", + "granite": "gpt2", # longt5 with O4 results in segmentation fault "longt5": "bert", "llama": "gpt2", From ac0ea8b6ba9855669683643b17bc462921acdbb2 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Oct 2024 12:58:48 -0600 Subject: [PATCH 4/9] fix(onnx/granite): Use LlamaOnnxConfig as the base for GraniteOnnxConfig Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/model_configs.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index f20078d06b..c5faffcf0b 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -290,14 +290,8 @@ class GemmaOnnxConfig(LlamaOnnxConfig): pass -class GraniteOnnxConfig(TextDecoderOnnxConfig): - # GG TODO: Bump past 4.44.2 once the next release is out - MIN_TRANSFORMERS_VERSION = version.parse("4.44.2") - DEFAULT_ONNX_OPSET = 14 # Granite follows Llama's default - - DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) - DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator - NORMALIZED_CONFIG_CLASS = NormalizedTextConfig +class GraniteOnnxConfig(LlamaOnnxConfig): + MIN_TRANSFORMERS_VERSION = version.parse("4.45.0") class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): From d02856ce4ff35b7e0b1aa3c3f8107a6c234905be Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Oct 2024 13:00:11 -0600 Subject: [PATCH 5/9] fix(onnxruntime): Add "granite" to list of model types with grouped attention Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/onnxruntime/modeling_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index bda3ec98d9..8e431a9f06 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -340,7 +340,7 @@ def prepare_past_key_values( if self.model_type == "gemma": num_attention_heads = self.normalized_config.num_key_value_heads embed_size_per_head = self.normalized_config.head_dim - elif self.model_type in {"mistral", "llama", "qwen2"}: + elif self.model_type in {"mistral", "llama", "qwen2", "granite"}: num_attention_heads = self.normalized_config.num_key_value_heads else: num_attention_heads = self.normalized_config.num_attention_heads From 05c301eafcc1bb4a835ad043f70e4f8dc9985e99 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Oct 2024 14:51:57 -0600 Subject: [PATCH 6/9] fix: Add granite to the list of models that require position_ids Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index 675566ba23..8df5ee4258 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -86,6 +86,7 @@ "phi", "phi3", "qwen2", + "granite", } From fbd051a1fc5e20a958ee989768b4199ffc3020b0 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 22 Oct 2024 16:17:17 -0600 Subject: [PATCH 7/9] fix(granite): Add MIN_TORCH_VERSION for recently fixed torch bug https://github.com/huggingface/optimum/pull/2043#issuecomment-2427975461 Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/model_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index c5faffcf0b..4f5e47d5df 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -292,6 +292,7 @@ class GemmaOnnxConfig(LlamaOnnxConfig): class GraniteOnnxConfig(LlamaOnnxConfig): MIN_TRANSFORMERS_VERSION = version.parse("4.45.0") + MIN_TORCH_VERSION = version.parse("2.5.0") class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): From ad17e28cb50bf281224bec1907d76ecfb60102c3 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 22 Oct 2024 16:17:45 -0600 Subject: [PATCH 8/9] test(granite): Add tiny random granite test for onnx exporter Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- tests/exporters/exporters_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index c8a33b0be3..ccccb5510b 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -100,6 +100,7 @@ "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJModel", + "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", "ibert": "hf-internal-testing/tiny-random-IBertModel", "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel", From 22de7b77f1d686e4b7fbe4d2b52008a9bd4513d5 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 25 Oct 2024 11:31:42 -0600 Subject: [PATCH 9/9] tests(onnxruntime): Add granite to onnxruntime tests Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- tests/onnxruntime/test_modeling.py | 1 + tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 33243da278..6819ed938d 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2311,6 +2311,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "gpt_neo", "gpt_neox", "gptj", + "granite", "llama", "mistral", "mpt", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 5071d0081a..ca0b7151b6 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -104,6 +104,7 @@ "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM", + "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel",