From ef1b453c359d6d564eb2e5ceb54e987638ad1193 Mon Sep 17 00:00:00 2001
From: Dave <69651599+D4ve-R@users.noreply.github.com>
Date: Fri, 1 Mar 2024 11:13:48 +0100
Subject: [PATCH 1/5] add stablelm model impl.

---
 src/models.js | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
diff --git a/src/models.js b/src/models.js
index cf7807300..6e24b8022 100644
--- a/src/models.js
+++ b/src/models.js
@@ -5194,6 +5194,44 @@ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel {
 
 //////////////////////////////////////////////////
 
+//////////////////////////////////////////////////
+// StableLM models
+export class StableLMPreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare StableLM Model transformer outputting raw hidden-states without any specific head on top.
+ */
+export class StableLMModel extends StableLMPreTrainedModel { }
+
+/**
+ * StableLM Model with a `language modeling` head on top for Causal Language Modeling (with past).
+ */
+export class StableLMForCausalLM extends StableLMPreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<CausalLMOutput>} An object containing the model's output logits for causal language modeling.
+     */
+    async _call(model_inputs) {
+        return new CausalLMOutputWithPast(await super._call(model_inputs));
+    }
+}
+
+/**
+ * StableLM Model with a sequence classification head on top (with Past).
+ */
+export class StableLMForSequenceClassification extends StableLMPreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutputWithPast(await super._call(model_inputs));
+    }
+}
+
+//////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
 // AutoModels, used to simplify construction of PreTrainedModels
@@ -5845,6 +5883,23 @@ export class SequenceClassifierOutput extends ModelOutput {
     }
 }
 
+/**
+ * Base class for outputs of sentence classification models.
+ */
+export class SequenceClassifierOutputWithPast extends ModelOutput {
+    /**
+     * @param {Object} output The output of the model.
+     * @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
+     * @param {Tensor} output.past_key_values Contains pre-computed hidden-states (key and values in the self-attention blocks)
+     * that can be used (see `past_key_values` input) to speed up sequential decoding.
+     */
+    constructor({ logits, past_key_values }) {
+        super();
+        this.logits = logits;
+        this.past_key_values = past_key_values;
+    }
+}
+
 /**
  * Base class for outputs of XVector models.
  */

From 01c8cf4112edc7eed6a73a6a24d658938ee58108 Mon Sep 17 00:00:00 2001
From: Dave <69651599+D4ve-R@users.noreply.github.com>
Date: Fri, 1 Mar 2024 11:20:43 +0100
Subject: [PATCH 2/5] add stablelm mapping

---
 src/models.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/models.js b/src/models.js
index 6e24b8022..9b3644001 100644
--- a/src/models.js
+++ b/src/models.js
@@ -5418,6 +5418,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
     ['mbart', ['MBartForSequenceClassification', MBartForSequenceClassification]],
     ['mobilebert', ['MobileBertForSequenceClassification', MobileBertForSequenceClassification]],
     ['squeezebert', ['SqueezeBertForSequenceClassification', SqueezeBertForSequenceClassification]],
+    ['stablelm', ['StableLMForSequenceClassification', StableLMForSequenceClassification]],
 ]);
 
 const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([
@@ -5465,6 +5466,7 @@ const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([
     ['mistral', ['MistralForCausalLM', MistralForCausalLM]],
     ['falcon', ['FalconForCausalLM', FalconForCausalLM]],
     ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
+    ['stablelm', ['StableLMForCausalLM', StableLMForCausalLM]],
 ]);
 
 const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([

From 1c8a7b8aa9c43b39e022c82c5a9184a1525ef64d Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Fri, 1 Mar 2024 21:09:08 +0000
Subject: [PATCH 3/5] Update `StableLMPreTrainedModel` class config

---
 src/models.js | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/models.js b/src/models.js
index 9b3644001..8f93ea482 100644
--- a/src/models.js
+++ b/src/models.js
@@ -5196,7 +5196,25 @@ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel {
 
 //////////////////////////////////////////////////
 // StableLM models
-export class StableLMPreTrainedModel extends PreTrainedModel { }
+export class StableLMPreTrainedModel extends PreTrainedModel {
+    /**
+     * Creates a new instance of the `StableLMPreTrainedModel` class.
+     * @param {Object} config The configuration of the model.
+     * @param {any} session The ONNX session containing the model weights.
+     * @param {GenerationConfig} generation_config The generation configuration.
+     */
+    constructor(config, session, generation_config) {
+        super(config, session);
+        this.generation_config = generation_config;
+
+        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
+        this.config.pad_token_id = this.config.eos_token_id
+
+        this.num_heads = this.config.num_attention_heads;
+        this.num_layers = this.config.num_hidden_layers;
+        this.dim_kv = this.config.hidden_size / this.num_heads;
+    }
+}
 
 /**
  * The bare StableLM Model transformer outputting raw hidden-states without any specific head on top.

From 303cd1acd935bb1dd95346223f5634b2dfa6164e Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Sat, 2 Mar 2024 12:35:26 +0000
Subject: [PATCH 4/5] Update stablelm conversion script quantization settings

---
 scripts/convert.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/convert.py b/scripts/convert.py
index 5b8620471..e45f88c14 100644
--- a/scripts/convert.py
+++ b/scripts/convert.py
@@ -83,6 +83,10 @@
         'per_channel': False,
         'reduce_range': False,
     },
+    'stablelm': {
+        'per_channel': False,
+        'reduce_range': False,
+    },
 
     # Encoder-decoder models
     'whisper': {

From 35d14b6963417021e601a16c431440f8f48adb8a Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Sat, 2 Mar 2024 12:37:15 +0000
Subject: [PATCH 5/5] Add StableLm to list of supported models

---
 README.md                                | 1 +
 docs/snippets/6_supported-models.snippet | 1 +
 scripts/supported_models.py              | 8 ++++++++
 3 files changed, 10 insertions(+)

diff --git a/README.md b/README.md
index 4fcaf68f5..0f7f4f0c3 100644
--- a/README.md
+++ b/README.md
@@ -335,6 +335,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer.
 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://arxiv.org/abs/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
 1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
+1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
 1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
 1. **[Swin2SR](https://huggingface.co/docs/transformers/model_doc/swin2sr)** (from University of Würzburg) released with the paper [Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration](https://arxiv.org/abs/2209.11345) by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
 1. **[T5](https://huggingface.co/docs/transformers/model_doc/t5)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet
index ed2c1bdff..a21a2aeb1 100644
--- a/docs/snippets/6_supported-models.snippet
+++ b/docs/snippets/6_supported-models.snippet
@@ -70,6 +70,7 @@
 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer.
 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://arxiv.org/abs/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
 1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
+1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
 1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
 1. **[Swin2SR](https://huggingface.co/docs/transformers/model_doc/swin2sr)** (from University of Würzburg) released with the paper [Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration](https://arxiv.org/abs/2209.11345) by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
 1. **[T5](https://huggingface.co/docs/transformers/model_doc/t5)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
diff --git a/scripts/supported_models.py b/scripts/supported_models.py
index 7d7a5c169..b987650c9 100644
--- a/scripts/supported_models.py
+++ b/scripts/supported_models.py
@@ -838,6 +838,14 @@
             'microsoft/speecht5_tts',
         ],
     },
+    'stablelm': {
+        # Text generation
+        'text-generation': [
+            'hf-internal-testing/tiny-random-StableLmForCausalLM',
+            'stabilityai/stablelm-2-1_6b',
+            'stabilityai/stablelm-2-zephyr-1_6b',
+        ],
+    },
     'squeezebert': {
         # Feature extraction
         'feature-extraction': [