From ef1b453c359d6d564eb2e5ceb54e987638ad1193 Mon Sep 17 00:00:00 2001 From: Dave <69651599+D4ve-R@users.noreply.github.com> Date: Fri, 1 Mar 2024 11:13:48 +0100 Subject: [PATCH 1/5] add stablelm model impl. --- src/models.js | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/models.js b/src/models.js index cf7807300..6e24b8022 100644 --- a/src/models.js +++ b/src/models.js @@ -5194,6 +5194,44 @@ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel { ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// StableLM models +export class StableLMPreTrainedModel extends PreTrainedModel { } + +/** + * The bare StableLM Model transformer outputting raw hidden-states without any specific head on top. + */ +export class StableLMModel extends StableLMPreTrainedModel { } + +/** + * StableLM Model with a `language modeling` head on top for Causal Language Modeling (with past). + */ +export class StableLMForCausalLM extends StableLMPreTrainedModel { + /** + * Calls the model on new inputs. + * @param {Object} model_inputs The inputs to the model. + * @returns {Promise} An object containing the model's output logits for causal language modeling. + */ + async _call(model_inputs) { + return new CausalLMOutputWithPast(await super._call(model_inputs)); + } +} + +/** + * StableLM Model with a sequence classification head on top (with Past). + */ +export class StableLMForSequenceClassification extends StableLMPreTrainedModel { + /** + * Calls the model on new inputs. + * @param {Object} model_inputs The inputs to the model. + * @returns {Promise} An object containing the model's output logits for sequence classification. + */ + async _call(model_inputs) { + return new SequenceClassifierOutputWithPast(await super._call(model_inputs)); + } +} + +////////////////////////////////////////////////// ////////////////////////////////////////////////// // AutoModels, used to simplify construction of PreTrainedModels @@ -5845,6 +5883,23 @@ export class SequenceClassifierOutput extends ModelOutput { } } +/** + * Base class for outputs of sentence classification models. + */ +export class SequenceClassifierOutputWithPast extends ModelOutput { + /** + * @param {Object} output The output of the model. + * @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax). + * @param {Tensor} output.past_key_values Contains pre-computed hidden-states (key and values in the self-attention blocks) + * that can be used (see `past_key_values` input) to speed up sequential decoding. + */ + constructor({ logits, past_key_values }) { + super(); + this.logits = logits; + this.past_key_values = past_key_values; + } +} + /** * Base class for outputs of XVector models. */ From 01c8cf4112edc7eed6a73a6a24d658938ee58108 Mon Sep 17 00:00:00 2001 From: Dave <69651599+D4ve-R@users.noreply.github.com> Date: Fri, 1 Mar 2024 11:20:43 +0100 Subject: [PATCH 2/5] add stablelm mapping --- src/models.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/models.js b/src/models.js index 6e24b8022..9b3644001 100644 --- a/src/models.js +++ b/src/models.js @@ -5418,6 +5418,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([ ['mbart', ['MBartForSequenceClassification', MBartForSequenceClassification]], ['mobilebert', ['MobileBertForSequenceClassification', MobileBertForSequenceClassification]], ['squeezebert', ['SqueezeBertForSequenceClassification', SqueezeBertForSequenceClassification]], + ['stablelm', ['StableLMForSequenceClassification', StableLMForSequenceClassification]], ]); const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([ @@ -5465,6 +5466,7 @@ const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([ ['mistral', ['MistralForCausalLM', MistralForCausalLM]], ['falcon', ['FalconForCausalLM', FalconForCausalLM]], ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]], + ['stablelm', ['StableLMForCausalLM', StableLMForCausalLM]], ]); const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([ From 1c8a7b8aa9c43b39e022c82c5a9184a1525ef64d Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 1 Mar 2024 21:09:08 +0000 Subject: [PATCH 3/5] Update `StableLMPreTrainedModel` class config --- src/models.js | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/models.js b/src/models.js index 9b3644001..8f93ea482 100644 --- a/src/models.js +++ b/src/models.js @@ -5196,7 +5196,25 @@ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel { ////////////////////////////////////////////////// // StableLM models -export class StableLMPreTrainedModel extends PreTrainedModel { } +export class StableLMPreTrainedModel extends PreTrainedModel { + /** + * Creates a new instance of the `StableLMPreTrainedModel` class. + * @param {Object} config The configuration of the model. + * @param {any} session The ONNX session containing the model weights. + * @param {GenerationConfig} generation_config The generation configuration. + */ + constructor(config, session, generation_config) { + super(config, session); + this.generation_config = generation_config; + + // config doesn't contain pad_token_id, so we assume it is the eos_token_id + this.config.pad_token_id = this.config.eos_token_id + + this.num_heads = this.config.num_attention_heads; + this.num_layers = this.config.num_hidden_layers; + this.dim_kv = this.config.hidden_size / this.num_heads; + } +} /** * The bare StableLM Model transformer outputting raw hidden-states without any specific head on top. From 303cd1acd935bb1dd95346223f5634b2dfa6164e Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 2 Mar 2024 12:35:26 +0000 Subject: [PATCH 4/5] Update stablelm conversion script quantization settings --- scripts/convert.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/convert.py b/scripts/convert.py index 5b8620471..e45f88c14 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -83,6 +83,10 @@ 'per_channel': False, 'reduce_range': False, }, + 'stablelm': { + 'per_channel': False, + 'reduce_range': False, + }, # Encoder-decoder models 'whisper': { From 35d14b6963417021e601a16c431440f8f48adb8a Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 2 Mar 2024 12:37:15 +0000 Subject: [PATCH 5/5] Add StableLm to list of supported models --- README.md | 1 + docs/snippets/6_supported-models.snippet | 1 + scripts/supported_models.py | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/README.md b/README.md index 4fcaf68f5..0f7f4f0c3 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://arxiv.org/abs/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. 1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. +1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu. 1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo. 1. **[Swin2SR](https://huggingface.co/docs/transformers/model_doc/swin2sr)** (from University of Würzburg) released with the paper [Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration](https://arxiv.org/abs/2209.11345) by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte. 1. **[T5](https://huggingface.co/docs/transformers/model_doc/t5)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index ed2c1bdff..a21a2aeb1 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -70,6 +70,7 @@ 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://arxiv.org/abs/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. 1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. +1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu. 1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo. 1. **[Swin2SR](https://huggingface.co/docs/transformers/model_doc/swin2sr)** (from University of Würzburg) released with the paper [Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration](https://arxiv.org/abs/2209.11345) by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte. 1. **[T5](https://huggingface.co/docs/transformers/model_doc/t5)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu. diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 7d7a5c169..b987650c9 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -838,6 +838,14 @@ 'microsoft/speecht5_tts', ], }, + 'stablelm': { + # Text generation + 'text-generation': [ + 'hf-internal-testing/tiny-random-StableLmForCausalLM', + 'stabilityai/stablelm-2-1_6b', + 'stabilityai/stablelm-2-zephyr-1_6b', + ], + }, 'squeezebert': { # Feature extraction 'feature-extraction': [