diff --git a/README.md b/README.md index cbde6bdae..ade8639d4 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://arxiv.org/abs/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. 1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. +1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu. 1. **[Starcoder2](https://huggingface.co/docs/transformers/main/model_doc/starcoder2)** (from BigCode team) released with the paper [StarCoder 2 and The Stack v2: The Next Generation](https://arxiv.org/abs/2402.19173) by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries. 1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo. 1. **[Swin2SR](https://huggingface.co/docs/transformers/model_doc/swin2sr)** (from University of Würzburg) released with the paper [Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration](https://arxiv.org/abs/2209.11345) by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 1c6644ad6..78948cf47 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -70,6 +70,7 @@ 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://arxiv.org/abs/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. 1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. +1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu. 1. **[Starcoder2](https://huggingface.co/docs/transformers/main/model_doc/starcoder2)** (from BigCode team) released with the paper [StarCoder 2 and The Stack v2: The Next Generation](https://arxiv.org/abs/2402.19173) by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries. 1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo. 1. **[Swin2SR](https://huggingface.co/docs/transformers/model_doc/swin2sr)** (from University of Würzburg) released with the paper [Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration](https://arxiv.org/abs/2209.11345) by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte. diff --git a/scripts/convert.py b/scripts/convert.py index 20c968dbf..7e05a111b 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -83,6 +83,10 @@ 'per_channel': False, 'reduce_range': False, }, + 'stablelm': { + 'per_channel': False, + 'reduce_range': False, + }, 'starcoder2': { 'per_channel': False, 'reduce_range': False, diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 2dc8f6218..4728155a0 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -838,6 +838,14 @@ 'microsoft/speecht5_tts', ], }, + 'stablelm': { + # Text generation + 'text-generation': [ + 'hf-internal-testing/tiny-random-StableLmForCausalLM', + 'stabilityai/stablelm-2-1_6b', + 'stabilityai/stablelm-2-zephyr-1_6b', + ], + }, 'squeezebert': { # Feature extraction 'feature-extraction': [ diff --git a/src/models.js b/src/models.js index abb9c3bbd..208566aba 100644 --- a/src/models.js +++ b/src/models.js @@ -5316,6 +5316,62 @@ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel { ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// StableLM models +export class StableLMPreTrainedModel extends PreTrainedModel { + /** + * Creates a new instance of the `StableLMPreTrainedModel` class. + * @param {Object} config The configuration of the model. + * @param {any} session The ONNX session containing the model weights. + * @param {GenerationConfig} generation_config The generation configuration. + */ + constructor(config, session, generation_config) { + super(config, session); + this.generation_config = generation_config; + + // config doesn't contain pad_token_id, so we assume it is the eos_token_id + this.config.pad_token_id = this.config.eos_token_id + + this.num_heads = this.config.num_attention_heads; + this.num_layers = this.config.num_hidden_layers; + this.dim_kv = this.config.hidden_size / this.num_heads; + } +} + +/** + * The bare StableLM Model transformer outputting raw hidden-states without any specific head on top. + */ +export class StableLMModel extends StableLMPreTrainedModel { } + +/** + * StableLM Model with a `language modeling` head on top for Causal Language Modeling (with past). + */ +export class StableLMForCausalLM extends StableLMPreTrainedModel { + /** + * Calls the model on new inputs. + * @param {Object} model_inputs The inputs to the model. + * @returns {Promise} An object containing the model's output logits for causal language modeling. + */ + async _call(model_inputs) { + return new CausalLMOutputWithPast(await super._call(model_inputs)); + } +} + +/** + * StableLM Model with a sequence classification head on top (with Past). + */ +export class StableLMForSequenceClassification extends StableLMPreTrainedModel { + /** + * Calls the model on new inputs. + * @param {Object} model_inputs The inputs to the model. + * @returns {Promise} An object containing the model's output logits for sequence classification. + */ + async _call(model_inputs) { + return new SequenceClassifierOutputWithPast(await super._call(model_inputs)); + } +} + +////////////////////////////////////////////////// ////////////////////////////////////////////////// // AutoModels, used to simplify construction of PreTrainedModels @@ -5505,6 +5561,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([ ['mbart', ['MBartForSequenceClassification', MBartForSequenceClassification]], ['mobilebert', ['MobileBertForSequenceClassification', MobileBertForSequenceClassification]], ['squeezebert', ['SqueezeBertForSequenceClassification', SqueezeBertForSequenceClassification]], + ['stablelm', ['StableLMForSequenceClassification', StableLMForSequenceClassification]], ]); const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([ @@ -5553,6 +5610,7 @@ const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([ ['starcoder2', ['Starcoder2ForCausalLM', Starcoder2ForCausalLM]], ['falcon', ['FalconForCausalLM', FalconForCausalLM]], ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]], + ['stablelm', ['StableLMForCausalLM', StableLMForCausalLM]], ]); const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([ @@ -5980,6 +6038,23 @@ export class SequenceClassifierOutput extends ModelOutput { } } +/** + * Base class for outputs of sentence classification models. + */ +export class SequenceClassifierOutputWithPast extends ModelOutput { + /** + * @param {Object} output The output of the model. + * @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax). + * @param {Tensor} output.past_key_values Contains pre-computed hidden-states (key and values in the self-attention blocks) + * that can be used (see `past_key_values` input) to speed up sequential decoding. + */ + constructor({ logits, past_key_values }) { + super(); + this.logits = logits; + this.past_key_values = past_key_values; + } +} + /** * Base class for outputs of XVector models. */