Merge pull request #521 from allenai/davidbrandfonbrener-patch-1

Fix k_norm dimension
allenai · Mar 26, 2024 · 8472d0b · 8472d0b
2 parents 194012a + 322c8b6
commit 8472d0b
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Don't log garbage on nodes that aren't rank 0
 - Don't crash in the HF code when we are referring to a tokenizer in a local file
+- Fixed the size calculation for qk layer norm
 
 ## [v0.2.5](https://github.com/allenai/OLMo/releases/tag/v0.2.5) - 2024-03-06
 

diff --git a/olmo/model.py b/olmo/model.py
@@ -425,10 +425,10 @@ def __init__(self, layer_id: int, config: ModelConfig, cache: BufferCache):
         self.k_norm: Optional[LayerNormBase] = None
         self.q_norm: Optional[LayerNormBase] = None
         if config.attention_layer_norm:
-            assert config.n_kv_heads is not None
+            assert config.effective_n_kv_heads is not None
             self.k_norm = LayerNormBase.build(
                 config,
-                size=config.d_model // config.effective_n_kv_heads,
+                size=(config.d_model // config.n_heads) * config.effective_n_kv_heads,
                 elementwise_affine=config.attention_layer_norm_with_affine,
             )
             self.q_norm = LayerNormBase.build(config, elementwise_affine=config.attention_layer_norm_with_affine)