NeMo-Mistral to HF converter bugfix. (NVIDIA#8353) (NVIDIA#8442)

Signed-off-by: Alexandros Koumparoulis <[email protected]> Co-authored-by: akoumpa <[email protected]>
HuiyingLi · Feb 21, 2024 · 8a0115b · 8a0115b
1 parent 1effe30
commit 8a0115b
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/scripts/nlp_language_modeling/convert_nemo_mistral_7b_to_hf.py b/scripts/nlp_language_modeling/convert_nemo_mistral_7b_to_hf.py
@@ -114,6 +114,7 @@ def convert(in_file, precision=None, cpu_only=True) -> None:
         embed_weights_base_name = f'model.language_model.embedding.word_embeddings.weight'
     state_dict[hf_embed_weight_name] = param_to_weights(ckpt[embed_weights_base_name])
 
+    head_num = model.cfg.num_attention_heads
     if nemo_config.num_query_groups is None or nemo_config.num_query_groups == head_num:
         num_query_groups = head_num
     else:
@@ -123,7 +124,6 @@ def convert(in_file, precision=None, cpu_only=True) -> None:
         assert nemo_config.activation.startswith('fast-'), 'mcore only supports fast version of gated linear unit.'
 
     hidden_size = model.cfg.hidden_size
-    head_num = model.cfg.num_attention_heads
     num_layers = model.cfg.num_layers
     num_query_groups = model.cfg.get("num_query_groups", head_num)  # different num_query_groups for 70B
 
@@ -191,7 +191,7 @@ def convert(in_file, precision=None, cpu_only=True) -> None:
 
         hf_post_attn_ln_weight_name = f'model.layers.{l}.post_attention_layernorm.weight'
         if mcore_gpt:
-            post_attn_ln_base_name = f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight'
+            post_attn_ln_base_name = f'model.decoder.layers.{l}.mlp.linear_fc1.layer_norm_weight'
         else:
             post_attn_ln_base_name = f'model.language_model.encoder.layers.{l}.post_attention_layernorm.weight'
         state_dict[hf_post_attn_ln_weight_name] = param_to_weights(ckpt[post_attn_ln_base_name])