Shift attention mask from 1:

After discussion with @molbap
huggingface · May 13, 2024 · 60ad9c5 · 60ad9c5
1 parent 404abd8
commit 60ad9c5
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/src/transformers/models/paligemma/modeling_paligemma.py b/src/transformers/models/paligemma/modeling_paligemma.py
@@ -463,7 +463,7 @@ def forward(
                 if attention_mask.dim() == 4:
                     # take top or bottom row of the 4d mask.
                     # this should only be used in the initial pass with full attention on prefix.
-                    shift_attention_mask = attention_mask[:, 0, 0, :-1].squeeze(1) if not left_padding else attention_mask[:, 0, -1, :-1].squeeze(1) 
+                    shift_attention_mask = attention_mask[:, 0, 0, 1:].squeeze(1) if not left_padding else attention_mask[:, 0, -1, 1:].squeeze(1)
                 elif attention_mask.dim() == 2:
                     # take normal slice of the attn mask
                     shift_attention_mask = attention_mask[..., 1:]