diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py
index 475dda72c59295..9747cbe6fb98ae 100644
--- a/src/transformers/models/mistral/modeling_mistral.py
+++ b/src/transformers/models/mistral/modeling_mistral.py
@@ -1089,8 +1089,9 @@ def _update_causal_mask(
             exclude_mask = torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             if self.config.sliding_window is not None:
                 if not using_sliding_window_cache or sequence_length > self.config.sliding_window:
-                    exclude_mask |= torch.arange(target_length, device=device) <= (
-                        cache_position.reshape(-1, 1) - self.config.sliding_window
+                    exclude_mask.bitwise_or_(
+                        torch.arange(target_length, device=device)
+                        <= (cache_position.reshape(-1, 1) - self.config.sliding_window)
                     )
             causal_mask *= exclude_mask
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)