diff --git a/tests/models/flaubert/test_modeling_flaubert.py b/tests/models/flaubert/test_modeling_flaubert.py
index 40a9f68f69d103..99dbf927e18b1e 100644
--- a/tests/models/flaubert/test_modeling_flaubert.py
+++ b/tests/models/flaubert/test_modeling_flaubert.py
@@ -394,11 +394,7 @@ class FlaubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
     def is_pipeline_test_to_skip(
         self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
     ):
-        if pipeline_test_casse_name == "FillMaskPipelineTests":
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `FlaubertConfig` was never used in pipeline tests: cannot create a simple tokenizer
-            return True
-        elif (
+        if (
             pipeline_test_casse_name == "QAPipelineTests"
             and tokenizer_name is not None
             and not tokenizer_name.endswith("Fast")
diff --git a/tests/models/flaubert/test_modeling_tf_flaubert.py b/tests/models/flaubert/test_modeling_tf_flaubert.py
index 11740020359723..6b7f4fc0316ea7 100644
--- a/tests/models/flaubert/test_modeling_tf_flaubert.py
+++ b/tests/models/flaubert/test_modeling_tf_flaubert.py
@@ -310,11 +310,7 @@ class TFFlaubertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
     def is_pipeline_test_to_skip(
         self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
     ):
-        if pipeline_test_casse_name == "FillMaskPipelineTests":
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `FlaubertConfig` was never used in pipeline tests: cannot create a simple tokenizer
-            return True
-        elif (
+        if (
             pipeline_test_casse_name == "QAPipelineTests"
             and tokenizer_name is not None
             and not tokenizer_name.endswith("Fast")
diff --git a/tests/models/xlm/test_modeling_tf_xlm.py b/tests/models/xlm/test_modeling_tf_xlm.py
index 3f27dad5d4f7fc..2b1fb2f963c0a7 100644
--- a/tests/models/xlm/test_modeling_tf_xlm.py
+++ b/tests/models/xlm/test_modeling_tf_xlm.py
@@ -313,11 +313,7 @@ class TFXLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
     def is_pipeline_test_to_skip(
         self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
     ):
-        if pipeline_test_casse_name == "FillMaskPipelineTests":
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `XLMConfig` was never used in pipeline tests: cannot create a simple tokenizer
-            return True
-        elif (
+        if (
             pipeline_test_casse_name == "QAPipelineTests"
             and tokenizer_name is not None
             and not tokenizer_name.endswith("Fast")
diff --git a/tests/models/xlm/test_modeling_xlm.py b/tests/models/xlm/test_modeling_xlm.py
index c86210e599eaba..d8a18441146a48 100644
--- a/tests/models/xlm/test_modeling_xlm.py
+++ b/tests/models/xlm/test_modeling_xlm.py
@@ -395,11 +395,7 @@ class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
     def is_pipeline_test_to_skip(
         self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
     ):
-        if pipeline_test_casse_name == "FillMaskPipelineTests":
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `XLMConfig` was never used in pipeline tests: cannot create a simple tokenizer
-            return True
-        elif (
+        if (
             pipeline_test_casse_name == "QAPipelineTests"
             and tokenizer_name is not None
             and not tokenizer_name.endswith("Fast")
diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py
index 5426a854252eac..d6dec8db171b71 100644
--- a/tests/pipelines/test_pipelines_fill_mask.py
+++ b/tests/pipelines/test_pipelines_fill_mask.py
@@ -302,7 +302,8 @@ def run_test_targets(self, model, tokenizer):
         )
         target_ids = {vocab[el] for el in targets}
         self.assertEqual({el["token"] for el in outputs}, target_ids)
-        self.assertEqual({el["token_str"] for el in outputs}, set(targets))
+        processed_targets = [tokenizer.decode([x]) for x in target_ids]
+        self.assertEqual({el["token_str"] for el in outputs}, set(processed_targets))
 
         # Call argument
         fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
@@ -316,24 +317,29 @@ def run_test_targets(self, model, tokenizer):
         )
         target_ids = {vocab[el] for el in targets}
         self.assertEqual({el["token"] for el in outputs}, target_ids)
-        self.assertEqual({el["token_str"] for el in outputs}, set(targets))
+        processed_targets = [tokenizer.decode([x]) for x in target_ids]
+        self.assertEqual({el["token_str"] for el in outputs}, set(processed_targets))
 
         # Score equivalence
         outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
         tokens = [top_mask["token_str"] for top_mask in outputs]
         scores = [top_mask["score"] for top_mask in outputs]
 
-        unmasked_targets = fill_masker(f"This is a {tokenizer.mask_token}", targets=tokens)
-        target_scores = [top_mask["score"] for top_mask in unmasked_targets]
-        self.assertEqual(nested_simplify(scores), nested_simplify(target_scores))
+        # For some BPE tokenizers, `</w>` is removed during decoding, so `token_str` won't be the same as in `targets`.
+        if set(tokens) == set(targets):
+            unmasked_targets = fill_masker(f"This is a {tokenizer.mask_token}", targets=tokens)
+            target_scores = [top_mask["score"] for top_mask in unmasked_targets]
+            self.assertEqual(nested_simplify(scores), nested_simplify(target_scores))
 
         # Raises with invalid
-        with self.assertRaises(ValueError):
-            outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[""])
         with self.assertRaises(ValueError):
             outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[])
-        with self.assertRaises(ValueError):
-            outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets="")
+        # For some tokenizers, `""` is actually in the vocabulary and the expected error won't raised
+        if "" not in tokenizer.get_vocab():
+            with self.assertRaises(ValueError):
+                outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[""])
+            with self.assertRaises(ValueError):
+                outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets="")
 
     def run_test_top_k(self, model, tokenizer):
         fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, top_k=2)
@@ -368,10 +374,11 @@ def run_test_top_k_targets(self, model, tokenizer):
         # If we use the most probably targets, and filter differently, we should still
         # have the same results
         targets2 = [el["token_str"] for el in sorted(outputs, key=lambda x: x["score"], reverse=True)]
-        outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=3, targets=targets2)
-
-        # They should yield exactly the same result
-        self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
+        # For some BPE tokenizers, `</w>` is removed during decoding, so `token_str` won't be the same as in `targets`.
+        if set(targets2).issubset(targets):
+            outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=3, targets=targets2)
+            # They should yield exactly the same result
+            self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
 
     def fill_mask_with_duplicate_targets_and_top_k(self, model, tokenizer):
         fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)