Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Removed duplicate field definitions in some classes #31888

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions examples/flax/language-modeling/run_clm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,6 @@ class DataTrainingArguments:
)
},
)
overwrite_cache: bool = field(
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
)
validation_split_percentage: Optional[int] = field(
default=5,
metadata={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,6 @@ class DataTrainingArguments:
overwrite_cache: bool = field(
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
)
overwrite_cache: bool = field(
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
)
preprocessing_num_workers: Optional[int] = field(
default=None,
metadata={"help": "The number of processes to use for the preprocessing."},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,6 @@ class DataTrainingArguments:
)
},
)
overwrite_cache: bool = field(
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
)
preprocessing_num_workers: Optional[int] = field(
default=None,
metadata={"help": "The number of processes to use for the preprocessing."},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,6 @@ class DeformableDetrPreTrainedModel(PreTrainedModel):
main_input_name = "pixel_values"
supports_gradient_checkpointing = True
_no_split_modules = [r"DeformableDetrConvEncoder", r"DeformableDetrEncoderLayer", r"DeformableDetrDecoderLayer"]
supports_gradient_checkpointing = True

def _init_weights(self, module):
std = self.config.init_std
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ class VideoLlavaPreTrainedModel(PreTrainedModel):
_no_split_modules = ["VideoLlavaVisionAttention"]
_skip_keys_device_placement = "past_key_values"
_supports_flash_attn_2 = True
_no_split_modules = ["VideoLlavaVisionAttention"]

def _init_weights(self, module):
std = (
Expand Down
1 change: 0 additions & 1 deletion tests/models/fnet/test_modeling_fnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ class FNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
# Skip Tests
test_pruning = False
test_head_masking = False
test_pruning = False

# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
Expand Down
1 change: 0 additions & 1 deletion tests/models/mamba/test_modeling_mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,6 @@ class MambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
test_model_parallel = False
test_pruning = False
test_head_masking = False # Mamba does not have attention heads
test_model_parallel = False
pipeline_model_mapping = (
{"feature-extraction": MambaModel, "text-generation": MambaForCausalLM} if is_torch_available() else {}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ class RecurrentGemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
test_model_parallel = False
test_pruning = False
test_head_masking = False # RecurrentGemma does not have attention heads
test_model_parallel = False

# Need to remove 0.9 in `test_cpu_offload`
# This is because we are hitting edge cases with the causal_mask buffer
Expand Down