Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add gather_use_object arguments #31514

Merged
merged 12 commits into from
Jun 28, 2024
7 changes: 7 additions & 0 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3831,6 +3831,13 @@ def evaluation_loop(
if is_torch_xla_available():
xm.mark_step()

if "use_gather_object" in inspect.signature(
self.gather_function
).parameters.keys() and is_accelerate_available("0.30.0"):
self.gather_function = functools.partial(
self.gather_function, use_gather_object=self.args.eval_use_gather_object
)

SangbumChoi marked this conversation as resolved.
Show resolved Hide resolved
# Update containers
if losses is not None:
losses = self.gather_function((losses.repeat(batch_size)))
Expand Down
10 changes: 10 additions & 0 deletions src/transformers/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,9 @@ class TrainingArguments:

eval_on_start(`bool`, *optional*, defaults to `False`):
Whether to perform a evaluation step (sanity check) before the training to ensure the validation steps works correctly.

eval_use_gather_object(`bool`, *optional*, defaults to `False`):
SangbumChoi marked this conversation as resolved.
Show resolved Hide resolved
SangbumChoi marked this conversation as resolved.
Show resolved Hide resolved
Whether to run recursively gather object in a nested list/tuple/dictionary of objects from all devices.
SangbumChoi marked this conversation as resolved.
Show resolved Hide resolved
"""

framework = "pt"
Expand Down Expand Up @@ -1465,6 +1468,13 @@ class TrainingArguments:
},
)

eval_use_gather_object: Optional[bool] = field(
default=False,
metadata={
"help": "Whether to run recursively gather object in a nested list/tuple/dictionary of objects from all devices."
},
)

def __post_init__(self):
# Parse in args that could be `dict` sent in from the CLI as a string
for field in _VALID_DICT_FIELDS:
Expand Down