Skip to content

Commit

Permalink
Merge pull request #3240 from flairNLP/variable_grad_norm_clipping
Browse files Browse the repository at this point in the history
Making gradient clipping optional & max gradient norm variable
  • Loading branch information
alanakbik authored Aug 8, 2023
2 parents 88a23be + 21e6ade commit 4a4fef1
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion flair/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def train_custom(
optimizer: Type[torch.optim.Optimizer] = SGD,
train_with_dev: bool = False,
train_with_test: bool = False,
max_grad_norm: Optional[float] = 5.0,
# evaluation and monitoring
main_evaluation_metric: Tuple[str, str] = ("micro avg", "f1-score"),
monitor_test: bool = False,
Expand Down Expand Up @@ -345,6 +346,8 @@ def train_custom(
monitor_train_sample: Set this to evaluate on a sample of the train data at the end of each epoch.
If you set an int, it will sample this many sentences to evaluate on. If you set a float, it will sample
a percentage of data points from train.
max_grad_norm (Optional[float]): If not None, gradients are clipped to this value before an optimizer.step is
called.
use_final_model_for_eval (bool): If True, the final model is used for the final evaluation. If False, the
model from the best epoch as determined by main_evaluation_metric is used for the final evaluation.
gold_label_dictionary_for_eval: Set to force evaluation to use a particular label dictionary
Expand Down Expand Up @@ -594,7 +597,8 @@ def train_custom(

# do the optimizer step
scaler.unscale_(self.optimizer)
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0)
if max_grad_norm is not None:
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_grad_norm)
scale_before = scaler.get_scale()
scaler.step(self.optimizer)
scaler.update()
Expand Down

0 comments on commit 4a4fef1

Please sign in to comment.