ray-project · matthewdeng · Aug 17, 2022 · Aug 16, 2022
@@ -62,6 +62,8 @@ def trainer_init_per_worker(train_dataset, eval_dataset, **config):
     args = transformers.TrainingArguments(
         output_dir=f"{model_checkpoint}-wikitext2",
         evaluation_strategy="epoch",
+        save_strategy="epoch",
+        logging_strategy="epoch",
         learning_rate=2e-5,
         weight_decay=0.01,
         no_cuda=True,  # Set to False for GPU training

@@ -30,7 +30,7 @@
         "id": "sQbdfyWQhYbO"
       },
       "source": [
-        "Uncomment and run the following line in order to install all the necessary dependencies:"
+        "Uncomment and run the following line in order to install all the necessary dependencies (this notebook is being tested with `transformers==4.19.1`):"
       ]
     },
     {
@@ -747,6 +747,7 @@
         "        name,\n",
         "        evaluation_strategy=\"epoch\",\n",
         "        save_strategy=\"epoch\",\n",
+        "        logging_strategy=\"epoch\",\n",
         "        learning_rate=2e-5,\n",
         "        per_device_train_batch_size=batch_size,\n",
         "        per_device_eval_batch_size=batch_size,\n",

diff --git a/python/ray/air/examples/huggingface/huggingface_basic_language_modeling_example.py b/python/ray/air/examples/huggingface/huggingface_basic_language_modeling_example.py
@@ -1,6 +1,8 @@
 # Based on
 # huggingface/notebooks/examples/language_modeling_from_scratch.ipynb
 
+# This example is tested with transformers==4.19.1
+
 import argparse
 import tempfile
 
@@ -88,11 +90,12 @@ def train_function(train_dataset, eval_dataset=None, **config):
         training_args = TrainingArguments(
             training_dir,
             evaluation_strategy="epoch",
+            save_strategy="epoch",
+            logging_strategy="epoch",
             num_train_epochs=num_epochs,
             learning_rate=2e-5,
             weight_decay=0.01,
             disable_tqdm=True,
-            save_strategy="epoch",
             # Required to avoid an exception
             no_cuda=not torch.cuda.is_available(),
         )

diff --git a/python/ray/train/huggingface/huggingface_trainer.py b/python/ray/train/huggingface/huggingface_trainer.py
@@ -207,6 +207,8 @@ def trainer_init_per_worker(train_dataset, eval_dataset, **config):
                 args = transformers.TrainingArguments(
                     output_dir=f"{model_checkpoint}-wikitext2",
                     evaluation_strategy="epoch",
+                    save_strategy="epoch",
+                    logging_strategy="epoch",
                     learning_rate=2e-5,
                     weight_decay=0.01,
                 )