rlhf batches (#197)

* ppo_batch_size as param * ppo batch_size default
h2oai · Jun 30, 2023 · 61bf5b3 · 61bf5b3
1 parent d9e0883
commit 61bf5b3
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 5 deletions.
diff --git a/llm_studio/python_configs/text_causal_language_modeling_config.py b/llm_studio/python_configs/text_causal_language_modeling_config.py
@@ -215,13 +215,13 @@ def __post_init__(self):
         self._possible_values["scaling_factor_value_loss"] = (0.01, 1, 0.01)
         self._possible_values["ppo_epochs"] = (1, 16, 1)
         self._possible_values["ppo_generate_temperature"] = (0.1, 1.0, 0.1)
-        self._possible_values["ppo_batch_size"] = (1, 1024, 1)
+        self._possible_values["ppo_batch_size"] = (1, 256, 1)
 
         self._visibility["loss_class"] = -1
         self._visibility["drop_last_batch"] = -1
         self._visibility["differential_learning_rate_layers"] = 1
         self._visibility["differential_learning_rate"] = 1
-        self._visibility["ppo_batch_size"] = -1
+        self._visibility["ppo_batch_size"] = 1
 
         self._nesting.add(
             ["differential_learning_rate"],

diff --git a/llm_studio/src/trl/trainer.py b/llm_studio/src/trl/trainer.py
@@ -388,12 +388,15 @@ def collator(data: List[Dict[str, torch.Tensor]]):
         num_updates = 0
 
         if (
-            self.cfg.training.ppo_epochs * self.cfg.training.ppo_batch_size
-        ) % self.cfg.training.grad_accumulation != 0:
+            self.cfg.training.ppo_epochs * self.cfg.training.grad_accumulation
+        ) % self.cfg.training.ppo_batch_size != 0:
             raise ValueError(
-                "ppo_epochs*ppo_batch_size must be multiply of grad_accumulation"
+                "ppo_epochs * grad_accumulation must be multiply of ppo_batch_size"
             )
 
+        if self.cfg.training.ppo_batch_size > self.training.batch_size:
+            raise ValueError("ppo_batch_size must not be larger than the batch_size")
+
         for _ in range(self.cfg.training.ppo_epochs):
             for batch in mini_batch_dataloader:
                 num_updates += 1