-

mila-iqia · Oct 15, 2024 · a422c2e · a422c2e
1 parent ca8ebbd
commit a422c2e
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 6 deletions.
diff --git a/benchmarks/lightning/main.py b/benchmarks/lightning/main.py
@@ -4,10 +4,9 @@
 import argparse
 import os
 
+# FIXME this is HPU only
 os.environ["PT_HPU_LAZY_MODE"] = str(int(int(os.getenv("WORLD_SIZE", -1)) <= 0))
 
-from habana_frameworks.torch import hpu; hpu.init()
-
 import torch
 import torch.nn.functional as F
 import lightning as L

diff --git a/benchmarks/llm/recipes/lora_finetune_single_device.py b/benchmarks/llm/recipes/lora_finetune_single_device.py
@@ -529,7 +529,8 @@ def train(self) -> None:
         """
         The core training loop.
         """
-
+        import torchcompat.core as accelerator
+
         if self._model_compile:
             log.info(
                 "NOTE: torch.compile is enabled and model is compiled in first forward. Expect a relatively slow first iteration."
@@ -580,10 +581,13 @@ def train(self) -> None:
                     loss = self._loss_fn(logits, labels) / self._gradient_accumulation_steps
                     running_loss += loss
                     loss.backward()
+                    accelerator.mark_step()
 
                     # Step with optimizer
                     if (idx + 1) % self._gradient_accumulation_steps == 0:
                         self._optimizer.step()
+                        accelerator.mark_step()
+
                         self._optimizer.zero_grad(set_to_none=True)
                         self._lr_scheduler.step()
                         # Update the number of steps when the weights are updated

diff --git a/benchmarks/rlhf/main.py b/benchmarks/rlhf/main.py
@@ -2,6 +2,7 @@
 
 import shutil
 
+import accelerate
 from accelerate import PartialState
 from datasets import load_dataset
 from transformers import (
@@ -15,10 +16,16 @@
 from trl.trainer.ppov2_trainer import PPOv2Config, PPOv2Trainer
 from trl.trainer.utils import SIMPLE_QUERY_CHAT_TEMPLATE
 
+import torchcompat.core as compat
+
 
 class PPOv2TrainerIntrumented(PPOv2Trainer):
     def __init__(self, config: PPOv2Config, *args, **kwargs):
         config.report_to = []
+
+        # FIXME: better way to monkeypatch this ?
+        # Use the compatibility accelerator class
+        accelerate.Accelerator = compat.accelerate.Accelerator
         super().__init__(config, *args, **kwargs)
 
         def batch_size_fn(batch):

diff --git a/scripts/article/run_hpu.sh b/scripts/article/run_hpu.sh
@@ -11,7 +11,7 @@ export MILABENCH_WORDIR="$(pwd)/$MILABENCH_GPU_ARCH"
 export MILABENCH_BASE="$MILABENCH_WORDIR/results"
 export MILABENCH_VENV="$MILABENCH_WORDIR/env"
 export BENCHMARK_VENV="$MILABENCH_WORDIR/results/venv/torch"
-
+export PT_HPU_LAZY_MODE=0
 
 if [ -z "${MILABENCH_SOURCE}" ]; then
     export MILABENCH_CONFIG="$MILABENCH_WORDIR/milabench/config/standard.yaml"
@@ -84,6 +84,8 @@ install_prepare() {
     #
     #   Generate/download datasets, download models etc...
     #
+    sed -i 's/pic.numpy(force=True)/pic.numpy()/' $BENCHMARK_VENV/lib/python3.10/dist-packages/torchvision/transforms/functional.py
+    sed -i 's/range(hpu.device_count())/range(len(available_modules))/' $BENCHMARK_VENV/lib/site-packages/habana_frameworks/torch/hpu/_utils.py
     milabench prepare $ARGS
 }
 
@@ -95,12 +97,17 @@ else
 fi
 
 
- (
+(
     . $BENCHMARK_VENV/bin/activate
     pip install lightning-habana
     pip install habana-media-loader
+    # git clone [email protected]:Delaunay/torchcompat.git
+    # git clone [email protected]:Delaunay/voir.git
+    pip install -e $MILABENCH_WORDIR/torchcompat
+    pip install -e $MILABENCH_WORDIR/voir
+    pip install -e $MILABENCH_WORDIR/optimum-habana
     # pip install habana_dataloader
- )
+)
 
 if [ "$MILABENCH_PREPARE" -eq 0 ]; then
     cd $MILABENCH_WORDIR