Merge branch 'master' into updated_docker

mila-iqia · Mar 1, 2024 · 925c8a0 · 925c8a0
2 parents 4311da8 + 0bf6348
commit 925c8a0
Show file tree

Hide file tree

Showing 7 changed files with 200 additions and 189 deletions.
diff --git a/.github/workflows/report_container.yml b/.github/workflows/report_container.yml
@@ -0,0 +1,60 @@
+name: Publish Docker image for reports
+
+on:
+  # Allow manual runs
+  workflow_dispatch:
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+permissions:
+  packages: write
+
+# define build arguments
+jobs:
+  build-image:
+    runs-on: ubuntu-22.04
+
+    strategy:
+      fail-fast: false
+
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Get Image Tag Name
+        env:
+          GITHUB_REF_NAME_ENV: ${{ github.ref_name }}
+        run: |
+          echo "IMAGE_TAG=$GITHUB_REF_NAME_ENV" >> $GITHUB_ENV
+
+      - name: Log in to the registry
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for the image
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=raw,value=report-${{ env.IMAGE_TAG }}
+
+      - name: Build and push the image
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          push: true
+          file: docker/Dockerfile-report
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            CONFIG=standard.yaml
diff --git a/README.md b/README.md
@@ -1,197 +1,94 @@
 
 # Milabench
 
-[Documentation](https://mila-iqia.github.io/milabench)
+[Documentation](https://milabench.readthedocs.io/en/stable/)
+
+Benchmarking framework for Machine learning and Artificial Intelligence, geared toward
+evaluating current and future hardware in a research environment.
+
+* Simple / Hands-off
+* Wide selection of models on diverse applications
+  * Multi GPUs
+  * Multi node
+  * nlp / transformer / llm / rl / rnn
+  * vision / classification / convnet / resnet / transformer
+  * audio
+* Docker Container
+* Works on slurm
+* Automatic batch resize
+* Focussed on training
+* Ease of use
+* Pytorch focused
+* ROCm & NVIDIA
+* Independent 
+
+## Getting Started
+
+The easiest way to run milabbench is to run it with one of its docker image.
+It will include all of the necessary data
+
+
+    # Choose the image you want to use
+    export MILABENCH_IMAGE=ghcr.io/mila-iqia/milabench:cuda-nightly
+
+    # Pull the image we are going to run
+    docker pull $MILABENCH_IMAGE
+
+    # Run milabench
+    docker run -it --rm --ipc=host --gpus=all      \
+          -v $(pwd)/results:/milabench/envs/runs   \
+          $MILABENCH_IMAGE                         \
+          milabench run
+
+    =================
+    Benchmark results
+    =================
+                             fail n       perf   sem%   std% peak_memory          score weight
+    bert-fp16                   0 8     155.08   0.3%   4.3%       24552    1241.260310   0.00
+    bert-fp32                   0 8      29.52   0.0%   0.5%       31524     236.337218   0.00
+    bert-tf32                   0 8     120.46   0.4%   6.1%       31524     964.713297   0.00
+    bert-tf32-fp16              0 8     154.76   0.3%   4.1%       24552    1238.477257   3.00
+    convnext_large-fp16         0 8     337.48   0.9%  14.0%       27658    2741.604444   0.00
+    convnext_large-fp32         0 8      44.61   0.8%  12.6%       49786     354.207225   0.00
+    convnext_large-tf32         0 8     135.99   0.7%  11.2%       49786    1089.394916   0.00
+    convnext_large-tf32-fp16    0 8     338.58   0.8%  13.0%       27658    2744.325170   3.00
+    davit_large                 0 8     312.79   0.3%   6.7%       35058    2515.326450   1.00
+    davit_large-multi           0 1    2401.65   1.0%   7.7%       42232    2401.651720   5.00
+    dlrm                        0 1  188777.20   1.8%  14.0%        3194  188777.203190   1.00
+    focalnet                    0 8     400.47   0.2%   5.4%       26604    3215.431924   2.00
+    opt-1_3b                    0 1      26.71   0.1%   0.4%       44116      26.714365   5.00
+    opt-1_3b-multinode          0 2      34.62   0.2%   1.0%       43552      34.618292  10.00
+    opt-6_7b                    0 1      14.32   0.0%   0.1%       55750      14.319587   5.00
+    opt-6_7b-multinode          0 2      10.79   0.1%   0.7%       49380      10.792595  10.00
+    reformer                    0 8      61.70   0.0%   0.9%       25376     494.110834   1.00
+    regnet_y_128gf              0 8      99.96   0.2%   5.0%       31840     803.012507   2.00
+    resnet152                   0 8     710.18   0.3%   6.2%       36732    5710.828608   1.00
+    resnet152-multi             0 1    5367.34   1.0%   8.1%       38638    5367.338469   5.00
+    resnet50                    0 8     984.43   0.9%  19.1%        5026    7927.257351   1.00
+    rwkv                        0 8     428.65   0.2%   3.8%        5546    3435.097716   1.00
+    stargan                     0 8      51.32   1.8%  40.8%       37848     413.238870   1.00
+    super-slomo                 0 8      41.63   0.1%   2.3%       34082     332.395065   1.00
+    t5                          0 8      48.05   0.2%   3.9%       35466     384.317023   2.00
+    whisper                     0 8     248.16   0.0%   0.6%       37006    1985.861017   1.00
+
+    Scores
+    ------
+    Failure rate:       0.00% (PASS)
+    Score:             219.06
+
+
+## Details
 
 The benchmark suite has been validated on the following configurations:
 
-| Python version | GPU | Configuration file |
-| - | - | - |
-| 3.9.12 (conda) | 4x NVIDIA A100 80GB | config/standard.yaml |
+| Python version |          GPU           |   Configuration file |
+|       -        |        -               |           -          |
+| 3.9.12 (conda) | 4x NVIDIA A100 80GB    | config/standard.yaml |
 | 3.9.12 (conda) | 4x NVIDIA RTX8000 48GB | config/standard.yaml |
-| 3.9.16 (conda) | 2x NVIDIA K80 | config/ci.yaml |
-| 3.9.16 (conda) | 2x AMD MI100 | config/ci.yaml |
+| 3.9.16 (conda) | 2x NVIDIA K80          | config/ci.yaml       |
+| 3.9.16 (conda) | 2x AMD MI100           | config/ci.yaml       |
 
 We are working on validating it on more configurations and will update the above table as we do.
 
 
-<!--
-## Install
 
-To install for development, clone the repo and use branch `v2`:
-
-```bash
-git -b v2 clone [email protected]:mila-iqia/milabench.git
-cd milabench
-# <Activate virtual environment>
-
-# Make sure pip version is high enough to handle pyproject.toml
-pip install --upgrade pip
-
-# Install in editable mode
-pip install -e .
-```
-
-This will install two commands, `milabench` and `voir`.
-
-
-## Using milabench
-
-To use `milabench`, you need:
-
-* A YAML configuration file to define the benchmarks to install, prepare or run.
-* The base directory for code, virtual environments, data and outputs, set either with the `$MILABENCH_BASE` environment variable or the `--base` option. The base directory will be automatically constructed by milabench and will be organized as follows:
-
-```bash
-$MILABENCH_BASE/
-|- venv/                            # Virtual environments and dependencies
-|  |- bench1/                       # venv for benchmark bench1
-|  |- ...                           # etc
-|- code/                            # Benchmark code
-|  |- bench1/                       # Code for benchmark bench1
-|  |- ...                           # etc
-|- data/                            # Datasets
-|  |- dataset1/                     # A dataset
-|  |- ...                           # etc
-|- runs/                            # Outputs of benchmark runs
-   |- calimero.2022-03-30_15:00:00/ # Auto-generated run name
-   |  |- bench1.0.json              # Output for the first run of bench1
-   |  |- bench1.1.json              # Output for the second run of bench1
-   |  |- ...                        # etc
-   |- blah/                         # Can set name with --run
-```
-
-It is possible to change the structure in the YAML to e.g. force benchmarks to all use the same virtual environment.
-
-### Important options
-
-* Use the `--select` option with a comma-separated list of benchmarks in order to only install/prepare/run these benchmarks (or use `--exclude` to run all benchmarks except a specific set).
-* You may use `--use-current-env` to force the use the currently active virtual environment (useful for development).
-
-### milabench install
-
-```bash
-milabench install benchmarks.yaml --select mybench
-```
-
-* Copies the code for the benchmark (specified in the `definition` field of the benchmark's YAML, relative to the YAML file itself) into `$MILABENCH_BASE/code/mybench`. Only files listed by the `manifest` file are copied.
-* Creates/reuses a virtual environment in `$MILABENCH_BASE/venv/mybench` and installs all pip dependencies in it.
-* Optionally extracts a shallow git clone of an external repository containing model code into `$MILABENCH_BASE/code/mybench`.
-
-### milabench prepare
-
-```bash
-milabench prepare benchmarks.yaml --select mybench
-```
-
-* Prepares data for the benchmark into `$MILABENCH_BASE/data/dataset_name`. Multiple benchmarks can share the same data. Some benchmarks need no preparation, so the prepare step does nothing.
-
-### milabench run
-
-```bash
-milabench run benchmarks.yaml --select mybench
-```
-
-* Creates a certain number of tasks from the benchmark using the `plan` defined in the YAML. For instance, one plan might be to run it in parallel on each GPU on the machine.
-* For each task, runs the benchmark installed in `$MILABENCH_BASE/code/mybench` in the appropriate virtual environment.
-* The benchmark is run from that directory using a command like `voir [VOIR_OPTIONS] main.py [SCRIPT_OPTIONS]`
-  * Both option groups are defined in the YAML.
-  * The VOIR_OPTIONS determine which instruments to use and what data to forward to milabench.
-  * The SCRIPT_OPTIONS are benchmark dependent.
-* Standard output/error and other data (training rates, etc.) are forwarded to the main dispatcher process and saved into `$MILABENCH_BASE/runs/run_name/mybench.run_number.json` (the name of the directory is printed out for easy reference).
-
-### milabench report
-
-TODO.
-
-```bash
-milabench report benchmarks.yaml --run <run_name>
-```
-
-
-## Benchmark configuration
-
-The configuration has two sections:
-
-* `defaults` defines a template for benchmarks.
-* `benchmarks` defines the benchmarks. Each benchmark may include the defaults with the special directive `<<< *defaults`. Note that the `<<<` operator performs a deep merge. For example:
-
-```yaml
-defaults: &defaults
-  plan:
-    method: njobs
-    n: 2
-
-benchmarks:
-  test:
-    <<<: *defaults
-    plan:
-      n: 3
-```
-
-is equivalent to:
-
-```yaml
-benchmarks:
-  test:
-    plan:
-      method: njobs
-      n: 3
-```
-
-### Fields
-
-Let's say you have the following `benchmark.yaml` configuration:
-
-```yaml
-benchmarks:
-  mnist:
-    definition: ../benchmarks/mnist-pytorch-example
-
-    dirs:
-      code: code/{name}
-      venv: venv/{name}
-      data: data
-      runs: runs
-
-    plan:
-      method: njobs
-      n: 2
-
-    voir:
-      --stop: 200
-      --forward:
-        - "#stdout"
-        - "#stderr"
-        - loss
-        - compute_rate
-        - train_rate
-        - loading_rate
-      --compute-rate: true
-      --train-rate: true
-      --loading-rate: true
-
-    argv:
-      --batch-size: 64
-```
-
-* `definition` points to the *definition directory* for the benchmark (more on that later). Important note: the path is *relative to benchmark.yaml*.
-* `dirs` defines the directories for the venv, code, data and runs. Normally, this is set in the defaults, but it is technically possible to override it for every benchmark. The paths are relative to `$MILABENCH_BASE` (or the argument to `--base`) `code/{name}` expands to `code/mnist`.
-* `plan` describes the way tasks will be created for this benchmark. `nruns` just launches a fixed number of parallel processes.
-* `voir` are the arguments given to the `voir` command when running a task. The `--forward` argument is important because it defines what will end up in the final `json` output saved to the disk. Some of them correspond to what other flags output.
-* `argv` are the arguments given to the benchmark script.
-
-
-## Benchmark definition
-
-To define a new benchmark, create a directory with roughly the following files:
-
-```bash
-mybench
-|- manifest        # Lists the file milabench install should copy (accepts wildcards)
-|- benchfile.py    # Benchmark definition file
-|- voirfile.py     # Probes and extra instruments
-|- prepare.py      # Executed by milabench prepare
-|- main.py         # Executed by milabench run
-|- dev.yaml        # Bench file to use for development
-``` -->
diff --git a/benchmarks/accelerate_opt/benchfile.py b/benchmarks/accelerate_opt/benchfile.py
@@ -44,6 +44,7 @@ def build_run_plan(self):
         for rank, node in enumerate(nodes):
             host = node["ip"]
             user = node["user"]
+            port = node.get("port", 22)
             options = dict()
 
             if rank == 0:
@@ -63,6 +64,7 @@ def build_run_plan(self):
                 host=host,
                 user=user,
                 key=key,
+                port=port,
                 executor=DockerRunCommand(
                     AccelerateLaunchCommand(pack, rank=rank),
                     self.config["system"].get("docker_image"),

diff --git a/benchmarks/accelerate_opt/main.py b/benchmarks/accelerate_opt/main.py
@@ -35,6 +35,7 @@ def arguments():
     parser.add_argument("--validation_split_percentage", required=True, type=int)
     parser.add_argument("--dataset_name", required=True, type=str)
     parser.add_argument("--dataset_config_name", required=True, type=str)
+    parser.add_argument("--dataset_rev", required=True, type=str)
     parser.add_argument("--cache", required=True, type=str)
     parser.add_argument("--model_name", required=True, type=str)
     parser.add_argument("--prepare_only", action="store_true", default=False)
@@ -180,17 +181,19 @@ def mblog(data):
     validation_split_percentage = config["validation_split_percentage"]
     dataset_name = config["dataset_name"]
     dataset_config_name = config["dataset_config_name"]
-    raw_datasets = load_dataset(dataset_name, dataset_config_name)
+    raw_datasets = load_dataset(dataset_name, dataset_config_name, revision=config["dataset_rev"])
     if "validation" not in raw_datasets.keys():
         raw_datasets["validation"] = load_dataset(
             dataset_name,
             dataset_config_name,
-            split=f"train[:{validation_split_percentage}%]",
+            split=f"train[:{validation_split_percentage}%]", 
+            revision=config["dataset_rev"]
         )
         raw_datasets["train"] = load_dataset(
             dataset_name,
             dataset_config_name,
-            split=f"train[{validation_split_percentage}%:]",
+            split=f"train[{validation_split_percentage}%:]", 
+            revision=config["dataset_rev"]
         )
 
     model_name = config["model_name"]

diff --git a/config/base.yaml b/config/base.yaml
@@ -109,6 +109,7 @@ _accelerate_opt:
     --max_train_steps: 100
     --dataset_name: "wikitext"
     --dataset_config_name: "wikitext-103-v1"
+    --dataset_rev: "b08601e"
     --validation_split_percentage: 5
     --per_gpu_batch_size: 1
     --cpus_per_gpu: 8