From 78649eea004da892c46f37761d70f3a1b5d75b0a Mon Sep 17 00:00:00 2001
From: Sam Stoelinga <sammiestoel@gmail.com>
Date: Tue, 27 Aug 2024 15:59:30 -0700
Subject: [PATCH 1/3] add ARM support

---
 .github/workflows/build-push.yml | 19 ++++++++++++++-----
 README.md                        |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml
index 0ca736f..595d493 100644
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -39,10 +39,14 @@ jobs:
       #        run: sudo service docker restart
       - name: Checkout repository
         uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
       # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
       - name: Log in to the Container registry
         if: github.event_name == 'push'
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        uses: docker/login-action@v3
         with:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
@@ -50,7 +54,7 @@ jobs:
 
       - name: Login to docker.io
         if: github.event_name == 'push'
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        uses: docker/login-action@v3
         with:
           username: ${{ vars.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -58,7 +62,7 @@ jobs:
       # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+        uses: docker/metadata-action@v5
         with:
           images: |
             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
@@ -67,9 +71,10 @@ jobs:
       # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
       # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
       - name: Build and push Docker image
-        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
+        uses: docker/build-push-action@v6
         with:
           context: .
+          platforms: linux/amd64,linux/arm64
           push: ${{ github.event_name == 'push' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
@@ -83,7 +88,10 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
-      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
       - name: Log in to the Container registry
         if: github.event_name == 'push'
         uses: docker/login-action@v3
@@ -113,6 +121,7 @@ jobs:
         with:
           context: "https://github.com/vllm-project/vllm.git#v0.5.5:."
           file: Dockerfile.cpu
+          platforms: linux/amd64,linux/arm64
           push: ${{ github.event_name == 'push' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
diff --git a/README.md b/README.md
index 58904d4..6fd74e3 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 vLLM is a fast and easy-to-use library for LLM inference and serving.
 This container image runs the OpenAI API server of vLLM.
 
+Supports Arm64 and x86_64 architectures.
+
 Image URLs:
 
 - `substratusai/vllm` (Docker Hub)

From ebcf5815b72e2dd15bb6c7bb14e7427ec27c2f02 Mon Sep 17 00:00:00 2001
From: Sam Stoelinga <sammiestoel@gmail.com>
Date: Tue, 27 Aug 2024 18:09:48 -0700
Subject: [PATCH 2/3] add forked Dockerfile without caching directives

---
 .github/workflows/build-push.yml |  7 +++--
 Dockerfile.cpu                   | 47 ++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile.cpu

diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml
index 595d493..6189db2 100644
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -99,7 +99,6 @@ jobs:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Login to docker.io
         if: github.event_name == 'push'
         uses: docker/login-action@v3
@@ -116,10 +115,14 @@ jobs:
           images: |
             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
             ${{ env.IMAGE_NAME }}
+      - name: Clone vLLM at specified tag
+        run: |
+          git clone https://github.com/vllm-project/vllm.git
+          cd vllm && git checkout v0.5.5
       - name: Build and push Docker image
         uses: docker/build-push-action@v6
         with:
-          context: "https://github.com/vllm-project/vllm.git#v0.5.5:."
+          context: vllm
           file: Dockerfile.cpu
           platforms: linux/amd64,linux/arm64
           push: ${{ github.event_name == 'push' }}
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
new file mode 100644
index 0000000..3a1fb1d
--- /dev/null
+++ b/Dockerfile.cpu
@@ -0,0 +1,47 @@
+# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
+# This is copied from https://github.com/vllm-project/vllm/blob/09c7792610ada9f88bbf87d32b472dd44bf23cc2/Dockerfile.cpu
+# original dockerfile has caching directives causing issues with docker buildx
+
+FROM ubuntu:22.04 AS cpu-test-1
+
+RUN apt-get update -y \
+    && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
+    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
+
+# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
+# intel-openmp provides additional performance improvement vs. openmp
+# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
+RUN pip install --no-cache-dir intel-openmp
+
+ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
+
+RUN echo 'ulimit -c 0' >> ~/.bashrc
+
+RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl
+
+ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
+COPY requirements-build.txt requirements-build.txt
+RUN pip install --upgrade pip && \
+    pip install -r requirements-build.txt
+
+FROM cpu-test-1 AS build
+
+WORKDIR /workspace/vllm
+
+COPY requirements-cpu.txt requirements-cpu.txt
+RUN pip install -v -r requirements-cpu.txt
+
+COPY ./ ./
+
+# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
+ARG VLLM_CPU_DISABLE_AVX512
+ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
+
+RUN VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
+    pip install dist/*.whl
+
+WORKDIR /workspace/
+
+RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
+
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
\ No newline at end of file

From aa24d04e0dc574c4b48aa5ffaede4d9b7cc16468 Mon Sep 17 00:00:00 2001
From: Sam Stoelinga <sammiestoel@gmail.com>
Date: Tue, 27 Aug 2024 18:12:48 -0700
Subject: [PATCH 3/3] fix dockerfile.cpu

---
 Dockerfile.cpu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index 3a1fb1d..84fcdb7 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -28,7 +28,7 @@ FROM cpu-test-1 AS build
 
 WORKDIR /workspace/vllm
 
-COPY requirements-cpu.txt requirements-cpu.txt
+COPY requirements-*.txt .
 RUN pip install -v -r requirements-cpu.txt
 
 COPY ./ ./