substratusai · samos123 · Aug 27, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml
@@ -39,26 +39,30 @@ jobs:
       #        run: sudo service docker restart
       - name: Checkout repository
         uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
       # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
       - name: Log in to the Container registry
         if: github.event_name == 'push'
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        uses: docker/login-action@v3
         with:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Login to docker.io
         if: github.event_name == 'push'
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        uses: docker/login-action@v3
         with:
           username: ${{ vars.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
 
       # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+        uses: docker/metadata-action@v5
         with:
           images: |
             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
@@ -67,9 +71,10 @@ jobs:
       # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
       # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
       - name: Build and push Docker image
-        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
+        uses: docker/build-push-action@v6
         with:
           context: .
+          platforms: linux/amd64,linux/arm64
           push: ${{ github.event_name == 'push' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
@@ -83,15 +88,17 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
-      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
       - name: Log in to the Container registry
         if: github.event_name == 'push'
         uses: docker/login-action@v3
         with:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Login to docker.io
         if: github.event_name == 'push'
         uses: docker/login-action@v3
@@ -108,11 +115,16 @@ jobs:
           images: |
             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
             ${{ env.IMAGE_NAME }}
+      - name: Clone vLLM at specified tag
+        run: |
+          git clone https://github.com/vllm-project/vllm.git
+          cd vllm && git checkout v0.5.5
       - name: Build and push Docker image
         uses: docker/build-push-action@v6
         with:
-          context: "https://github.com/vllm-project/vllm.git#v0.5.5:."
+          context: vllm
           file: Dockerfile.cpu
+          platforms: linux/amd64,linux/arm64
           push: ${{ github.event_name == 'push' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -0,0 +1,47 @@
+# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
+# This is copied from https://github.com/vllm-project/vllm/blob/09c7792610ada9f88bbf87d32b472dd44bf23cc2/Dockerfile.cpu
+# original dockerfile has caching directives causing issues with docker buildx
+
+FROM ubuntu:22.04 AS cpu-test-1
+
+RUN apt-get update -y \
+    && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
+    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
+
+# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
+# intel-openmp provides additional performance improvement vs. openmp
+# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
+RUN pip install --no-cache-dir intel-openmp
+
+ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
+
+RUN echo 'ulimit -c 0' >> ~/.bashrc
+
+RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl
+
+ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
+COPY requirements-build.txt requirements-build.txt
+RUN pip install --upgrade pip && \
+    pip install -r requirements-build.txt
+
+FROM cpu-test-1 AS build
+
+WORKDIR /workspace/vllm
+
+COPY requirements-*.txt .
+RUN pip install -v -r requirements-cpu.txt
+
+COPY ./ ./
+
+# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
+ARG VLLM_CPU_DISABLE_AVX512
+ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
+
+RUN VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
+    pip install dist/*.whl
+
+WORKDIR /workspace/
+
+RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
+
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 vLLM is a fast and easy-to-use library for LLM inference and serving.
 This container image runs the OpenAI API server of vLLM.
 
+Supports Arm64 and x86_64 architectures.
+
 Image URLs:
 
 - `substratusai/vllm` (Docker Hub)