From 78649eea004da892c46f37761d70f3a1b5d75b0a Mon Sep 17 00:00:00 2001 From: Sam Stoelinga Date: Tue, 27 Aug 2024 15:59:30 -0700 Subject: [PATCH 1/3] add ARM support --- .github/workflows/build-push.yml | 19 ++++++++++++++----- README.md | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 0ca736f..595d493 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -39,10 +39,14 @@ jobs: # run: sudo service docker restart - name: Checkout repository uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - name: Log in to the Container registry if: github.event_name == 'push' - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -50,7 +54,7 @@ jobs: - name: Login to docker.io if: github.event_name == 'push' - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + uses: docker/login-action@v3 with: username: ${{ vars.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -58,7 +62,7 @@ jobs: # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + uses: docker/metadata-action@v5 with: images: | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} @@ -67,9 +71,10 @@ jobs: # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + uses: docker/build-push-action@v6 with: context: . + platforms: linux/amd64,linux/arm64 push: ${{ github.event_name == 'push' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} @@ -83,7 +88,10 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - name: Log in to the Container registry if: github.event_name == 'push' uses: docker/login-action@v3 @@ -113,6 +121,7 @@ jobs: with: context: "https://github.com/vllm-project/vllm.git#v0.5.5:." file: Dockerfile.cpu + platforms: linux/amd64,linux/arm64 push: ${{ github.event_name == 'push' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/README.md b/README.md index 58904d4..6fd74e3 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ vLLM is a fast and easy-to-use library for LLM inference and serving. This container image runs the OpenAI API server of vLLM. +Supports Arm64 and x86_64 architectures. + Image URLs: - `substratusai/vllm` (Docker Hub) From ebcf5815b72e2dd15bb6c7bb14e7427ec27c2f02 Mon Sep 17 00:00:00 2001 From: Sam Stoelinga Date: Tue, 27 Aug 2024 18:09:48 -0700 Subject: [PATCH 2/3] add forked Dockerfile without caching directives --- .github/workflows/build-push.yml | 7 +++-- Dockerfile.cpu | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 Dockerfile.cpu diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 595d493..6189db2 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -99,7 +99,6 @@ jobs: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Login to docker.io if: github.event_name == 'push' uses: docker/login-action@v3 @@ -116,10 +115,14 @@ jobs: images: | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }} + - name: Clone vLLM at specified tag + run: | + git clone https://github.com/vllm-project/vllm.git + cd vllm && git checkout v0.5.5 - name: Build and push Docker image uses: docker/build-push-action@v6 with: - context: "https://github.com/vllm-project/vllm.git#v0.5.5:." + context: vllm file: Dockerfile.cpu platforms: linux/amd64,linux/arm64 push: ${{ github.event_name == 'push' }} diff --git a/Dockerfile.cpu b/Dockerfile.cpu new file mode 100644 index 0000000..3a1fb1d --- /dev/null +++ b/Dockerfile.cpu @@ -0,0 +1,47 @@ +# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform. +# This is copied from https://github.com/vllm-project/vllm/blob/09c7792610ada9f88bbf87d32b472dd44bf23cc2/Dockerfile.cpu +# original dockerfile has caching directives causing issues with docker buildx + +FROM ubuntu:22.04 AS cpu-test-1 + +RUN apt-get update -y \ + && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 + +# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html +# intel-openmp provides additional performance improvement vs. openmp +# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. +RUN pip install --no-cache-dir intel-openmp + +ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" + +RUN echo 'ulimit -c 0' >> ~/.bashrc + +RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl + +ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu +COPY requirements-build.txt requirements-build.txt +RUN pip install --upgrade pip && \ + pip install -r requirements-build.txt + +FROM cpu-test-1 AS build + +WORKDIR /workspace/vllm + +COPY requirements-cpu.txt requirements-cpu.txt +RUN pip install -v -r requirements-cpu.txt + +COPY ./ ./ + +# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... +ARG VLLM_CPU_DISABLE_AVX512 +ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} + +RUN VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ + pip install dist/*.whl + +WORKDIR /workspace/ + +RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks + +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] \ No newline at end of file From aa24d04e0dc574c4b48aa5ffaede4d9b7cc16468 Mon Sep 17 00:00:00 2001 From: Sam Stoelinga Date: Tue, 27 Aug 2024 18:12:48 -0700 Subject: [PATCH 3/3] fix dockerfile.cpu --- Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 3a1fb1d..84fcdb7 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -28,7 +28,7 @@ FROM cpu-test-1 AS build WORKDIR /workspace/vllm -COPY requirements-cpu.txt requirements-cpu.txt +COPY requirements-*.txt . RUN pip install -v -r requirements-cpu.txt COPY ./ ./