michaelfeil · michaelfeil · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -10,17 +10,40 @@ env:
   WORKDIR: "libs/infinity_emb"
 
 jobs:
-  docker-container-push:
+  docker-container-push-nvidia:
     uses:
       ./.github/workflows/release_docker_container.yaml
     with:
       # working-directory: libs/infinity_emb
-      dockerfile: libs/infinity_emb/Dockerfile
+      dockerfile: libs/infinity_emb/Dockerfile.nvidia_auto
       image: michaelf34/infinity
+      appendix_tag: ""
+      platforms: "linux/amd64,linux/arm64"
     secrets: inherit
+
+  docker-container-push-cpu:
+    uses:
+      ./.github/workflows/release_docker_container.yaml
+    with:
+      # working-directory: libs/infinity_emb
+      dockerfile: libs/infinity_emb/Dockerfile.cpu_auto
+      image: michaelf34/infinity
+      appendix_tag: "-cpu"
+      platforms: "linux/amd64,linux/arm64"
+    secrets: inherit
+
+  # docker-container-push-amd:
+  #   uses:
+  #     ./.github/workflows/release_docker_container.yaml
+  #   with:
+  #     # working-directory: libs/infinity_emb
+  #     dockerfile: libs/infinity_emb/Dockerfile.amd_auto
+  #     image: michaelf34/infinity
+  #     appendix_tag: "-amd"
+  #     platforms: "linux/amd64"
+  #   secrets: inherit
 
   modal-deploy:
-    needs: docker-container-push
     uses:
       ./.github/workflows/release_modal_com.yaml
     secrets: inherit
diff --git a/.github/workflows/release_docker_container.yaml b/.github/workflows/release_docker_container.yaml
@@ -11,11 +11,20 @@ on:
         required: true
         type: string
         description: "Name of the image to build"
+      appendix_tag:
+        required: false
+        type: string
+        description: "Appendix tag to add to the image, e.g. `-cpu`"
       context:
         required: false
         type: string
         description: "Path to the build context"
         default: "./libs/infinity_emb"
+      platforms:
+        required: false
+        type: string
+        description: "Platforms to build for"
+        default: "linux/amd64,linux/arm64"
 
 env:
   TEST_TAG: ${{ inputs.image }}:test
@@ -78,6 +87,6 @@ jobs:
           # QEMU and base python image, for now build only for
           # linux/amd64 and linux/arm64
           # cache-from: type=registry,ref=${{ env.LATEST_TAG }}
-          platforms: linux/amd64,linux/arm64
-          tags: ${{ env.LATEST_TAG }},${{ env.VERSION_TAG }}
+          platforms: ${{ inputs.platforms }}
+          tags: ${{ env.LATEST_TAG }}{{ inputs.appendix_tag }},${{ env.VERSION_TAG }}{{ inputs.appendix_tag }}
           push: true
diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto
@@ -0,0 +1,115 @@
+# Autogenerated warning:
+# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
+# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
+
+FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 AS base
+
+ENV PYTHONUNBUFFERED=1 \
+    \
+    # pip
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100 \
+    \
+    # make poetry create the virtual environment in the project's root
+    # it gets named `.venv`
+    POETRY_VIRTUALENVS_IN_PROJECT=true \
+    # do not ask any interactive question
+    POETRY_NO_INTERACTION=1 \
+    EXTRAS="all" \
+    PYTHON="python3.11"
+RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+WORKDIR /app
+
+FROM base as builder
+# Set the working directory for the app
+# Define the version of Poetry to install (default is 1.7.1)
+# Define the directory to install Poetry to (default is /opt/poetry)
+ARG POETRY_VERSION=1.7.1
+ARG POETRY_HOME=/opt/poetry
+# Create a Python virtual environment for Poetry and install it
+RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
+ENV PATH=$POETRY_HOME/bin:$PATH
+# Test if Poetry is installed in the expected path
+RUN echo "Poetry version:" && poetry --version
+# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
+COPY poetry.lock poetry.toml pyproject.toml README.md /app/
+# Install dependencies only
+RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|#|' pyproject.toml  && rm poetry.lock
+RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+COPY infinity_emb infinity_emb
+# Install dependency with infinity_emb package
+RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+
+FROM builder as testing
+# install lint and test dependencies
+RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
+# lint 
+RUN poetry run ruff .
+RUN poetry run black --check .
+RUN poetry run mypy .
+# pytest
+COPY tests tests
+# run end to end tests because of duration of build in github ci.
+# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
+# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
+poetry run python -m pytest tests/end_to_end -x ; \
+else \
+poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+fi
+RUN echo "all tests passed" > "test_results.txt"
+
+
+# Use a multi-stage build -> production version, with download
+FROM base AS tested-builder
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/huggingface
+ENV PATH=/app/.venv/bin:$PATH
+# do nothing
+RUN echo "copied all files"
+
+
+# Export with tensorrt, not recommended.
+# docker buildx build --target=production-tensorrt -f Dockerfile .
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=off \
+    PYTHON="python3.11"
+RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y 
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/torch
+ENV PATH=/app/.venv/bin:$PATH
+RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
+ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+ENTRYPOINT ["infinity_emb"]
+
+
+# Use a multi-stage build -> production version, with download
+# docker buildx build --target=production-with-download \
+# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+FROM tested-builder AS production-with-download
+# collect model name and engine from build args
+ARG MODEL_NAME
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
+ARG ENGINE
+RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+ARG EXTRA_PACKAGES
+RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+# will exit with 3 if model is downloaded # TODO: better exit code
+RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+ENTRYPOINT ["infinity_emb"]
+
+# flash attention fa2
+FROM tested-builder AS production-with-fa2
+RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
+ENTRYPOINT ["infinity_emb"]
+
+# Use a multi-stage build -> production version
+FROM tested-builder AS production
+ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto
@@ -0,0 +1,115 @@
+# Autogenerated warning:
+# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
+# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
+
+FROM ubuntu:22.04 AS base
+
+ENV PYTHONUNBUFFERED=1 \
+    \
+    # pip
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100 \
+    \
+    # make poetry create the virtual environment in the project's root
+    # it gets named `.venv`
+    POETRY_VIRTUALENVS_IN_PROJECT=true \
+    # do not ask any interactive question
+    POETRY_NO_INTERACTION=1 \
+    EXTRAS="all" \
+    PYTHON="python3.11"
+RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+WORKDIR /app
+
+FROM base as builder
+# Set the working directory for the app
+# Define the version of Poetry to install (default is 1.7.1)
+# Define the directory to install Poetry to (default is /opt/poetry)
+ARG POETRY_VERSION=1.7.1
+ARG POETRY_HOME=/opt/poetry
+# Create a Python virtual environment for Poetry and install it
+RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
+ENV PATH=$POETRY_HOME/bin:$PATH
+# Test if Poetry is installed in the expected path
+RUN echo "Poetry version:" && poetry --version
+# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
+COPY poetry.lock poetry.toml pyproject.toml README.md /app/
+# Install dependencies only
+RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock
+RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+COPY infinity_emb infinity_emb
+# Install dependency with infinity_emb package
+RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+
+FROM builder as testing
+# install lint and test dependencies
+RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
+# lint 
+RUN poetry run ruff .
+RUN poetry run black --check .
+RUN poetry run mypy .
+# pytest
+COPY tests tests
+# run end to end tests because of duration of build in github ci.
+# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
+# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
+poetry run python -m pytest tests/end_to_end -x ; \
+else \
+poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+fi
+RUN echo "all tests passed" > "test_results.txt"
+
+
+# Use a multi-stage build -> production version, with download
+FROM base AS tested-builder
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/huggingface
+ENV PATH=/app/.venv/bin:$PATH
+# do nothing
+RUN echo "copied all files"
+
+
+# Export with tensorrt, not recommended.
+# docker buildx build --target=production-tensorrt -f Dockerfile .
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=off \
+    PYTHON="python3.11"
+RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y 
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/torch
+ENV PATH=/app/.venv/bin:$PATH
+RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
+ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+ENTRYPOINT ["infinity_emb"]
+
+
+# Use a multi-stage build -> production version, with download
+# docker buildx build --target=production-with-download \
+# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+FROM tested-builder AS production-with-download
+# collect model name and engine from build args
+ARG MODEL_NAME
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
+ARG ENGINE
+RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+ARG EXTRA_PACKAGES
+RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+# will exit with 3 if model is downloaded # TODO: better exit code
+RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+ENTRYPOINT ["infinity_emb"]
+
+# flash attention fa2
+FROM tested-builder AS production-with-fa2
+RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
+ENTRYPOINT ["infinity_emb"]
+
+# Use a multi-stage build -> production version
+FROM tested-builder AS production
+ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile → libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile → libs/infinity_emb/Dockerfile.jinja2
@@ -1,5 +1,8 @@
-# Use the Python base image
-FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base
+# Autogenerated warning:
+# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
+# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
+
+FROM {{ base_image }} AS base
 
 ENV PYTHONUNBUFFERED=1 \
     \
@@ -32,16 +35,15 @@ RUN echo "Poetry version:" && poetry --version
 # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
-RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test
+RUN {{pyproject_sed}}
+RUN {{main_install}} && poetry cache clear pypi --all
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
-RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}"  --without lint,test
-# remove cache
-RUN poetry cache clear pypi --all
+RUN {{main_install|replace("--no-root","")}} && poetry cache clear pypi --all
 
 FROM builder as testing
 # install lint and test dependencies
-RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}"
+RUN {{main_install|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
 # lint 
 RUN poetry run ruff .
 RUN poetry run black --check .
@@ -50,7 +52,7 @@ RUN poetry run mypy .
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
-# poetry run python -m pytest tests/end_to_end -x
+# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
 RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
 poetry run python -m pytest tests/end_to_end -x ; \
 else \