From c5647da04a1ae22a634c093b743b9ad7d723f4cc Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Mon, 14 Oct 2024 23:41:42 -0700 Subject: [PATCH 1/3] add jinja docker templates --- .github/workflows/release.yaml | 29 ++++- .../workflows/release_docker_container.yaml | 13 +- infra/sap/sap-core-ai | 2 +- libs/infinity_emb/Dockerfile.amd_auto | 115 ++++++++++++++++++ libs/infinity_emb/Dockerfile.cpu_auto | 115 ++++++++++++++++++ libs/infinity_emb/Dockerfile.jinja2 | 115 ++++++++++++++++++ .../{Dockerfile => Dockerfile.nvidia_auto} | 16 +-- libs/infinity_emb/Makefile | 7 +- libs/infinity_emb/docker.template.yaml | 19 +++ .../infinity_emb/transformer/audio/torch.py | 2 +- libs/infinity_emb/poetry.lock | 67 ++++++---- libs/infinity_emb/pyproject.toml | 23 +++- 12 files changed, 485 insertions(+), 38 deletions(-) create mode 100644 libs/infinity_emb/Dockerfile.amd_auto create mode 100644 libs/infinity_emb/Dockerfile.cpu_auto create mode 100644 libs/infinity_emb/Dockerfile.jinja2 rename libs/infinity_emb/{Dockerfile => Dockerfile.nvidia_auto} (88%) create mode 100644 libs/infinity_emb/docker.template.yaml diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index dbdaddb6..1b84ffff 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -10,17 +10,40 @@ env: WORKDIR: "libs/infinity_emb" jobs: - docker-container-push: + docker-container-push-nvidia: uses: ./.github/workflows/release_docker_container.yaml with: # working-directory: libs/infinity_emb - dockerfile: libs/infinity_emb/Dockerfile + dockerfile: libs/infinity_emb/Dockerfile.nvidia_auto image: michaelf34/infinity + appendix_tag: "" + platforms: "linux/amd64,linux/arm64" secrets: inherit + + docker-container-push-cpu: + uses: + ./.github/workflows/release_docker_container.yaml + with: + # working-directory: libs/infinity_emb + dockerfile: libs/infinity_emb/Dockerfile.cpu_auto + image: michaelf34/infinity + appendix_tag: "-cpu" + platforms: "linux/amd64,linux/arm64" + secrets: inherit + + # docker-container-push-amd: + # uses: + # ./.github/workflows/release_docker_container.yaml + # with: + # # working-directory: libs/infinity_emb + # dockerfile: libs/infinity_emb/Dockerfile.amd_auto + # image: michaelf34/infinity + # appendix_tag: "-amd" + # platforms: "linux/amd64" + # secrets: inherit modal-deploy: - needs: docker-container-push uses: ./.github/workflows/release_modal_com.yaml secrets: inherit \ No newline at end of file diff --git a/.github/workflows/release_docker_container.yaml b/.github/workflows/release_docker_container.yaml index c7933557..d71e8039 100644 --- a/.github/workflows/release_docker_container.yaml +++ b/.github/workflows/release_docker_container.yaml @@ -11,11 +11,20 @@ on: required: true type: string description: "Name of the image to build" + appendix_tag: + required: false + type: string + description: "Appendix tag to add to the image, e.g. `-cpu`" context: required: false type: string description: "Path to the build context" default: "./libs/infinity_emb" + platforms: + required: false + type: string + description: "Platforms to build for" + default: "linux/amd64,linux/arm64" env: TEST_TAG: ${{ inputs.image }}:test @@ -78,6 +87,6 @@ jobs: # QEMU and base python image, for now build only for # linux/amd64 and linux/arm64 # cache-from: type=registry,ref=${{ env.LATEST_TAG }} - platforms: linux/amd64,linux/arm64 - tags: ${{ env.LATEST_TAG }},${{ env.VERSION_TAG }} + platforms: ${{ inputs.platforms }} + tags: ${{ env.LATEST_TAG }}{{ inputs.appendix_tag }},${{ env.VERSION_TAG }}{{ inputs.appendix_tag }} push: true diff --git a/infra/sap/sap-core-ai b/infra/sap/sap-core-ai index 8eb2ce3e..881d03d5 160000 --- a/infra/sap/sap-core-ai +++ b/infra/sap/sap-core-ai @@ -1 +1 @@ -Subproject commit 8eb2ce3e7994b4419ca98d8fc2c19690c2808eb0 +Subproject commit 881d03d5288efe293da2e31d8650b86bc6a5fa48 diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto new file mode 100644 index 00000000..e648bf8a --- /dev/null +++ b/libs/infinity_emb/Dockerfile.amd_auto @@ -0,0 +1,115 @@ +# Autogenerated warning: +# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly. +# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd + +FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 AS base + +ENV PYTHONUNBUFFERED=1 \ + \ + # pip + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + \ + # make poetry create the virtual environment in the project's root + # it gets named `.venv` + POETRY_VIRTUALENVS_IN_PROJECT=true \ + # do not ask any interactive question + POETRY_NO_INTERACTION=1 \ + EXTRAS="all" \ + PYTHON="python3.11" +RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +WORKDIR /app + +FROM base as builder +# Set the working directory for the app +# Define the version of Poetry to install (default is 1.7.1) +# Define the directory to install Poetry to (default is /opt/poetry) +ARG POETRY_VERSION=1.7.1 +ARG POETRY_HOME=/opt/poetry +# Create a Python virtual environment for Poetry and install it +RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON - +ENV PATH=$POETRY_HOME/bin:$PATH +# Test if Poetry is installed in the expected path +RUN echo "Poetry version:" && poetry --version +# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes) +COPY poetry.lock poetry.toml pyproject.toml README.md /app/ +# Install dependencies only +RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|#|' pyproject.toml && rm poetry.lock +RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +COPY infinity_emb infinity_emb +# Install dependency with infinity_emb package +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all + +FROM builder as testing +# install lint and test dependencies +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all +# lint +RUN poetry run ruff . +RUN poetry run black --check . +RUN poetry run mypy . +# pytest +COPY tests tests +# run end to end tests because of duration of build in github ci. +# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu +# poetry run python -m pytest tests/end_to_end -x # TODO: does not work. +RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ +poetry run python -m pytest tests/end_to_end -x ; \ +else \ +poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ +fi +RUN echo "all tests passed" > "test_results.txt" + + +# Use a multi-stage build -> production version, with download +FROM base AS tested-builder +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/huggingface +ENV PATH=/app/.venv/bin:$PATH +# do nothing +RUN echo "copied all files" + + +# Export with tensorrt, not recommended. +# docker buildx build --target=production-tensorrt -f Dockerfile . +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt +ENV PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=off \ + PYTHON="python3.11" +RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/torch +ENV PATH=/app/.venv/bin:$PATH +RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*" +ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH} +ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH} +ENTRYPOINT ["infinity_emb"] + + +# Use a multi-stage build -> production version, with download +# docker buildx build --target=production-with-download \ +# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small . +FROM tested-builder AS production-with-download +# collect model name and engine from build args +ARG MODEL_NAME +RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi +ARG ENGINE +RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi +ARG EXTRA_PACKAGES +RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi +# will exit with 3 if model is downloaded # TODO: better exit code +RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] +ENTRYPOINT ["infinity_emb"] + +# flash attention fa2 +FROM tested-builder AS production-with-fa2 +RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl +ENTRYPOINT ["infinity_emb"] + +# Use a multi-stage build -> production version +FROM tested-builder AS production +ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto new file mode 100644 index 00000000..8db05cc3 --- /dev/null +++ b/libs/infinity_emb/Dockerfile.cpu_auto @@ -0,0 +1,115 @@ +# Autogenerated warning: +# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly. +# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd + +FROM ubuntu:22.04 AS base + +ENV PYTHONUNBUFFERED=1 \ + \ + # pip + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + \ + # make poetry create the virtual environment in the project's root + # it gets named `.venv` + POETRY_VIRTUALENVS_IN_PROJECT=true \ + # do not ask any interactive question + POETRY_NO_INTERACTION=1 \ + EXTRAS="all" \ + PYTHON="python3.11" +RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +WORKDIR /app + +FROM base as builder +# Set the working directory for the app +# Define the version of Poetry to install (default is 1.7.1) +# Define the directory to install Poetry to (default is /opt/poetry) +ARG POETRY_VERSION=1.7.1 +ARG POETRY_HOME=/opt/poetry +# Create a Python virtual environment for Poetry and install it +RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON - +ENV PATH=$POETRY_HOME/bin:$PATH +# Test if Poetry is installed in the expected path +RUN echo "Poetry version:" && poetry --version +# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes) +COPY poetry.lock poetry.toml pyproject.toml README.md /app/ +# Install dependencies only +RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock +RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +COPY infinity_emb infinity_emb +# Install dependency with infinity_emb package +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all + +FROM builder as testing +# install lint and test dependencies +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all +# lint +RUN poetry run ruff . +RUN poetry run black --check . +RUN poetry run mypy . +# pytest +COPY tests tests +# run end to end tests because of duration of build in github ci. +# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu +# poetry run python -m pytest tests/end_to_end -x # TODO: does not work. +RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ +poetry run python -m pytest tests/end_to_end -x ; \ +else \ +poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ +fi +RUN echo "all tests passed" > "test_results.txt" + + +# Use a multi-stage build -> production version, with download +FROM base AS tested-builder +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/huggingface +ENV PATH=/app/.venv/bin:$PATH +# do nothing +RUN echo "copied all files" + + +# Export with tensorrt, not recommended. +# docker buildx build --target=production-tensorrt -f Dockerfile . +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt +ENV PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=off \ + PYTHON="python3.11" +RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/torch +ENV PATH=/app/.venv/bin:$PATH +RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*" +ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH} +ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH} +ENTRYPOINT ["infinity_emb"] + + +# Use a multi-stage build -> production version, with download +# docker buildx build --target=production-with-download \ +# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small . +FROM tested-builder AS production-with-download +# collect model name and engine from build args +ARG MODEL_NAME +RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi +ARG ENGINE +RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi +ARG EXTRA_PACKAGES +RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi +# will exit with 3 if model is downloaded # TODO: better exit code +RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] +ENTRYPOINT ["infinity_emb"] + +# flash attention fa2 +FROM tested-builder AS production-with-fa2 +RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl +ENTRYPOINT ["infinity_emb"] + +# Use a multi-stage build -> production version +FROM tested-builder AS production +ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile.jinja2 new file mode 100644 index 00000000..9c2079e6 --- /dev/null +++ b/libs/infinity_emb/Dockerfile.jinja2 @@ -0,0 +1,115 @@ +# Autogenerated warning: +# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly. +# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd + +FROM {{ base_image }} AS base + +ENV PYTHONUNBUFFERED=1 \ + \ + # pip + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + \ + # make poetry create the virtual environment in the project's root + # it gets named `.venv` + POETRY_VIRTUALENVS_IN_PROJECT=true \ + # do not ask any interactive question + POETRY_NO_INTERACTION=1 \ + EXTRAS="all" \ + PYTHON="python3.11" +RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +WORKDIR /app + +FROM base as builder +# Set the working directory for the app +# Define the version of Poetry to install (default is 1.7.1) +# Define the directory to install Poetry to (default is /opt/poetry) +ARG POETRY_VERSION=1.7.1 +ARG POETRY_HOME=/opt/poetry +# Create a Python virtual environment for Poetry and install it +RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON - +ENV PATH=$POETRY_HOME/bin:$PATH +# Test if Poetry is installed in the expected path +RUN echo "Poetry version:" && poetry --version +# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes) +COPY poetry.lock poetry.toml pyproject.toml README.md /app/ +# Install dependencies only +RUN {{pyproject_sed}} +RUN {{main_install}} && poetry cache clear pypi --all +COPY infinity_emb infinity_emb +# Install dependency with infinity_emb package +RUN {{main_install|replace("--no-root","")}} && poetry cache clear pypi --all + +FROM builder as testing +# install lint and test dependencies +RUN {{main_install|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all +# lint +RUN poetry run ruff . +RUN poetry run black --check . +RUN poetry run mypy . +# pytest +COPY tests tests +# run end to end tests because of duration of build in github ci. +# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu +# poetry run python -m pytest tests/end_to_end -x # TODO: does not work. +RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ +poetry run python -m pytest tests/end_to_end -x ; \ +else \ +poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ +fi +RUN echo "all tests passed" > "test_results.txt" + + +# Use a multi-stage build -> production version, with download +FROM base AS tested-builder +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/huggingface +ENV PATH=/app/.venv/bin:$PATH +# do nothing +RUN echo "copied all files" + + +# Export with tensorrt, not recommended. +# docker buildx build --target=production-tensorrt -f Dockerfile . +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt +ENV PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=off \ + PYTHON="python3.11" +RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/torch +ENV PATH=/app/.venv/bin:$PATH +RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*" +ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH} +ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH} +ENTRYPOINT ["infinity_emb"] + + +# Use a multi-stage build -> production version, with download +# docker buildx build --target=production-with-download \ +# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small . +FROM tested-builder AS production-with-download +# collect model name and engine from build args +ARG MODEL_NAME +RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi +ARG ENGINE +RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi +ARG EXTRA_PACKAGES +RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi +# will exit with 3 if model is downloaded # TODO: better exit code +RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] +ENTRYPOINT ["infinity_emb"] + +# flash attention fa2 +FROM tested-builder AS production-with-fa2 +RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl +ENTRYPOINT ["infinity_emb"] + +# Use a multi-stage build -> production version +FROM tested-builder AS production +ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile.nvidia_auto similarity index 88% rename from libs/infinity_emb/Dockerfile rename to libs/infinity_emb/Dockerfile.nvidia_auto index 80193b83..6ec70999 100644 --- a/libs/infinity_emb/Dockerfile +++ b/libs/infinity_emb/Dockerfile.nvidia_auto @@ -1,4 +1,7 @@ -# Use the Python base image +# Autogenerated warning: +# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly. +# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd + FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base ENV PYTHONUNBUFFERED=1 \ @@ -32,16 +35,15 @@ RUN echo "Poetry version:" && poetry --version # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes) COPY poetry.lock poetry.toml pyproject.toml README.md /app/ # Install dependencies only -RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test +RUN true +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --no-root --without lint,test && poetry cache clear pypi --all COPY infinity_emb infinity_emb # Install dependency with infinity_emb package -RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test -# remove cache -RUN poetry cache clear pypi --all +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all FROM builder as testing # install lint and test dependencies -RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all # lint RUN poetry run ruff . RUN poetry run black --check . @@ -50,7 +52,7 @@ RUN poetry run mypy . COPY tests tests # run end to end tests because of duration of build in github ci. # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu -# poetry run python -m pytest tests/end_to_end -x +# poetry run python -m pytest tests/end_to_end -x # TODO: does not work. RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ poetry run python -m pytest tests/end_to_end -x ; \ else \ diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile index b9e6faa4..4c98942e 100644 --- a/libs/infinity_emb/Makefile +++ b/libs/infinity_emb/Makefile @@ -1,4 +1,4 @@ -.PHONY: all clean docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck format lint test tests test_watch integration_tests docker_tests help extended_tests +.PHONY: all clean docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck format lint test tests test_watch template_docker integration_tests docker_tests help extended_tests # Default target executed when no arguments are given to make. all: help @@ -42,6 +42,11 @@ format format_diff: [ "$(PYTHON_FILES)" = "" ] || poetry run black $(PYTHON_FILES) [ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES) +template_docker: + jinja2 Dockerfile.jinja2 docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto + jinja2 Dockerfile.jinja2 docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto + jinja2 Dockerfile.jinja2 docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto + poetry_check: poetry check diff --git a/libs/infinity_emb/docker.template.yaml b/libs/infinity_emb/docker.template.yaml new file mode 100644 index 00000000..acfb2131 --- /dev/null +++ b/libs/infinity_emb/docker.template.yaml @@ -0,0 +1,19 @@ +# 1. Guide: pip install jinja2 jinja2-cli +nvidia: + # 2 .command: jinja2 Dockerfile.jinja2 docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto + base_image: 'nvidia/cuda:12.1.1-base-ubuntu22.04' + main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test + pyproject_sed: "true" + +cpu: + # 2. command: jinja2 Dockerfile.jinja2 docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto + base_image: 'ubuntu:22.04' + main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test + pyproject_sed: sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock + +amd: + # 2 . command: jinja2 Dockerfile.jinja2 docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto + base_image: 'rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0' + main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test + pyproject_sed: sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|#|' pyproject.toml && rm poetry.lock + diff --git a/libs/infinity_emb/infinity_emb/transformer/audio/torch.py b/libs/infinity_emb/infinity_emb/transformer/audio/torch.py index 41ce443d..311cc8fd 100644 --- a/libs/infinity_emb/infinity_emb/transformer/audio/torch.py +++ b/libs/infinity_emb/infinity_emb/transformer/audio/torch.py @@ -16,7 +16,7 @@ if CHECK_TORCH.is_available: import torch -if CHECK_TRANSFORMERS.is_available: +if CHECK_TORCH.is_available and CHECK_TRANSFORMERS.is_available: from transformers import AutoModel, AutoProcessor # type: ignore diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock index d5956cee..35b56e19 100644 --- a/libs/infinity_emb/poetry.lock +++ b/libs/infinity_emb/poetry.lock @@ -1339,6 +1339,26 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jinja2-cli" +version = "0.8.2" +description = "A CLI interface to Jinja2" +optional = false +python-versions = "*" +files = [ + {file = "jinja2-cli-0.8.2.tar.gz", hash = "sha256:a16bb1454111128e206f568c95938cdef5b5a139929378f72bb8cf6179e18e50"}, + {file = "jinja2_cli-0.8.2-py2.py3-none-any.whl", hash = "sha256:b91715c79496beaddad790171e7258a87db21c1a0b6d2b15bca3ba44b74aac5d"}, +] + +[package.dependencies] +jinja2 = "*" + +[package.extras] +tests = ["flake8", "jinja2", "pytest"] +toml = ["jinja2", "toml"] +xml = ["jinja2", "xmltodict"] +yaml = ["jinja2", "pyyaml"] + [[package]] name = "jiter" version = "0.6.1" @@ -2270,13 +2290,13 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "optimum" -version = "1.17.1" +version = "1.23.1" description = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality." optional = true python-versions = ">=3.7.0" files = [ - {file = "optimum-1.17.1-py3-none-any.whl", hash = "sha256:508bc55db3c9434f4e8d5a30c39a46ac63c4cdb45bcc5a641b6c1c77cae88d23"}, - {file = "optimum-1.17.1.tar.gz", hash = "sha256:e59af717e8691b11903fe2cfb8c6efd6f6798b0417f3e70d231e578a02448ceb"}, + {file = "optimum-1.23.1-py3-none-any.whl", hash = "sha256:9a910601b665ac617ef14df99a44fe06e51040bcf945093f7b111d0e692fa5ac"}, + {file = "optimum-1.23.1.tar.gz", hash = "sha256:bdef34c20d702a0856b0f35720287f561e55854e0fc4655512a99365ac480dde"}, ] [package.dependencies] @@ -2294,30 +2314,35 @@ packaging = "*" protobuf = {version = ">=3.20.1", optional = true, markers = "extra == \"onnxruntime\""} sympy = "*" torch = ">=1.11" -transformers = {version = ">=4.26.0", extras = ["sentencepiece"]} +transformers = [ + {version = ">=4.29", extras = ["sentencepiece"]}, + {version = "<4.46.0", optional = true, markers = "extra == \"onnxruntime\""}, +] [package.extras] amd = ["optimum-amd"] benchmark = ["evaluate (>=0.2.0)", "optuna", "scikit-learn", "seqeval", "torchvision", "tqdm"] -dev = ["Pillow", "accelerate", "black (>=23.1,<24.0)", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest", "pytest-xdist", "requests", "rjieba", "ruff (==0.1.5)", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"] +dev = ["Pillow", "accelerate", "black (>=23.1,<24.0)", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "ruff (==0.1.5)", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"] diffusers = ["diffusers"] doc-build = ["accelerate"] -exporters = ["onnx", "onnxruntime", "timm"] -exporters-gpu = ["onnx", "onnxruntime-gpu", "timm"] -exporters-tf = ["h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm"] +exporters = ["onnx", "onnxruntime", "timm", "transformers (<4.46.0)"] +exporters-gpu = ["onnx", "onnxruntime-gpu", "timm", "transformers (<4.46.0)"] +exporters-tf = ["datasets (<=2.16)", "h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm", "transformers[sentencepiece] (>=4.26,<4.38)"] furiosa = ["optimum-furiosa"] graphcore = ["optimum-graphcore"] -habana = ["optimum-habana", "transformers (>=4.37.0,<4.38.0)"] -intel = ["optimum-intel (>=1.15.0)"] -neural-compressor = ["optimum-intel[neural-compressor] (>=1.15.0)"] -neuron = ["optimum-neuron[neuron]"] -neuronx = ["optimum-neuron[neuronx]"] -nncf = ["optimum-intel[nncf] (>=1.15.0)"] -onnxruntime = ["datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime (>=1.11.0)", "protobuf (>=3.20.1)"] -onnxruntime-gpu = ["accelerate", "datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime-gpu (>=1.11.0)", "protobuf (>=3.20.1)"] -openvino = ["optimum-intel[openvino] (>=1.15.0)"] +habana = ["optimum-habana", "transformers (>=4.43.0,<4.44.0)"] +intel = ["optimum-intel (>=1.18.0)"] +ipex = ["optimum-intel[ipex] (>=1.18.0)"] +neural-compressor = ["optimum-intel[neural-compressor] (>=1.18.0)"] +neuron = ["optimum-neuron[neuron] (>=0.0.20)", "transformers (>=4.36.2,<4.42.0)"] +neuronx = ["optimum-neuron[neuronx] (>=0.0.20)", "transformers (>=4.36.2,<4.42.0)"] +nncf = ["optimum-intel[nncf] (>=1.18.0)"] +onnxruntime = ["datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime (>=1.11.0)", "protobuf (>=3.20.1)", "transformers (<4.46.0)"] +onnxruntime-gpu = ["accelerate", "datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime-gpu (>=1.11.0)", "protobuf (>=3.20.1)", "transformers (<4.46.0)"] +openvino = ["optimum-intel[openvino] (>=1.18.0)"] quality = ["black (>=23.1,<24.0)", "ruff (==0.1.5)"] -tests = ["Pillow", "accelerate", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest", "pytest-xdist", "requests", "rjieba", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"] +quanto = ["optimum-quanto (>=0.2.4)"] +tests = ["Pillow", "accelerate", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"] [[package]] name = "orjson" @@ -5054,7 +5079,7 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -all = ["colpali-engine", "ctranslate2", "diskcache", "einops", "fastapi", "optimum", "orjson", "pillow", "posthog", "prometheus-fastapi-instrumentator", "pydantic", "rich", "sentence-transformers", "soundfile", "timm", "torch", "typer", "uvicorn"] +all = ["colpali-engine", "ctranslate2", "diskcache", "einops", "fastapi", "optimum", "orjson", "pillow", "posthog", "prometheus-fastapi-instrumentator", "pydantic", "rich", "sentence-transformers", "soundfile", "timm", "torch", "torchvision", "typer", "uvicorn"] audio = ["soundfile"] cache = ["diskcache"] ct2 = ["ctranslate2", "sentence-transformers", "torch", "transformers"] @@ -5065,9 +5090,9 @@ optimum = ["optimum"] server = ["fastapi", "orjson", "posthog", "prometheus-fastapi-instrumentator", "pydantic", "rich", "typer", "uvicorn"] tensorrt = ["tensorrt"] torch = ["sentence-transformers", "torch"] -vision = ["colpali-engine", "pillow", "timm"] +vision = ["colpali-engine", "pillow", "timm", "torchvision"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "ddb2234dae1ebb22503ff767029fcaa8a7f15b394ace3f15864d4547fd866966" +content-hash = "88abd4ba04370e75955d60208cbaa746d0de66247d123cfba0d8a5a4e79371dd" diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index acab8757..4180267e 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -1,3 +1,4 @@ + [tool.poetry] name = "infinity_emb" version = "0.0.63" @@ -26,17 +27,20 @@ typer = {version = "^0.9.0", optional=true, extras = ["all"]} pydantic = {version = ">=2.4.0,<3", optional=true} posthog = {version = "*", optional=true} # backend +# pin torch to a specific source, but default to pypi. use sed to overwrite. torch = {version = ">=2.2.1", source = "pypi", optional=true} sentence-transformers = {version = "^3.0.1", optional=true} transformers = {version = ">4.34.0,<=5.0", optional=true} ctranslate2 = {version = "^4.0.0", optional=true} -optimum = {version = ">=1.16.2", optional=true, extras=["onnxruntime"]} +optimum = {version = ">=1.23.1", optional=true, extras=["onnxruntime"]} hf_transfer = {version=">=0.1.5"} einops = {version = "*", optional=true} # vision pillow = {version = "*", optional=true} timm = {version = "*", optional=true} colpali-engine = {version="^0.3.1", optional=true} +# pin torchvision to a specific source, but default to pypi. use sed to overwrite. +torchvision = {version = "*", source = "pypi", optional=true} # cache diskcache = {version = "*", optional=true} # gpu @@ -60,6 +64,8 @@ mypy = "^1.5.1" requests = "2.28.1" types-requests = "2.28.1" openai = "*" # 1.51.0 works +jinja2 = "*" +jinja2-cli = "*" # preferred dev dependencies torch = "2.4.1" @@ -90,7 +96,7 @@ torch=["sentence-transformers","torch"] einops=["einops"] logging=["rich"] cache=["diskcache"] -vision=["colpali-engine","pillow","timm"] +vision=["colpali-engine","pillow","timm","torchvision"] audio=["soundfile"] server=[ "fastapi", @@ -118,6 +124,7 @@ all=[ "sentence-transformers", "timm", "torch", + "torchvision", "typer", "uvicorn", "soundfile" @@ -126,6 +133,18 @@ all=[ tensorrt=["tensorrt"] onnxruntime-gpu=["onnxruntime-gpu"] +[[tool.poetry.source]] +# used for monkey-patching cpu onlu +name = "pytorch_cpu" +url = "https://download.pytorch.org/whl/cpu" +priority = "explicit" + +[[tool.poetry.source]] +# used for monkey-patching rocm only +name = "pytorch_rocm" +url = "https://download.pytorch.org/whl/rocm6.1" +priority = "explicit" + [tool.pytest.ini_options] markers = [ "performance: tests that measure performance (deselect with '-m \"not performance\"')", From cfa4f02a2cb4f99098668a3e52f6a572f8350c09 Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Mon, 14 Oct 2024 23:44:45 -0700 Subject: [PATCH 2/3] add jinja docker templates 2 --- libs/infinity_emb/{Dockerfile.nvidia_auto => Dockerfile} | 6 +++--- libs/infinity_emb/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename libs/infinity_emb/{Dockerfile.nvidia_auto => Dockerfile} (92%) diff --git a/libs/infinity_emb/Dockerfile.nvidia_auto b/libs/infinity_emb/Dockerfile similarity index 92% rename from libs/infinity_emb/Dockerfile.nvidia_auto rename to libs/infinity_emb/Dockerfile index 6ec70999..4168262a 100644 --- a/libs/infinity_emb/Dockerfile.nvidia_auto +++ b/libs/infinity_emb/Dockerfile @@ -36,14 +36,14 @@ RUN echo "Poetry version:" && poetry --version COPY poetry.lock poetry.toml pyproject.toml README.md /app/ # Install dependencies only RUN true -RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --no-root --without lint,test && poetry cache clear pypi --all +RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all COPY infinity_emb infinity_emb # Install dependency with infinity_emb package -RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all FROM builder as testing # install lint and test dependencies -RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all # lint RUN poetry run ruff . RUN poetry run black --check . diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile index 4c98942e..5392d0aa 100644 --- a/libs/infinity_emb/Makefile +++ b/libs/infinity_emb/Makefile @@ -3,7 +3,7 @@ # Default target executed when no arguments are given to make. all: help -precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs openapi test +precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs template_docker openapi test ###################### # TESTING AND COVERAGE From e300d76fbee8bd263d7aaa22c8c4063ff61b379c Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Tue, 15 Oct 2024 00:09:25 -0700 Subject: [PATCH 3/3] fix docker --- libs/infinity_emb/{Dockerfile => Dockerfile.nvidia_auto} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename libs/infinity_emb/{Dockerfile => Dockerfile.nvidia_auto} (100%) diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile.nvidia_auto similarity index 100% rename from libs/infinity_emb/Dockerfile rename to libs/infinity_emb/Dockerfile.nvidia_auto