diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c527a1b0..1dbb42f2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -32,6 +32,17 @@ jobs: platforms: "linux/amd64" secrets: inherit + docker-container-push-onnx-trt: + uses: + ./.github/workflows/release_docker_container.yaml + with: + # working-directory: libs/infinity_emb + dockerfile: libs/infinity_emb/Dockerfile.trt_onnx_auto + image: michaelf34/infinity + appendix_tag: "-trt-onnx" + platforms: "linux/amd64" + secrets: inherit + # docker-container-push-amd: # uses: # ./.github/workflows/release_docker_container.yaml diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml index ce28fd8f..3f782264 100644 --- a/libs/infinity_emb/Docker.template.yaml +++ b/libs/infinity_emb/Docker.template.yaml @@ -1,20 +1,35 @@ +# run all commands here via: `make template_docker` + # 1. Guide: pip install jinja2 jinja2-cli nvidia: # 2 .command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto base_image: 'nvidia/cuda:12.1.1-base-ubuntu22.04' - main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test - cpu: # 2. command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto base_image: 'ubuntu:22.04' - main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock amd: # 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto base_image: 'rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0' - main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test - pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock + pyproject_sed: | + RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml + RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml + RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock poetry_virtualenvs_create: "false" poetry_virtualenvs_in_project: "false" + poetry_extras: "all onnxruntime-gpu" python_version: python3.10 + +trt: + base_image: nvidia/cuda:12.1.1-devel-ubuntu22.04 + poetry_extras: "all onnxruntime-gpu" + extra_installs_main: | + # Install utils for tensorrt + RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip + RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation + RUN poetry run $PYTHON -m pip install --no-cache-dir "tensorrt==10.2.0" "tensorrt_lean==10.2.0" "tensorrt_dispatch==10.2.0" + ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/$PYTHON/dist-packages/tensorrt/ + # ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH} + # ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH} + python_version: python3.10 \ No newline at end of file diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto index 49e64085..7db2e4bc 100644 --- a/libs/infinity_emb/Dockerfile.amd_auto +++ b/libs/infinity_emb/Dockerfile.amd_auto @@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \ POETRY_VIRTUALENVS_IN_PROJECT="false" \ # do not ask any interactive question POETRY_NO_INTERACTION=1 \ - EXTRAS="all" \ + EXTRAS="all onnxruntime-gpu" \ PYTHON="python3.10" -RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl WORKDIR /app FROM base as builder + # Set the working directory for the app # Define the version of Poetry to install (default is 1.7.1) # Define the directory to install Poetry to (default is /opt/poetry) @@ -36,29 +37,44 @@ RUN echo "Poetry version:" && poetry --version # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes) COPY poetry.lock poetry.toml pyproject.toml README.md /app/ # Install dependencies only -RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock +RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml +RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml +RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock + RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all COPY infinity_emb infinity_emb # Install dependency with infinity_emb package RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +# + FROM builder as testing # install lint and test dependencies RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all # lint -RUN poetry run ruff . -RUN poetry run black --check . +RUN poetry run ruff check . RUN poetry run mypy . # pytest COPY tests tests # run end to end tests because of duration of build in github ci. # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu # poetry run python -m pytest tests/end_to_end -x # TODO: does not work. -RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ -poetry run python -m pytest tests/end_to_end -x ; \ -else \ -poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ -fi +RUN if [ -z "$TARGETPLATFORM" ]; then \ + ARCH=$(uname -m); \ + if [ "$ARCH" = "x86_64" ]; then \ + TARGETPLATFORM="linux/amd64"; \ + elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \ + TARGETPLATFORM="linux/arm64"; \ + else \ + echo "Unsupported architecture: $ARCH"; exit 1; \ + fi; \ + fi; \ + echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \ + poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ + else \ + poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \ + fi RUN echo "all tests passed" > "test_results.txt" @@ -100,17 +116,11 @@ ARG MODEL_NAME RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi ARG ENGINE RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi -ARG EXTRA_PACKAGES -RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi + # will exit with 3 if model is downloaded # TODO: better exit code RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] -# flash attention fa2 -FROM tested-builder AS production-with-fa2 -RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl -ENTRYPOINT ["infinity_emb"] - # Use a multi-stage build -> production version FROM tested-builder AS production ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto index cf06ef92..f69637c9 100644 --- a/libs/infinity_emb/Dockerfile.cpu_auto +++ b/libs/infinity_emb/Dockerfile.cpu_auto @@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \ POETRY_NO_INTERACTION=1 \ EXTRAS="all" \ PYTHON="python3.11" -RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl WORKDIR /app FROM base as builder + # Set the working directory for the app # Define the version of Poetry to install (default is 1.7.1) # Define the directory to install Poetry to (default is /opt/poetry) @@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w COPY infinity_emb infinity_emb # Install dependency with infinity_emb package RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +# + FROM builder as testing # install lint and test dependencies RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all # lint -RUN poetry run ruff . -RUN poetry run black --check . +RUN poetry run ruff check . RUN poetry run mypy . # pytest COPY tests tests # run end to end tests because of duration of build in github ci. # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu # poetry run python -m pytest tests/end_to_end -x # TODO: does not work. -RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ -poetry run python -m pytest tests/end_to_end -x ; \ -else \ -poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ -fi +RUN if [ -z "$TARGETPLATFORM" ]; then \ + ARCH=$(uname -m); \ + if [ "$ARCH" = "x86_64" ]; then \ + TARGETPLATFORM="linux/amd64"; \ + elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \ + TARGETPLATFORM="linux/arm64"; \ + else \ + echo "Unsupported architecture: $ARCH"; exit 1; \ + fi; \ + fi; \ + echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \ + poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ + else \ + poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \ + fi RUN echo "all tests passed" > "test_results.txt" @@ -100,17 +113,11 @@ ARG MODEL_NAME RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi ARG ENGINE RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi -ARG EXTRA_PACKAGES -RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi + # will exit with 3 if model is downloaded # TODO: better exit code RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] -# flash attention fa2 -FROM tested-builder AS production-with-fa2 -RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl -ENTRYPOINT ["infinity_emb"] - # Use a multi-stage build -> production version FROM tested-builder AS production ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile.jinja2 index 35e1d07c..0ced9436 100644 --- a/libs/infinity_emb/Dockerfile.jinja2 +++ b/libs/infinity_emb/Dockerfile.jinja2 @@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \ POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \ # do not ask any interactive question POETRY_NO_INTERACTION=1 \ - EXTRAS="all" \ + EXTRAS="{{poetry_extras | default('all')}}" \ PYTHON="{{python_version | default('python3.11')}}" -RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl WORKDIR /app FROM base as builder +{% set main_install2 = "poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test" %} # Set the working directory for the app # Define the version of Poetry to install (default is 1.7.1) # Define the directory to install Poetry to (default is /opt/poetry) @@ -37,28 +38,40 @@ RUN echo "Poetry version:" && poetry --version COPY poetry.lock poetry.toml pyproject.toml README.md /app/ # Install dependencies only {{pyproject_sed | default('#')}} -RUN {{main_install}} && poetry cache clear pypi --all +RUN {{main_install2}} && poetry cache clear pypi --all COPY infinity_emb infinity_emb # Install dependency with infinity_emb package -RUN {{main_install|replace("--no-root","")}} && poetry cache clear pypi --all +RUN {{main_install2|replace("--no-root","")}} && poetry cache clear pypi --all +{{extra_installs_main | default('#')}} + FROM builder as testing # install lint and test dependencies -RUN {{main_install|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all +RUN {{main_install2|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all # lint -RUN poetry run ruff . -RUN poetry run black --check . +RUN poetry run ruff check . RUN poetry run mypy . # pytest COPY tests tests # run end to end tests because of duration of build in github ci. # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu # poetry run python -m pytest tests/end_to_end -x # TODO: does not work. -RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ -poetry run python -m pytest tests/end_to_end -x ; \ -else \ -poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ -fi +RUN if [ -z "$TARGETPLATFORM" ]; then \ + ARCH=$(uname -m); \ + if [ "$ARCH" = "x86_64" ]; then \ + TARGETPLATFORM="linux/amd64"; \ + elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \ + TARGETPLATFORM="linux/arm64"; \ + else \ + echo "Unsupported architecture: $ARCH"; exit 1; \ + fi; \ + fi; \ + echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \ + poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ + else \ + poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \ + fi RUN echo "all tests passed" > "test_results.txt" @@ -100,17 +113,11 @@ ARG MODEL_NAME RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi ARG ENGINE RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi -ARG EXTRA_PACKAGES -RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi + # will exit with 3 if model is downloaded # TODO: better exit code RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] -# flash attention fa2 -FROM tested-builder AS production-with-fa2 -RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl -ENTRYPOINT ["infinity_emb"] - # Use a multi-stage build -> production version FROM tested-builder AS production ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile.nvidia_auto b/libs/infinity_emb/Dockerfile.nvidia_auto index 56146b13..34750bee 100644 --- a/libs/infinity_emb/Dockerfile.nvidia_auto +++ b/libs/infinity_emb/Dockerfile.nvidia_auto @@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \ POETRY_NO_INTERACTION=1 \ EXTRAS="all" \ PYTHON="python3.11" -RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y +RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl WORKDIR /app FROM base as builder + # Set the working directory for the app # Define the version of Poetry to install (default is 1.7.1) # Define the directory to install Poetry to (default is /opt/poetry) @@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w COPY infinity_emb infinity_emb # Install dependency with infinity_emb package RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +# + FROM builder as testing # install lint and test dependencies RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all # lint -RUN poetry run ruff . -RUN poetry run black --check . +RUN poetry run ruff check . RUN poetry run mypy . # pytest COPY tests tests # run end to end tests because of duration of build in github ci. # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu # poetry run python -m pytest tests/end_to_end -x # TODO: does not work. -RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ -poetry run python -m pytest tests/end_to_end -x ; \ -else \ -poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ -fi +RUN if [ -z "$TARGETPLATFORM" ]; then \ + ARCH=$(uname -m); \ + if [ "$ARCH" = "x86_64" ]; then \ + TARGETPLATFORM="linux/amd64"; \ + elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \ + TARGETPLATFORM="linux/arm64"; \ + else \ + echo "Unsupported architecture: $ARCH"; exit 1; \ + fi; \ + fi; \ + echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \ + poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ + else \ + poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \ + fi RUN echo "all tests passed" > "test_results.txt" @@ -100,17 +113,11 @@ ARG MODEL_NAME RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi ARG ENGINE RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi -ARG EXTRA_PACKAGES -RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi + # will exit with 3 if model is downloaded # TODO: better exit code RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] -# flash attention fa2 -FROM tested-builder AS production-with-fa2 -RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl -ENTRYPOINT ["infinity_emb"] - # Use a multi-stage build -> production version FROM tested-builder AS production ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Dockerfile.trt_onnx_auto b/libs/infinity_emb/Dockerfile.trt_onnx_auto new file mode 100644 index 00000000..98409f89 --- /dev/null +++ b/libs/infinity_emb/Dockerfile.trt_onnx_auto @@ -0,0 +1,130 @@ +# Autogenerated warning: +# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly. +# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd + +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 AS base + +ENV PYTHONUNBUFFERED=1 \ + \ + # pip + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + \ + # make poetry create the virtual environment in the project's root + # it gets named `.venv` + POETRY_VIRTUALENVS_CREATE="true" \ + POETRY_VIRTUALENVS_IN_PROJECT="true" \ + # do not ask any interactive question + POETRY_NO_INTERACTION=1 \ + EXTRAS="all onnxruntime-gpu" \ + PYTHON="python3.10" +RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl +WORKDIR /app + +FROM base as builder + +# Set the working directory for the app +# Define the version of Poetry to install (default is 1.7.1) +# Define the directory to install Poetry to (default is /opt/poetry) +ARG POETRY_VERSION=1.7.1 +ARG POETRY_HOME=/opt/poetry +# Create a Python virtual environment for Poetry and install it +RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON - +ENV PATH=$POETRY_HOME/bin:$PATH +# Test if Poetry is installed in the expected path +RUN echo "Poetry version:" && poetry --version +# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes) +COPY poetry.lock poetry.toml pyproject.toml README.md /app/ +# Install dependencies only +# +RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +COPY infinity_emb infinity_emb +# Install dependency with infinity_emb package +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all +# Install utils for tensorrt +RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip +RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation +RUN poetry run $PYTHON -m pip install --no-cache-dir "tensorrt==10.2.0" "tensorrt_lean==10.2.0" "tensorrt_dispatch==10.2.0" +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/$PYTHON/dist-packages/tensorrt/ +# ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH} +# ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH} + + + +FROM builder as testing +# install lint and test dependencies +RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all +# lint +RUN poetry run ruff check . +RUN poetry run mypy . +# pytest +COPY tests tests +# run end to end tests because of duration of build in github ci. +# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu +# poetry run python -m pytest tests/end_to_end -x # TODO: does not work. +RUN if [ -z "$TARGETPLATFORM" ]; then \ + ARCH=$(uname -m); \ + if [ "$ARCH" = "x86_64" ]; then \ + TARGETPLATFORM="linux/amd64"; \ + elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \ + TARGETPLATFORM="linux/arm64"; \ + else \ + echo "Unsupported architecture: $ARCH"; exit 1; \ + fi; \ + fi; \ + echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \ + poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \ + else \ + poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \ + fi +RUN echo "all tests passed" > "test_results.txt" + + +# Use a multi-stage build -> production version, with download +FROM base AS tested-builder +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/huggingface +ENV PATH=/app/.venv/bin:$PATH +# do nothing +RUN echo "copied all files" + + +# Export with tensorrt, not recommended. +# docker buildx build --target=production-tensorrt -f Dockerfile . +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt +ENV PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=off \ + PYTHON="python3.11" +RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y +COPY --from=builder /app /app +# force testing stage to run +COPY --from=testing /app/test_results.txt /app/test_results.txt +ENV HF_HOME=/app/.cache/torch +ENV PATH=/app/.venv/bin:$PATH +RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*" +ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH} +ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH} +ENTRYPOINT ["infinity_emb"] + + +# Use a multi-stage build -> production version, with download +# docker buildx build --target=production-with-download \ +# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small . +FROM tested-builder AS production-with-download +# collect model name and engine from build args +ARG MODEL_NAME +RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi +ARG ENGINE +RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi + +# will exit with 3 if model is downloaded # TODO: better exit code +RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] +ENTRYPOINT ["infinity_emb"] + +# Use a multi-stage build -> production version +FROM tested-builder AS production +ENTRYPOINT ["infinity_emb"] diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile index 1502a507..f6ca0c0f 100644 --- a/libs/infinity_emb/Makefile +++ b/libs/infinity_emb/Makefile @@ -44,6 +44,7 @@ template_docker: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto + jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s trt > Dockerfile.trt_onnx_auto poetry_check: poetry check