From 06a75c239cd68fb0d3c7386403341325a41604b3 Mon Sep 17 00:00:00 2001 From: gokturkDev Date: Thu, 18 Jul 2024 17:25:12 +0300 Subject: [PATCH 1/4] add dockerfile for flash_attn setup --- libs/infinity_emb/Dockerfile.flash | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 libs/infinity_emb/Dockerfile.flash diff --git a/libs/infinity_emb/Dockerfile.flash b/libs/infinity_emb/Dockerfile.flash new file mode 100644 index 00000000..c7cad453 --- /dev/null +++ b/libs/infinity_emb/Dockerfile.flash @@ -0,0 +1,18 @@ +FROM nvcr.io/nvidia/pytorch:24.06-py3 + + +WORKDIR /app + +RUN apt-get update && apt-get install build-essential python3-dev python3.10-venv python3.10 curl -y + +RUN python -m venv .venv +RUN source .venv/bin/activate +RUN pip install flash_attn + +COPY test.py /app + + +RUN pip install infinity-emb[all] + +RUN infinity_emb v2 --model-id dunzhang/stella_en_1.5B_v5 --engine torch --preload-only || [ $? -eq 3 ] +ENTRYPOINT ["infinity_emb"] \ No newline at end of file From 01c0257d148bd9e1c8cef575e5bcf0a68768ca26 Mon Sep 17 00:00:00 2001 From: gokturkDev Date: Thu, 18 Jul 2024 17:28:16 +0300 Subject: [PATCH 2/4] remove test.py --- libs/infinity_emb/Dockerfile.flash | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/infinity_emb/Dockerfile.flash b/libs/infinity_emb/Dockerfile.flash index c7cad453..cc9040a3 100644 --- a/libs/infinity_emb/Dockerfile.flash +++ b/libs/infinity_emb/Dockerfile.flash @@ -9,8 +9,6 @@ RUN python -m venv .venv RUN source .venv/bin/activate RUN pip install flash_attn -COPY test.py /app - RUN pip install infinity-emb[all] From 45e7d21feb03cd27a3cdefb6efc6604cb2d4bf02 Mon Sep 17 00:00:00 2001 From: gokturkDev Date: Thu, 18 Jul 2024 17:44:19 +0300 Subject: [PATCH 3/4] parametrize model name and engine --- libs/infinity_emb/Dockerfile.flash | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libs/infinity_emb/Dockerfile.flash b/libs/infinity_emb/Dockerfile.flash index cc9040a3..ab746391 100644 --- a/libs/infinity_emb/Dockerfile.flash +++ b/libs/infinity_emb/Dockerfile.flash @@ -12,5 +12,13 @@ RUN pip install flash_attn RUN pip install infinity-emb[all] -RUN infinity_emb v2 --model-id dunzhang/stella_en_1.5B_v5 --engine torch --preload-only || [ $? -eq 3 ] +ARG MODEL_NAME +RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi +ARG ENGINE +RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi +ARG EXTRA_PACKAGES +RUN if [ -n "${EXTRA_PACKAGES}" ]; then pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi + + +RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] \ No newline at end of file From 53b75aed3949513b1a4e96aab37298f5146aeb31 Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Sun, 21 Jul 2024 14:49:07 -0700 Subject: [PATCH 4/4] Update Dockerfile --- libs/infinity_emb/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile index dfcf4985..33f827fc 100644 --- a/libs/infinity_emb/Dockerfile +++ b/libs/infinity_emb/Dockerfile @@ -113,6 +113,11 @@ RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${E RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] +# flash attention fa2 +FROM tested-builder AS production-with-fa2 +RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl +ENTRYPOINT ["infinity_emb"] + # Use a multi-stage build -> production version FROM tested-builder AS production ENTRYPOINT ["infinity_emb"]