Skip to content

Commit

Permalink
Add flash_attn support (#306)
Browse files Browse the repository at this point in the history
* add dockerfile for flash_attn setup

* remove test.py

* parametrize model name and engine

* Update Dockerfile

---------

Co-authored-by: Michael Feil <[email protected]>
  • Loading branch information
gokturkDev and michaelfeil authored Jul 21, 2024
1 parent 04b422d commit fddd955
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
5 changes: 5 additions & 0 deletions libs/infinity_emb/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${E
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# flash attention fa2
FROM tested-builder AS production-with-fa2
RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
FROM tested-builder AS production
ENTRYPOINT ["infinity_emb"]
24 changes: 24 additions & 0 deletions libs/infinity_emb/Dockerfile.flash
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
FROM nvcr.io/nvidia/pytorch:24.06-py3


WORKDIR /app

RUN apt-get update && apt-get install build-essential python3-dev python3.10-venv python3.10 curl -y

RUN python -m venv .venv
RUN source .venv/bin/activate
RUN pip install flash_attn


RUN pip install infinity-emb[all]

ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
ARG EXTRA_PACKAGES
RUN if [ -n "${EXTRA_PACKAGES}" ]; then pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi


RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

0 comments on commit fddd955

Please sign in to comment.