diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index c527a1b0..1dbb42f2 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -32,6 +32,17 @@ jobs:
       platforms: "linux/amd64"
     secrets: inherit
   
+  docker-container-push-onnx-trt:
+    uses:
+      ./.github/workflows/release_docker_container.yaml
+    with:
+      # working-directory: libs/infinity_emb
+      dockerfile: libs/infinity_emb/Dockerfile.trt_onnx_auto
+      image: michaelf34/infinity
+      appendix_tag: "-trt-onnx"
+      platforms: "linux/amd64"
+    secrets: inherit
+  
   # docker-container-push-amd:
   #   uses:
   #     ./.github/workflows/release_docker_container.yaml
diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml
index ce28fd8f..3f782264 100644
--- a/libs/infinity_emb/Docker.template.yaml
+++ b/libs/infinity_emb/Docker.template.yaml
@@ -1,20 +1,35 @@
+# run all commands here via: `make template_docker`
+
 # 1. Guide: pip install jinja2 jinja2-cli
 nvidia:
   # 2 .command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto
   base_image: 'nvidia/cuda:12.1.1-base-ubuntu22.04'
-  main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test  
-
 cpu:
   # 2. command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto
   base_image: 'ubuntu:22.04' 
-  main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test 
   pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock 
 
 amd:
   # 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto
   base_image: 'rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0'
-  main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test 
-  pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock 
+  pyproject_sed: |
+    RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml 
+    RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml 
+    RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock 
   poetry_virtualenvs_create: "false"
   poetry_virtualenvs_in_project: "false"
+  poetry_extras: "all onnxruntime-gpu"
   python_version: python3.10
+
+trt:
+  base_image: nvidia/cuda:12.1.1-devel-ubuntu22.04
+  poetry_extras: "all onnxruntime-gpu"
+  extra_installs_main: | 
+    # Install utils for tensorrt
+    RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip
+    RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation
+    RUN poetry run $PYTHON -m pip install --no-cache-dir "tensorrt==10.2.0" "tensorrt_lean==10.2.0" "tensorrt_dispatch==10.2.0"
+    ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/$PYTHON/dist-packages/tensorrt/
+    # ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+    # ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+  python_version: python3.10
\ No newline at end of file
diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto
index 49e64085..7db2e4bc 100644
--- a/libs/infinity_emb/Dockerfile.amd_auto
+++ b/libs/infinity_emb/Dockerfile.amd_auto
@@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_VIRTUALENVS_IN_PROJECT="false" \
     # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
-    EXTRAS="all" \
+    EXTRAS="all onnxruntime-gpu" \
     PYTHON="python3.10"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -36,29 +37,44 @@ RUN echo "Poetry version:" && poetry --version
 # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
-RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock
+RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml 
+RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml 
+RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock 
+
 RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+#
+
 
 FROM builder as testing
 # install lint and test dependencies
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +116,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto
index cf06ef92..f69637c9 100644
--- a/libs/infinity_emb/Dockerfile.cpu_auto
+++ b/libs/infinity_emb/Dockerfile.cpu_auto
@@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_NO_INTERACTION=1 \
     EXTRAS="all" \
     PYTHON="python3.11"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+#
+
 
 FROM builder as testing
 # install lint and test dependencies
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +113,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile.jinja2
index 35e1d07c..0ced9436 100644
--- a/libs/infinity_emb/Dockerfile.jinja2
+++ b/libs/infinity_emb/Dockerfile.jinja2
@@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \
     # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
-    EXTRAS="all" \
+    EXTRAS="{{poetry_extras | default('all')}}" \
     PYTHON="{{python_version | default('python3.11')}}"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+{% set main_install2 = "poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test" %}
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -37,28 +38,40 @@ RUN echo "Poetry version:" && poetry --version
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
 {{pyproject_sed | default('#')}}
-RUN {{main_install}} && poetry cache clear pypi --all
+RUN {{main_install2}} && poetry cache clear pypi --all
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
-RUN {{main_install|replace("--no-root","")}} && poetry cache clear pypi --all
+RUN {{main_install2|replace("--no-root","")}} && poetry cache clear pypi --all
+{{extra_installs_main | default('#')}}
+
 
 FROM builder as testing
 # install lint and test dependencies
-RUN {{main_install|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
+RUN {{main_install2|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +113,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.nvidia_auto b/libs/infinity_emb/Dockerfile.nvidia_auto
index 56146b13..34750bee 100644
--- a/libs/infinity_emb/Dockerfile.nvidia_auto
+++ b/libs/infinity_emb/Dockerfile.nvidia_auto
@@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_NO_INTERACTION=1 \
     EXTRAS="all" \
     PYTHON="python3.11"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+#
+
 
 FROM builder as testing
 # install lint and test dependencies
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +113,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.trt_onnx_auto b/libs/infinity_emb/Dockerfile.trt_onnx_auto
new file mode 100644
index 00000000..98409f89
--- /dev/null
+++ b/libs/infinity_emb/Dockerfile.trt_onnx_auto
@@ -0,0 +1,130 @@
+# Autogenerated warning:
+# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
+# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
+
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 AS base
+
+ENV PYTHONUNBUFFERED=1 \
+    \
+    # pip
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100 \
+    \
+    # make poetry create the virtual environment in the project's root
+    # it gets named `.venv`
+    POETRY_VIRTUALENVS_CREATE="true" \
+    POETRY_VIRTUALENVS_IN_PROJECT="true" \
+    # do not ask any interactive question
+    POETRY_NO_INTERACTION=1 \
+    EXTRAS="all onnxruntime-gpu" \
+    PYTHON="python3.10"
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
+WORKDIR /app
+
+FROM base as builder
+
+# Set the working directory for the app
+# Define the version of Poetry to install (default is 1.7.1)
+# Define the directory to install Poetry to (default is /opt/poetry)
+ARG POETRY_VERSION=1.7.1
+ARG POETRY_HOME=/opt/poetry
+# Create a Python virtual environment for Poetry and install it
+RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
+ENV PATH=$POETRY_HOME/bin:$PATH
+# Test if Poetry is installed in the expected path
+RUN echo "Poetry version:" && poetry --version
+# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
+COPY poetry.lock poetry.toml pyproject.toml README.md /app/
+# Install dependencies only
+#
+RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+COPY infinity_emb infinity_emb
+# Install dependency with infinity_emb package
+RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+# Install utils for tensorrt
+RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip
+RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation
+RUN poetry run $PYTHON -m pip install --no-cache-dir "tensorrt==10.2.0" "tensorrt_lean==10.2.0" "tensorrt_dispatch==10.2.0"
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/$PYTHON/dist-packages/tensorrt/
+# ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+# ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+
+
+
+FROM builder as testing
+# install lint and test dependencies
+RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
+# lint 
+RUN poetry run ruff check .
+RUN poetry run mypy .
+# pytest
+COPY tests tests
+# run end to end tests because of duration of build in github ci.
+# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
+# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
+RUN echo "all tests passed" > "test_results.txt"
+
+
+# Use a multi-stage build -> production version, with download
+FROM base AS tested-builder
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/huggingface
+ENV PATH=/app/.venv/bin:$PATH
+# do nothing
+RUN echo "copied all files"
+
+
+# Export with tensorrt, not recommended.
+# docker buildx build --target=production-tensorrt -f Dockerfile .
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=off \
+    PYTHON="python3.11"
+RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y 
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/torch
+ENV PATH=/app/.venv/bin:$PATH
+RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
+ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+ENTRYPOINT ["infinity_emb"]
+
+
+# Use a multi-stage build -> production version, with download
+# docker buildx build --target=production-with-download \
+# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+FROM tested-builder AS production-with-download
+# collect model name and engine from build args
+ARG MODEL_NAME
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
+ARG ENGINE
+RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+
+# will exit with 3 if model is downloaded # TODO: better exit code
+RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+ENTRYPOINT ["infinity_emb"]
+
+# Use a multi-stage build -> production version
+FROM tested-builder AS production
+ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile
index 1502a507..f6ca0c0f 100644
--- a/libs/infinity_emb/Makefile
+++ b/libs/infinity_emb/Makefile
@@ -44,6 +44,7 @@ template_docker:
 	jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto
 	jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto
 	jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto
+	jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s trt > Dockerfile.trt_onnx_auto
 
 poetry_check:
 	poetry check