From 2c8e535ff2090eb4609251dadcdac3e57759e0eb Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Wed, 29 Mar 2023 16:13:09 +0200
Subject: [PATCH 01/21] creating a layer with Docker/docker-compose

---
 .dockerignore      |  2 ++
 .env.example       | 22 ++++++++++++++++++
 Dockerfile         | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 README.md          | 20 ++++++++++++++++-
 docker-compose.yml | 31 +++++++++++++++++++++++++
 5 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 .dockerignore
 create mode 100644 .env.example
 create mode 100644 Dockerfile
 create mode 100644 docker-compose.yml

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000..033948efda
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,2 @@
+/loras
+/models
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000000..7a5965cc1b
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,22 @@
+# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
+# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
+# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
+TORCH_CUDA_ARCH_LIST=7.5
+
+# these commands worked for me with roughly 4.5GB of vram
+CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
+# example running 13b with 4bit/128 groupsize        : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
+# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
+
+# the port the webui binds to on the host
+HOST_PORT=7860
+# the port the webui binds to inside the container
+CONTAINER_PORT=7860
+
+# the port the api binds to on the host
+HOST_API_PORT=5000
+# the port the api binds to inside the container
+CONTAINER_API_PORT=5000
+
+# the hash used to install from after checkout, defaults to HEAD
+GPTQ_SHA=HEAD
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000..d0abd6c6f1
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,56 @@
+# GPTQ-for-LLaMa and Text Generation WebUI Dockerfile
+FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 as builder
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio
+RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
+
+WORKDIR /build
+
+ARG GPTQ_SHA
+RUN git reset --hard ${GPTQ_SHA}
+
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
+
+# https://developer.nvidia.com/cuda-gpus
+# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
+ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+RUN python3 setup_cuda.py bdist_wheel -d .
+
+FROM ubuntu:22.04
+
+LABEL maintainer="Your Name <your.email@example.com>"
+LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y git python3 python3-pip && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio
+
+COPY . /app/
+
+WORKDIR /app
+
+ARG WEBUI_SHA=HEAD
+RUN git reset --hard ${WEBUI_SHA}
+
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
+
+COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
+
+ENV CLI_ARGS=""
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+RUN --mount=type=cache,target=/root/.cache/pip cd extensions/api && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip cd extensions/google_translate && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip cd extensions/silero_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip cd extensions/whisper_stt && pip3 install -r requirements.txt
+
+CMD python3 server.py ${CLI_ARGS}
diff --git a/README.md b/README.md
index 8736787710..59ed6ca36a 100644
--- a/README.md
+++ b/README.md
@@ -113,7 +113,25 @@ As an alternative to the recommended WSL method, you can install the web UI nati
 
 ### Alternative: Docker
 
-https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87
+dependencies:
+```bash
+yay -S docker docker-compose buildkit nvidia-container-runtime
+sudo systemctl restart docker # required by nvidia-container-runtime
+```
+
+Converted without group-size (better for the 7b model): https://github.com/oobabooga/text-generation-webui/pull/530#is>
+Converted with group-size (better from 13b upwards): https://github.com/oobabooga/text-generation-webui/pull/530#issue>
+
+download and place the folders inside the models folder
+
+edit .env values to your needs
+```bash
+cp .env.example .env
+nano .env
+```
+```bash
+docker-compose up --build
+```
 
 ## Downloading models
 
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000000..9dbc5ae35f
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,31 @@
+version: "3.3"
+services:
+  text-generation-webui:
+    build:
+      context: .
+      args:
+        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
+        GPTQ_SHA: ${GPTQ_SHA}
+    env_file: .env
+    ports:
+      - "${HOST_PORT}:${CONTAINER_PORT}"
+      - "${HOST_API_PORT}:${CONTAINER_API_PORT}"
+    stdin_open: true
+    tty: true
+    volumes:
+      - ./characters:/app/characters
+      - ./extensions:/app/extensions
+      - ./loras:/app/loras
+      - ./models:/app/models
+      - ./presets:/app/presets
+      - ./prompts:/app/prompts
+      - ./softprompts:/app/softprompts
+      - ./training:/app/training
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]

From c0f3347def772b979ce0705a2736ededfeebee63 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Wed, 29 Mar 2023 20:26:56 +0200
Subject: [PATCH 02/21] using nvida image in second stage aswell to provide
 required libraries

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index d0abd6c6f1..dfa6fc5137 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN python3 setup_cuda.py bdist_wheel -d .
 
-FROM ubuntu:22.04
+FROM nvidia/cuda:11.7.0-devel-ubuntu22.04
 
 LABEL maintainer="Your Name <your.email@example.com>"
 LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"

From cf8196b090851ace1cbe20a5ee7bad4a852407f6 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Fri, 31 Mar 2023 22:40:18 +0200
Subject: [PATCH 03/21] GPTQ switch to cuda branch, minor update to
 nvidia/cuda:11.8.0-devel-ubuntu22.04 to delay deprecation of base image

---
 .env.example | 4 ++--
 Dockerfile   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.env.example b/.env.example
index 7a5965cc1b..4d4d18a0de 100644
--- a/.env.example
+++ b/.env.example
@@ -18,5 +18,5 @@ HOST_API_PORT=5000
 # the port the api binds to inside the container
 CONTAINER_API_PORT=5000
 
-# the hash used to install from after checkout, defaults to HEAD
-GPTQ_SHA=HEAD
+# the hash used to install from after checkout, defaults to cuda
+GPTQ_SHA=cuda
diff --git a/Dockerfile b/Dockerfile
index dfa6fc5137..9997c9f701 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # GPTQ-for-LLaMa and Text Generation WebUI Dockerfile
-FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 as builder
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
 
 RUN apt-get update && \
     apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \
@@ -11,7 +11,7 @@ RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
 WORKDIR /build
 
 ARG GPTQ_SHA
-RUN git reset --hard ${GPTQ_SHA}
+RUN git checkout ${GPTQ_SHA}
 
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 
@@ -20,7 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN python3 setup_cuda.py bdist_wheel -d .
 
-FROM nvidia/cuda:11.7.0-devel-ubuntu22.04
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
 
 LABEL maintainer="Your Name <your.email@example.com>"
 LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"

From 1797fd5b3032735874d30d94df33d204920dfd65 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Sat, 1 Apr 2023 03:06:51 +0200
Subject: [PATCH 04/21] docs for ubuntu 22.04/manjaro installation of
 dependencies

---
 README.md             | 20 +---------
 docs/README_docker.md | 92 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 19 deletions(-)
 create mode 100644 docs/README_docker.md

diff --git a/README.md b/README.md
index 59ed6ca36a..53b009ed51 100644
--- a/README.md
+++ b/README.md
@@ -113,25 +113,7 @@ As an alternative to the recommended WSL method, you can install the web UI nati
 
 ### Alternative: Docker
 
-dependencies:
-```bash
-yay -S docker docker-compose buildkit nvidia-container-runtime
-sudo systemctl restart docker # required by nvidia-container-runtime
-```
-
-Converted without group-size (better for the 7b model): https://github.com/oobabooga/text-generation-webui/pull/530#is>
-Converted with group-size (better from 13b upwards): https://github.com/oobabooga/text-generation-webui/pull/530#issue>
-
-download and place the folders inside the models folder
-
-edit .env values to your needs
-```bash
-cp .env.example .env
-nano .env
-```
-```bash
-docker-compose up --build
-```
+[docker/docker-compose instructions](docs/README_docker.md)
 
 ## Downloading models
 
diff --git a/docs/README_docker.md b/docs/README_docker.md
new file mode 100644
index 0000000000..cac176408b
--- /dev/null
+++ b/docs/README_docker.md
@@ -0,0 +1,92 @@
+- [Linux](#linux)
+  - [Ubuntu 22.04](#ubuntu-2204)
+    - [update the drivers](#update-the-drivers)
+    - [reboot](#reboot)
+    - [docker \& container toolkit](#docker--container-toolkit)
+  - [Manjaro](#manjaro)
+    - [update the drivers](#update-the-drivers-1)
+    - [reboot](#reboot-1)
+    - [docker \& container toolkit](#docker--container-toolkit-1)
+  - [prepare environment \& startup](#prepare-environment--startup)
+    - [place models in models folder](#place-models-in-models-folder)
+    - [prepare .env file](#prepare-env-file)
+    - [startup docker container](#startup-docker-container)
+- [Windows](#windows)
+# Linux
+
+## Ubuntu 22.04
+
+### update the drivers
+in the the “software updater” update drivers to the last version of the prop driver.
+
+### reboot
+to switch using to new driver
+
+```bash
+sudo apt update
+sudo apt-get install curl
+
+sudo mkdir -m 0755 -p /etc/apt/keyrings
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
+
+echo \
+  "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
+  "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
+  sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+
+sudo apt update
+sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y
+
+sudo usermod -aG docker $USER
+newgrp docker
+```
+
+### docker & container toolkit
+```bash
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+
+echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \
+sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null 
+
+sudo apt update
+
+sudo apt install nvidia-docker2 -y
+sudo systemctl restart docker
+```
+
+## Manjaro
+
+### update the drivers
+```bash
+sudo mhwd -a pci nonfree 0300
+```
+### reboot
+```bash
+reboot
+```
+### docker & container toolkit
+```bash
+yay -S docker docker-compose buildkit nvidia-container-runtime
+sudo systemctl restart docker # required by nvidia-container-runtime
+```
+
+## prepare environment & startup
+
+### place models in models folder
+download and place the models inside the models folder
+
+### prepare .env file
+edit .env values to your needs
+```bash
+cp .env.example .env
+nano .env
+```
+
+### startup docker container
+```bash
+docker-compose up --build
+```
+
+
+# Windows
+coming soon
\ No newline at end of file

From d83a10cf3b4cd7e88ac6433adf8916f7e0138395 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Sat, 1 Apr 2023 12:50:43 +0200
Subject: [PATCH 05/21] unified arguments WEBUI_VERSION and GPTQ_VERSION

---
 .env.example | 7 +++++--
 Dockerfile   | 8 ++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/.env.example b/.env.example
index 4d4d18a0de..a2c615f6a9 100644
--- a/.env.example
+++ b/.env.example
@@ -18,5 +18,8 @@ HOST_API_PORT=5000
 # the port the api binds to inside the container
 CONTAINER_API_PORT=5000
 
-# the hash used to install from after checkout, defaults to cuda
-GPTQ_SHA=cuda
+# the version used to install GPTQ from, defaults to cuda
+GPTQ_VERSION=cuda
+
+# the version used to install text-generation-webui from
+WEBUI_VERSION=HEAD
diff --git a/Dockerfile b/Dockerfile
index 9997c9f701..1638b280ee 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,8 +10,8 @@ RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
 
 WORKDIR /build
 
-ARG GPTQ_SHA
-RUN git checkout ${GPTQ_SHA}
+ARG GPTQ_VERSION
+RUN git checkout ${GPTQ_VERSION}
 
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 
@@ -35,8 +35,8 @@ COPY . /app/
 
 WORKDIR /app
 
-ARG WEBUI_SHA=HEAD
-RUN git reset --hard ${WEBUI_SHA}
+ARG WEBUI_VERSION
+RUN git reset --hard ${WEBUI_VERSION}
 
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 

From 6f05f2e8b196f39723a01a226de1465c895cc87a Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Sat, 1 Apr 2023 13:38:01 +0200
Subject: [PATCH 06/21] didnt save file

---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 9dbc5ae35f..bb71a1fc57 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       args:
         # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
         TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
-        GPTQ_SHA: ${GPTQ_SHA}
+        GPTQ_SHA: ${GPTQ_VERSION}
     env_file: .env
     ports:
       - "${HOST_PORT}:${CONTAINER_PORT}"

From 1fc2dca9926b422e0cb32379d52b03c937a463b1 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Sat, 1 Apr 2023 13:42:49 +0200
Subject: [PATCH 07/21] changes suggested by deece to allow running version
 with uncommited changes

---
 Dockerfile         | 2 +-
 docker-compose.yml | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 1638b280ee..80cd6afec3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -36,7 +36,7 @@ COPY . /app/
 WORKDIR /app
 
 ARG WEBUI_VERSION
-RUN git reset --hard ${WEBUI_VERSION}
+RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION}
 
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 
diff --git a/docker-compose.yml b/docker-compose.yml
index bb71a1fc57..509caee22e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,7 +6,8 @@ services:
       args:
         # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
         TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
-        GPTQ_SHA: ${GPTQ_VERSION}
+        GPTQ_VERSION: ${GPTQ_VERSION}
+        WEBUI_VERSION: ${WEBUI_VERSION}
     env_file: .env
     ports:
       - "${HOST_PORT}:${CONTAINER_PORT}"

From 657ce70da7bdc8e66f3842dda679a25841b9f266 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Sat, 1 Apr 2023 20:36:08 +0200
Subject: [PATCH 08/21] updated version of gptq, linked in links to models used
 in testing

---
 .env.example          | 2 +-
 docs/README_docker.md | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index a2c615f6a9..817805323c 100644
--- a/.env.example
+++ b/.env.example
@@ -19,7 +19,7 @@ HOST_API_PORT=5000
 CONTAINER_API_PORT=5000
 
 # the version used to install GPTQ from, defaults to cuda
-GPTQ_VERSION=cuda
+GPTQ_VERSION=608f3ba71e40596c75f8864d73506eaf57323c6e
 
 # the version used to install text-generation-webui from
 WEBUI_VERSION=HEAD
diff --git a/docs/README_docker.md b/docs/README_docker.md
index cac176408b..255f43cf5e 100644
--- a/docs/README_docker.md
+++ b/docs/README_docker.md
@@ -73,7 +73,10 @@ sudo systemctl restart docker # required by nvidia-container-runtime
 ## prepare environment & startup
 
 ### place models in models folder
-download and place the models inside the models folder
+download and place the models inside the models folder. tested with:
+
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
 
 ### prepare .env file
 edit .env values to your needs

From 4551df7d6768eb7f72e9a8dbb104ae93f05cfd8e Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Sun, 2 Apr 2023 15:08:13 +0200
Subject: [PATCH 09/21] webui version line to not fail if no WEBUI_VERSION
 provided

---
 Dockerfile            | 2 +-
 docs/README_docker.md | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 80cd6afec3..2a45a8f11e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -36,7 +36,7 @@ COPY . /app/
 WORKDIR /app
 
 ARG WEBUI_VERSION
-RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION}
+RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
 
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 
diff --git a/docs/README_docker.md b/docs/README_docker.md
index 255f43cf5e..06dec78756 100644
--- a/docs/README_docker.md
+++ b/docs/README_docker.md
@@ -66,7 +66,9 @@ reboot
 ```
 ### docker & container toolkit
 ```bash
-yay -S docker docker-compose buildkit nvidia-container-runtime
+yay -S docker docker-compose buildkit gcc nvidia-docker
+sudo usermod -aG docker $USER
+newgrp docker
 sudo systemctl restart docker # required by nvidia-container-runtime
 ```
 

From 0ba16a80cf2b1f985f3e32f4bfcf04dba699f090 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 02:01:48 +0200
Subject: [PATCH 10/21] replaced devel with runtime for final stage, removed
 env vars as already defined by cuda images

---
 Dockerfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2a45a8f11e..74d7c804b9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
 ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN python3 setup_cuda.py bdist_wheel -d .
 
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
 
 LABEL maintainer="Your Name <your.email@example.com>"
 LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
@@ -44,8 +44,6 @@ COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
 
 ENV CLI_ARGS=""
-ENV NVIDIA_VISIBLE_DEVICES=all
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 
 RUN --mount=type=cache,target=/root/.cache/pip cd extensions/api && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip cd extensions/elevenlabs_tts && pip3 install -r requirements.txt

From df48ddbdb9e048af4833b5758187f2c6d033b26b Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 10:55:41 +0200
Subject: [PATCH 11/21] added comment to point users with old cards to using an
 older GPTQ version

---
 .env.example | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.env.example b/.env.example
index 817805323c..c596b18508 100644
--- a/.env.example
+++ b/.env.example
@@ -20,6 +20,7 @@ CONTAINER_API_PORT=5000
 
 # the version used to install GPTQ from, defaults to cuda
 GPTQ_VERSION=608f3ba71e40596c75f8864d73506eaf57323c6e
+# older cards such as the k80 might have more luck with this GTPQ_VERSION=841feedde876785bc8022ca48fd9c3ff626587e2 https://github.com/qwopqwop200/GPTQ-for-LLaMa/issues/88#issuecomment-1485897212
 
 # the version used to install text-generation-webui from
 WEBUI_VERSION=HEAD

From 50ba3200c1409a1d9e1bc1b2a7a514aa6422a7cd Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 13:41:18 +0200
Subject: [PATCH 12/21] added venv to Dockerfile to avoid error failing for
 transfomers, related to
 https://github.com/huggingface/transformers/pull/22539

---
 Dockerfile | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 74d7c804b9..e4d3e9b393 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,11 +1,11 @@
-# GPTQ-for-LLaMa and Text Generation WebUI Dockerfile
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
 
 RUN apt-get update && \
     apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \
     rm -rf /var/lib/apt/lists/*
 
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
+
 RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
 
 WORKDIR /build
@@ -13,12 +13,16 @@ WORKDIR /build
 ARG GPTQ_VERSION
 RUN git checkout ${GPTQ_VERSION}
 
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
+RUN virtualenv /build/venv
+RUN . /build/venv/bin/activate && \
+    pip3 install torch torchvision torchaudio && \
+    pip3 install -r requirements.txt
 
 # https://developer.nvidia.com/cuda-gpus
 # for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
 ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
-RUN python3 setup_cuda.py bdist_wheel -d .
+RUN . /build/venv/bin/activate && \
+    python3 setup_cuda.py bdist_wheel -d .
 
 FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
 
@@ -29,7 +33,7 @@ RUN apt-get update && \
     apt-get install --no-install-recommends -y git python3 python3-pip && \
     rm -rf /var/lib/apt/lists/*
 
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
 
 COPY . /app/
 
@@ -38,17 +42,21 @@ WORKDIR /app
 ARG WEBUI_VERSION
 RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
 
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt
+RUN virtualenv /app/venv
+RUN . /app/venv/bin/activate && \
+    pip3 install torch torchvision torchaudio && \
+    pip3 install -r requirements.txt
 
 COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
+RUN . /app/venv/bin/activate && \
+    pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
 
 ENV CLI_ARGS=""
 
-RUN --mount=type=cache,target=/root/.cache/pip cd extensions/api && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip cd extensions/google_translate && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip cd extensions/silero_tts && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip cd extensions/whisper_stt && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/api && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/google_translate && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/silero_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/whisper_stt && pip3 install -r requirements.txt
 
-CMD python3 server.py ${CLI_ARGS}
+CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}

From 9571be8f8f8baadf334164f23c897260dcc6de2d Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 14:18:02 +0200
Subject: [PATCH 13/21] Update Dockerfile

Co-authored-by: Xuehai Pan <XuehaiPan@outlook.com>
---
 Dockerfile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e4d3e9b393..a93f153661 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,8 +4,6 @@ RUN apt-get update && \
     apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \
     rm -rf /var/lib/apt/lists/*
 
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
-
 RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
 
 WORKDIR /build

From e8ed319bde5769f54d2f6f46658ee86c187da176 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 14:18:10 +0200
Subject: [PATCH 14/21] Update Dockerfile

Co-authored-by: Xuehai Pan <XuehaiPan@outlook.com>
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index a93f153661..88ff186121 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@ WORKDIR /build
 ARG GPTQ_VERSION
 RUN git checkout ${GPTQ_VERSION}
 
-RUN virtualenv /build/venv
+RUN python3 -m venv /build/venv
 RUN . /build/venv/bin/activate && \
     pip3 install torch torchvision torchaudio && \
     pip3 install -r requirements.txt

From 7d0286b30d876c2fc157a8f7b3c611b0c44e5ef3 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 14:18:17 +0200
Subject: [PATCH 15/21] Update Dockerfile

Co-authored-by: Xuehai Pan <XuehaiPan@outlook.com>
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 88ff186121..f5b3a78a3a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
 
 RUN apt-get update && \
-    apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \
+    apt-get install --no-install-recommends -y git build-essential python3-dev python3-venv && \
     rm -rf /var/lib/apt/lists/*
 
 RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build

From de45b5c8bd1619d411c5d3074a5303f5c28fc2eb Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 14:20:39 +0200
Subject: [PATCH 16/21] updating pip prior to running pip installs

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index f5b3a78a3a..854429b573 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,6 +13,7 @@ RUN git checkout ${GPTQ_VERSION}
 
 RUN python3 -m venv /build/venv
 RUN . /build/venv/bin/activate && \
+    pip3 install --upgrade pip setuptools && \
     pip3 install torch torchvision torchaudio && \
     pip3 install -r requirements.txt
 
@@ -42,6 +43,7 @@ RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Usi
 
 RUN virtualenv /app/venv
 RUN . /app/venv/bin/activate && \
+    pip3 install --upgrade pip setuptools && \
     pip3 install torch torchvision torchaudio && \
     pip3 install -r requirements.txt
 

From 9a5e27889bb669a381718a54338d485d54014bf9 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Tue, 4 Apr 2023 18:45:38 +0200
Subject: [PATCH 17/21] tested 8bit, added examples for 8bit model download/cli
 args to start

---
 .env.example          |  3 +++
 Dockerfile            | 12 +++++++-----
 docs/README_docker.md |  6 +++++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/.env.example b/.env.example
index c596b18508..db54503563 100644
--- a/.env.example
+++ b/.env.example
@@ -5,8 +5,11 @@ TORCH_CUDA_ARCH_LIST=7.5
 
 # these commands worked for me with roughly 4.5GB of vram
 CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
+
+# the following examples have been tested with the files linked in docs/README_docker.md:
 # example running 13b with 4bit/128 groupsize        : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
 # example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
+# example running 7b with 8bit groupsize             : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
 
 # the port the webui binds to on the host
 HOST_PORT=7860
diff --git a/Dockerfile b/Dockerfile
index 854429b573..334f5a1ed1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -53,10 +53,12 @@ RUN . /app/venv/bin/activate && \
 
 ENV CLI_ARGS=""
 
-RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/api && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/google_translate && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/silero_tts && pip3 install -r requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/whisper_stt && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
+
+RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
 
 CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
diff --git a/docs/README_docker.md b/docs/README_docker.md
index 06dec78756..bdd00748a2 100644
--- a/docs/README_docker.md
+++ b/docs/README_docker.md
@@ -50,7 +50,7 @@ sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null
 
 sudo apt update
 
-sudo apt install nvidia-docker2 -y
+sudo apt install nvidia-docker2 nvidia-container-runtime -y
 sudo systemctl restart docker
 ```
 
@@ -77,9 +77,13 @@ sudo systemctl restart docker # required by nvidia-container-runtime
 ### place models in models folder
 download and place the models inside the models folder. tested with:
 
+4bit
 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
 
+8bit:
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
+
 ### prepare .env file
 edit .env values to your needs
 ```bash

From 7d9728b719696b0ceba04eff570af15ac2b9dfaf Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Thu, 6 Apr 2023 20:58:24 +0200
Subject: [PATCH 18/21] added .env and dockerfile to .dockerignore

---
 .dockerignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.dockerignore b/.dockerignore
index 033948efda..30d7c69f08 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,4 @@
 /loras
 /models
+.env
+Dockerfile

From 4806703043f911a2b47f3dca26a625b616e1bbe1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 6 Apr 2023 21:43:46 -0300
Subject: [PATCH 19/21] Switch to oobabooga/GPTQ-for-LLaMa

---
 .env.example | 4 ----
 Dockerfile   | 5 +----
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/.env.example b/.env.example
index db54503563..d20300b776 100644
--- a/.env.example
+++ b/.env.example
@@ -21,9 +21,5 @@ HOST_API_PORT=5000
 # the port the api binds to inside the container
 CONTAINER_API_PORT=5000
 
-# the version used to install GPTQ from, defaults to cuda
-GPTQ_VERSION=608f3ba71e40596c75f8864d73506eaf57323c6e
-# older cards such as the k80 might have more luck with this GTPQ_VERSION=841feedde876785bc8022ca48fd9c3ff626587e2 https://github.com/qwopqwop200/GPTQ-for-LLaMa/issues/88#issuecomment-1485897212
-
 # the version used to install text-generation-webui from
 WEBUI_VERSION=HEAD
diff --git a/Dockerfile b/Dockerfile
index 334f5a1ed1..5aaf2db6b1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,13 +4,10 @@ RUN apt-get update && \
     apt-get install --no-install-recommends -y git build-essential python3-dev python3-venv && \
     rm -rf /var/lib/apt/lists/*
 
-RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
+RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build
 
 WORKDIR /build
 
-ARG GPTQ_VERSION
-RUN git checkout ${GPTQ_VERSION}
-
 RUN python3 -m venv /build/venv
 RUN . /build/venv/bin/activate && \
     pip3 install --upgrade pip setuptools && \

From be7b3b7b6ccde4ccb42c0754569f1507ea9a08d4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 6 Apr 2023 21:52:38 -0300
Subject: [PATCH 20/21] Add vim to the requirements

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 5aaf2db6b1..8a063539bc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
 
 RUN apt-get update && \
-    apt-get install --no-install-recommends -y git build-essential python3-dev python3-venv && \
+    apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
     rm -rf /var/lib/apt/lists/*
 
 RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build

From 6b479cd8513fd3de33233216abbbaaaaeaaf0a1c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 6 Apr 2023 22:37:55 -0300
Subject: [PATCH 21/21] Add files to .dockerignore

---
 .dockerignore | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index 30d7c69f08..fdf0c4ce2b 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,10 @@
-/loras
-/models
 .env
 Dockerfile
+/characters
+/extensions
+/loras
+/models
+/presets
+/prompts
+/softprompts
+/training