From 2c8e535ff2090eb4609251dadcdac3e57759e0eb Mon Sep 17 00:00:00 2001 From: loeken Date: Wed, 29 Mar 2023 16:13:09 +0200 Subject: [PATCH 01/21] creating a layer with Docker/docker-compose --- .dockerignore | 2 ++ .env.example | 22 ++++++++++++++++++ Dockerfile | 56 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 20 ++++++++++++++++- docker-compose.yml | 31 +++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..033948efda --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +/loras +/models diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000..7a5965cc1b --- /dev/null +++ b/.env.example @@ -0,0 +1,22 @@ +# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX +# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 +# https://developer.nvidia.com/cuda-gpus you can find the version for your card here +TORCH_CUDA_ARCH_LIST=7.5 + +# these commands worked for me with roughly 4.5GB of vram +CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices +# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25 +# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share + +# the port the webui binds to on the host +HOST_PORT=7860 +# the port the webui binds to inside the container +CONTAINER_PORT=7860 + +# the port the api binds to on the host +HOST_API_PORT=5000 +# the port the api binds to inside the container +CONTAINER_API_PORT=5000 + +# the hash used to install from after checkout, defaults to HEAD +GPTQ_SHA=HEAD diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..d0abd6c6f1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,56 @@ +# GPTQ-for-LLaMa and Text Generation WebUI Dockerfile +FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 as builder + +RUN apt-get update && \ + apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio +RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build + +WORKDIR /build + +ARG GPTQ_SHA +RUN git reset --hard ${GPTQ_SHA} + +RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt + +# https://developer.nvidia.com/cuda-gpus +# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" +ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" +RUN python3 setup_cuda.py bdist_wheel -d . + +FROM ubuntu:22.04 + +LABEL maintainer="Your Name " +LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" + +RUN apt-get update && \ + apt-get install --no-install-recommends -y git python3 python3-pip && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio + +COPY . /app/ + +WORKDIR /app + +ARG WEBUI_SHA=HEAD +RUN git reset --hard ${WEBUI_SHA} + +RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt + +COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa +RUN --mount=type=cache,target=/root/.cache/pip pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl + +ENV CLI_ARGS="" +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility + +RUN --mount=type=cache,target=/root/.cache/pip cd extensions/api && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip cd extensions/elevenlabs_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip cd extensions/google_translate && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip cd extensions/silero_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip cd extensions/whisper_stt && pip3 install -r requirements.txt + +CMD python3 server.py ${CLI_ARGS} diff --git a/README.md b/README.md index 8736787710..59ed6ca36a 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,25 @@ As an alternative to the recommended WSL method, you can install the web UI nati ### Alternative: Docker -https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87 +dependencies: +```bash +yay -S docker docker-compose buildkit nvidia-container-runtime +sudo systemctl restart docker # required by nvidia-container-runtime +``` + +Converted without group-size (better for the 7b model): https://github.com/oobabooga/text-generation-webui/pull/530#is> +Converted with group-size (better from 13b upwards): https://github.com/oobabooga/text-generation-webui/pull/530#issue> + +download and place the folders inside the models folder + +edit .env values to your needs +```bash +cp .env.example .env +nano .env +``` +```bash +docker-compose up --build +``` ## Downloading models diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..9dbc5ae35f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,31 @@ +version: "3.3" +services: + text-generation-webui: + build: + context: . + args: + # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus + TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST} + GPTQ_SHA: ${GPTQ_SHA} + env_file: .env + ports: + - "${HOST_PORT}:${CONTAINER_PORT}" + - "${HOST_API_PORT}:${CONTAINER_API_PORT}" + stdin_open: true + tty: true + volumes: + - ./characters:/app/characters + - ./extensions:/app/extensions + - ./loras:/app/loras + - ./models:/app/models + - ./presets:/app/presets + - ./prompts:/app/prompts + - ./softprompts:/app/softprompts + - ./training:/app/training + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] From c0f3347def772b979ce0705a2736ededfeebee63 Mon Sep 17 00:00:00 2001 From: loeken Date: Wed, 29 Mar 2023 20:26:56 +0200 Subject: [PATCH 02/21] using nvida image in second stage aswell to provide required libraries --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d0abd6c6f1..dfa6fc5137 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN python3 setup_cuda.py bdist_wheel -d . -FROM ubuntu:22.04 +FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 LABEL maintainer="Your Name " LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" From cf8196b090851ace1cbe20a5ee7bad4a852407f6 Mon Sep 17 00:00:00 2001 From: loeken Date: Fri, 31 Mar 2023 22:40:18 +0200 Subject: [PATCH 03/21] GPTQ switch to cuda branch, minor update to nvidia/cuda:11.8.0-devel-ubuntu22.04 to delay deprecation of base image --- .env.example | 4 ++-- Dockerfile | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index 7a5965cc1b..4d4d18a0de 100644 --- a/.env.example +++ b/.env.example @@ -18,5 +18,5 @@ HOST_API_PORT=5000 # the port the api binds to inside the container CONTAINER_API_PORT=5000 -# the hash used to install from after checkout, defaults to HEAD -GPTQ_SHA=HEAD +# the hash used to install from after checkout, defaults to cuda +GPTQ_SHA=cuda diff --git a/Dockerfile b/Dockerfile index dfa6fc5137..9997c9f701 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # GPTQ-for-LLaMa and Text Generation WebUI Dockerfile -FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 as builder +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder RUN apt-get update && \ apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \ @@ -11,7 +11,7 @@ RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build WORKDIR /build ARG GPTQ_SHA -RUN git reset --hard ${GPTQ_SHA} +RUN git checkout ${GPTQ_SHA} RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt @@ -20,7 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN python3 setup_cuda.py bdist_wheel -d . -FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 LABEL maintainer="Your Name " LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" From 1797fd5b3032735874d30d94df33d204920dfd65 Mon Sep 17 00:00:00 2001 From: loeken Date: Sat, 1 Apr 2023 03:06:51 +0200 Subject: [PATCH 04/21] docs for ubuntu 22.04/manjaro installation of dependencies --- README.md | 20 +--------- docs/README_docker.md | 92 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 19 deletions(-) create mode 100644 docs/README_docker.md diff --git a/README.md b/README.md index 59ed6ca36a..53b009ed51 100644 --- a/README.md +++ b/README.md @@ -113,25 +113,7 @@ As an alternative to the recommended WSL method, you can install the web UI nati ### Alternative: Docker -dependencies: -```bash -yay -S docker docker-compose buildkit nvidia-container-runtime -sudo systemctl restart docker # required by nvidia-container-runtime -``` - -Converted without group-size (better for the 7b model): https://github.com/oobabooga/text-generation-webui/pull/530#is> -Converted with group-size (better from 13b upwards): https://github.com/oobabooga/text-generation-webui/pull/530#issue> - -download and place the folders inside the models folder - -edit .env values to your needs -```bash -cp .env.example .env -nano .env -``` -```bash -docker-compose up --build -``` +[docker/docker-compose instructions](docs/README_docker.md) ## Downloading models diff --git a/docs/README_docker.md b/docs/README_docker.md new file mode 100644 index 0000000000..cac176408b --- /dev/null +++ b/docs/README_docker.md @@ -0,0 +1,92 @@ +- [Linux](#linux) + - [Ubuntu 22.04](#ubuntu-2204) + - [update the drivers](#update-the-drivers) + - [reboot](#reboot) + - [docker \& container toolkit](#docker--container-toolkit) + - [Manjaro](#manjaro) + - [update the drivers](#update-the-drivers-1) + - [reboot](#reboot-1) + - [docker \& container toolkit](#docker--container-toolkit-1) + - [prepare environment \& startup](#prepare-environment--startup) + - [place models in models folder](#place-models-in-models-folder) + - [prepare .env file](#prepare-env-file) + - [startup docker container](#startup-docker-container) +- [Windows](#windows) +# Linux + +## Ubuntu 22.04 + +### update the drivers +in the the “software updater” update drivers to the last version of the prop driver. + +### reboot +to switch using to new driver + +```bash +sudo apt update +sudo apt-get install curl + +sudo mkdir -m 0755 -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + +echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt update +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y + +sudo usermod -aG docker $USER +newgrp docker +``` + +### docker & container toolkit +```bash +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg + +echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \ +sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null + +sudo apt update + +sudo apt install nvidia-docker2 -y +sudo systemctl restart docker +``` + +## Manjaro + +### update the drivers +```bash +sudo mhwd -a pci nonfree 0300 +``` +### reboot +```bash +reboot +``` +### docker & container toolkit +```bash +yay -S docker docker-compose buildkit nvidia-container-runtime +sudo systemctl restart docker # required by nvidia-container-runtime +``` + +## prepare environment & startup + +### place models in models folder +download and place the models inside the models folder + +### prepare .env file +edit .env values to your needs +```bash +cp .env.example .env +nano .env +``` + +### startup docker container +```bash +docker-compose up --build +``` + + +# Windows +coming soon \ No newline at end of file From d83a10cf3b4cd7e88ac6433adf8916f7e0138395 Mon Sep 17 00:00:00 2001 From: loeken Date: Sat, 1 Apr 2023 12:50:43 +0200 Subject: [PATCH 05/21] unified arguments WEBUI_VERSION and GPTQ_VERSION --- .env.example | 7 +++++-- Dockerfile | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 4d4d18a0de..a2c615f6a9 100644 --- a/.env.example +++ b/.env.example @@ -18,5 +18,8 @@ HOST_API_PORT=5000 # the port the api binds to inside the container CONTAINER_API_PORT=5000 -# the hash used to install from after checkout, defaults to cuda -GPTQ_SHA=cuda +# the version used to install GPTQ from, defaults to cuda +GPTQ_VERSION=cuda + +# the version used to install text-generation-webui from +WEBUI_VERSION=HEAD diff --git a/Dockerfile b/Dockerfile index 9997c9f701..1638b280ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,8 @@ RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build WORKDIR /build -ARG GPTQ_SHA -RUN git checkout ${GPTQ_SHA} +ARG GPTQ_VERSION +RUN git checkout ${GPTQ_VERSION} RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt @@ -35,8 +35,8 @@ COPY . /app/ WORKDIR /app -ARG WEBUI_SHA=HEAD -RUN git reset --hard ${WEBUI_SHA} +ARG WEBUI_VERSION +RUN git reset --hard ${WEBUI_VERSION} RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt From 6f05f2e8b196f39723a01a226de1465c895cc87a Mon Sep 17 00:00:00 2001 From: loeken Date: Sat, 1 Apr 2023 13:38:01 +0200 Subject: [PATCH 06/21] didnt save file --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 9dbc5ae35f..bb71a1fc57 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: args: # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST} - GPTQ_SHA: ${GPTQ_SHA} + GPTQ_SHA: ${GPTQ_VERSION} env_file: .env ports: - "${HOST_PORT}:${CONTAINER_PORT}" From 1fc2dca9926b422e0cb32379d52b03c937a463b1 Mon Sep 17 00:00:00 2001 From: loeken Date: Sat, 1 Apr 2023 13:42:49 +0200 Subject: [PATCH 07/21] changes suggested by deece to allow running version with uncommited changes --- Dockerfile | 2 +- docker-compose.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1638b280ee..80cd6afec3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,7 @@ COPY . /app/ WORKDIR /app ARG WEBUI_VERSION -RUN git reset --hard ${WEBUI_VERSION} +RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt diff --git a/docker-compose.yml b/docker-compose.yml index bb71a1fc57..509caee22e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,8 @@ services: args: # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST} - GPTQ_SHA: ${GPTQ_VERSION} + GPTQ_VERSION: ${GPTQ_VERSION} + WEBUI_VERSION: ${WEBUI_VERSION} env_file: .env ports: - "${HOST_PORT}:${CONTAINER_PORT}" From 657ce70da7bdc8e66f3842dda679a25841b9f266 Mon Sep 17 00:00:00 2001 From: loeken Date: Sat, 1 Apr 2023 20:36:08 +0200 Subject: [PATCH 08/21] updated version of gptq, linked in links to models used in testing --- .env.example | 2 +- docs/README_docker.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index a2c615f6a9..817805323c 100644 --- a/.env.example +++ b/.env.example @@ -19,7 +19,7 @@ HOST_API_PORT=5000 CONTAINER_API_PORT=5000 # the version used to install GPTQ from, defaults to cuda -GPTQ_VERSION=cuda +GPTQ_VERSION=608f3ba71e40596c75f8864d73506eaf57323c6e # the version used to install text-generation-webui from WEBUI_VERSION=HEAD diff --git a/docs/README_docker.md b/docs/README_docker.md index cac176408b..255f43cf5e 100644 --- a/docs/README_docker.md +++ b/docs/README_docker.md @@ -73,7 +73,10 @@ sudo systemctl restart docker # required by nvidia-container-runtime ## prepare environment & startup ### place models in models folder -download and place the models inside the models folder +download and place the models inside the models folder. tested with: + +https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617 +https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105 ### prepare .env file edit .env values to your needs From 4551df7d6768eb7f72e9a8dbb104ae93f05cfd8e Mon Sep 17 00:00:00 2001 From: loeken Date: Sun, 2 Apr 2023 15:08:13 +0200 Subject: [PATCH 09/21] webui version line to not fail if no WEBUI_VERSION provided --- Dockerfile | 2 +- docs/README_docker.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 80cd6afec3..2a45a8f11e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,7 @@ COPY . /app/ WORKDIR /app ARG WEBUI_VERSION -RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} +RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt diff --git a/docs/README_docker.md b/docs/README_docker.md index 255f43cf5e..06dec78756 100644 --- a/docs/README_docker.md +++ b/docs/README_docker.md @@ -66,7 +66,9 @@ reboot ``` ### docker & container toolkit ```bash -yay -S docker docker-compose buildkit nvidia-container-runtime +yay -S docker docker-compose buildkit gcc nvidia-docker +sudo usermod -aG docker $USER +newgrp docker sudo systemctl restart docker # required by nvidia-container-runtime ``` From 0ba16a80cf2b1f985f3e32f4bfcf04dba699f090 Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 02:01:48 +0200 Subject: [PATCH 10/21] replaced devel with runtime for final stage, removed env vars as already defined by cuda images --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2a45a8f11e..74d7c804b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" RUN python3 setup_cuda.py bdist_wheel -d . -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 +FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 LABEL maintainer="Your Name " LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" @@ -44,8 +44,6 @@ COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa RUN --mount=type=cache,target=/root/.cache/pip pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl ENV CLI_ARGS="" -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility RUN --mount=type=cache,target=/root/.cache/pip cd extensions/api && pip3 install -r requirements.txt RUN --mount=type=cache,target=/root/.cache/pip cd extensions/elevenlabs_tts && pip3 install -r requirements.txt From df48ddbdb9e048af4833b5758187f2c6d033b26b Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 10:55:41 +0200 Subject: [PATCH 11/21] added comment to point users with old cards to using an older GPTQ version --- .env.example | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.example b/.env.example index 817805323c..c596b18508 100644 --- a/.env.example +++ b/.env.example @@ -20,6 +20,7 @@ CONTAINER_API_PORT=5000 # the version used to install GPTQ from, defaults to cuda GPTQ_VERSION=608f3ba71e40596c75f8864d73506eaf57323c6e +# older cards such as the k80 might have more luck with this GTPQ_VERSION=841feedde876785bc8022ca48fd9c3ff626587e2 https://github.com/qwopqwop200/GPTQ-for-LLaMa/issues/88#issuecomment-1485897212 # the version used to install text-generation-webui from WEBUI_VERSION=HEAD From 50ba3200c1409a1d9e1bc1b2a7a514aa6422a7cd Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 13:41:18 +0200 Subject: [PATCH 12/21] added venv to Dockerfile to avoid error failing for transfomers, related to https://github.com/huggingface/transformers/pull/22539 --- Dockerfile | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 74d7c804b9..e4d3e9b393 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,11 @@ -# GPTQ-for-LLaMa and Text Generation WebUI Dockerfile FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder RUN apt-get update && \ apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \ rm -rf /var/lib/apt/lists/* -RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio +RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv + RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build WORKDIR /build @@ -13,12 +13,16 @@ WORKDIR /build ARG GPTQ_VERSION RUN git checkout ${GPTQ_VERSION} -RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt +RUN virtualenv /build/venv +RUN . /build/venv/bin/activate && \ + pip3 install torch torchvision torchaudio && \ + pip3 install -r requirements.txt # https://developer.nvidia.com/cuda-gpus # for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" -RUN python3 setup_cuda.py bdist_wheel -d . +RUN . /build/venv/bin/activate && \ + python3 setup_cuda.py bdist_wheel -d . FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 @@ -29,7 +33,7 @@ RUN apt-get update && \ apt-get install --no-install-recommends -y git python3 python3-pip && \ rm -rf /var/lib/apt/lists/* -RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch torchvision torchaudio +RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv COPY . /app/ @@ -38,17 +42,21 @@ WORKDIR /app ARG WEBUI_VERSION RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" -RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r requirements.txt +RUN virtualenv /app/venv +RUN . /app/venv/bin/activate && \ + pip3 install torch torchvision torchaudio && \ + pip3 install -r requirements.txt COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa -RUN --mount=type=cache,target=/root/.cache/pip pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl +RUN . /app/venv/bin/activate && \ + pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl ENV CLI_ARGS="" -RUN --mount=type=cache,target=/root/.cache/pip cd extensions/api && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip cd extensions/elevenlabs_tts && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip cd extensions/google_translate && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip cd extensions/silero_tts && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip cd extensions/whisper_stt && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/api && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/elevenlabs_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/google_translate && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/silero_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/whisper_stt && pip3 install -r requirements.txt -CMD python3 server.py ${CLI_ARGS} +CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} From 9571be8f8f8baadf334164f23c897260dcc6de2d Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 14:18:02 +0200 Subject: [PATCH 13/21] Update Dockerfile Co-authored-by: Xuehai Pan --- Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index e4d3e9b393..a93f153661 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,8 +4,6 @@ RUN apt-get update && \ apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \ rm -rf /var/lib/apt/lists/* -RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv - RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build WORKDIR /build From e8ed319bde5769f54d2f6f46658ee86c187da176 Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 14:18:10 +0200 Subject: [PATCH 14/21] Update Dockerfile Co-authored-by: Xuehai Pan --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a93f153661..88ff186121 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ WORKDIR /build ARG GPTQ_VERSION RUN git checkout ${GPTQ_VERSION} -RUN virtualenv /build/venv +RUN python3 -m venv /build/venv RUN . /build/venv/bin/activate && \ pip3 install torch torchvision torchaudio && \ pip3 install -r requirements.txt From 7d0286b30d876c2fc157a8f7b3c611b0c44e5ef3 Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 14:18:17 +0200 Subject: [PATCH 15/21] Update Dockerfile Co-authored-by: Xuehai Pan --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 88ff186121..f5b3a78a3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder RUN apt-get update && \ - apt-get install --no-install-recommends -y git build-essential python3-dev python3-pip && \ + apt-get install --no-install-recommends -y git build-essential python3-dev python3-venv && \ rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build From de45b5c8bd1619d411c5d3074a5303f5c28fc2eb Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 14:20:39 +0200 Subject: [PATCH 16/21] updating pip prior to running pip installs --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index f5b3a78a3a..854429b573 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,7 @@ RUN git checkout ${GPTQ_VERSION} RUN python3 -m venv /build/venv RUN . /build/venv/bin/activate && \ + pip3 install --upgrade pip setuptools && \ pip3 install torch torchvision torchaudio && \ pip3 install -r requirements.txt @@ -42,6 +43,7 @@ RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Usi RUN virtualenv /app/venv RUN . /app/venv/bin/activate && \ + pip3 install --upgrade pip setuptools && \ pip3 install torch torchvision torchaudio && \ pip3 install -r requirements.txt From 9a5e27889bb669a381718a54338d485d54014bf9 Mon Sep 17 00:00:00 2001 From: loeken Date: Tue, 4 Apr 2023 18:45:38 +0200 Subject: [PATCH 17/21] tested 8bit, added examples for 8bit model download/cli args to start --- .env.example | 3 +++ Dockerfile | 12 +++++++----- docs/README_docker.md | 6 +++++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index c596b18508..db54503563 100644 --- a/.env.example +++ b/.env.example @@ -5,8 +5,11 @@ TORCH_CUDA_ARCH_LIST=7.5 # these commands worked for me with roughly 4.5GB of vram CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices + +# the following examples have been tested with the files linked in docs/README_docker.md: # example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25 # example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share +# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices # the port the webui binds to on the host HOST_PORT=7860 diff --git a/Dockerfile b/Dockerfile index 854429b573..334f5a1ed1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,10 +53,12 @@ RUN . /app/venv/bin/activate && \ ENV CLI_ARGS="" -RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/api && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/elevenlabs_tts && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/google_translate && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/silero_tts && pip3 install -r requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate cd extensions/whisper_stt && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt + +RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} diff --git a/docs/README_docker.md b/docs/README_docker.md index 06dec78756..bdd00748a2 100644 --- a/docs/README_docker.md +++ b/docs/README_docker.md @@ -50,7 +50,7 @@ sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null sudo apt update -sudo apt install nvidia-docker2 -y +sudo apt install nvidia-docker2 nvidia-container-runtime -y sudo systemctl restart docker ``` @@ -77,9 +77,13 @@ sudo systemctl restart docker # required by nvidia-container-runtime ### place models in models folder download and place the models inside the models folder. tested with: +4bit https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105 +8bit: +https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789 + ### prepare .env file edit .env values to your needs ```bash From 7d9728b719696b0ceba04eff570af15ac2b9dfaf Mon Sep 17 00:00:00 2001 From: loeken Date: Thu, 6 Apr 2023 20:58:24 +0200 Subject: [PATCH 18/21] added .env and dockerfile to .dockerignore --- .dockerignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.dockerignore b/.dockerignore index 033948efda..30d7c69f08 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,4 @@ /loras /models +.env +Dockerfile From 4806703043f911a2b47f3dca26a625b616e1bbe1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 6 Apr 2023 21:43:46 -0300 Subject: [PATCH 19/21] Switch to oobabooga/GPTQ-for-LLaMa --- .env.example | 4 ---- Dockerfile | 5 +---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.env.example b/.env.example index db54503563..d20300b776 100644 --- a/.env.example +++ b/.env.example @@ -21,9 +21,5 @@ HOST_API_PORT=5000 # the port the api binds to inside the container CONTAINER_API_PORT=5000 -# the version used to install GPTQ from, defaults to cuda -GPTQ_VERSION=608f3ba71e40596c75f8864d73506eaf57323c6e -# older cards such as the k80 might have more luck with this GTPQ_VERSION=841feedde876785bc8022ca48fd9c3ff626587e2 https://github.com/qwopqwop200/GPTQ-for-LLaMa/issues/88#issuecomment-1485897212 - # the version used to install text-generation-webui from WEBUI_VERSION=HEAD diff --git a/Dockerfile b/Dockerfile index 334f5a1ed1..5aaf2db6b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,13 +4,10 @@ RUN apt-get update && \ apt-get install --no-install-recommends -y git build-essential python3-dev python3-venv && \ rm -rf /var/lib/apt/lists/* -RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build +RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build WORKDIR /build -ARG GPTQ_VERSION -RUN git checkout ${GPTQ_VERSION} - RUN python3 -m venv /build/venv RUN . /build/venv/bin/activate && \ pip3 install --upgrade pip setuptools && \ From be7b3b7b6ccde4ccb42c0754569f1507ea9a08d4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 6 Apr 2023 21:52:38 -0300 Subject: [PATCH 20/21] Add vim to the requirements --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5aaf2db6b1..8a063539bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder RUN apt-get update && \ - apt-get install --no-install-recommends -y git build-essential python3-dev python3-venv && \ + apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \ rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build From 6b479cd8513fd3de33233216abbbaaaaeaaf0a1c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 6 Apr 2023 22:37:55 -0300 Subject: [PATCH 21/21] Add files to .dockerignore --- .dockerignore | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index 30d7c69f08..fdf0c4ce2b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,10 @@ -/loras -/models .env Dockerfile +/characters +/extensions +/loras +/models +/presets +/prompts +/softprompts +/training