bmaltais · bmaltais · Feb 18, 2024 · Feb 17, 2024 · Feb 17, 2024 · Feb 17, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -5,3 +5,11 @@ bitsandbytes_windows_deprecated/
 dataset/
 __pycache__/
 venv/
+**/.hadolint.yml
+**/*.log
+**/.git
+**/.gitignore
+**/.env
+**/.github
+**/.vscode
+**/*.ps1
diff --git a/.hadolint.yml b/.hadolint.yml
@@ -0,0 +1,6 @@
+ignored:
+  - DL3042 # Avoid use of cache directory with pip. Use `pip install --no-cache-dir <package>`
+  - DL3013 # Pin versions in pip. Instead of `pip install <package>` use `pip install <package>==<version>`
+  - DL3008 # Pin versions in apt get install. Instead of `apt-get install <package>` use `apt-get install <package>=<version>`
+  - DL4006 # Set the SHELL option -o pipefail before RUN with a pipe in it
+  - SC2015 # Note that A && B || C is not if-then-else. C may run when A is true.
diff --git a/Dockerfile b/Dockerfile
@@ -1,54 +1,118 @@
-FROM nvcr.io/nvidia/pytorch:23.04-py3 as base
-ENV DEBIAN_FRONTEND=noninteractive
-ENV TZ=Europe/London
-
-RUN apt update && apt-get install -y software-properties-common
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    add-apt-repository ppa:deadsnakes/ppa && \
-    apt update && \
-    apt-get install -y git curl libgl1 libglib2.0-0 libgoogle-perftools-dev \
-    python3.10-dev python3.10-tk python3-html5lib python3-apt python3-pip python3.10-distutils && \
-    rm -rf /var/lib/apt/lists/*
+# syntax=docker/dockerfile:1
+ARG UID=1000
+ARG VERSION=EDGE
+ARG RELEASE=0
 
-# Set python 3.10 and cuda 11.8 as default
-RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 3 && \
-    update-alternatives --set python3 /usr/bin/python3.10 && \
-    update-alternatives --set cuda /usr/local/cuda-11.8
+FROM python:3.10-slim as build
 
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3
+# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
+ARG TARGETARCH
+ARG TARGETVARIANT
 
 WORKDIR /app
-RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install wheel
 
-# Todo: Install torch 2.1.0 for cu121 support (only available as nightly as of writing)
-## RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --pre torch ninja setuptools --extra-index-url https://download.pytorch.org/whl/nightly/cu121
+# Install under /root/.local
+ENV PIP_USER="true"
+ARG PIP_NO_WARN_SCRIPT_LOCATION=0
+ARG PIP_ROOT_USER_ACTION="ignore"
+
+# Install build dependencies
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends python3-launchpadlib git curl && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
-# Todo: Install xformers nightly for Torch 2.1.0 support
-## RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
+# Install PyTorch and TensorFlow
+# The versions must align and be in sync with the requirements_linux_docker.txt
+# hadolint ignore=SC2102
+RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
+    pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \
+    torch==2.1.2 torchvision==0.16.2 \
+    xformers==0.0.23.post1 \
+    # Why [and-cuda]: https://github.com/tensorflow/tensorflow/issues/61468#issuecomment-1759462485
+    tensorflow[and-cuda]==2.14.0 \
+    ninja \
+    pip setuptools wheel
 
 # Install requirements
-COPY ./requirements.txt ./requirements_linux_docker.txt ./
-COPY ./setup/docker_setup.py ./setup.py
-RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r ./requirements_linux_docker.txt
-RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r ./requirements.txt
+RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
+    --mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \
+    --mount=source=requirements.txt,target=requirements.txt \
+    --mount=source=setup/docker_setup.py,target=setup.py \
+    pip install -r requirements_linux_docker.txt -r requirements.txt
+
+# Replace pillow with pillow-simd (Only for x86)
+ARG TARGETPLATFORM
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+    apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip uninstall -y pillow && \
+    CC="cc -mavx2" pip install -U --force-reinstall pillow-simd; \
+    fi
+
+FROM python:3.10-slim as final
 
-# Replace pillow with pillow-simd
-RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip uninstall -y pillow && \
-    CC="cc -mavx2" python3 -m pip install -U --force-reinstall pillow-simd
+ARG UID
+ARG VERSION
+ARG RELEASE
+
+LABEL name="bmaltais/kohya_ss" \
+    vendor="bmaltais" \
+    maintainer="bmaltais" \
+    # Dockerfile source repository
+    url="https://github.com/bmaltais/kohya_ss" \
+    version=${VERSION} \
+    # This should be a number, incremented with each change
+    release=${RELEASE} \
+    io.k8s.display-name="kohya_ss" \
+    summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \
+    description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."
+
+# Install runtime dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 # Fix missing libnvinfer7
-USER root
 RUN ln -s /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 && \
     ln -s /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7
 
-RUN useradd -m -s /bin/bash appuser && \
-    chown -R appuser: /app
-USER appuser
-COPY --chown=appuser . .
+# Create user
+RUN groupadd -g $UID $UID && \
+    useradd -l -u $UID -g $UID -m -s /bin/sh -N $UID
 
-STOPSIGNAL SIGINT
+# Create directories with correct permissions
+RUN install -d -m 775 -o $UID -g 0 /dataset && \
+    install -d -m 775 -o $UID -g 0 /licenses && \
+    install -d -m 775 -o $UID -g 0 /app
+
+# Copy dist and support arbitrary user ids (OpenShift best practice)
+COPY --chown=$UID:0 --chmod=775 \
+    --from=build /root/.local /home/$UID/.local
+
+WORKDIR /app
+COPY --chown=$UID:0 --chmod=775 . .
+
+# Copy licenses (OpenShift Policy)
+COPY --chmod=775 LICENSE.md /licenses/LICENSE.md
+
+ENV PATH="/home/$UID/.local/bin:$PATH"
+ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages" 
 ENV LD_PRELOAD=libtcmalloc.so
 ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
-ENV PATH="$PATH:/home/appuser/.local/bin"
-CMD python3 "./kohya_gui.py" ${CLI_ARGS} --listen 0.0.0.0 --server_port 7860
+
+VOLUME [ "/dataset" ]
+
+# 7860: Kohya GUI
+# 6006: TensorBoard
+EXPOSE 7860 6006
+
+USER $UID
+
+STOPSIGNAL SIGINT
+
+# Use dumb-init as PID 1 to handle signals properly
+ENTRYPOINT ["dumb-init", "--"]
+CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860"]
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -3,10 +3,12 @@ services:
   kohya-ss-gui:
     container_name: kohya-ss-gui
     image: kohya-ss-gui:latest
+    user: 1000:0
     build:
       context: .
+      args:
+        - UID=1000
     ports:
-      - 127.0.0.1:3000:3000
       - 7860:7860
       - 6006:6006
     tty: true
@@ -16,15 +18,15 @@ services:
       SAFETENSORS_FAST_GPU: 1
       DISPLAY: $DISPLAY
     tmpfs:
-      - /tmp      
+      - /tmp
     volumes:
-      - ./dataset:/dataset
-      - ./.cache/user:/home/appuser/.cache
-      - ./.cache/triton:/home/appuser/.triton    
-      - ./.cache/config:/app/appuser/.config
-      - ./.cache/nv:/home/appuser/.nv 
-      - ./.cache/keras:/home/appuser/.keras      
       - /tmp/.X11-unix:/tmp/.X11-unix
+      - ./dataset:/dataset
+      - ./.cache/user:/home/1000/.cache
+      - ./.cache/triton:/home/1000/.triton
+      - ./.cache/nv:/home/1000/.nv
+      - ./.cache/keras:/home/1000/.keras
+      - ./.cache/config:/home/1000/.config
     deploy:
       resources:
         reservations:

diff --git a/requirements_linux_docker.txt b/requirements_linux_docker.txt
@@ -1,5 +1,5 @@
-xformers==0.0.20
+xformers>=0.0.20
 bitsandbytes==0.41.1
-accelerate==0.19.0
+accelerate==0.25.0
 tensorboard==2.14.1
-tensorflow==2.14.0
+tensorflow==2.14.0
diff --git a/setup/docker_setup.py b/setup/docker_setup.py
@@ -1,3 +1,3 @@
 from setuptools import setup, find_packages
 
-setup(name="library", version="1.0.3", packages=find_packages())
+setup()