diff --git a/.github/workflows/build-conda-images.yml b/.github/workflows/build-conda-images.yml index 43626533e..aed535473 100644 --- a/.github/workflows/build-conda-images.yml +++ b/.github/workflows/build-conda-images.yml @@ -26,7 +26,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - cuda_version: ["11.6", "11.7", "11.8", "cpu"] + cuda_version: ["11.7", "11.8", "12.1", "cpu"] env: CUDA_VERSION: ${{ matrix.cuda_version }} steps: diff --git a/common/install_cuda.sh b/common/install_cuda.sh index 359df5b3b..27d4b0c1c 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -85,6 +85,37 @@ function install_118 { ldconfig } +function install_121 { + echo "Installing CUDA 12.1 and cuDNN 8.8 and NCCL 2.17.1" + rm -rf /usr/local/cuda-12.1 /usr/local/cuda + # install CUDA 12.1.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run + chmod +x cuda_12.1.0_530.30.02_linux.run + ./cuda_12.1.0_530.30.02_linux.run --toolkit --silent + rm -f cuda_12.1.0_530.30.02_linux.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-8.8.1.3_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-8.8.1.3_cuda12-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + ldconfig + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.17.1/nccl_2.17.1-1+cuda12.1_x86_64.txz + tar xf nccl_2.17.1-1+cuda12.1_x86_64.txz + cp -a nccl_2.17.1-1+cuda12.1_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.17.1-1+cuda12.1_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig +} + function prune_116 { echo "Pruning CUDA 11.6 and CuDNN" ##################################################################################### @@ -178,6 +209,37 @@ function prune_118 { rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/ } +function prune_121 { + echo "Pruning CUDA 12.1" + ##################################################################################### + # CUDA 12.1 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64" + + export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + + # all CUDA libs except CuDNN and CuBLAS + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 12.1 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-12.1/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/ +} + # idiomatic parameter and option handling in sh while test $# -gt 0 do @@ -188,6 +250,8 @@ do ;; 11.8) install_118; prune_118 ;; + 12.1) install_121; prune_121 + ;; *) echo "bad argument $1"; exit 1 ;; esac diff --git a/conda/Dockerfile b/conda/Dockerfile index c65e1ad99..a58a28511 100644 --- a/conda/Dockerfile +++ b/conda/Dockerfile @@ -60,6 +60,10 @@ FROM cuda as cuda11.8 RUN bash ./install_cuda.sh 11.8 ENV DESIRED_CUDA=11.8 +FROM cuda as cuda12.1 +RUN bash ./install_cuda.sh 12.1 +ENV DESIRE_CUDA=12.1 + # Install MNIST test data FROM base as mnist ADD ./common/install_mnist.sh install_mnist.sh @@ -69,6 +73,7 @@ FROM base as all_cuda COPY --from=cuda11.6 /usr/local/cuda-11.6 /usr/local/cuda-11.6 COPY --from=cuda11.7 /usr/local/cuda-11.7 /usr/local/cuda-11.7 COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8 +COPY --from=cuda12.1 /usr/local/cuda-12.1 /usr/local/cuda-12.1 FROM ${BASE_TARGET} as final # Install LLVM diff --git a/conda/build_all_docker.sh b/conda/build_all_docker.sh index 1dc5ffe4f..551098f5b 100755 --- a/conda/build_all_docker.sh +++ b/conda/build_all_docker.sh @@ -4,6 +4,6 @@ set -eou pipefail TOPDIR=$(git rev-parse --show-toplevel) -for CUDA_VERSION in 11.8 11.7 11.6 cpu; do +for CUDA_VERSION in 12.1 11.8 11.7 cpu; do CUDA_VERSION="${CUDA_VERSION}" conda/build_docker.sh done