From dddc103fdbf1a8a72a9560aec73114cb1fbb9423 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Thu, 16 Feb 2023 14:50:24 -0500 Subject: [PATCH] cudaFlags: rewrite to capture all architechtures and fix #215436 --- .../science/math/mxnet/default.nix | 2 +- .../compilers/cudatoolkit/flags.nix | 179 ++++++++++++------ .../compilers/cudatoolkit/gpus.nix | 110 +++++++++++ .../libraries/science/math/magma/default.nix | 2 +- .../python-modules/jaxlib/default.nix | 2 +- .../python-modules/tensorflow/default.nix | 2 +- .../python-modules/torch/default.nix | 47 ++++- .../python-modules/torchvision/default.nix | 6 +- 8 files changed, 281 insertions(+), 69 deletions(-) create mode 100644 pkgs/development/compilers/cudatoolkit/gpus.nix diff --git a/pkgs/applications/science/math/mxnet/default.nix b/pkgs/applications/science/math/mxnet/default.nix index a5c0ebc85b13b..c1a329c608864 100644 --- a/pkgs/applications/science/math/mxnet/default.nix +++ b/pkgs/applications/science/math/mxnet/default.nix @@ -50,7 +50,7 @@ stdenv.mkDerivation rec { "-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743 "-DCUDA_ARCH_NAME=All" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" - "-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}" + "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}" ] else [ "-DUSE_CUDA=OFF" ]) ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF"; diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index 24f653ded29be..8e1e54723b2e4 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -2,7 +2,18 @@ , lib , cudatoolkit }: + +# Type aliases +# Gpu = { +# archName: String, # e.g., "Hopper" +# computeCapability: String, # e.g., "9.0" +# minCudaVersion: String, # e.g., "11.8" +# maxCudaVersion: String, # e.g., "12.0" +# } + let + inherit (lib) attrsets lists strings trivial versions; + cudaVersion = cudatoolkit.version; # Flags are determined based on your CUDA toolkit by default. You may benefit # from improved performance, reduced file size, or greater hardware suppport by @@ -13,66 +24,116 @@ let # # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351 - defaultCudaCapabilities = rec { - cuda9 = [ - "3.0" - "3.5" - "5.0" - "5.2" - "6.0" - "6.1" - "7.0" - ]; - - cuda10 = cuda9 ++ [ - "7.5" - ]; - - cuda11 = [ - "3.5" - "5.0" - "5.2" - "6.0" - "6.1" - "7.0" - "7.5" - "8.0" - "8.6" - ]; - - }; - - cudaMicroarchitectureNames = { - "3" = "Kepler"; - "5" = "Maxwell"; - "6" = "Pascal"; - "7" = "Volta"; - "8" = "Ampere"; - "9" = "Hopper"; - }; - - defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}"; - cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList; - capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX"; - - dropDot = ver: builtins.replaceStrings ["."] [""] ver; - - archMapper = feat: map (ver: "${feat}_${dropDot ver}"); - gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}"); - cudaRealArchs = archMapper "sm" cudaRealCapabilities; - cudaPTXArchs = archMapper "compute" cudaRealCapabilities; - cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ]; - - cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities); - cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward; - cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]); - - cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities; - cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities; - cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities; + # gpus :: List Gpu + gpus = builtins.import ./gpus.nix; + + # isVersionIn :: Gpu -> Bool + isSupported = gpu: + let + inherit (gpu) minCudaVersion maxCudaVersion; + lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion; + upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion); + in + lowerBoundSatisfied && upperBoundSatisfied; + + # supportedGpus :: List Gpu + # GPUs which are supported by the provided CUDA version. + supportedGpus = builtins.filter isSupported gpus; + + # cudaArchNameToVersions :: AttrSet String (List String) + # Maps the name of a GPU architecture to different versions of that architecture. + # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ]. + cudaArchNameToVersions = + lists.groupBy' + (versions: gpu: versions ++ [ gpu.computeCapability ]) + [ ] + (gpu: gpu.archName) + supportedGpus; + + # cudaArchNames :: List String + # NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here; + # otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them + # from is already sorted, so we'll preserve that order here. + cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus); + + # cudaComputeCapabilityToName :: AttrSet String String + # Maps the version of a GPU architecture to the name of that architecture. + # For example, "8.0" maps to "Ampere". + cudaComputeCapabilityToName = builtins.listToAttrs ( + lists.map + (gpu: { + name = gpu.computeCapability; + value = gpu.archName; + }) + supportedGpus + ); + + # cudaComputeCapabilities :: List String + # NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here; + # otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them + # from is already sorted, so we'll preserve that order here. + # Use the user-provided list of CUDA capabilities if it's provided. + cudaComputeCapabilities = config.cudaCapabilities + or (lists.map (gpu: gpu.computeCapability) supportedGpus); + + # cudaForwardComputeCapability :: String + cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX"; + + # cudaComputeCapabilitiesAndForward :: List String + # The list of supported CUDA architectures, including the forward compatibility architecture. + # If forward compatibility is disabled, this will be the same as cudaComputeCapabilities. + cudaComputeCapabilitiesAndForward = cudaComputeCapabilities + ++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability; + + # dropDot :: String -> String + dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver; + + # archMapper :: String -> List String -> List String + # Maps a feature across a list of architecture versions to produce a list of architectures. + # For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "sm_80" "sm_86" "sm_87" ]. + archMapper = feat: lists.map (computeCapability: "${feat}_${dropDot computeCapability}"); + + # gencodeMapper :: String -> List String -> List String + # Maps a feature across a list of architecture versions to produce a list of gencode arguments. + # For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "-gencode=arch=compute_80,code=sm_80" + # "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_87,code=sm_87" ]. + gencodeMapper = feat: lists.map ( + computeCapability: + "-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}" + ); + + # cudaRealArches :: List String + # The real architectures are physical architectures supported by the CUDA version. + # For example, "sm_80". + cudaRealArches = archMapper "sm" cudaComputeCapabilities; + + # cudaVirtualArches :: List String + # The virtual architectures are typically used for forward compatibility, when trying to support + # an architecture newer than the CUDA version allows. + # For example, "compute_80". + cudaVirtualArches = archMapper "compute" cudaComputeCapabilities; + + # cudaArches :: List String + # By default, build for all supported architectures and forward compatibility via a virtual + # architecture for the newest supported architecture. + cudaArches = cudaRealArches ++ + lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches); + + # cudaGencode :: List String + # A list of CUDA gencode arguments to pass to NVCC. + cudaGencode = + let + base = gencodeMapper "sm" cudaComputeCapabilities; + forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ]; + in + base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat; in { - inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString - cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs; + inherit + cudaArchNames + cudaArchNameToVersions cudaComputeCapabilityToName + cudaRealArches cudaVirtualArches cudaArches + cudaGencode; + cudaCapabilities = cudaComputeCapabilitiesAndForward; } diff --git a/pkgs/development/compilers/cudatoolkit/gpus.nix b/pkgs/development/compilers/cudatoolkit/gpus.nix new file mode 100644 index 0000000000000..e938e91297478 --- /dev/null +++ b/pkgs/development/compilers/cudatoolkit/gpus.nix @@ -0,0 +1,110 @@ +[ + { + archName = "Kepler"; + computeCapability = "3.0"; + minCudaVersion = "10.0"; + maxCudaVersion = "10.2"; + } + { + archName = "Kepler"; + computeCapability = "3.2"; + minCudaVersion = "10.0"; + maxCudaVersion = "10.2"; + } + { + archName = "Kepler"; + computeCapability = "3.5"; + minCudaVersion = "10.0"; + maxCudaVersion = "11.8"; + } + { + archName = "Kepler"; + computeCapability = "3.7"; + minCudaVersion = "10.0"; + maxCudaVersion = "11.8"; + } + { + archName = "Maxwell"; + computeCapability = "5.0"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Maxwell"; + computeCapability = "5.2"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Maxwell"; + computeCapability = "5.3"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Pascal"; + computeCapability = "6.0"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Pascal"; + computeCapability = "6.1"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Pascal"; + computeCapability = "6.2"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Volta"; + computeCapability = "7.0"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Volta"; + computeCapability = "7.2"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Turing"; + computeCapability = "7.5"; + minCudaVersion = "10.0"; + maxCudaVersion = "12.0"; + } + { + archName = "Ampere"; + computeCapability = "8.0"; + minCudaVersion = "11.2"; + maxCudaVersion = "12.0"; + } + { + archName = "Ampere"; + computeCapability = "8.6"; + minCudaVersion = "11.2"; + maxCudaVersion = "12.0"; + } + { + archName = "Ampere"; + computeCapability = "8.7"; + minCudaVersion = "11.5"; + maxCudaVersion = "12.0"; + } + { + archName = "Ada"; + computeCapability = "8.9"; + minCudaVersion = "11.8"; + maxCudaVersion = "12.0"; + } + { + archName = "Hopper"; + computeCapability = "9.0"; + minCudaVersion = "11.8"; + maxCudaVersion = "12.0"; + } +] diff --git a/pkgs/development/libraries/science/math/magma/default.nix b/pkgs/development/libraries/science/math/magma/default.nix index b7223690f4354..f70cbbcff5d82 100644 --- a/pkgs/development/libraries/science/math/magma/default.nix +++ b/pkgs/development/libraries/science/math/magma/default.nix @@ -52,7 +52,7 @@ in stdenv.mkDerivation (finalAttrs: { "-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/gcc" "-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/g++" "-DMAGMA_ENABLE_CUDA=ON" - "-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}" + "-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArches}" ] ++ lib.optionals useROCM [ "-DCMAKE_C_COMPILER=${hip}/bin/hipcc" "-DCMAKE_CXX_COMPILER=${hip}/bin/hipcc" diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index baa9a0dc1abfc..2c13defe43838 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -164,7 +164,7 @@ let build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}" build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}" build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}" - build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${cudaFlags.cudaRealCapabilitiesCommaString}" + build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}" '' + '' CFG ''; diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index d41526552f4b4..f7d920c372217 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -301,7 +301,7 @@ let TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}"; GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc"; - TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArchs; + TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArches; postPatch = '' # bazel 3.3 should work just as well as bazel 3.1 diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 8fe9e1fed629e..86889be4ee168 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -41,6 +41,7 @@ }: let + inherit (lib) lists strings trivial; inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; in @@ -54,6 +55,45 @@ assert !cudaSupport || magma.cudatoolkit == cudatoolkit; let setBool = v: if v then "1" else "0"; + + # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/utils/cpp_extension.py#L1751 + supportedTorchCudaCapabilities = + let + real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6"]; + ptx = lists.map (x: "${x}+PTX") real; + in + real ++ ptx; + + # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements + # of the first list *from* the second list. That means: + # lists.subtractLists a b = b - a + + # For CUDA + supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities; + unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities; + + # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified. + gpuArchWarner = supported: unsupported: + trivial.throwIf (supported == [ ]) + ( + "No supported GPU targets specified. Requested GPU targets: " + + strings.concatStringsSep ", " unsupported + ) + supported; + + # Create the gpuTargetString. + gpuTargetString = strings.concatStringsSep ";" ( + if gpuTargets != [ ] then + # If gpuTargets is specified, it always takes priority. + gpuTargets + else if cudaSupport then + gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities + else if rocmSupport then + hip.gpuTargets + else + throw "No GPU targets specified" + ); + cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-unsplit"; # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs @@ -146,14 +186,14 @@ in buildPythonPackage rec { ''; preConfigure = lib.optionalString cudaSupport '' - export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}" + export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ '' + lib.optionalString (cudaSupport && cudnn != null) '' export CUDNN_INCLUDE_DIR=${cudnn}/include '' + lib.optionalString rocmSupport '' export ROCM_PATH=${rocmtoolkit_joined} export ROCM_SOURCE_DIR=${rocmtoolkit_joined} - export PYTORCH_ROCM_ARCH="${lib.strings.concatStringsSep ";" (if gpuTargets == [ ] then hip.gpuTargets else gpuTargets)}" + export PYTORCH_ROCM_ARCH="${gpuTargetString}" export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas" python tools/amd_build/build_amd.py ''; @@ -320,7 +360,8 @@ in buildPythonPackage rec { requiredSystemFeatures = [ "big-parallel" ]; passthru = { - inherit cudaSupport cudaPackages; + inherit cudaSupport cudaPackages gpuTargetString; + cudaCapabilities = supportedCudaCapabilities; # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. blasProvider = blas.provider; }; diff --git a/pkgs/development/python-modules/torchvision/default.nix b/pkgs/development/python-modules/torchvision/default.nix index de8852035c909..d36beb6575e09 100644 --- a/pkgs/development/python-modules/torchvision/default.nix +++ b/pkgs/development/python-modules/torchvision/default.nix @@ -15,13 +15,13 @@ }: let - inherit (torch.cudaPackages) cudatoolkit cudaFlags cudnn; + inherit (torch) gpuTargetString; + inherit (torch.cudaPackages) cudatoolkit cudnn; cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-unsplit"; paths = [ cudatoolkit.out cudatoolkit.lib ]; }; - cudaArchStr = lib.optionalString cudaSupport lib.strings.concatStringsSep ";" torch.cudaArchList; in buildPythonPackage rec { pname = "torchvision"; version = "0.14.1"; @@ -45,7 +45,7 @@ in buildPythonPackage rec { propagatedBuildInputs = [ numpy pillow torch scipy ]; preBuild = lib.optionalString cudaSupport '' - export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}" + export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" export FORCE_CUDA=1 '';