Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cudaFlags: rewrite to capture all architectures and fix NixOS#215436 #217367

Merged
merged 1 commit into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkgs/applications/science/math/mxnet/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
"-DCUDA_ARCH_NAME=All"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
"-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}"
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
] else [ "-DUSE_CUDA=OFF" ])
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

Expand Down
179 changes: 120 additions & 59 deletions pkgs/development/compilers/cudatoolkit/flags.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@
, lib
, cudatoolkit
}:

# Type aliases
# Gpu = {
# archName: String, # e.g., "Hopper"
# computeCapability: String, # e.g., "9.0"
# minCudaVersion: String, # e.g., "11.8"
# maxCudaVersion: String, # e.g., "12.0"
# }

let
inherit (lib) attrsets lists strings trivial versions;
cudaVersion = cudatoolkit.version;

# Flags are determined based on your CUDA toolkit by default. You may benefit
# from improved performance, reduced file size, or greater hardware suppport by
Expand All @@ -13,66 +24,116 @@ let
#
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351

defaultCudaCapabilities = rec {
cuda9 = [
"3.0"
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
];

cuda10 = cuda9 ++ [
"7.5"
];

cuda11 = [
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
"7.5"
"8.0"
"8.6"
];

};

cudaMicroarchitectureNames = {
"3" = "Kepler";
"5" = "Maxwell";
"6" = "Pascal";
"7" = "Volta";
"8" = "Ampere";
"9" = "Hopper";
};

defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList;
capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX";

dropDot = ver: builtins.replaceStrings ["."] [""] ver;

archMapper = feat: map (ver: "${feat}_${dropDot ver}");
gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}");
cudaRealArchs = archMapper "sm" cudaRealCapabilities;
cudaPTXArchs = archMapper "compute" cudaRealCapabilities;
cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ];

cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities);
cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward;
cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]);

cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities;
cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities;
cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities;
# gpus :: List Gpu
gpus = builtins.import ./gpus.nix;

# isVersionIn :: Gpu -> Bool
isSupported = gpu:
let
inherit (gpu) minCudaVersion maxCudaVersion;
lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
in
lowerBoundSatisfied && upperBoundSatisfied;

# supportedGpus :: List Gpu
# GPUs which are supported by the provided CUDA version.
supportedGpus = builtins.filter isSupported gpus;

# cudaArchNameToVersions :: AttrSet String (List String)
# Maps the name of a GPU architecture to different versions of that architecture.
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
cudaArchNameToVersions =
lists.groupBy'
(versions: gpu: versions ++ [ gpu.computeCapability ])
[ ]
(gpu: gpu.archName)
supportedGpus;

# cudaArchNames :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
# otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);

# cudaComputeCapabilityToName :: AttrSet String String
# Maps the version of a GPU architecture to the name of that architecture.
# For example, "8.0" maps to "Ampere".
cudaComputeCapabilityToName = builtins.listToAttrs (
lists.map
(gpu: {
name = gpu.computeCapability;
value = gpu.archName;
})
supportedGpus
);

# cudaComputeCapabilities :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
# otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
# Use the user-provided list of CUDA capabilities if it's provided.
cudaComputeCapabilities = config.cudaCapabilities
or (lists.map (gpu: gpu.computeCapability) supportedGpus);

# cudaForwardComputeCapability :: String
cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";

# cudaComputeCapabilitiesAndForward :: List String
# The list of supported CUDA architectures, including the forward compatibility architecture.
# If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;

# dropDot :: String -> String
dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;

# archMapper :: String -> List String -> List String
# Maps a feature across a list of architecture versions to produce a list of architectures.
# For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "sm_80" "sm_86" "sm_87" ].
archMapper = feat: lists.map (computeCapability: "${feat}_${dropDot computeCapability}");

# gencodeMapper :: String -> List String -> List String
# Maps a feature across a list of architecture versions to produce a list of gencode arguments.
# For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "-gencode=arch=compute_80,code=sm_80"
# "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_87,code=sm_87" ].
gencodeMapper = feat: lists.map (
computeCapability:
"-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
);

# cudaRealArches :: List String
# The real architectures are physical architectures supported by the CUDA version.
# For example, "sm_80".
cudaRealArches = archMapper "sm" cudaComputeCapabilities;

# cudaVirtualArches :: List String
# The virtual architectures are typically used for forward compatibility, when trying to support
# an architecture newer than the CUDA version allows.
# For example, "compute_80".
cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;

# cudaArches :: List String
# By default, build for all supported architectures and forward compatibility via a virtual
# architecture for the newest supported architecture.
cudaArches = cudaRealArches ++
lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);

# cudaGencode :: List String
# A list of CUDA gencode arguments to pass to NVCC.
cudaGencode =
let
base = gencodeMapper "sm" cudaComputeCapabilities;
forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
in
base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;

in
{
inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString
cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs;
inherit
cudaArchNames
cudaArchNameToVersions cudaComputeCapabilityToName
cudaRealArches cudaVirtualArches cudaArches
cudaGencode;
cudaCapabilities = cudaComputeCapabilitiesAndForward;
}
110 changes: 110 additions & 0 deletions pkgs/development/compilers/cudatoolkit/gpus.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
[
{
archName = "Kepler";
computeCapability = "3.0";
minCudaVersion = "10.0";
maxCudaVersion = "10.2";
}
{
archName = "Kepler";
computeCapability = "3.2";
minCudaVersion = "10.0";
maxCudaVersion = "10.2";
}
{
archName = "Kepler";
computeCapability = "3.5";
minCudaVersion = "10.0";
maxCudaVersion = "11.8";
}
{
archName = "Kepler";
computeCapability = "3.7";
minCudaVersion = "10.0";
maxCudaVersion = "11.8";
}
{
archName = "Maxwell";
computeCapability = "5.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Maxwell";
computeCapability = "5.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Maxwell";
computeCapability = "5.3";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Pascal";
computeCapability = "6.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Pascal";
computeCapability = "6.1";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Pascal";
computeCapability = "6.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Volta";
computeCapability = "7.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Volta";
computeCapability = "7.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Turing";
computeCapability = "7.5";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Ampere";
computeCapability = "8.0";
minCudaVersion = "11.2";
maxCudaVersion = "12.0";
}
{
archName = "Ampere";
computeCapability = "8.6";
minCudaVersion = "11.2";
maxCudaVersion = "12.0";
}
{
archName = "Ampere";
computeCapability = "8.7";
minCudaVersion = "11.5";
maxCudaVersion = "12.0";
}
{
archName = "Ada";
computeCapability = "8.9";
minCudaVersion = "11.8";
maxCudaVersion = "12.0";
}
{
archName = "Hopper";
computeCapability = "9.0";
minCudaVersion = "11.8";
maxCudaVersion = "12.0";
}
]
2 changes: 1 addition & 1 deletion pkgs/development/libraries/science/math/magma/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ in stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/gcc"
"-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/g++"
"-DMAGMA_ENABLE_CUDA=ON"
"-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}"
"-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
] ++ lib.optionals useROCM [
"-DCMAKE_C_COMPILER=${hip}/bin/hipcc"
"-DCMAKE_CXX_COMPILER=${hip}/bin/hipcc"
Expand Down
2 changes: 1 addition & 1 deletion pkgs/development/python-modules/jaxlib/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ let
build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${cudaFlags.cudaRealCapabilitiesCommaString}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
'' + ''
CFG
'';
Expand Down
2 changes: 1 addition & 1 deletion pkgs/development/python-modules/tensorflow/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ let
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArchs;
TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArches;

postPatch = ''
# bazel 3.3 should work just as well as bazel 3.1
Expand Down
Loading