diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 4aa8537f1c..9dedfd20d2 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -61,6 +61,7 @@ steps: matrix: setup: cuda: + - "12.5" - "12.4" - "12.3" - "12.2" diff --git a/Project.toml b/Project.toml index 758266782c..ab7c59c0e1 100644 --- a/Project.toml +++ b/Project.toml @@ -50,9 +50,9 @@ AbstractFFTs = "0.4, 0.5, 1.0" Adapt = "4" BFloat16s = "0.2, 0.3, 0.4, 0.5" CEnum = "0.2, 0.3, 0.4, 0.5" -CUDA_Driver_jll = "0.8" +CUDA_Driver_jll = "0.9" CUDA_Runtime_Discovery = "0.3.3" -CUDA_Runtime_jll = "0.12, 0.13" +CUDA_Runtime_jll = "0.14" ChainRulesCore = "1" Crayons = "4" DataFrames = "1" diff --git a/README.md b/README.md index 28399c588c..ed9743e3d4 100644 --- a/README.md +++ b/README.md @@ -30,31 +30,6 @@ using Julia. It features a user-friendly array abstraction, a compiler for writi kernels in Julia, and wrappers for various CUDA libraries. -## Requirements - -The latest development version of CUDA.jl requires **Julia 1.8** or higher. If you are using -an older version of Julia, you need to use a previous version of CUDA.jl. This will happen -automatically when you install the package using Julia's package manager. - -Note that CUDA.jl may not work with a custom build of Julia; it is recommended that you -install Julia using the [official binaries](https://julialang.org/downloads/) or -[juliaup](https://github.com/JuliaLang/juliaup). - -CUDA.jl also requires a CUDA-capable GPU with **compute capability 3.5** (Kepler) or -higher, and an accompanying **NVIDIA driver for CUDA 11.0** or newer. A compatible CUDA -toolkit will be downloaded automatically, but in case you want to use your own, CUDA.jl only -supports **CUDA toolkit 11.4+** or newer. These requirements are not enforced by the Julia -package manager when installing CUDA.jl. Depending on your system and GPU, you may need to -install an older version of the package: - -* CUDA.jl v4.4 is the last version with support for CUDA 11.0-11.3 (deprecated in v5.0) -* CUDA.jl v4.0 is the last version to work with CUDA 10.2 (removed in v4.1) -* CUDA.jl v3.13 is the last version to work with CUDA 10.1 (removed in v4.0) -* CUDA.jl v1.3 is the last version to work with CUDA 9-10.0 (removed in v2.0) - -Finally, you should be using a platform **supported by NVIDIA**. Currently, that means using -64-bit Linux or Windows, with an X86, ARM, or PowerPC host processor. - ## Quick start Before all, make sure you have a recent NVIDIA driver. On Windows, also make sure you have @@ -91,6 +66,35 @@ For more usage instructions and other information, please refer to [the documentation](https://juliagpu.github.io/CUDA.jl/stable/). +## Requirements + +The latest development version of CUDA.jl requires **Julia 1.8** or higher. If you are using +an older version of Julia, you need to use a previous version of CUDA.jl. This will happen +automatically when you install the package using Julia's package manager. + +Note that CUDA.jl may not work with a custom build of Julia; it is recommended that you +install Julia using the [official binaries](https://julialang.org/downloads/) or +[juliaup](https://github.com/JuliaLang/juliaup). + +The latest version of CUDA.jl also has certain requirements that cannot be enforced by the +package manager: + +- Host platform: only 64-bit Linux and Windows are supported; +- Device hardware: only NVIDIA GPUs with **compute capability 3.5** (Kepler) or higher are + supported; +- NVIDIA driver: a driver for **CUDA 11.0** or newer is required; +- CUDA toolkit (in case you need to use your own): only **CUDA toolkit 11.4** or newer are + supported. + +If you cannot meet these requirements, you may need to install an older version of CUDA.jl: + +* CUDA.jl v5.3 is the last version with support for PowerPC (removed in v5.4) +* CUDA.jl v4.4 is the last version with support for CUDA 11.0-11.3 (deprecated in v5.0) +* CUDA.jl v4.0 is the last version to work with CUDA 10.2 (removed in v4.1) +* CUDA.jl v3.13 is the last version to work with CUDA 10.1 (removed in v4.0) +* CUDA.jl v1.3 is the last version to work with CUDA 9-10.0 (removed in v2.0) + + ## Supporting and Citing Much of the software in this ecosystem was developed as part of academic research. If you @@ -105,7 +109,7 @@ root of this repository lists the relevant papers. The package is tested against, and being developed for, Julia 1.8 and above. Main development and testing happens on x86 Linux, but the package is expected to work on -Windows, and on ARM and PowerPC as well. +Windows and ARM and as well. ## Questions and Contributions diff --git a/lib/cudadrv/libcuda.jl b/lib/cudadrv/libcuda.jl index e719fccf7e..294b4b1201 100644 --- a/lib/cudadrv/libcuda.jl +++ b/lib/cudadrv/libcuda.jl @@ -868,7 +868,7 @@ function Base.getproperty(x::Ptr{CUstreamBatchMemOpParams_union}, f::Symbol) f === :waitValue && return Ptr{CUstreamMemOpWaitValueParams_st}(x + 0) f === :writeValue && return Ptr{CUstreamMemOpWriteValueParams_st}(x + 0) f === :flushRemoteWrites && return Ptr{CUstreamMemOpFlushRemoteWritesParams_st}(x + 0) - f === :memoryBarrier && return Ptr{CUstreampMemoryBarrierParams_st}(x + 0) + f === :memoryBarrier && return Ptr{CUstreamMemOpMemoryBarrierParams_st}(x + 0) f === :pad && return Ptr{NTuple{6,cuuint64_t}}(x + 0) return getfield(x, f) end @@ -6048,7 +6048,7 @@ struct CUstreamMemOpFlushRemoteWritesParams_st flags::Cuint end -struct CUstreampMemoryBarrierParams_st +struct CUstreamMemOpMemoryBarrierParams_st operation::CUstreamBatchMemOpType flags::Cuint end diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index 07252210a1..ed823303c7 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -225,7 +225,7 @@ function process(f, cfg::ActivityConfig) ## kernel activities activity_types[CUPTI_ACTIVITY_KIND_KERNEL] = if cuda_version >= v"12.0" - CUpti_ActivityKernel5 + CUpti_ActivityKernel9 elseif cuda_version >= v"11.8" CUpti_ActivityKernel8 elseif cuda_version >= v"11.6" diff --git a/res/wrap/Manifest.toml b/res/wrap/Manifest.toml index 45a01383c4..7b350e1bf6 100644 --- a/res/wrap/Manifest.toml +++ b/res/wrap/Manifest.toml @@ -17,9 +17,9 @@ version = "0.5.0" [[CSTParser]] deps = ["Tokenize"] -git-tree-sha1 = "b544d62417a99d091c569b95109bc9d8c223e9e3" +git-tree-sha1 = "0157e592151e39fa570645e2b2debcdfb8a0f112" uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f" -version = "3.4.2" +version = "3.4.3" [[CUDA_Driver_jll]] deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] @@ -35,9 +35,9 @@ version = "0.12.1+0" [[CUDA_SDK_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "752d5810571b8639702ada3b098cf550babb1967" +git-tree-sha1 = "6be192823cc703b93728c13b7183d1b4446be0d1" uuid = "6cbf2f2e-7e60-5632-ac76-dca2274e0be0" -version = "12.4.1+0" +version = "12.4.1+2" [[CUDNN_jll]] deps = ["Artifacts", "CUDA_Runtime_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] @@ -53,9 +53,9 @@ version = "2.0.1+0" [[Clang]] deps = ["CEnum", "Clang_jll", "Downloads", "Pkg", "TOML"] -git-tree-sha1 = "be935fd478265159ffdb1a949489a5f91319fb95" +git-tree-sha1 = "2397d5da17ba4970f772a9888b208a0a1d77eb5d" uuid = "40e3b903-d033-50b4-a0cc-940c62c95e31" -version = "0.18.1" +version = "0.18.3" [[Clang_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "TOML", "Zlib_jll", "libLLVM_jll"] @@ -85,7 +85,7 @@ version = "4.14.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.1.0+0" +version = "1.1.1+0" [[Crayons]] git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" @@ -276,9 +276,9 @@ uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" version = "1.10.0" [[Tokenize]] -git-tree-sha1 = "5b5a892ba7704c0977013bd0f9c30f5d962181e0" +git-tree-sha1 = "468b4685af4abe0e9fd4d7bf495a6554a6276e75" uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.28" +version = "0.5.29" [[URIs]] git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" diff --git a/src/compatibility.jl b/src/compatibility.jl index 8d651e375b..9201a15f42 100644 --- a/src/compatibility.jl +++ b/src/compatibility.jl @@ -112,6 +112,7 @@ const cuda_ptx_db = Dict( v"8.2" => between(v"12.2", highest), v"8.3" => between(v"12.3", highest), v"8.4" => between(v"12.4", highest), + v"8.5" => between(v"12.5", highest), ) function cuda_ptx_support(ver::VersionNumber) @@ -182,15 +183,15 @@ const llvm_cap_db = Dict( v"6.0" => between(v"3.9", highest), v"6.1" => between(v"3.9", highest), v"6.2" => between(v"3.9", highest), - v"7.0" => between(v"6.0", highest), - v"7.2" => between(v"7.0", highest), - v"7.5" => between(v"8.0", highest), - v"8.0" => between(v"11.0", highest), - v"8.6" => between(v"13.0", highest), - v"8.7" => between(v"16.0", highest), - v"8.9" => between(v"16.0", highest), - v"9.0" => between(v"16.0", highest), - #v"9.0a" => between(v"17.0", highest), + v"7.0" => between(v"6", highest), + v"7.2" => between(v"7", highest), + v"7.5" => between(v"8", highest), + v"8.0" => between(v"11", highest), + v"8.6" => between(v"13", highest), + v"8.7" => between(v"16", highest), + v"8.9" => between(v"16", highest), + v"9.0" => between(v"16", highest), + #v"9.0a" => between(v"18", highest), ) function llvm_cap_support(ver::VersionNumber) @@ -216,20 +217,25 @@ const llvm_ptx_db = Dict( v"4.2" => between(v"3.7", highest), v"4.3" => between(v"3.9", highest), v"5.0" => between(v"3.9", highest), - v"6.0" => between(v"6.0", highest), - v"6.1" => between(v"7.0", highest), - v"6.3" => between(v"8.0", highest), - v"6.4" => between(v"9.0", highest), - v"6.5" => between(v"11.0", highest), - v"7.0" => between(v"11.0", highest), - v"7.1" => between(v"13.0", highest), - v"7.2" => between(v"13.0", highest), - v"7.3" => between(v"14.0", highest), - v"7.4" => between(v"14.0", highest), - v"7.5" => between(v"14.0", highest), - v"7.6" => between(v"16.0", highest), - v"7.7" => between(v"16.0", highest), - v"7.8" => between(v"16.0", highest), + v"6.0" => between(v"6", highest), + v"6.1" => between(v"7", highest), + v"6.3" => between(v"8", highest), + v"6.4" => between(v"9", highest), + v"6.5" => between(v"11", highest), + v"7.0" => between(v"11", highest), + v"7.1" => between(v"13", highest), + v"7.2" => between(v"13", highest), + v"7.3" => between(v"14", highest), + v"7.4" => between(v"14", highest), + v"7.5" => between(v"14", highest), + v"7.6" => between(v"16", highest), + v"7.7" => between(v"16", highest), + v"7.8" => between(v"16", highest), + v"8.0" => between(v"17", highest), + v"8.1" => between(v"17", highest), + v"8.2" => between(v"18", highest), + v"8.3" => between(v"18", highest), + v"8.4" => between(v"19", highest), ) function llvm_ptx_support(ver::VersionNumber) diff --git a/src/compiler/reflection.jl b/src/compiler/reflection.jl index aaa9e12cc4..2bfa6dbc3c 100644 --- a/src/compiler/reflection.jl +++ b/src/compiler/reflection.jl @@ -52,8 +52,9 @@ function code_sass(io::IO, job::CompilerJob; raw::Bool=false) end # NVIDIA bug #4604961: CUPTI in CUDA 12.4 Update 1 does not capture profiled events - # unless the activity API is first activated - if runtime_version() == v"12.4" + # unless the activity API is first activated. This is fixed in + # 12.5 Update 1, but we can only check the minor runtime version. + if v"12.4" <= runtime_version() <= v"12.5" cfg = CUPTI.ActivityConfig([CUPTI.CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, CUPTI.CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API]) CUPTI.enable!(cfg) do @@ -89,8 +90,9 @@ function code_sass(f::Base.Callable, io::IO=stdout; raw::Bool=false) end # NVIDIA bug #4604961: CUPTI in CUDA 12.4 Update 1 does not capture profiled events - # unless the activity API is first activated - if runtime_version() == v"12.4" + # unless the activity API is first activated. This is fixed in + # 12.5 Update 1, but we can only check the minor runtime version. + if v"12.4" <= runtime_version() <= v"12.5" cfg = CUPTI.ActivityConfig([CUPTI.CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, CUPTI.CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API]) CUPTI.enable!(cfg) do diff --git a/src/initialization.jl b/src/initialization.jl index 2953542d60..fa61a88597 100644 --- a/src/initialization.jl +++ b/src/initialization.jl @@ -66,9 +66,9 @@ function __init__() return end - if driver < v"11" - @error "This version of CUDA.jl only supports NVIDIA drivers for CUDA 11.x or higher (yours is for CUDA $driver)" - _initialization_error[] = "CUDA driver too old" + if !(v"11" <= driver < v"13-") + @error "This version of CUDA.jl only supports NVIDIA drivers for CUDA 11.x or 12.x (yours is for CUDA $driver)" + _initialization_error[] = "CUDA driver unsupported" return end @@ -79,19 +79,45 @@ function __init__() # check that we have a runtime if !CUDA_Runtime.is_available() + # try to find out why + reason = if CUDA_Runtime != CUDA_Runtime_jll + """You requested use of a local CUDA toolkit, but not all + required components were discovered. + + Try running with `JULIA_DEBUG=CUDA_Runtime_Discovery` in + your environment and re-loading CUDA.jl for more details.""" + elseif !Sys.iswindows() && !Sys.islinux() && !in(Sys.ARCH, [:x86_64, :aarch64]) + """You are using an unsupported platform: this version of CUDA.jl + only supports Linux (x86_64, aarch64) and Windows (x86_64). + + Consider downgrading CUDA.jl (refer to the README for a list of + supported platforms) or manually installing the CUDA toolkit and make + CUDA.jl use it by calling `CUDA.set_runtime_version!(local_toolkit=true)`.""" + elseif CUDA_Runtime_jll.host_platform["cuda"] == "none" + """CUDA.jl's JLLs were precompiled without an NVIDIA driver present. + This can happen when installing CUDA.jl on an HPC log-in node, + or in a container. In that case, you need to specify which CUDA + version to use at run time by calling `CUDA.set_runtime_version!` + or provisioning the preference it sets at compile time. + + If you are not running in a container or on an HPC log-in node, + try re-compiling the CUDA runtime JLL and re-loading CUDA.jl: + pkg = Base.PkgId(Base.UUID("76a88914-d11a-5bdc-97e0-2f5a05c973a2"), + "CUDA_Runtime_jll") + Base.compilecache(pkg) + # re-start Julia and re-load CUDA.jl""" + else + """Could not diagnose why the CUDA runtime is not available. + + If the issue persists, please file a support ticket with the following details: + - host platform: $(Base.BinaryPlatforms.triplet(CUDA_Runtime_jll.host_platform)) + - libcuda: $libcuda (loaded through JLL: $(CUDA_Driver_jll.is_available())) + - driver version: $driver + """ + end @error """CUDA.jl could not find an appropriate CUDA runtime to use. - This can have several reasons: - * you are using an unsupported platform: this version of CUDA.jl - only supports Linux (x86_64, aarch64, ppc64le) and Windows (x86_64), - while your platform was identified as $(Base.BinaryPlatforms.triplet(CUDA_Runtime_jll.host_platform)); - * you precompiled CUDA.jl in an environment where the CUDA driver - was not available (i.e., a container, or an HPC login node). - in that case, you need to specify which CUDA version to use - by calling `CUDA.set_runtime_version!`; - * you requested use of a local CUDA toolkit, but not all - required components were discovered. try running with - JULIA_DEBUG=all in your environment for more details. + $reason For more details, refer to the CUDA.jl documentation at https://cuda.juliagpu.org/stable/installation/overview/""" @@ -148,6 +174,13 @@ function __init__() return end + # warn if we're not using an official build of Julia + official_release = startswith(Base.TAGGED_RELEASE_BANNER, "Official") + if !official_release + @warn """You are using a non-official build of Julia. This may cause issues with CUDA.jl. + Please consider using an official build from https://julialang.org/downloads/.""" + end + @static if !isdefined(Base, :get_extension) @require ChainRulesCore="d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" begin include("../ext/ChainRulesCoreExt.jl") diff --git a/test/core/codegen.jl b/test/core/codegen.jl index 9ca772d0f8..d8f409781e 100644 --- a/test/core/codegen.jl +++ b/test/core/codegen.jl @@ -193,7 +193,7 @@ end valid_kernel() = return invalid_kernel() = 1 - if can_use_cupti() + if can_use_cupti() && CUDA.runtime_version() != v"12.5" @test CUDA.code_sass(devnull, valid_kernel, Tuple{}) == nothing @test_throws CUDA.KernelError CUDA.code_sass(devnull, invalid_kernel, Tuple{}) end @@ -204,7 +204,7 @@ end @eval kernel_341(ptr) = (@inbounds unsafe_store!(ptr, $(Symbol("dummy_^"))(unsafe_load(ptr))); nothing) - if can_use_cupti() + if can_use_cupti() && CUDA.runtime_version() != v"12.5" CUDA.code_sass(devnull, kernel_341, Tuple{Ptr{Int}}) end end @@ -212,7 +212,7 @@ end @testset "device runtime" begin kernel() = (CUDA.cudaGetLastError(); return) - if can_use_cupti() + if can_use_cupti() && CUDA.runtime_version() != v"12.5" CUDA.code_sass(devnull, kernel, Tuple{}) end end diff --git a/test/core/execution.jl b/test/core/execution.jl index 6d78851e27..e8ad6ef960 100644 --- a/test/core/execution.jl +++ b/test/core/execution.jl @@ -77,7 +77,7 @@ end CUDA.code_warntype(devnull, dummy, Tuple{}) CUDA.code_llvm(devnull, dummy, Tuple{}) CUDA.code_ptx(devnull, dummy, Tuple{}) - if can_use_cupti() + if can_use_cupti() && CUDA.runtime_version() != v"12.5" # NVIDIA bug #4667039 # functions defined in Julia sass = sprint(io->CUDA.code_sass(io, dummy, Tuple{})) @test occursin(".text._Z5dummy", sass) @@ -96,7 +96,7 @@ end @device_code_warntype io=devnull @cuda dummy() @device_code_llvm io=devnull @cuda dummy() @device_code_ptx io=devnull @cuda dummy() - if can_use_cupti() + if can_use_cupti() && CUDA.runtime_version() != v"12.5" # NVIDIA bug #4667039 # functions defined in Julia sass = sprint(io->@device_code_sass io=io @cuda dummy()) @test occursin(".text._Z5dummy", sass) @@ -120,7 +120,7 @@ end @test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy()))) @test occursin("dummy", sprint(io->(@device_code_llvm io=io @cuda dummy()))) @test occursin("dummy", sprint(io->(@device_code_ptx io=io @cuda dummy()))) - if can_use_cupti() + if can_use_cupti() && CUDA.runtime_version() != v"12.5" # NVIDIA bug #4667039 @test occursin("dummy", sprint(io->(@device_code_sass io=io @cuda dummy()))) end