diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 8c338bbe67..02c7314661 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -48,6 +48,26 @@ steps: # NOTE: we make sure to run all tests on CUDA versions that have CUDNN & CUTENSOR # by setting the CI_THOROUGH environment variable, and using a recent GPU. + - label: "CUDA 11.2" + plugins: + - JuliaCI/julia#v0.6: + version: 1.5 + - JuliaCI/julia-test#v0.3: ~ + - JuliaCI/julia-coverage#v0.3: + codecov: true + dirs: + - src + - lib + - examples + agents: + queue: "juliagpu" + cuda: "11.2" + env: + JULIA_CUDA_VERSION: '11.2' + JULIA_CUDA_USE_BINARYBUILDER: 'true' + if: build.message !~ /\[skip tests\]/ + timeout_in_minutes: 60 + - label: "CUDA 11.1" plugins: - JuliaCI/julia#v0.4: diff --git a/Artifacts.toml b/Artifacts.toml index ec86f5cd27..0a1c477aff 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -138,6 +138,38 @@ os = "windows" +[[CUDA112]] +arch = "powerpc64le" +git-tree-sha1 = "205075090ca78a68358e4613a444e56ddf5333d3" +lazy = true +libc = "glibc" +os = "linux" + + [[CUDA112.download]] + sha256 = "b146213b1b3ebf8c32f09f9cd8f843461991ece3089ae56c1d7669e6ccdd7711" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.0+0/CUDA.v11.2.0.powerpc64le-linux-gnu.tar.gz" + +[[CUDA112]] +arch = "x86_64" +git-tree-sha1 = "e99dab5d7bdf5b60da265bae5e949189d907a56b" +lazy = true +libc = "glibc" +os = "linux" + + [[CUDA112.download]] + sha256 = "e2e2c31544411a4e85db23f603c367a9386c44ab0ba49fd86a1af2668fe3ce82" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.0+0/CUDA.v11.2.0.x86_64-linux-gnu.tar.gz" + +[[CUDA112]] +arch = "x86_64" +git-tree-sha1 = "1aecead5cc57a9388d3b46929549edb0ef99912f" +lazy = true +os = "windows" + + [[CUDA112.download]] + sha256 = "cbfcbc9428b761b99856ab1aab089c5f34b87abee9322ac269052bbd877969e0" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.0+0/CUDA.v11.2.0.x86_64-w64-mingw32.tar.gz" + # CUDNN diff --git a/deps/bindeps.jl b/deps/bindeps.jl index afdf749fb5..e4d72c4a13 100644 --- a/deps/bindeps.jl +++ b/deps/bindeps.jl @@ -118,6 +118,7 @@ lazy_artifact(x) = @artifact_str(x) # NOTE: we don't use autogenerated JLLs, because we have multiple artifacts and need to # decide at run time (i.e. not via package dependencies) which one to use. const cuda_artifacts = Dict( + (release=v"11.2", version=v"11.2.0", preferred=false) => ()->lazy_artifact("CUDA112"), (release=v"11.1", version=v"11.1.1", preferred=true) => ()->lazy_artifact("CUDA111"), (release=v"11.0", version=v"11.0.3", preferred=true) => ()->lazy_artifact("CUDA110"), (release=v"10.2", version=v"10.2.89", preferred=true) => ()->lazy_artifact("CUDA102"), diff --git a/deps/compatibility.jl b/deps/compatibility.jl index 3ef3044ca1..7680cb10e7 100644 --- a/deps/compatibility.jl +++ b/deps/compatibility.jl @@ -94,6 +94,7 @@ const cuda_ptx_db = Dict( v"6.5" => v"10.2":highest, v"7.0" => v"11.0":highest, v"7.1" => v"11.1":highest, + v"7.2" => v"11.2":highest, ) function cuda_ptx_support(ver::VersionNumber) diff --git a/lib/cublas/wrappers.jl b/lib/cublas/wrappers.jl index aa6d33630c..f2a5c6c004 100644 --- a/lib/cublas/wrappers.jl +++ b/lib/cublas/wrappers.jl @@ -763,6 +763,7 @@ function gemmExComputeType(TA, TB, TC, m, k, n) end if m%4 == 0 && n%4 == 0 && k%4 == 0 && sig === (Int8, Int32) + CUDA.version() >= v"11.2" && return nothing # NVIDIA bug #3221266 # Int32=Int8*Int8 requires m,n,k to be multiples of 4 # https://forums.developer.nvidia.com/t/cublasgemmex-cant-use-cuda-r-8i-compute-type-on-gtx1080/58100/2 return math_mode==CUDA.PEDANTIC_MATH ? CUBLAS_COMPUTE_32I_PEDANTIC : CUBLAS_COMPUTE_32I