Skip to content
This repository has been archived by the owner on Mar 12, 2021. It is now read-only.

Commit

Permalink
Try #275:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] committed Feb 7, 2019
2 parents 8c12c02 + b0b4db9 commit 67f665d
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 88 deletions.
21 changes: 20 additions & 1 deletion src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ CuVector{T} = CuArray{T,1}
CuMatrix{T} = CuArray{T,2}
CuVecOrMat{T} = Union{CuVector{T},CuMatrix{T}}

function unsafe_free!(xs::CuArray)
const INVALID = Mem.alloc(0)

function unsafe_free!(xs::CuArray{<:Any,N}) where {N}
xs.buf === INVALID && return
Mem.release(xs.buf) && dealloc(xs.buf, prod(xs.dims)*sizeof(eltype(xs)))
xs.dims = Tuple(0 for _ in 1:N)
xs.buf = INVALID
return
end

Expand All @@ -44,6 +49,20 @@ CuArray{T}(::UndefInitializer, dims::Integer...) where {T} =
# empty vector constructor
CuArray{T,1}() where {T} = CuArray{T,1}(undef, 0)

# do-block constructors
for (ctor, tvars) in (:CuArray => (), :(CuArray{T}) => (:T,), :(CuArray{T,N}) => (:T, :N))
@eval begin
function $ctor(f::Function, args...) where {$(tvars...)}
xs = $ctor(args...)
try
f(xs)
finally
unsafe_free!(xs)
end
end
end
end


Base.similar(a::CuArray{T,N}) where {T,N} = CuArray{T,N}(undef, size(a))
Base.similar(a::CuArray{T}, dims::Base.Dims{N}) where {T,N} = CuArray{T,N}(undef, dims)
Expand Down
2 changes: 1 addition & 1 deletion src/blas/CUBLAS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import CUDAdrv: CUDAdrv, CuContext, CuStream_t, CuPtr, PtrOrCuPtr, CU_NULL
import CUDAapi

using ..CuArrays
using ..CuArrays: libcublas, active_context
using ..CuArrays: libcublas, active_context, unsafe_free!

using LinearAlgebra

Expand Down
48 changes: 38 additions & 10 deletions src/blas/wrappers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ for (fname, elty) in ((:cublasIdamax_v2,:Float64),
dx::CuArray{$elty},
incx::Integer)
result = Ref{Cint}()
$fname(handle(), n, dx, incx, result)
$fname(handle(), n, dx, incx, result)
return result[]
end
end
Expand Down Expand Up @@ -836,7 +836,7 @@ for (fname, elty) in
B::Array{CuMatrix{$elty},1},
beta::($elty),
C::Array{CuMatrix{$elty},1})
if( length(A) != length(B) || length(A) != length(C) )
if length(A) != length(B) || length(A) != length(C)
throw(DimensionMismatch(""))
end
for (As,Bs,Cs) in zip(A,B,C)
Expand All @@ -847,6 +847,7 @@ for (fname, elty) in
throw(DimensionMismatch(""))
end
end

m = size(A[1], transA == 'N' ? 1 : 2)
k = size(A[1], transA == 'N' ? 2 : 1)
n = size(B[1], transB == 'N' ? 2 : 1)
Expand All @@ -860,6 +861,10 @@ for (fname, elty) in
Cptrs = device_batch(C)
$fname(handle(), cutransA,cutransB, m, n, k, [alpha], Aptrs, lda, Bptrs,
ldb, [beta], Cptrs, ldc, length(A))
unsafe_free!(Cptrs)
unsafe_free!(Bptrs)
unsafe_free!(Aptrs)

C
end
function gemm_batched(transA::Char,
Expand Down Expand Up @@ -1346,7 +1351,7 @@ for (fname, elty) in
cuuplo = cublasfill(uplo)
cutransa = cublasop(transa)
cudiag = cublasdiag(diag)
if( length(A) != length(B) )
if length(A) != length(B)
throw(DimensionMismatch(""))
end
for (As,Bs) in zip(A,B)
Expand All @@ -1355,12 +1360,16 @@ for (fname, elty) in
if mA != nA throw(DimensionMismatch("A must be square")) end
if nA != (side == 'L' ? m : n) throw(DimensionMismatch("trsm_batched!")) end
end

m,n = size(B[1])
lda = max(1,stride(A[1],2))
ldb = max(1,stride(B[1],2))
Aptrs = device_batch(A)
Bptrs = device_batch(B)
$fname(handle(), cuside, cuuplo, cutransa, cudiag, m, n, [alpha], Aptrs, lda, Bptrs, ldb, length(A))
unsafe_free!(Bptrs)
unsafe_free!(Aptrs)

B
end
function trsm_batched(side::Char,
Expand Down Expand Up @@ -1453,13 +1462,16 @@ for (fname, elty) in
throw(DimensionMismatch("All matrices must be square!"))
end
end

m,n = size(A[1])
lda = max(1,stride(A[1],2))
Aptrs = device_batch(A)
info = CuArray{Cint}(undef, length(A))
pivotArray = Pivot ? CuArray{Int32}(undef, (n, length(A))) : CU_NULL
$fname(handle(), n, Aptrs, lda, pivotArray, info, length(A))
if( !Pivot )
unsafe_free!(Aptrs)

if !Pivot
pivotArray = CuArray(zeros(Cint, (n, length(A))))
end
pivotArray, info, A
Expand Down Expand Up @@ -1493,6 +1505,7 @@ for (fname, elty) in
throw(DimensionMismatch("All A matrices must be square!"))
end
end

C = CuMatrix{$elty}[similar(A[1]) for i in 1:length(A)]
n = size(A[1])[1]
lda = max(1,stride(A[1],2))
Expand All @@ -1501,6 +1514,9 @@ for (fname, elty) in
Cptrs = device_batch(C)
info = CuArray(zeros(Cint,length(A)))
$fname(handle(), n, Aptrs, lda, pivotArray, Cptrs, ldc, info, length(A))
unsafe_free!(Cptrs)
unsafe_free!(Aptrs)

pivotArray, info, C
end
end
Expand Down Expand Up @@ -1528,6 +1544,7 @@ for (fname, elty) in
throw(ArgumentError("matinv requires all matrices be smaller than 32 x 32"))
end
end

C = CuMatrix{$elty}[similar(A[1]) for i in 1:length(A)]
n = size(A[1])[1]
lda = max(1,stride(A[1],2))
Expand All @@ -1536,6 +1553,9 @@ for (fname, elty) in
Cptrs = device_batch(C)
info = CuArray(zeros(Cint,length(A)))
$fname(handle(), n, Aptrs, lda, Cptrs, ldc, info, length(A))
unsafe_free!(Cptrs)
unsafe_free!(Aptrs)

info, C
end
end
Expand All @@ -1560,14 +1580,17 @@ for (fname, elty) in
hTauArray = [zeros($elty, min(m,n)) for i in 1:length(A)]
TauArray = CuArray{$elty,1}[]
for i in 1:length(A)
push!(TauArray,CuArray(hTauArray[i]))
push!(TauArray, CuArray(hTauArray[i]))
end
Tauptrs = device_batch(TauArray)
info = zero(Cint)
$fname(handle(), m, n, Aptrs, lda, Tauptrs, [info], length(A))
if( info != 0 )
unsafe_free!(Tauptrs)

if info != 0
throw(ArgumentError,string("Invalid value at ",-info))
end

TauArray, A
end
function geqrf_batched(A::Array{CuMatrix{$elty},1})
Expand All @@ -1593,20 +1616,21 @@ for (fname, elty) in
A::Array{CuMatrix{$elty},1},
C::Array{CuMatrix{$elty},1})
cutrans = cublasop(trans)
if( length(A) != length(C) )
if length(A) != length(C)
throw(DimensionMismatch(""))
end
for (As,Cs) in zip(A,C)
m,n = size(As)
mC,nC = size(Cs)
if( n != mC )
if n != mC
throw(DimensionMismatch(""))
end
end
m,n = size(A[1])
if( m < n )
if m < n
throw(ArgumentError("System must be overdetermined"))
end

nrhs = size(C[1])[2]
lda = max(1,stride(A[1],2))
ldc = max(1,stride(A[1],2))
Expand All @@ -1615,9 +1639,13 @@ for (fname, elty) in
info = zero(Cint)
infoarray = CuArray(zeros(Cint, length(A)))
$fname(handle(), cutrans, m, n, nrhs, Aptrs, lda, Cptrs, ldc, [info], infoarray, length(A))
if( info != 0 )
unsafe_free!(Cptrs)
unsafe_free!(Aptrs)

if info != 0
throw(ArgumentError,string("Invalid value at ",-info))
end

A, C, infoarray
end
function gels_batched(trans::Char,
Expand Down
2 changes: 1 addition & 1 deletion src/dnn/CUDNN.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import CUDAapi
import CUDAdrv: CUDAdrv, CuContext, CuPtr, CU_NULL

using ..CuArrays
using ..CuArrays: libcudnn, active_context, configured
using ..CuArrays: libcudnn, active_context, configured, unsafe_free!

include("libcudnn_types.jl")
include("error.jl")
Expand Down
3 changes: 0 additions & 3 deletions src/dnn/libcudnn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ function cudnnConvolutionBiasActivationForward(y::CuArray{T,N}, x::CuArray{T,N},
end

function cudnnConvolutionForward(alpha, xDesc, x, wDesc, w, convDesc, algo, workspace, workspace_size, beta, yDesc, y)
workspace = something(workspace, CU_NULL)
@check ccall((:cudnnConvolutionForward, libcudnn),
cudnnStatus_t,
(cudnnHandle_t, Ptr{Nothing}, cudnnTensorDescriptor_t, CuPtr{Nothing},
Expand Down Expand Up @@ -285,7 +284,6 @@ function cudnnGetConvolutionForwardWorkspaceSize(y::CuArray{T,N}, x::CuArray{T,N
end

function cudnnConvolutionBackwardData(alpha, wDesc, w, dyDesc, dy, convDesc, algo, workspace, workspace_size, beta, dxDesc, dx)
workspace = something(workspace, CU_NULL)
@check ccall((:cudnnConvolutionBackwardData, libcudnn),
cudnnStatus_t,
(cudnnHandle_t, Ptr{Nothing}, cudnnFilterDescriptor_t, CuPtr{Nothing},
Expand Down Expand Up @@ -325,7 +323,6 @@ function cudnnGetConvolutionBackwardDataWorkspaceSize(dx::CuArray{T,N}, w::CuArr
end

function cudnnConvolutionBackwardFilter(alpha, xDesc, x, dyDesc, dy, convDesc, algo, workspace, workspace_size, beta, dwDesc, dw)
workspace = something(workspace, CU_NULL)
@check ccall((:cudnnConvolutionBackwardFilter, libcudnn),
cudnnStatus_t,
(cudnnHandle_t, Ptr{Nothing}, cudnnTensorDescriptor_t, CuPtr{Nothing},
Expand Down
76 changes: 30 additions & 46 deletions src/dnn/nnlib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import NNlib: conv!, ∇conv_filter!, ∇conv_data!,
import ..CuArrays: CuVecOrMat, CuVector
using CUDAnative


# Softmax

const CUDNNFloat = Union{Float16,Float32,Float64}
Expand Down Expand Up @@ -36,73 +37,56 @@ end
∇logsoftmax::CuVecOrMat{T}, xs::CuVecOrMat{T}) where T<:CUDNNFloat =
∇logsoftmax!(similar(xs), Δ, xs)

# Convolution

const _conv_workspace = Ref{CuVector{UInt8}}()

function conv_workspace(bytes)
global _conv_workspace
if isassigned(_conv_workspace) && bytes <= length(_conv_workspace[])
_conv_workspace[]
else
_conv_workspace[] = CuVector{UInt8}(undef, bytes)
end
end
# Convolution

function conv!(y::CuArray{T}, x::CuArray{T}, w::CuArray{T};
pad=0, stride=1, flipkernel=0, alpha=1, dilation=1,
workspace::Union{CuVector, Nothing}=nothing, algo=0) where T<:CUDNNFloat
pad=0, stride=1, flipkernel=0, alpha=1, dilation=1, algo=0) where T<:CUDNNFloat
if version() < v"6"
all(x -> x == 1, dilation) || error("Only dilation = 1 is supported in cuDNN version < 6")
end
if workspace === nothing
workspace_size =
cudnnGetConvolutionForwardWorkspaceSize(y, x, w, padding=pad, stride=stride, dilation=dilation,
algo=algo, mode=flipkernel)
workspace = workspace_size != 0 ? conv_workspace(workspace_size) : workspace
else
workspace_size = length(workspace[])

workspace_size =
cudnnGetConvolutionForwardWorkspaceSize(y, x, w, padding=pad, stride=stride, dilation=dilation,
algo=algo, mode=flipkernel)

CuVector{UInt8}(undef, workspace_size) do workspace
cudnnConvolutionForward(y, x, w, padding=pad, stride=stride, dilation=dilation, mode=flipkernel,
alpha=alpha, algo=algo, workspace=workspace, workspace_size=workspace_size)
end
cudnnConvolutionForward(y, x, w, padding=pad, stride=stride, dilation=dilation, mode=flipkernel,
alpha=alpha, algo=algo, workspace=workspace, workspace_size=workspace_size)
end

function ∇conv_filter!(dw::CuArray{T}, dy::CuArray{T}, x::CuArray{T};
pad=0, stride=1, flipkernel=0, alpha=1, dilation=1,
workspace::Union{CuVector, Nothing}=nothing, algo=0) where T<:CUDNNFloat
pad=0, stride=1, flipkernel=0, alpha=1, dilation=1, algo=0) where T<:CUDNNFloat
if version() < v"6"
all(x -> x == 1, dilation) || error("Only dilation = 1 is supported in cuDNN version < 6")
end
if workspace === nothing
workspace_size =
cudnnGetConvolutionBackwardFilterWorkspaceSize(dw, x, dy, padding=pad, stride=stride,
dilation=dilation, algo=algo, mode=flipkernel)
workspace = workspace_size != 0 ? conv_workspace(workspace_size) : workspace
else
workspace_size = length(workspace[])

workspace_size =
cudnnGetConvolutionBackwardFilterWorkspaceSize(dw, x, dy, padding=pad, stride=stride,
dilation=dilation, algo=algo, mode=flipkernel)

CuVector{UInt8}(undef, workspace_size) do workspace
cudnnConvolutionBackwardFilter(dw, x, dy, padding=pad, stride=stride, dilation=dilation,
mode=flipkernel, alpha=alpha, algo=algo, workspace=workspace,
workspace_size=workspace_size)
end
cudnnConvolutionBackwardFilter(dw, x, dy, padding=pad, stride=stride, dilation=dilation,
mode=flipkernel, alpha=alpha, algo=algo, workspace=workspace,
workspace_size=workspace_size)
end

function ∇conv_data!(dx::CuArray{T}, dy::CuArray{T}, w::CuArray{T};
pad=0, stride=1, flipkernel=0, alpha=1, dilation=1,
workspace::Union{CuVector, Nothing}=nothing, algo=0) where T<:CUDNNFloat
pad=0, stride=1, flipkernel=0, alpha=1, dilation=1, algo=0) where T<:CUDNNFloat
if version() < v"6"
all(x -> x == 1, dilation) || error("Only dilation = 1 is supported in cuDNN version < 6")
end
if workspace === nothing
workspace_size =
cudnnGetConvolutionBackwardDataWorkspaceSize(dx, w, dy, padding=pad, stride=stride,
dilation=dilation, algo=algo, mode=flipkernel)
workspace = workspace_size != 0 ? conv_workspace(workspace_size) : workspace
else
workspace_size = length(workspace[])

workspace_size =
cudnnGetConvolutionBackwardDataWorkspaceSize(dx, w, dy, padding=pad, stride=stride,
dilation=dilation, algo=algo, mode=flipkernel)
CuVector{UInt8}(undef, workspace_size) do workspace
cudnnConvolutionBackwardData(dx, w, dy, padding=pad, stride=stride, dilation=dilation,
mode=flipkernel, alpha=alpha, algo=algo, workspace=workspace,
workspace_size=workspace_size)
end
cudnnConvolutionBackwardData(dx, w, dy, padding=pad, stride=stride, dilation=dilation,
mode=flipkernel, alpha=alpha, algo=algo, workspace=workspace,
workspace_size=workspace_size)
end

∇conv_bias!(db::CuArray{T}, dy::CuArray{T}; alpha=1, beta=0) where T<:CUDNNFloat =
Expand Down
2 changes: 1 addition & 1 deletion src/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ end

function dealloc(buf, bytes)
# 0-byte allocations shouldn't hit the pool
bytes == 0 && return Mem.alloc(0)
bytes == 0 && return

stats.req_nfree += 1
stats.user_free += bytes
Expand Down
2 changes: 1 addition & 1 deletion src/solver/CUSOLVER.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import CUDAdrv: CUDAdrv, CuContext, CuStream_t, CuPtr, PtrOrCuPtr, CU_NULL
import CUDAapi

using ..CuArrays
using ..CuArrays: libcusolver, active_context, _getindex
using ..CuArrays: libcusolver, active_context, _getindex, unsafe_free!

using LinearAlgebra
using SparseArrays
Expand Down
Loading

0 comments on commit 67f665d

Please sign in to comment.