Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MadNLPGPU] Migrate to KernelAbstractions 0.9 #258

Merged
merged 2 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions lib/MadNLPGPU/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@ version = "0.5.0"

[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"

[compat]
CUDA = "~4"
CUDAKernels = "0.4"
KernelAbstractions = "0.8"
KernelAbstractions = "0.9"
MadNLP = "0.6"
MadNLPTests = "~0.3"
julia = "1.7"
Expand Down
4 changes: 2 additions & 2 deletions lib/MadNLPGPU/src/MadNLPGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ module MadNLPGPU
import LinearAlgebra
# CUDA
import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
import CUDA: CUDABackend
import .CUSOLVER:
libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT,
CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T

# Kernels
import KernelAbstractions: @kernel, @index, wait, Event
import CUDAKernels: CUDADevice
import KernelAbstractions: @kernel, @index, synchronize

import MadNLP: NLPModels
import MadNLP
Expand Down
28 changes: 14 additions & 14 deletions lib/MadNLPGPU/src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ end

function MadNLP.diag!(dest::CuVector{T}, src::CuMatrix{T}) where T
@assert length(dest) == size(src, 1)
ev = _copy_diag!(CUDADevice())(dest, src, ndrange=length(dest))
wait(ev)
ev = _copy_diag!(CUDABackend())(dest, src, ndrange=length(dest))
synchronize(CUDABackend())
end

@kernel function _add_diagonal!(dest, src1, src2)
Expand All @@ -19,8 +19,8 @@ end
end

function MadNLP.diag_add!(dest::CuMatrix, src1::CuVector, src2::CuVector)
ev = _add_diagonal!(CUDADevice())(dest, src1, src2, ndrange=size(dest, 1))
wait(ev)
ev = _add_diagonal!(CUDABackend())(dest, src1, src2, ndrange=size(dest, 1))
synchronize(CUDABackend())
end

#=
Expand Down Expand Up @@ -74,8 +74,8 @@ function MadNLP.treat_fixed_variable!(kkt::MadNLP.AbstractKKTSystem{T, VT, MT})
aug = kkt.aug_com
d_ind_fixed = kkt.ind_fixed |> CuVector # TODO: allocate ind_fixed directly on the GPU
ndrange = (length(d_ind_fixed), size(aug, 1))
ev = _treat_fixed_variable_kernell!(CUDADevice())(aug, d_ind_fixed, ndrange=ndrange)
wait(ev)
ev = _treat_fixed_variable_kernell!(CUDABackend())(aug, d_ind_fixed, ndrange=ndrange)
synchronize(CUDABackend())
end


Expand Down Expand Up @@ -180,11 +180,11 @@ function MadNLP._build_dense_kkt_system!(
)
ind_ineq_gpu = ind_ineq |> CuArray
ndrange = (n+m+ns, n)
ev = _build_dense_kkt_system_kernel!(CUDADevice())(
ev = _build_dense_kkt_system_kernel!(CUDABackend())(
dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq_gpu, con_scale, n, m, ns,
ndrange=ndrange
)
wait(ev)
synchronize(CUDABackend())
end


Expand Down Expand Up @@ -214,11 +214,11 @@ function MadNLP._build_ineq_jac!(
(m_ineq == 0) && return # nothing to do if no ineq. constraints
ind_ineq_gpu = ind_ineq |> CuArray
ndrange = (m_ineq, n)
ev = _build_jacobian_condensed_kernel!(CUDADevice())(
ev = _build_jacobian_condensed_kernel!(CUDABackend())(
dest, jac, pr_diag, ind_ineq_gpu, con_scale, n, m_ineq,
ndrange=ndrange, dependencies=Event(CUDADevice()),
ndrange=ndrange,
)
wait(ev)
synchronize(CUDABackend())
# need to zero the fixed components
dest[:, ind_fixed] .= 0.0
return
Expand Down Expand Up @@ -253,11 +253,11 @@ function MadNLP._build_condensed_kkt_system!(
)
ind_eq_gpu = ind_eq |> CuArray
ndrange = (n + m_eq, n)
ev = _build_condensed_kkt_system_kernel!(CUDADevice())(
ev = _build_condensed_kkt_system_kernel!(CUDABackend())(
dest, hess, jac, pr_diag, du_diag, ind_eq_gpu, n, m_eq,
ndrange=ndrange, dependencies=Event(CUDADevice()),
ndrange=ndrange,
)
wait(ev)
synchronize(CUDABackend())
end

function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
Expand Down