Skip to content

Commit

Permalink
[MadNLPGPU] Migrate to KernelAbstractions 0.9 (#258)
Browse files Browse the repository at this point in the history
Co-authored-by: Sungho Shin <[email protected]>
  • Loading branch information
frapac and sshin23 authored Jun 6, 2023
1 parent 4979e39 commit 9411a1e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 19 deletions.
4 changes: 1 addition & 3 deletions lib/MadNLPGPU/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@ version = "0.5.0"

[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"

[compat]
CUDA = "~4"
CUDAKernels = "0.4"
KernelAbstractions = "0.8"
KernelAbstractions = "0.9"
MadNLP = "0.6"
MadNLPTests = "~0.3"
julia = "1.7"
Expand Down
4 changes: 2 additions & 2 deletions lib/MadNLPGPU/src/MadNLPGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ module MadNLPGPU
import LinearAlgebra
# CUDA
import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
import CUDA: CUDABackend
import .CUSOLVER:
libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT,
CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T

# Kernels
import KernelAbstractions: @kernel, @index, wait, Event
import CUDAKernels: CUDADevice
import KernelAbstractions: @kernel, @index, synchronize

import MadNLP: NLPModels
import MadNLP
Expand Down
28 changes: 14 additions & 14 deletions lib/MadNLPGPU/src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ end

function MadNLP.diag!(dest::CuVector{T}, src::CuMatrix{T}) where T
@assert length(dest) == size(src, 1)
ev = _copy_diag!(CUDADevice())(dest, src, ndrange=length(dest))
wait(ev)
ev = _copy_diag!(CUDABackend())(dest, src, ndrange=length(dest))
synchronize(CUDABackend())
end

@kernel function _add_diagonal!(dest, src1, src2)
Expand All @@ -19,8 +19,8 @@ end
end

function MadNLP.diag_add!(dest::CuMatrix, src1::CuVector, src2::CuVector)
ev = _add_diagonal!(CUDADevice())(dest, src1, src2, ndrange=size(dest, 1))
wait(ev)
ev = _add_diagonal!(CUDABackend())(dest, src1, src2, ndrange=size(dest, 1))
synchronize(CUDABackend())
end

#=
Expand Down Expand Up @@ -74,8 +74,8 @@ function MadNLP.treat_fixed_variable!(kkt::MadNLP.AbstractKKTSystem{T, VT, MT})
aug = kkt.aug_com
d_ind_fixed = kkt.ind_fixed |> CuVector # TODO: allocate ind_fixed directly on the GPU
ndrange = (length(d_ind_fixed), size(aug, 1))
ev = _treat_fixed_variable_kernell!(CUDADevice())(aug, d_ind_fixed, ndrange=ndrange)
wait(ev)
ev = _treat_fixed_variable_kernell!(CUDABackend())(aug, d_ind_fixed, ndrange=ndrange)
synchronize(CUDABackend())
end


Expand Down Expand Up @@ -180,11 +180,11 @@ function MadNLP._build_dense_kkt_system!(
)
ind_ineq_gpu = ind_ineq |> CuArray
ndrange = (n+m+ns, n)
ev = _build_dense_kkt_system_kernel!(CUDADevice())(
ev = _build_dense_kkt_system_kernel!(CUDABackend())(
dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq_gpu, con_scale, n, m, ns,
ndrange=ndrange
)
wait(ev)
synchronize(CUDABackend())
end


Expand Down Expand Up @@ -214,11 +214,11 @@ function MadNLP._build_ineq_jac!(
(m_ineq == 0) && return # nothing to do if no ineq. constraints
ind_ineq_gpu = ind_ineq |> CuArray
ndrange = (m_ineq, n)
ev = _build_jacobian_condensed_kernel!(CUDADevice())(
ev = _build_jacobian_condensed_kernel!(CUDABackend())(
dest, jac, pr_diag, ind_ineq_gpu, con_scale, n, m_ineq,
ndrange=ndrange, dependencies=Event(CUDADevice()),
ndrange=ndrange,
)
wait(ev)
synchronize(CUDABackend())
# need to zero the fixed components
dest[:, ind_fixed] .= 0.0
return
Expand Down Expand Up @@ -253,11 +253,11 @@ function MadNLP._build_condensed_kkt_system!(
)
ind_eq_gpu = ind_eq |> CuArray
ndrange = (n + m_eq, n)
ev = _build_condensed_kkt_system_kernel!(CUDADevice())(
ev = _build_condensed_kkt_system_kernel!(CUDABackend())(
dest, hess, jac, pr_diag, du_diag, ind_eq_gpu, n, m_eq,
ndrange=ndrange, dependencies=Event(CUDADevice()),
ndrange=ndrange,
)
wait(ev)
synchronize(CUDABackend())
end

function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
Expand Down

0 comments on commit 9411a1e

Please sign in to comment.