diff --git a/lib/MadNLPGPU/src/MadNLPGPU.jl b/lib/MadNLPGPU/src/MadNLPGPU.jl index ed1149fc..7c5f5fb6 100644 --- a/lib/MadNLPGPU/src/MadNLPGPU.jl +++ b/lib/MadNLPGPU/src/MadNLPGPU.jl @@ -18,8 +18,9 @@ import MadNLP: AbstractOptions, AbstractLinearSolver, AbstractNLPModel, set_options!, SymbolicException,FactorizationException,SolveException,InertiaException, introduce, factorize!, solve!, improve!, is_inertia, inertia, tril_to_full!, - LapackOptions, input_type, is_supported, default_options + LapackOptions, input_type, is_supported, default_options, symul! +symul!(y, A, x::CuVector{T}, α = 1., β = 0.) where T = CUBLAS.symv!('L', T(α), A, x, T(β), y) include("kernels.jl") diff --git a/lib/MadNLPGPU/src/kernels.jl b/lib/MadNLPGPU/src/kernels.jl index 2f6407ef..2ed110fa 100644 --- a/lib/MadNLPGPU/src/kernels.jl +++ b/lib/MadNLPGPU/src/kernels.jl @@ -133,7 +133,7 @@ function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, # x and y can be host arrays. Copy them on the device to avoid side effect. copyto!(d_x, x) - LinearAlgebra.mul!(d_y, kkt.aug_com, d_x) + symul!(d_y, kkt.aug_com, d_x) copyto!(y, d_y) end function LinearAlgebra.mul!(y::MadNLP.ReducedKKTVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::MadNLP.ReducedKKTVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}} @@ -266,7 +266,7 @@ function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSyst # Call parent() as CUDA does not dispatch on proper copyto! when passed a view copyto!(d_x, 1, parent(x), 1, length(x)) - LinearAlgebra.mul!(d_y, kkt.aug_com, d_x) + symul!(d_y, kkt.aug_com, d_x) copyto!(y, d_y) else # Load buffers diff --git a/src/KKT/dense.jl b/src/KKT/dense.jl index b6d1b768..e85ab707 100644 --- a/src/KKT/dense.jl +++ b/src/KKT/dense.jl @@ -137,7 +137,7 @@ is_reduced(::DenseKKTSystem) = true num_variables(kkt::DenseKKTSystem) = length(kkt.pr_diag) function mul!(y::AbstractVector, kkt::DenseKKTSystem, x::AbstractVector) - mul!(y, Symmetric(kkt.aug_com, :L), x) + symul!(y, kkt.aug_com, x) end function mul!(y::ReducedKKTVector, kkt::DenseKKTSystem, x::ReducedKKTVector) mul!(full(y), kkt.aug_com, full(x)) @@ -354,7 +354,7 @@ function _mul_expanded!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::Abst # / x (variable) yx .= Σx .* xx - mul!(yx, kkt.hess, xx, 1.0, 1.0) + symul!(yx, kkt.hess, xx) mul!(yx, kkt.jac', xy, 1.0, 1.0) # / s (slack) @@ -371,7 +371,7 @@ end function mul!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector) # TODO: implement properly with AbstractKKTRHS if length(y) == length(x) == size(kkt.aug_com, 1) - mul!(y, Symmetric(kkt.aug_com, :L), x) + symul!(y, kkt.aug_com, x) else _mul_expanded!(y, kkt, x) end diff --git a/src/MadNLP.jl b/src/MadNLP.jl index 2e55acd4..4c73039d 100644 --- a/src/MadNLP.jl +++ b/src/MadNLP.jl @@ -8,7 +8,7 @@ import MathOptInterface import Libdl: dlopen, dlext, RTLD_DEEPBIND, RTLD_GLOBAL import Printf: @sprintf import LinearAlgebra: BLAS, Adjoint, Symmetric, mul!, ldiv!, norm, dot -import LinearAlgebra.BLAS: axpy!, libblas, liblapack, BlasInt, @blasfunc +import LinearAlgebra.BLAS: axpy!, symv!, libblas, liblapack, BlasInt, @blasfunc import SparseArrays: AbstractSparseMatrix, SparseMatrixCSC, sparse, getcolptr, rowvals, nnz import Base: string, show, print, size, getindex, copyto!, @kwdef import SuiteSparse: UMFPACK diff --git a/src/utils.jl b/src/utils.jl index 44c0f154..789fa542 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -42,6 +42,11 @@ for (name,level,color) in [(:trace,TRACE,7),(:debug,DEBUG,6),(:info,INFO,256),(: end # BLAS +# CUBLAS currently does not import symv!, +# so using symv! is not dispatched to CUBLAS.symv! +# symul! wraps symv! and dispatch based on the data type +symul!(y, A, x::AbstractVector{T}, α = 1, β = 0) where T = BLAS.symv!('L', T(α), A, x, T(β), y) + const blas_num_threads = Ref{Int}(1) function set_blas_num_threads(n::Integer;permanent::Bool=false) permanent && (blas_num_threads[]=n)