From 897acf177f13a7943b28b896739f7b595129bcd9 Mon Sep 17 00:00:00 2001 From: Sungho Shin Date: Tue, 5 Jul 2022 10:05:30 -0500 Subject: [PATCH] Add support for Float32 (#187) * float32 on cpu works * tests passing * general precision * gpu test passing * everything in HSL works * first draft done * export linear solvers * krylov subtyping * hsl build improvement, hopefully final * HSL build improvment - no fakemetis * addressed francois's comments * remove unncessary * typo fix --- lib/MadNLPGPU/src/MadNLPGPU.jl | 10 +- lib/MadNLPGPU/src/interface.jl | 20 +- lib/MadNLPGPU/src/lapackgpu.jl | 332 ++++++++++------------ lib/MadNLPGPU/test/densekkt_gpu.jl | 52 ++-- lib/MadNLPGPU/test/runtests.jl | 9 +- lib/MadNLPHSL/README.md | 15 +- lib/MadNLPHSL/deps/build.jl | 136 ++++++--- lib/MadNLPHSL/src/MadNLPHSL.jl | 41 ++- lib/MadNLPHSL/src/ma27.jl | 110 ++++--- lib/MadNLPHSL/src/ma57.jl | 112 ++++---- lib/MadNLPHSL/src/ma77.jl | 233 ++++++++------- lib/MadNLPHSL/src/ma86.jl | 152 ++++++---- lib/MadNLPHSL/src/ma97.jl | 151 ++++++---- lib/MadNLPHSL/test/runtests.jl | 3 +- lib/MadNLPKrylov/src/MadNLPKrylov.jl | 16 +- lib/MadNLPMumps/src/MadNLPMumps.jl | 189 ++++-------- lib/MadNLPMumps/test/runtests.jl | 3 +- lib/MadNLPPardiso/src/MadNLPPardiso.jl | 2 +- lib/MadNLPPardiso/src/pardiso.jl | 35 +-- lib/MadNLPPardiso/src/pardisomkl.jl | 41 +-- lib/MadNLPPardiso/test/runtests.jl | 6 + lib/MadNLPTests/src/Instances/dummy_qp.jl | 50 ++-- lib/MadNLPTests/src/MadNLPTests.jl | 14 +- src/IPM/IPM.jl | 193 +++++++------ src/IPM/callbacks.jl | 14 +- src/IPM/factorization.jl | 4 +- src/IPM/kernels.jl | 12 +- src/IPM/restoration.jl | 72 ++--- src/IPM/solver.jl | 12 +- src/KKT/rhs.jl | 14 +- src/LinearSolvers/backsolve.jl | 16 +- src/LinearSolvers/lapack.jl | 135 +++++---- src/LinearSolvers/linearsolvers.jl | 24 +- src/LinearSolvers/umfpack.jl | 77 ++--- src/MadNLP.jl | 2 +- src/matrixtools.jl | 2 +- test/madnlp_dense.jl | 53 ++-- test/madnlp_test.jl | 1 + test/matrix_test.jl | 14 +- test/nlpmodels_test.jl | 11 - test/runtests.jl | 6 - 41 files changed, 1271 insertions(+), 1123 deletions(-) delete mode 100644 test/nlpmodels_test.jl diff --git a/lib/MadNLPGPU/src/MadNLPGPU.jl b/lib/MadNLPGPU/src/MadNLPGPU.jl index a3a33308..75d23d85 100644 --- a/lib/MadNLPGPU/src/MadNLPGPU.jl +++ b/lib/MadNLPGPU/src/MadNLPGPU.jl @@ -2,19 +2,23 @@ module MadNLPGPU import LinearAlgebra # CUDA -import CUDA: CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version +import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version +import .CUSOLVER: + libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle +import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT, + CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T + # Kernels import KernelAbstractions: @kernel, @index, wait, Event import CUDAKernels: CUDADevice import MadNLP - import MadNLP: @kwdef, Logger, @debug, @warn, @error, AbstractOptions, AbstractLinearSolver, AbstractNLPModel, set_options!, SymbolicException,FactorizationException,SolveException,InertiaException, introduce, factorize!, solve!, improve!, is_inertia, inertia, tril_to_full!, - LapackOptions, input_type + LapackOptions, input_type, is_supported diff --git a/lib/MadNLPGPU/src/interface.jl b/lib/MadNLPGPU/src/interface.jl index 004a8110..1aaed015 100644 --- a/lib/MadNLPGPU/src/interface.jl +++ b/lib/MadNLPGPU/src/interface.jl @@ -1,22 +1,22 @@ -function CuInteriorPointSolver(nlp::AbstractNLPModel; +function CuInteriorPointSolver(nlp::AbstractNLPModel{T}; option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), kwargs... -) +) where T opt = MadNLP.Options(linear_solver=LapackGPUSolver) MadNLP.set_options!(opt,option_dict,kwargs) MadNLP.check_option_sanity(opt) KKTSystem = if (opt.kkt_system == MadNLP.SPARSE_KKT_SYSTEM) || (opt.kkt_system == MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM) error("Sparse KKT system are currently not supported on CUDA GPU.\n" * - "Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.") + "Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.") elseif opt.kkt_system == MadNLP.DENSE_KKT_SYSTEM - MT = CuMatrix{Float64} - VT = CuVector{Float64} - MadNLP.DenseKKTSystem{Float64, VT, MT} + MT = CuMatrix{T} + VT = CuVector{T} + MadNLP.DenseKKTSystem{T, VT, MT} elseif opt.kkt_system == MadNLP.DENSE_CONDENSED_KKT_SYSTEM - MT = CuMatrix{Float64} - VT = CuVector{Float64} - MadNLP.DenseCondensedKKTSystem{Float64, VT, MT} + MT = CuMatrix{T} + VT = CuVector{T} + MadNLP.DenseCondensedKKTSystem{T, VT, MT} end - return MadNLP.InteriorPointSolver{KKTSystem}(nlp, opt; option_linear_solver=option_dict) + return MadNLP.InteriorPointSolver{T,KKTSystem}(nlp, opt; option_linear_solver=option_dict) end diff --git a/lib/MadNLPGPU/src/lapackgpu.jl b/lib/MadNLPGPU/src/lapackgpu.jl index 615004e9..5d75b913 100644 --- a/lib/MadNLPGPU/src/lapackgpu.jl +++ b/lib/MadNLPGPU/src/lapackgpu.jl @@ -1,22 +1,10 @@ - -import .CUSOLVER: - cusolverDnDsytrf_bufferSize, cusolverDnDsytrf, - cusolverDnDpotrf_bufferSize, cusolverDnDpotrf, cusolverDnDpotrs, - cusolverDnDgetrf_bufferSize, cusolverDnDgetrf, cusolverDnDgetrs, - cusolverDnDgeqrf_bufferSize, cusolverDnDgeqrf, cusolverDnDgeqrf_bufferSize, - cusolverDnDormqr_bufferSize, cusolverDnDormqr, - libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle -import .CUBLAS: cublasDtrsm_v2, handle, CUBLAS_DIAG_NON_UNIT, - CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T - - -mutable struct LapackGPUSolver{MT} <: AbstractLinearSolver - dense::MT - fact::CuMatrix{Float64} - rhs::CuVector{Float64} - work::CuVector{Float64} +mutable struct LapackGPUSolver{T} <: AbstractLinearSolver{T} + dense::AbstractMatrix{T} + fact::CuMatrix{T} + rhs::CuVector{T} + work::CuVector{T} lwork - work_host::Vector{Float64} + work_host::Vector{T} lwork_host info::CuVector{Int32} etc::Dict{Symbol,Any} # throw some algorithm-specific things here @@ -24,22 +12,25 @@ mutable struct LapackGPUSolver{MT} <: AbstractLinearSolver logger::Logger end -function LapackGPUSolver(dense::MT; - option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), - opt=LapackOptions(),logger=Logger(), - kwargs...) where {MT <: AbstractMatrix} + +function LapackGPUSolver( + dense::MT; + option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), + opt=LapackOptions(),logger=Logger(), + kwargs...) where {T,MT <: AbstractMatrix{T}} set_options!(opt,option_dict,kwargs...) - fact = CuMatrix{Float64}(undef,size(dense)) - rhs = CuVector{Float64}(undef,size(dense,1)) - work = CuVector{Float64}(undef, 1) + fact = CuMatrix{T}(undef,size(dense)) + rhs = CuVector{T}(undef,size(dense,1)) + work = CuVector{T}(undef, 1) lwork = Int32[1] - work_host = Vector{Float64}(undef, 1) + work_host = Vector{T}(undef, 1) lwork_host = Int32[1] info = CuVector{Int32}(undef,1) etc = Dict{Symbol,Any}() - return LapackGPUSolver(dense,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger) + + return LapackGPUSolver{T}(dense,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger) end function factorize!(M::LapackGPUSolver) @@ -72,159 +63,144 @@ end improve!(M::LapackGPUSolver) = false introduce(M::LapackGPUSolver) = "Lapack-GPU ($(M.opt.lapack_algorithm))" -if runtime_version() >= v"11.3.1" - - is_inertia(M::LapackGPUSolver) = M.opt.lapack_algorithm == MadNLP.CHOLESKY # TODO: implement inertia(M::Solver) for BUNCHKAUFMAN - - function factorize_bunchkaufman!(M::LapackGPUSolver) - haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1))) - haskey(M.etc,:ipiv64) || (M.etc[:ipiv64] = CuVector{Int64}(undef,length(M.etc[:ipiv]))) - - copyto!(M.fact,M.dense) - cusolverDnDsytrf_bufferSize( - dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - cusolverDnDsytrf( - dense_handle(),CUBLAS_FILL_MODE_LOWER, - Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)), - M.etc[:ipiv],M.work,M.lwork[],M.info) - return M - end - - function solve_bunchkaufman!(M::LapackGPUSolver,x) - - copyto!(M.etc[:ipiv64],M.etc[:ipiv]) - copyto!(M.rhs,x) - ccall((:cusolverDnXsytrs_bufferSize, libcusolver()), cusolverStatus_t, - (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType, - CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType, - CuPtr{Cdouble}, Int64, Ptr{Int64}, Ptr{Int64}), - dense_handle(), CUBLAS_FILL_MODE_LOWER, - size(M.fact,1),1,R_64F,M.fact,size(M.fact,2), - M.etc[:ipiv64],R_64F,M.rhs,length(M.rhs),M.lwork,M.lwork_host) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - length(M.work_host) < M.lwork_host[] && resize!(work_host,Int(M.lwork_host[])) - ccall((:cusolverDnXsytrs, libcusolver()), cusolverStatus_t, - (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType, - CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType, - CuPtr{Cdouble}, Int64, CuPtr{Cdouble}, Int64, Ptr{Cdouble}, Int64, - CuPtr{Int64}), - dense_handle(),CUBLAS_FILL_MODE_LOWER, - size(M.fact,1),1,R_64F,M.fact,size(M.fact,2), - M.etc[:ipiv64],R_64F,M.rhs,length(M.rhs),M.work,M.lwork[],M.work_host,M.lwork_host[],M.info) - copyto!(x,M.rhs) - - return x - end -else - is_inertia(M::LapackGPUSolver) = - M.opt.lapack_algorithm == MadNLP.CHOLESKY - - function factorize_bunchkaufman!(M::LapackGPUSolver) - haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1))) - - copyto!(M.fact,M.dense) - CUSOLVER.cusolverDnDsytrf_bufferSize( - CUSOLVER.dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - CUSOLVER.cusolverDnDsytrf( - CUSOLVER.dense_handle(),CUBLAS.CUBLAS_FILL_MODE_LOWER, - Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)), - M.etc[:ipiv],M.work,M.lwork[],M.info) - - # need to send the factorization back to cpu to call mkl sytrs -------------- - haskey(M.etc,:fact_cpu) || (M.etc[:fact_cpu] = Matrix{Float64}(undef,size(M.dense))) - haskey(M.etc,:ipiv_cpu) || (M.etc[:ipiv_cpu] = Vector{Int}(undef,length(M.etc[:ipiv]))) - haskey(M.etc,:info_cpu) || (M.etc[:info_cpu] = Vector{Int}(undef,length(M.info))) - copyto!(M.etc[:fact_cpu],M.fact) - copyto!(M.etc[:ipiv_cpu],M.etc[:ipiv]) - copyto!(M.etc[:info_cpu],M.info) - # --------------------------------------------------------------------------- - return M - end - - function solve_bunchkaufman!(M::LapackGPUSolver,x) - ccall( - (:dsytrs_64_,"libopenblas64_"), # MKL doesn't work for some reason... - Cvoid, - (Ref{Cchar},Ref{Int},Ref{Int},Ptr{Cdouble},Ref{Int},Ptr{Int},Ptr{Cdouble},Ref{Int},Ptr{Int}), - 'L',size(M.fact,1),1,M.etc[:fact_cpu],size(M.fact,2),M.etc[:ipiv_cpu],x,length(x),[1]) - - return x +for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_buffer,trsm,potrf,potrf_buffer,potrs,typ,cutyp) in ( + ( + :cusolverDnDsytrf, :cusolverDnDsytrf_bufferSize, + :cusolverDnDgetrf, :cusolverDnDgetrf_bufferSize, :cusolverDnDgetrs, + :cusolverDnDgeqrf, :cusolverDnDgeqrf_bufferSize, + :cusolverDnDormqr, :cusolverDnDormqr_bufferSize, + :cublasDtrsm_v2, + :cusolverDnDpotrf, :cusolverDnDpotrf_bufferSize, + :cusolverDnDpotrs, + Float64, CUDA.R_64F + ), + ( + :cusolverDnSsytrf, :cusolverDnSsytrf_bufferSize, + :cusolverDnSgetrf, :cusolverDnSgetrf_bufferSize, :cusolverDnSgetrs, + :cusolverDnSgeqrf, :cusolverDnSgeqrf_bufferSize, + :cusolverDnSormqr, :cusolverDnSormqr_bufferSize, + :cublasStrsm_v2, + :cusolverDnSpotrf, :cusolverDnSpotrf_bufferSize, + :cusolverDnSpotrs, + Float32, CUDA.R_32F + ), + ) + @eval begin + function factorize_bunchkaufman!(M::LapackGPUSolver{$typ}) + haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1))) + haskey(M.etc,:ipiv64) || (M.etc[:ipiv64] = CuVector{Int64}(undef,length(M.etc[:ipiv]))) + + copyto!(M.fact,M.dense) + CUSOLVER.$sytrf_buffer( + dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork) + length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) + CUSOLVER.$sytrf( + dense_handle(),CUBLAS_FILL_MODE_LOWER, + Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)), + M.etc[:ipiv],M.work,M.lwork[],M.info) + return M + end + + function solve_bunchkaufman!(M::LapackGPUSolver{$typ},x) + + copyto!(M.etc[:ipiv64],M.etc[:ipiv]) + copyto!(M.rhs,x) + ccall((:cusolverDnXsytrs_bufferSize, libcusolver()), cusolverStatus_t, + (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType, + CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType, + CuPtr{Cdouble}, Int64, Ptr{Int64}, Ptr{Int64}), + dense_handle(), CUBLAS_FILL_MODE_LOWER, + size(M.fact,1),1,$cutyp,M.fact,size(M.fact,2), + M.etc[:ipiv64],$cutyp,M.rhs,length(M.rhs),M.lwork,M.lwork_host) + length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) + length(M.work_host) < M.lwork_host[] && resize!(work_host,Int(M.lwork_host[])) + ccall((:cusolverDnXsytrs, libcusolver()), cusolverStatus_t, + (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType, + CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType, + CuPtr{Cdouble}, Int64, CuPtr{Cdouble}, Int64, Ptr{Cdouble}, Int64, + CuPtr{Int64}), + dense_handle(),CUBLAS_FILL_MODE_LOWER, + size(M.fact,1),1,$cutyp,M.fact,size(M.fact,2), + M.etc[:ipiv64],$cutyp,M.rhs,length(M.rhs),M.work,M.lwork[],M.work_host,M.lwork_host[],M.info) + copyto!(x,M.rhs) + + return x + end + + function factorize_lu!(M::LapackGPUSolver{$typ}) + haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1))) + tril_to_full!(M.dense) + copyto!(M.fact,M.dense) + CUSOLVER.$getrf_buffer( + dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)), + M.fact,Int32(size(M.fact,2)),M.lwork) + length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) + CUSOLVER.$getrf( + dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)), + M.fact,Int32(size(M.fact,2)),M.work,M.etc[:ipiv],M.info) + return M + end + + function solve_lu!(M::LapackGPUSolver{$typ},x) + copyto!(M.rhs,x) + CUSOLVER.$getrs( + dense_handle(),CUBLAS_OP_N, + Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)), + M.etc[:ipiv],M.rhs,Int32(length(M.rhs)),M.info) + copyto!(x,M.rhs) + return x + end + + function factorize_qr!(M::LapackGPUSolver{$typ}) + haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{$typ}(undef,size(M.dense,1))) + haskey(M.etc,:one) || (M.etc[:one] = ones($typ,1)) + tril_to_full!(M.dense) + copyto!(M.fact,M.dense) + CUSOLVER.$geqrf_buffer(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.lwork) + length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) + CUSOLVER.$geqrf(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.work,M.lwork[],M.info) + return M + end + + function solve_qr!(M::LapackGPUSolver{$typ},x) + copyto!(M.rhs,x) + CUSOLVER.$ormqr_buffer(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T, + Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.lwork) + length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) + CUSOLVER.$ormqr(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T, + Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.work,M.lwork[],M.info) + CUBLAS.$trsm(handle(),CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT, + Int32(size(M.fact,1)),Int32(1),M.etc[:one],M.fact,Int32(size(M.fact,2)),M.rhs,Int32(length(M.rhs))) + copyto!(x,M.rhs) + return x + end + + function factorize_cholesky!(M::LapackGPUSolver{$typ}) + copyto!(M.fact,M.dense) + CUSOLVER.$potrf_buffer( + dense_handle(),CUBLAS_FILL_MODE_LOWER, + Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork) + length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) + CUSOLVER.$potrf( + dense_handle(),CUBLAS_FILL_MODE_LOWER, + Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)), + M.work,M.lwork[],M.info) + return M + end + + function solve_cholesky!(M::LapackGPUSolver{$typ},x) + copyto!(M.rhs,x) + CUSOLVER.$potrs( + dense_handle(),CUBLAS_FILL_MODE_LOWER, + Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)), + M.rhs,Int32(length(M.rhs)),M.info) + copyto!(x,M.rhs) + return x + end end end -function factorize_lu!(M::LapackGPUSolver) - haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1))) - tril_to_full!(M.dense) - copyto!(M.fact,M.dense) - cusolverDnDgetrf_bufferSize( - dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)), - M.fact,Int32(size(M.fact,2)),M.lwork) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - cusolverDnDgetrf( - dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)), - M.fact,Int32(size(M.fact,2)),M.work,M.etc[:ipiv],M.info) - return M -end - -function solve_lu!(M::LapackGPUSolver,x) - copyto!(M.rhs,x) - cusolverDnDgetrs( - dense_handle(),CUBLAS_OP_N, - Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)), - M.etc[:ipiv],M.rhs,Int32(length(M.rhs)),M.info) - copyto!(x,M.rhs) - return x -end - -function factorize_qr!(M::LapackGPUSolver) - haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{Float64}(undef,size(M.dense,1))) - haskey(M.etc,:one) || (M.etc[:one] = ones(1)) - tril_to_full!(M.dense) - copyto!(M.fact,M.dense) - cusolverDnDgeqrf_bufferSize(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.lwork) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - cusolverDnDgeqrf(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.work,M.lwork[],M.info) - return M -end - -function solve_qr!(M::LapackGPUSolver,x) - copyto!(M.rhs,x) - cusolverDnDormqr_bufferSize(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T, - Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.lwork) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - cusolverDnDormqr(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T, - Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.work,M.lwork[],M.info) - cublasDtrsm_v2(handle(),CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT, - Int32(size(M.fact,1)),Int32(1),M.etc[:one],M.fact,Int32(size(M.fact,2)),M.rhs,Int32(length(M.rhs))) - copyto!(x,M.rhs) - return x -end - -function factorize_cholesky!(M::LapackGPUSolver) - copyto!(M.fact,M.dense) - cusolverDnDpotrf_bufferSize( - dense_handle(),CUBLAS_FILL_MODE_LOWER, - Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork) - length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[])) - cusolverDnDpotrf( - dense_handle(),CUBLAS_FILL_MODE_LOWER, - Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)), - M.work,M.lwork[],M.info) - return M -end - -function solve_cholesky!(M::LapackGPUSolver,x) - copyto!(M.rhs,x) - cusolverDnDpotrs( - dense_handle(),CUBLAS_FILL_MODE_LOWER, - Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)), - M.rhs,Int32(length(M.rhs)),M.info) - copyto!(x,M.rhs) - return x -end - +is_inertia(M::LapackGPUSolver) = M.opt.lapack_algorithm == MadNLP.CHOLESKY # TODO: implement inertia(M::LapackGPUSolver) for BUNCHKAUFMAN function inertia(M::LapackGPUSolver) if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN inertia(M.etc[:fact_cpu],M.etc[:ipiv_cpu],M.etc[:info_cpu][]) @@ -236,4 +212,6 @@ function inertia(M::LapackGPUSolver) end input_type(::Type{LapackGPUSolver}) = :dense +is_supported(::Type{LapackGPUSolver},::Type{Float32}) = true +is_supported(::Type{LapackGPUSolver},::Type{Float64}) = true diff --git a/lib/MadNLPGPU/test/densekkt_gpu.jl b/lib/MadNLPGPU/test/densekkt_gpu.jl index b52ea6c5..805cbc07 100644 --- a/lib/MadNLPGPU/test/densekkt_gpu.jl +++ b/lib/MadNLPGPU/test/densekkt_gpu.jl @@ -9,32 +9,32 @@ function _compare_gpu_with_cpu(KKTSystem, n, m, ind_fixed) elseif (KKTSystem == MadNLP.DenseCondensedKKTSystem) MadNLP.DENSE_CONDENSED_KKT_SYSTEM end - # Define options - madnlp_options = Dict{Symbol, Any}( - :kkt_system=>opt_kkt, - :linear_solver=>LapackGPUSolver, - :print_level=>MadNLP.ERROR, - ) - - nlp = MadNLPTests.DenseDummyQP(; n=n, m=m, fixed_variables=ind_fixed) - - # Solve on CPU - h_ips = MadNLP.InteriorPointSolver(nlp; option_dict=copy(madnlp_options)) - MadNLP.optimize!(h_ips) - - # Solve on GPU - d_ips = MadNLPGPU.CuInteriorPointSolver(nlp; option_dict=copy(madnlp_options)) - MadNLP.optimize!(d_ips) - - T = Float64 - VT = CuVector{T} - MT = CuMatrix{T} - @test isa(d_ips.kkt, KKTSystem{T, VT, MT}) - # # Check that both results match exactly - @test h_ips.cnt.k == d_ips.cnt.k - @test h_ips.obj_val ≈ d_ips.obj_val atol=1e-10 - @test h_ips.x ≈ d_ips.x atol=1e-10 - @test h_ips.l ≈ d_ips.l atol=1e-10 + + for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)] + madnlp_options = Dict{Symbol, Any}( + :kkt_system=>opt_kkt, + :linear_solver=>LapackGPUSolver, + :print_level=>MadNLP.ERROR, + :tol=>tol + ) + + nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed) + + # Solve on CPU + h_ips = MadNLP.InteriorPointSolver(nlp; option_dict=copy(madnlp_options)) + MadNLP.optimize!(h_ips) + + # Solve on GPU + d_ips = MadNLPGPU.CuInteriorPointSolver(nlp; option_dict=copy(madnlp_options)) + MadNLP.optimize!(d_ips) + + @test isa(d_ips.kkt, KKTSystem{T, CuVector{T}, CuMatrix{T}}) + # # Check that both results match exactly + @test h_ips.cnt.k == d_ips.cnt.k + @test h_ips.obj_val ≈ d_ips.obj_val atol=atol + @test h_ips.x ≈ d_ips.x atol=atol + @test h_ips.l ≈ d_ips.l atol=atol + end end @testset "MadNLPGPU ($(kkt_system))" for kkt_system in [ diff --git a/lib/MadNLPGPU/test/runtests.jl b/lib/MadNLPGPU/test/runtests.jl index b59224fe..07956b05 100644 --- a/lib/MadNLPGPU/test/runtests.jl +++ b/lib/MadNLPGPU/test/runtests.jl @@ -35,8 +35,12 @@ testset = [ ], ] -# Test LapackGPU wrapper -@testset "LapackGPU test" begin +@testset "MadNLPGPU test" begin + + MadNLPTests.test_linear_solver(LapackGPUSolver,Float32) + MadNLPTests.test_linear_solver(LapackGPUSolver,Float64) + + # Test LapackGPU wrapper for (name,optimizer_constructor,exclude) in testset test_madnlp(name,optimizer_constructor,exclude) end @@ -44,4 +48,3 @@ end # Test DenseKKTSystem on GPU include("densekkt_gpu.jl") - diff --git a/lib/MadNLPHSL/README.md b/lib/MadNLPHSL/README.md index b811b0b6..2fb8cec1 100644 --- a/lib/MadNLPHSL/README.md +++ b/lib/MadNLPHSL/README.md @@ -7,11 +7,20 @@ pkg> add MadNLPHSL To build MadNLP with HSL linear solvers (Ma27, Ma57, Ma77, Ma86, Ma97), the source codes need to be obtained by the user from under Coin-HSL Full (Stable). The source codes are distribted as a tarball file `coinhsl-*.tar.gz`. The absolute path to the extracted source code or the complied library should be provided to the user. If the user has an already compiled HSL sovler library, one can simply provide a path to that shared library.In this case, the source code is not compiled and the provided shared library is directly used. ```julia -# either one of the following should be given +# at least one of the following should be given julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl" -julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05" -julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/ma57-3.11.0/" +julia> ENV["MADNLP_MA27_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05" +julia> ENV["MADNLP_MA57_SOURCE_PATH"] = "/opt/ma57-3.11.0/" +julia> ENV["MADNLP_MA77_SOURCE_PATH"] = "/opt/hsl_ma77-6.3.0" +julia> ENV["MADNLP_MA86_SOURCE_PATH"] = "/opt/hsl_ma86-1.7.2" +julia> ENV["MADNLP_MA97_SOURCE_PATH"] = "/opt/hsl_ma97-2.7.1" + julia> ENV["MADNLP_HSL_LIBRARY_PATH"] = "/usr/lib/libcoinhsl.so" +julia> ENV["MADNLP_MA27_LIBRARY_PATH"] = "/usr/lib/libma27.so" +julia> ENV["MADNLP_MA57_LIBRARY_PATH"] = "/usr/lib/libma57.so" +julia> ENV["MADNLP_MA77_LIBRARY_PATH"] = "/usr/lib/libma77.so" +julia> ENV["MADNLP_MA86_LIBRARY_PATH"] = "/usr/lib/libma86.so" +julia> ENV["MADNLP_MA97_LIBRARY_PATH"] = "/usr/lib/libma97.so" # optionally, one can specify julia> ENV["MADNLP_HSL_BLAS"] = "mkl" # default is "openblas" ``` diff --git a/lib/MadNLPHSL/deps/build.jl b/lib/MadNLPHSL/deps/build.jl index d158ed4f..2cde9269 100644 --- a/lib/MadNLPHSL/deps/build.jl +++ b/lib/MadNLPHSL/deps/build.jl @@ -13,8 +13,6 @@ const rpath = `-Wl,-rpath,` const whole_archive= Sys.isapple() ? `-Wl,-all_load` : `-Wl,--whole-archive` const no_whole_archive = Sys.isapple() ? `-Wl,-noall_load` : `-Wl,--no-whole-archive` const libdir = mkpath(joinpath(@__DIR__, "lib")) -const hsl_library_path = haskey(ENV,"MADNLP_HSL_LIBRARY_PATH") ? ENV["MADNLP_HSL_LIBRARY_PATH"] : "" -const hsl_source_path = haskey(ENV,"MADNLP_HSL_SOURCE_PATH") ? ENV["MADNLP_HSL_SOURCE_PATH"] : "" const FC = haskey(ENV,"MADNLP_FC") ? ENV["MADNLP_FC"] : `gfortran` const libmetis_dir = joinpath(artifact"METIS", "lib") const with_metis = `-L$libmetis_dir $rpath$libmetis_dir -lmetis` @@ -26,25 +24,59 @@ else const libblas_dir = joinpath(artifact"OpenBLAS32","lib") const with_blas = `-L$libblas_dir $rpath$libblas_dir -lopenblas` end - -const targets =[ - [ - "deps.f", "deps90.f90", + +const supported_library = [ + (:libhsl, "MADNLP_HSL_LIBRARY_PATH", "MADNLP_HSL_SOURCE_PATH") + (:libma27, "MADNLP_MA27_LIBRARY_PATH", "MADNLP_MA27_SOURCE_PATH") + (:libma57, "MADNLP_MA57_LIBRARY_PATH", "MADNLP_MA57_SOURCE_PATH") + (:libma77, "MADNLP_MA77_LIBRARY_PATH", "MADNLP_MA77_SOURCE_PATH") + (:libma86, "MADNLP_MA86_LIBRARY_PATH", "MADNLP_MA86_SOURCE_PATH") + (:libma97, "MADNLP_MA97_LIBRARY_PATH", "MADNLP_MA97_SOURCE_PATH") +] + +const targets_dict = Dict( + :libhsl=> [ + "deps.f", + "deps90.f90", + "ma27d.f", + "ma57d.f", + "hsl_ma77d.f90", + "hsl_ma86d.f90", + "hsl_ma97d.f90", + "hsl_mc68i_ciface.f90", + "hsl_ma77d_ciface.f90", + "hsl_ma86d_ciface.f90", + "hsl_ma97d_ciface.f90", ], - [ - "ma27d.f", "ma27s.f", - "ma57d.f", "ma57s.f", - "hsl_ma77d.f90", "hsl_ma77s.f90", - "hsl_ma86d.f90", "hsl_ma86s.f90", - "hsl_ma97d.f90", "hsl_ma97s.f90", + :libma27 => [ + "deps.f", + "ma27d.f", + "ma27s.f", ], - [ - "hsl_mc68i_ciface.f90", + :libma57 => [ + "sdeps.f", "ddeps.f", + "ma57d.f", "ma57s.f", + ], + :libma77 => [ + "common.f", "common90.f90", + "ddeps90.f90", "sdeps90.f90", + "hsl_ma77d.f90", "hsl_ma77s.f90", "hsl_ma77d_ciface.f90", "hsl_ma77s_ciface.f90", + ], + :libma86 => [ + "common.f", "common90.f90", + "sdeps90.f90", + "hsl_ma86d.f90", "hsl_ma86s.f90", "hsl_ma86d_ciface.f90", "hsl_ma86s_ciface.f90", - "hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90", + "hsl_mc68i_ciface.f90", + ], + :libma97 => [ + "common.f", "common90.f90", + "sdeps90.f90", "ddeps90.f90", + "hsl_ma97d.f90", "hsl_ma97s.f90", + "hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90", ] -] +) rm(libdir;recursive=true,force=true) mkpath(libdir) @@ -52,48 +84,60 @@ isvalid(cmd::Cmd)=(try run(cmd) catch e return false end; return true) # HSL -if hsl_source_path != "" - if isvalid(`$FC --version`) - OC = OutputCollector[] - cd(hsl_source_path) +attempted = Tuple{Symbol,Product}[] - names_succeeded = [] - for i=1:3 - names = [] - for (root, dirs, files) in walkdir(hsl_source_path) - for file in files; - if file in targets[i]; - filter!(x->x != file,files) - name = splitext(relpath(joinpath(root,file),hsl_source_path)) - push!(names, name) - @info "$(name[1])$(name[2]) source code detected." - end +for (lib, envlib, envsrc) in supported_library + if haskey(ENV,envlib) + push!(attempted, (lib,FileProduct(ENV[envlib], lib))) + elseif haskey(ENV,envsrc) && isvalid(`$FC --version`) + @info "Compiling $lib" + source_path = ENV[envsrc] + targets = targets_dict[lib] + + cd(source_path) + + list = [] + for (root, dir, files) in walkdir(source_path) + for file in files + if file in targets + @info "$file source code detected." + push!(list, (root, dir, file)) end end - succeeded = wait.( - [OutputCollector(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`,verbose=verbose) - for (name,ext) in names]) - append!(names_succeeded, names[succeeded]) end - cmd = `$FC -o$(libdir)/libhsl.$so -shared -fPIC -O3 -fopenmp` - append!(cmd.exec, ["$name.o" for (name,ext) in names_succeeded]) + succeeded = [] + for target in targets + for (root, dir, file) in list + if file == target + name, ext = splitext(relpath(joinpath(root,file),source_path)) + isvalid(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`) + push!(succeeded, (name, ext)) + end + end + end + + + cmd = `$FC -o$(libdir)/$lib.$so -shared -fPIC -O3 -fopenmp` + append!(cmd.exec, ["$name.o" for (name,ext) in succeeded]) append!(cmd.exec, with_metis.exec) append!(cmd.exec, with_blas.exec) run(cmd) cd("$(@__DIR__)") - product = FileProduct(prefix,joinpath(libdir,"libhsl.$so"), :libhsl) + push!(attempted, (lib,FileProduct(prefix,joinpath(libdir,"$lib.$so"), lib))) end -else - product = FileProduct(hsl_library_path, :libhsl) end # write deps.jl -if satisfied(product) - @info "Building HSL succeeded." - write_deps_file(joinpath(@__DIR__, "deps.jl"),Product[product], verbose=verbose) -else - @error "Building HSL failed." - write_deps_file(joinpath(@__DIR__, "deps.jl"),Product[], verbose=verbose) +succeeded = Product[] +for (lib, product) in attempted + if satisfied(product) + @info "Building $lib succeeded." + push!(succeeded, product) + else + @error "Building $lib failed." + end end + +write_deps_file(joinpath(@__DIR__, "deps.jl"), succeeded, verbose=verbose) diff --git a/lib/MadNLPHSL/src/MadNLPHSL.jl b/lib/MadNLPHSL/src/MadNLPHSL.jl index 890fa855..c3a7e9de 100644 --- a/lib/MadNLPHSL/src/MadNLPHSL.jl +++ b/lib/MadNLPHSL/src/MadNLPHSL.jl @@ -5,25 +5,56 @@ import MadNLP: @kwdef, Logger, @debug, @warn, @error, AbstractOptions, AbstractLinearSolver, set_options!, SparseMatrixCSC, SubVector, SymbolicException,FactorizationException,SolveException,InertiaException, introduce, factorize!, solve!, improve!, is_inertia, inertia, findIJ, nnz, - get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap + get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap, + is_supported include(joinpath("..","deps","deps.jl")) +include("common.jl") +include("mc68.jl") + if @isdefined(libhsl) - include("common.jl") - include("mc68.jl") + @isdefined(libma27) || const libma27 = libhsl + @isdefined(libma57) || const libma57 = libhsl + @isdefined(libma77) || const libma77 = libhsl + @isdefined(libma86) || const libma86 = libhsl + @isdefined(libma97) || const libma97 = libhsl +end + +if @isdefined(libma27) include("ma27.jl") + export Ma27Solver +end + +if @isdefined(libma57) include("ma57.jl") + export Ma57Solver +end + +if @isdefined(libma77) include("ma77.jl") + export Ma77Solver +end + +if @isdefined(libma86) include("ma86.jl") + export Ma86Solver +end + +if @isdefined(libma97) include("ma97.jl") - export Ma27Solver, Ma57Solver, Ma77Solver, Ma86Solver, Ma97Solver + export Ma97Solver end function __init__() check_deps() try - @isdefined(libhsl) && dlopen(libhsl,RTLD_DEEPBIND) + @isdefined(libhsl) && dlopen(libhsl,RTLD_DEEPBIND) + @isdefined(libma27) && dlopen(libma27,RTLD_DEEPBIND) + @isdefined(libma77) && dlopen(libma57,RTLD_DEEPBIND) + @isdefined(libma77) && dlopen(libma77,RTLD_DEEPBIND) + @isdefined(libma86) && dlopen(libma77,RTLD_DEEPBIND) + @isdefined(libma97) && dlopen(libma97,RTLD_DEEPBIND) catch e println("HSL shared library cannot be loaded") end diff --git a/lib/MadNLPHSL/src/ma27.jl b/lib/MadNLPHSL/src/ma27.jl index 418b98a2..133a345c 100644 --- a/lib/MadNLPHSL/src/ma27.jl +++ b/lib/MadNLPHSL/src/ma27.jl @@ -1,6 +1,6 @@ -const ma27_default_icntl = Int32[ +ma27_default_icntl() = Int32[ 6,6,0,2139062143,1,32639,32639,32639,32639,14,9,8,8,9,10,32639,32639,32639,32689,24,11,9,8,9,10,0,0,0,0,0] -const ma27_default_cntl = [.1,1.0,0.,0.,0.] +ma27_default_cntl(T) = T[.1,1.0,0.,0.,0.] @kwdef mutable struct Ma27Options <: AbstractOptions ma27_pivtol::Float64 = 1e-8 @@ -10,18 +10,18 @@ const ma27_default_cntl = [.1,1.0,0.,0.,0.] ma27_meminc_factor::Float64 =2. end -mutable struct Ma27Solver <: AbstractLinearSolver - csc::SparseMatrixCSC{Float64,Int32} +mutable struct Ma27Solver{T} <: AbstractLinearSolver{T} + csc::SparseMatrixCSC{T,Int32} I::Vector{Int32} J::Vector{Int32} icntl::Vector{Int32} - cntl::Vector{Float64} + cntl::Vector{T} info::Vector{Int32} - a::Vector{Float64} - a_view::Vector{Float64} + a::Vector{T} + a_view::Vector{T} la::Int32 ikeep::Vector{Int32} @@ -29,51 +29,68 @@ mutable struct Ma27Solver <: AbstractLinearSolver liw::Int32 iw1::Vector{Int32} nsteps::Vector{Int32} - w::Vector{Float64} + w::Vector{T} maxfrt::Vector{Int32} opt::Ma27Options logger::Logger end -ma27ad!(n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint}, - iw::Vector{Cint},liw::Cint,ikeep::Vector{Cint},iw1::Vector{Cint}, - nsteps::Vector{Cint},iflag::Cint,icntl::Vector{Cint},cntl::Vector{Cdouble}, - info::Vector{Cint},ops::Cdouble) = ccall( - (:ma27ad_,libhsl), + +for (fa, fb, fc, typ) in [ + (:ma27ad_,:ma27bd_,:ma27cd_,Float64), + (:ma27a_,:ma27b_,:ma27c_,Float32) + ] + @eval begin + ma27a!( + n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint}, + iw::Vector{Cint},liw::Cint,ikeep::Vector{Cint},iw1::Vector{Cint}, + nsteps::Vector{Cint},iflag::Cint,icntl::Vector{Cint},cntl::Vector{$typ}, + info::Vector{Cint},ops::$typ + ) = ccall( + ($(string(fa)),libma27), Nothing, (Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint}, Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint}, - Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{Cdouble}, - Ptr{Cint},Ref{Cdouble}), - n,nz,I,J,iw,liw,ikeep,iw1,nsteps,iflag,icntl,cntl,info,ops) - -ma27bd!(n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint}, - a::Vector{Cdouble},la::Cint,iw::Vector{Cint},liw::Cint, - ikeep::Vector{Cint},nsteps::Vector{Cint},maxfrt::Vector{Cint},iw1::Vector{Cint}, - icntl::Vector{Cint},cntl::Vector{Cdouble},info::Vector{Cint}) = ccall( - (:ma27bd_,libhsl), + Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{$typ}, + Ptr{Cint},Ref{$typ}), + n,nz,I,J,iw,liw,ikeep,iw1,nsteps,iflag,icntl,cntl,info,ops + ) + + ma27b!( + n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint}, + a::Vector{$typ},la::Cint,iw::Vector{Cint},liw::Cint, + ikeep::Vector{Cint},nsteps::Vector{Cint},maxfrt::Vector{Cint},iw1::Vector{Cint}, + icntl::Vector{Cint},cntl::Vector{$typ},info::Vector{Cint} + ) = ccall( + ($(string(fb)),libma27), Nothing, (Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint}, - Ptr{Cdouble},Ref{Cint},Ptr{Cint},Ref{Cint}, + Ptr{$typ},Ref{Cint},Ptr{Cint},Ref{Cint}, Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}, - Ptr{Cint},Ptr{Cdouble},Ptr{Cint}), - n,nz,I,J,a,la,iw,liw,ikeep,nsteps,maxfrt,iw1,icntl,cntl,info) - -ma27cd!(n::Cint,a::Vector{Cdouble},la::Cint,iw::Vector{Cint}, - liw::Cint,w::Vector{Cdouble},maxfrt::Vector{Cint},rhs::Vector{Cdouble}, - iw1::Vector{Cint},nsteps::Vector{Cint},icntl::Vector{Cint}, - info::Vector{Cint}) = ccall( - (:ma27cd_,libhsl), + Ptr{Cint},Ptr{$typ},Ptr{Cint}), + n,nz,I,J,a,la,iw,liw,ikeep,nsteps,maxfrt,iw1,icntl,cntl,info + ) + + ma27c!( + n::Cint,a::Vector{$typ},la::Cint,iw::Vector{Cint}, + liw::Cint,w::Vector{$typ},maxfrt::Vector{Cint},rhs::Vector{$typ}, + iw1::Vector{Cint},nsteps::Vector{Cint},icntl::Vector{Cint}, + info::Vector{Cint} + ) = ccall( + ($(string(fc)),libma27), Nothing, - (Ref{Cint},Ptr{Cdouble},Ref{Cint},Ptr{Cint}, - Ref{Cint},Ptr{Cdouble},Ptr{Cint},Ptr{Cdouble}, + (Ref{Cint},Ptr{$typ},Ref{Cint},Ptr{Cint}, + Ref{Cint},Ptr{$typ},Ptr{Cint},Ptr{$typ}, Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}), - n,a,la,iw,liw,w,maxfrt,rhs,iw1,nsteps,icntl,info) + n,a,la,iw,liw,w,maxfrt,rhs,iw1,nsteps,icntl,info + ) + end +end -function Ma27Solver(csc::SparseMatrixCSC; +function Ma27Solver(csc::SparseMatrixCSC{T}; option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), - opt=Ma27Options(),logger=Logger(),kwargs...) + opt=Ma27Options(),logger=Logger(),kwargs...) where T set_options!(opt,option_dict,kwargs) @@ -87,31 +104,31 @@ function Ma27Solver(csc::SparseMatrixCSC; nsteps=Int32[1] iflag =Int32(0) - icntl= copy(ma27_default_icntl) + icntl= ma27_default_icntl() + cntl = ma27_default_cntl(T) icntl[1:2] .= 0 - cntl = copy(ma27_default_cntl) cntl[1] = opt.ma27_pivtol info = Vector{Int32}(undef,20) - ma27ad!(Int32(csc.n),nz,I,J,iw,liw,ikeep,iw1,nsteps,Int32(0),icntl,cntl,info,0.) + ma27a!(Int32(csc.n),nz,I,J,iw,liw,ikeep,iw1,nsteps,Int32(0),icntl,cntl,info,zero(T)) info[1]<0 && throw(SymbolicException()) la = ceil(Int32,max(nz,opt.ma27_la_init_factor*info[5])) - a = Vector{Float64}(undef,la) + a = Vector{T}(undef,la) a_view = _madnlp_unsafe_wrap(a,nnz(csc)) liw= ceil(Int32,opt.ma27_liw_init_factor*info[6]) resize!(iw,liw) maxfrt=Int32[1] - return Ma27Solver(csc,I,J,icntl,cntl,info,a,a_view,la,ikeep,iw,liw, - iw1,nsteps,Vector{Float64}(),maxfrt,opt,logger) + return Ma27Solver{T}(csc,I,J,icntl,cntl,info,a,a_view,la,ikeep,iw,liw, + iw1,nsteps,Vector{T}(),maxfrt,opt,logger) end function factorize!(M::Ma27Solver) M.a_view.=M.csc.nzval while true - ma27bd!(Int32(M.csc.n),Int32(nnz(M.csc)),M.I,M.J,M.a,M.la, + ma27b!(Int32(M.csc.n),Int32(nnz(M.csc)),M.I,M.J,M.a,M.la, M.iw,M.liw,M.ikeep,M.nsteps,M.maxfrt, M.iw1,M.icntl,M.cntl,M.info) if M.info[1] == -3 @@ -131,10 +148,10 @@ function factorize!(M::Ma27Solver) return M end -function solve!(M::Ma27Solver,rhs::Vector{Float64}) +function solve!(M::Ma27Solver{T},rhs::Vector{T}) where T length(M.w)= n error("The number of constraints `m` should be less than the number of variable `n`.") end @@ -66,29 +61,29 @@ function DenseDummyQP(; n=100, m=10, fixed_variables=Int[], equality_cons=[]) Random.seed!(1) # Build QP problem 0.5 * x' * P * x + q' * x - P = randn(n , n) + P = randn(T,n , n) P += P' # P is symmetric - P += 100.0 * I + P += T(100.0) * I - q = randn(n) + q = randn(T,n) # Build constraints gl <= Ax <= gu - A = zeros(m, n) + A = zeros(T,m, n) for j in 1:m - A[j, j] = 1.0 - A[j, j+1] = -1.0 + A[j, j] = one(T) + A[j, j+1] = -one(T) end - x0 = zeros(n) - y0 = zeros(m) + x0 = zeros(T,n) + y0 = zeros(T,m) # Bound constraints - xu = fill(1.0, n) - xl = fill(0.0, n) - gl = fill(0.0, m) - gu = fill(1.0, m) + xu = fill(one(T), n) + xl = fill(zero(T), n) + gl = fill(zero(T), m) + gu = fill(one(T), m) # Update gu to load equality constraints - gu[equality_cons] .= 0.0 + gu[equality_cons] .= zero(T) xl[fixed_variables] .= xu[fixed_variables] @@ -100,7 +95,7 @@ function DenseDummyQP(; n=100, m=10, fixed_variables=Int[], equality_cons=[]) jcols = [i for i in 1:n for j in 1:m] nnzj = n * m - return DenseDummyQP( + return DenseDummyQP{T}( NLPModels.NLPModelMeta( n, ncon = m, @@ -119,3 +114,4 @@ function DenseDummyQP(; n=100, m=10, fixed_variables=Int[], equality_cons=[]) ) end +DenseDummyQP(; kwargs...) = DenseDummyQP{Float64}(; kwargs...) diff --git a/lib/MadNLPTests/src/MadNLPTests.jl b/lib/MadNLPTests/src/MadNLPTests.jl index 33c338e0..7eb34473 100644 --- a/lib/MadNLPTests/src/MadNLPTests.jl +++ b/lib/MadNLPTests/src/MadNLPTests.jl @@ -20,23 +20,25 @@ function solcmp(x,sol;atol=1e-4,rtol=1e-4) return (aerr < atol || rerr < rtol) end -function test_linear_solver(solver) +function test_linear_solver(solver,T; kwargs...) + m = 2 n = 2 row = Int32[1,2,2] col = Int32[1,1,2] - val = Float64[1.,.1,2.] - b = [1.0,3.0] + val = T[1.,.1,2.] + b = T[1.0,3.0] x = similar(b) @testset "Linear solver $solver" begin + csc = sparse(row,col,val,m,n) - dense = Array(csc) sol= [0.8542713567839195, 1.4572864321608041] if MadNLP.input_type(solver) == :csc - M = solver(csc) + M = solver(csc;kwargs...) elseif MadNLP.input_type(solver) == :dense - M = solver(dense) + dense = Array(csc) + M = solver(dense;kwargs...) end MadNLP.introduce(M) MadNLP.improve!(M) diff --git a/src/IPM/IPM.jl b/src/IPM/IPM.jl index e046f14a..16dd949b 100644 --- a/src/IPM/IPM.jl +++ b/src/IPM/IPM.jl @@ -1,11 +1,11 @@ # MadNLP.jl # Created by Sungho Shin (sungho.shin@wisc.edu) -abstract type AbstractInteriorPointSolver end +abstract type AbstractInteriorPointSolver{T} end include("restoration.jl") -mutable struct InteriorPointSolver{KKTSystem} <: AbstractInteriorPointSolver +mutable struct InteriorPointSolver{T,KKTSystem <: AbstractKKTSystem{T}} <: AbstractInteriorPointSolver{T} nlp::AbstractNLPModel kkt::KKTSystem @@ -18,111 +18,113 @@ mutable struct InteriorPointSolver{KKTSystem} <: AbstractInteriorPointSolver nlb::Int nub::Int - x::Vector{Float64} # primal (after reformulation) - l::Vector{Float64} # dual - zl::Vector{Float64} # dual (after reformulation) - zu::Vector{Float64} # dual (after reformulation) - xl::Vector{Float64} # primal lower bound (after reformulation) - xu::Vector{Float64} # primal upper bound (after reformulation) + x::Vector{T} # primal (after reformulation) + l::Vector{T} # dual + zl::Vector{T} # dual (after reformulation) + zu::Vector{T} # dual (after reformulation) + xl::Vector{T} # primal lower bound (after reformulation) + xu::Vector{T} # primal upper bound (after reformulation) - obj_val::Float64 - f::Vector{Float64} - c::Vector{Float64} + obj_val::T + f::Vector{T} + c::Vector{T} - jacl::Vector{Float64} + jacl::Vector{T} - d::UnreducedKKTVector{Float64, Vector{Float64}} - p::UnreducedKKTVector{Float64, Vector{Float64}} + d::UnreducedKKTVector{T, Vector{T}} + p::UnreducedKKTVector{T, Vector{T}} - _w1::AbstractKKTVector{Float64, Vector{Float64}} - _w2::AbstractKKTVector{Float64, Vector{Float64}} + _w1::AbstractKKTVector{T, Vector{T}} + _w2::AbstractKKTVector{T, Vector{T}} - _w3::AbstractKKTVector{Float64, Vector{Float64}} - _w4::AbstractKKTVector{Float64, Vector{Float64}} + _w3::AbstractKKTVector{T, Vector{T}} + _w4::AbstractKKTVector{T, Vector{T}} - x_trial::Vector{Float64} - c_trial::Vector{Float64} - obj_val_trial::Float64 + x_trial::Vector{T} + c_trial::Vector{T} + obj_val_trial::T - x_slk::Vector{Float64} - c_slk::SubVector{Float64} - rhs::Vector{Float64} + x_slk::Vector{T} + c_slk::SubVector{T} + rhs::Vector{T} ind_ineq::Vector{Int} ind_fixed::Vector{Int} ind_llb::Vector{Int} ind_uub::Vector{Int} - x_lr::SubVector{Float64} - x_ur::SubVector{Float64} - xl_r::SubVector{Float64} - xu_r::SubVector{Float64} - zl_r::SubVector{Float64} - zu_r::SubVector{Float64} - - dx_lr::SubVector{Float64} - dx_ur::SubVector{Float64} - x_trial_lr::SubVector{Float64} - x_trial_ur::SubVector{Float64} - - linear_solver::AbstractLinearSolver - iterator::AbstractIterator - - obj_scale::Vector{Float64} - con_scale::Vector{Float64} - con_jac_scale::Vector{Float64} - inf_pr::Float64 - inf_du::Float64 - inf_compl::Float64 - - theta_min::Float64 - theta_max::Float64 - mu::Float64 - tau::Float64 - - alpha::Float64 - alpha_z::Float64 + x_lr::SubVector{T} + x_ur::SubVector{T} + xl_r::SubVector{T} + xu_r::SubVector{T} + zl_r::SubVector{T} + zu_r::SubVector{T} + + dx_lr::SubVector{T} + dx_ur::SubVector{T} + x_trial_lr::SubVector{T} + x_trial_ur::SubVector{T} + + linear_solver::AbstractLinearSolver{T} + iterator::AbstractIterator{T} + + obj_scale::Vector{T} + con_scale::Vector{T} + con_jac_scale::Vector{T} + inf_pr::T + inf_du::T + inf_compl::T + + theta_min::T + theta_max::T + mu::T + tau::T + + alpha::T + alpha_z::T ftype::String - del_w::Float64 - del_c::Float64 - del_w_last::Float64 + del_w::T + del_c::T + del_w_last::T - filter::Vector{Tuple{Float64,Float64}} + filter::Vector{Tuple{T,T}} - RR::Union{Nothing,RobustRestorer} + RR::Union{Nothing,RobustRestorer{T}} status::Status output::Dict end -function InteriorPointSolver(nlp::AbstractNLPModel; +function InteriorPointSolver(nlp::AbstractNLPModel{T}; option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), kwargs... -) +) where T opt = Options(linear_solver=default_linear_solver()) set_options!(opt,option_dict,kwargs) check_option_sanity(opt) + + @assert is_supported(opt.linear_solver,T) - VT = Vector{Float64} + VT = Vector{T} KKTSystem = if opt.kkt_system == SPARSE_KKT_SYSTEM - MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{Float64, Int32} : Matrix{Float64} - SparseKKTSystem{Float64, VT, MT} + MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T} + SparseKKTSystem{T, VT, MT} elseif opt.kkt_system == SPARSE_UNREDUCED_KKT_SYSTEM - MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{Float64, Int32} : Matrix{Float64} - SparseUnreducedKKTSystem{Float64, VT, MT} + MT = (input_type(opt.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T} + SparseUnreducedKKTSystem{T, VT, MT} elseif opt.kkt_system == DENSE_KKT_SYSTEM - MT = Matrix{Float64} - DenseKKTSystem{Float64, VT, MT} + MT = Matrix{T} + DenseKKTSystem{T, VT, MT} elseif opt.kkt_system == DENSE_CONDENSED_KKT_SYSTEM - MT = Matrix{Float64} - DenseCondensedKKTSystem{Float64, VT, MT} + MT = Matrix{T} + DenseCondensedKKTSystem{T, VT, MT} end - return InteriorPointSolver{KKTSystem}(nlp, opt; option_linear_solver=option_dict) + return InteriorPointSolver{T,KKTSystem}(nlp, opt; option_linear_solver=option_dict) end # Inner constructor -function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options; +function InteriorPointSolver{T,KKTSystem}(nlp::AbstractNLPModel, opt::Options; option_linear_solver::Dict{Symbol,Any}=Dict{Symbol,Any}(), -) where {KKTSystem<:AbstractKKTSystem} +) where {T, KKTSystem<:AbstractKKTSystem{T}} cnt = Counters(start_time=time()) logger = Logger(print_level=opt.print_level,file_print_level=opt.file_print_level, @@ -145,21 +147,21 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options; xl = [get_lvar(nlp);view(get_lcon(nlp),ind_cons.ind_ineq)] xu = [get_uvar(nlp);view(get_ucon(nlp),ind_cons.ind_ineq)] - x = [get_x0(nlp);zeros(ns)] + x = [get_x0(nlp);zeros(T,ns)] l = copy(get_y0(nlp)) - zl= zeros(get_nvar(nlp)+ns) - zu= zeros(get_nvar(nlp)+ns) + zl= zeros(T,get_nvar(nlp)+ns) + zu= zeros(T,get_nvar(nlp)+ns) - f = zeros(n) # not sure why, but seems necessary to initialize to 0 when used with Plasmo interface - c = zeros(m) + f = zeros(T,n) # not sure why, but seems necessary to initialize to 0 when used with Plasmo interface + c = zeros(T,m) n_jac = nnz_jacobian(kkt) nlb = length(ind_cons.ind_lb) nub = length(ind_cons.ind_ub) - x_trial=Vector{Float64}(undef,n) - c_trial=Vector{Float64}(undef,m) + x_trial=Vector{T}(undef,n) + c_trial=Vector{T}(undef,m) x_slk= _madnlp_unsafe_wrap(x,ns, get_nvar(nlp)+1) c_slk= view(c,ind_cons.ind_ineq) @@ -174,24 +176,29 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options; x_trial_lr = view(x_trial, ind_cons.ind_lb) x_trial_ur = view(x_trial, ind_cons.ind_ub) - aug_vec_length = is_reduced(kkt) ? n+m : n+m+nlb+nub - - _w1 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub) - _w2 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub) - _w3 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub) - _w4 = is_reduced(kkt) ? ReducedKKTVector(n, m) : UnreducedKKTVector(n, m, nlb, nub) + if is_reduced(kkt) + _w1 = ReducedKKTVector{T,typeof(x)}(n, m) + _w2 = ReducedKKTVector{T,typeof(x)}(n, m) + _w3 = ReducedKKTVector{T,typeof(x)}(n, m) + _w4 = ReducedKKTVector{T,typeof(x)}(n, m) + else + _w1 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub) + _w2 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub) + _w3 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub) + _w4 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub) + end - jacl = zeros(n) # spblas may throw an error if not initialized to zero + jacl = zeros(T,n) # spblas may throw an error if not initialized to zero - d = UnreducedKKTVector(n, m, nlb, nub) + d = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub) dx_lr = view(d.xp, ind_cons.ind_lb) # TODO dx_ur = view(d.xp, ind_cons.ind_ub) # TODO - p = UnreducedKKTVector(n, m, nlb, nub) + p = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub) - obj_scale = [1.0] - con_scale = ones(m) - con_jac_scale = ones(n_jac) + obj_scale = T[1.0] + con_scale = ones(T,m) + con_jac_scale = ones(T,n_jac) @trace(logger,"Initializing linear solver.") cnt.linear_solver_time = @@ -210,7 +217,7 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options; !isempty(option_linear_solver) && print_ignored_options(logger, option_linear_solver) - return InteriorPointSolver{KKTSystem}(nlp,kkt,opt,cnt,logger, + return InteriorPointSolver{T,KKTSystem}(nlp,kkt,opt,cnt,logger, n,m,nlb,nub,x,l,zl,zu,xl,xu,0.,f,c, jacl, d, p, @@ -221,7 +228,7 @@ function InteriorPointSolver{KKTSystem}(nlp::AbstractNLPModel, opt::Options; linear_solver,iterator, obj_scale,con_scale,con_jac_scale, 0.,0.,0.,0.,0.,0.,0.,0.,0.," ",0.,0.,0., - Vector{Float64}[],nothing,INITIAL,Dict(), + Vector{T}[],nothing,INITIAL,Dict(), ) end diff --git a/src/IPM/callbacks.jl b/src/IPM/callbacks.jl index f742388d..fd98d7fb 100644 --- a/src/IPM/callbacks.jl +++ b/src/IPM/callbacks.jl @@ -1,4 +1,4 @@ -function eval_f_wrapper(ips::InteriorPointSolver, x::Vector{Float64}) +function eval_f_wrapper(ips::InteriorPointSolver, x::Vector{T}) where T nlp = ips.nlp cnt = ips.cnt @trace(ips.logger,"Evaluating objective.") @@ -9,7 +9,7 @@ function eval_f_wrapper(ips::InteriorPointSolver, x::Vector{Float64}) return obj_val*ips.obj_scale[] end -function eval_grad_f_wrapper!(ips::InteriorPointSolver, f::Vector{Float64},x::Vector{Float64}) +function eval_grad_f_wrapper!(ips::InteriorPointSolver, f::Vector{T},x::Vector{T}) where T nlp = ips.nlp cnt = ips.cnt @trace(ips.logger,"Evaluating objective gradient.") @@ -26,7 +26,7 @@ function eval_grad_f_wrapper!(ips::InteriorPointSolver, f::Vector{Float64},x::Ve return f end -function eval_cons_wrapper!(ips::InteriorPointSolver, c::Vector{Float64},x::Vector{Float64}) +function eval_cons_wrapper!(ips::InteriorPointSolver, c::Vector{T},x::Vector{T}) where T nlp = ips.nlp cnt = ips.cnt @trace(ips.logger, "Evaluating constraints.") @@ -45,7 +45,7 @@ function eval_cons_wrapper!(ips::InteriorPointSolver, c::Vector{Float64},x::Vect return c end -function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{Float64}) +function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{T}) where T nlp = ipp.nlp cnt = ipp.cnt ns = length(ipp.ind_ineq) @@ -65,7 +65,7 @@ function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x:: return jac end -function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{Float64},l::Vector{Float64};is_resto=false) +function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem, x::Vector{T},l::Vector{T};is_resto=false) where T nlp = ipp.nlp cnt = ipp.cnt @trace(ipp.logger,"Evaluating Lagrangian Hessian.") @@ -86,7 +86,7 @@ function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractKKTSystem return hess end -function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{Float64}) +function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{T}) where T nlp = ipp.nlp cnt = ipp.cnt ns = length(ipp.ind_ineq) @@ -105,7 +105,7 @@ function eval_jac_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem return jac end -function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{Float64},l::Vector{Float64};is_resto=false) +function eval_lag_hess_wrapper!(ipp::InteriorPointSolver, kkt::AbstractDenseKKTSystem, x::Vector{T},l::Vector{T};is_resto=false) where T nlp = ipp.nlp cnt = ipp.cnt @trace(ipp.logger,"Evaluating Lagrangian Hessian.") diff --git a/src/IPM/factorization.jl b/src/IPM/factorization.jl index ae6742eb..66edab72 100644 --- a/src/IPM/factorization.jl +++ b/src/IPM/factorization.jl @@ -36,10 +36,10 @@ function solve_refine_wrapper!( end function solve_refine_wrapper!( - ips::InteriorPointSolver{<:DenseCondensedKKTSystem}, + ips::InteriorPointSolver{T,<:DenseCondensedKKTSystem}, x::AbstractKKTVector, b::AbstractKKTVector, -) +) where T cnt = ips.cnt @trace(ips.logger,"Iterative solution started.") fixed_variable_treatment_vec!(full(b), ips.ind_fixed) diff --git a/src/IPM/kernels.jl b/src/IPM/kernels.jl index 945168cd..9082d30c 100644 --- a/src/IPM/kernels.jl +++ b/src/IPM/kernels.jl @@ -311,10 +311,10 @@ function initialize_variables!(x,xl,xu,bound_push,bound_fac) end end -function adjust_boundary!(x_lr,xl_r,x_ur,xu_r,mu) +function adjust_boundary!(x_lr::VT,xl_r,x_ur,xu_r,mu) where {T, VT <: AbstractVector{T}} adjusted = 0 - c1 = eps(Float64)*mu - c2= eps(Float64)^(3/4) + c1 = eps(T)*mu + c2= eps(T)^(3/4) @simd for i=1:length(xl_r) @inbounds x_lr[i]-xl_r[i] < c1 && (xl_r[i] -= c2*max(1,abs(x_lr[i]));adjusted+=1) end @@ -355,9 +355,9 @@ function get_alpha_min(theta,varphi_d,theta_min,gamma_theta,gamma_phi,alpha_min_ end is_switching(varphi_d,alpha,s_phi,del,theta,s_theta) = varphi_d < 0 && alpha*(-varphi_d)^s_phi > del*theta^s_theta is_armijo(varphi_trial,varphi,eta_phi,alpha,varphi_d) = varphi_trial <= varphi + eta_phi*alpha*varphi_d -is_sufficient_progress(theta_trial,theta,gamma_theta,varphi_trial,varphi,gamma_phi,has_constraints) = - (has_constraints && ((theta_trial<=(1-gamma_theta)*theta+10*eps(Float64)*abs(theta))) || - ((varphi_trial<=varphi-gamma_phi*theta +10*eps(Float64)*abs(varphi)))) +is_sufficient_progress(theta_trial::T,theta,gamma_theta,varphi_trial,varphi,gamma_phi,has_constraints) where T = + (has_constraints && ((theta_trial<=(1-gamma_theta)*theta+10*eps(T)*abs(theta))) || + ((varphi_trial<=varphi-gamma_phi*theta +10*eps(T)*abs(varphi)))) augment_filter!(filter,theta,varphi,gamma_theta) = push!(filter,((1-gamma_theta)*theta,varphi-gamma_theta*theta)) function is_filter_acceptable(filter,theta,varphi) !isnan(theta) || return false diff --git a/src/IPM/restoration.jl b/src/IPM/restoration.jl index ca349450..fca35020 100644 --- a/src/IPM/restoration.jl +++ b/src/IPM/restoration.jl @@ -1,49 +1,49 @@ -mutable struct RobustRestorer - obj_val_R::Float64 - f_R::Vector{Float64} - x_ref::Vector{Float64} +mutable struct RobustRestorer{T} + obj_val_R::T + f_R::Vector{T} + x_ref::Vector{T} - theta_ref::Float64 - D_R::Vector{Float64} - obj_val_R_trial::Float64 + theta_ref::T + D_R::Vector{T} + obj_val_R_trial::T - pp::Vector{Float64} - nn::Vector{Float64} - zp::Vector{Float64} - zn::Vector{Float64} + pp::Vector{T} + nn::Vector{T} + zp::Vector{T} + zn::Vector{T} - dpp::Vector{Float64} - dnn::Vector{Float64} - dzp::Vector{Float64} - dzn::Vector{Float64} + dpp::Vector{T} + dnn::Vector{T} + dzp::Vector{T} + dzn::Vector{T} - pp_trial::Vector{Float64} - nn_trial::Vector{Float64} + pp_trial::Vector{T} + nn_trial::Vector{T} - inf_pr_R::Float64 - inf_du_R::Float64 - inf_compl_R::Float64 + inf_pr_R::T + inf_du_R::T + inf_compl_R::T - mu_R::Float64 - tau_R::Float64 - zeta::Float64 + mu_R::T + tau_R::T + zeta::T - filter::Vector{Tuple{Float64,Float64}} + filter::Vector{Tuple{T,T}} end -function RobustRestorer(ips::AbstractInteriorPointSolver) +function RobustRestorer(ips::AbstractInteriorPointSolver{T}) where T - nn = Vector{Float64}(undef,ips.m) - zp = Vector{Float64}(undef,ips.m) - zn = Vector{Float64}(undef,ips.m) - dpp= Vector{Float64}(undef,ips.m) - dnn= Vector{Float64}(undef,ips.m) - dzp= Vector{Float64}(undef,ips.m) - dzn= Vector{Float64}(undef,ips.m) - pp_trial = Vector{Float64}(undef,ips.m) - nn_trial = Vector{Float64}(undef,ips.m) + nn = Vector{T}(undef,ips.m) + zp = Vector{T}(undef,ips.m) + zn = Vector{T}(undef,ips.m) + dpp= Vector{T}(undef,ips.m) + dnn= Vector{T}(undef,ips.m) + dzp= Vector{T}(undef,ips.m) + dzn= Vector{T}(undef,ips.m) + pp_trial = Vector{T}(undef,ips.m) + nn_trial = Vector{T}(undef,ips.m) - return RobustRestorer( + return RobustRestorer{T}( 0., primal(ips._w2), primal(ips._w1), @@ -57,7 +57,7 @@ function RobustRestorer(ips::AbstractInteriorPointSolver) dual(ips._w2), dual(ips._w1), 0.,0.,0.,0.,0.,0., - Tuple{Float64,Float64}[], + Tuple{T,T}[], ) end diff --git a/src/IPM/solver.jl b/src/IPM/solver.jl index fc205659..0eb6f666 100644 --- a/src/IPM/solver.jl +++ b/src/IPM/solver.jl @@ -213,7 +213,7 @@ function regular!(ips::AbstractInteriorPointSolver) ips.alpha = alpha_max varphi_trial= 0. theta_trial = 0. - small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(Float64) + small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(eltype(ips.x)) switching_condition = is_switching(varphi_d,ips.alpha,ips.opt.s_phi,ips.opt.delta,2.,ips.opt.s_theta) armijo_condition = false while true @@ -247,7 +247,7 @@ function regular!(ips::AbstractInteriorPointSolver) return RESTORE else @trace(ips.logger,"Step rejected; proceed with the next trial step.") - ips.alpha * norm(primal(ips.d)) < eps(Float64)*10 && + ips.alpha * norm(primal(ips.d)) < eps(eltype(ips.x))*10 && return ips.cnt.acceptable_cnt >0 ? SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL end @@ -434,7 +434,7 @@ function robust!(ips::InteriorPointSolver) ips.cnt.l = 1 theta_R_trial = 0. varphi_R_trial = 0. - small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(Float64) + small_search_norm = get_rel_search_norm(ips.x, primal(ips.d)) < 10*eps(eltype(ips.x)) switching_condition = is_switching(varphi_d_R,ips.alpha,ips.opt.s_phi,ips.opt.delta,theta_R,ips.opt.s_theta) armijo_condition = false @@ -470,7 +470,7 @@ function robust!(ips::InteriorPointSolver) return RESTORATION_FAILED else @trace(ips.logger,"Step rejected; proceed with the next trial step.") - ips.alpha < eps(Float64)*10 && return ips.cnt.acceptable_cnt >0 ? + ips.alpha < eps(eltype(ips.x))*10 && return ips.cnt.acceptable_cnt >0 ? SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL end end @@ -632,8 +632,8 @@ end curv_test(t,n,g,wx,inertia_free_tol) = dot(wx,t) + max(dot(wx,n)-dot(g,n),0) - inertia_free_tol*dot(t,t) >=0 -function second_order_correction(ips::AbstractInteriorPointSolver,alpha_max::Float64,theta::Float64,varphi::Float64, - theta_trial::Float64,varphi_d::Float64,switching_condition::Bool) +function second_order_correction(ips::AbstractInteriorPointSolver,alpha_max,theta,varphi, + theta_trial,varphi_d,switching_condition::Bool) @trace(ips.logger,"Second-order correction started.") dual(ips._w1) .= alpha_max .* ips.c .+ ips.c_trial diff --git a/src/KKT/rhs.jl b/src/KKT/rhs.jl index 95a12c2e..f5080d46 100644 --- a/src/KKT/rhs.jl +++ b/src/KKT/rhs.jl @@ -95,14 +95,14 @@ struct ReducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT} xl::VT # unsafe view end -ReducedKKTVector(n::Int, m::Int, nlb::Int, nub::Int) = ReducedKKTVector(n, m) -function ReducedKKTVector(n::Int, m::Int) - x = Vector{Float64}(undef, n + m) +ReducedKKTVector{T,VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}} = ReducedKKTVector{T,VT}(n, m) +function ReducedKKTVector{T,VT}(n::Int, m::Int) where {T, VT <: AbstractVector{T}} + x = VT(undef, n + m) fill!(x, 0.0) # Wrap directly array x to avoid dealing with views xp = _madnlp_unsafe_wrap(x, n) xl = _madnlp_unsafe_wrap(x, m, n+1) - return ReducedKKTVector{Float64, Vector{Float64}}(x, xp, xl) + return ReducedKKTVector{T, VT}(x, xp, xl) end function ReducedKKTVector(rhs::AbstractKKTVector) return ReducedKKTVector(number_primal(rhs), number_dual(rhs)) @@ -133,8 +133,8 @@ struct UnreducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT} xzu::VT # unsafe view end -function UnreducedKKTVector(n::Int, m::Int, nlb::Int, nub::Int) - values = Vector{Float64}(undef, n + m + nlb + nub) +function UnreducedKKTVector{T, VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}} + values = VT(undef,n+m+nlb+nub) fill!(values, 0.0) # Wrap directly array x to avoid dealing with views x = _madnlp_unsafe_wrap(values, n + m) # Primal-Dual @@ -142,7 +142,7 @@ function UnreducedKKTVector(n::Int, m::Int, nlb::Int, nub::Int) xl = _madnlp_unsafe_wrap(values, m, n+1) # Dual xzl = _madnlp_unsafe_wrap(values, nlb, n + m + 1) # Lower bound xzu = _madnlp_unsafe_wrap(values, nub, n + m + nlb + 1) # Upper bound - return UnreducedKKTVector{Float64, Vector{Float64}}(values, x, xp, xl, xzl, xzu) + return UnreducedKKTVector{T, VT}(values, x, xp, xl, xzl, xzu) end full(rhs::UnreducedKKTVector) = rhs.values diff --git a/src/LinearSolvers/backsolve.jl b/src/LinearSolvers/backsolve.jl index 0a32078c..22cb3b07 100644 --- a/src/LinearSolvers/backsolve.jl +++ b/src/LinearSolvers/backsolve.jl @@ -1,7 +1,7 @@ # MadNLP.jl # Created by Sungho Shin (sungho.shin@wisc.edu) -struct RichardsonIterator{T, VT, KKT, LinSolver} <: AbstractIterator +struct RichardsonIterator{T, VT, KKT, LinSolver <: AbstractLinearSolver{T}} <: AbstractIterator{T} linear_solver::LinSolver kkt::KKT residual::VT @@ -11,11 +11,11 @@ struct RichardsonIterator{T, VT, KKT, LinSolver} <: AbstractIterator logger::Logger end function RichardsonIterator( - linear_solver::AbstractLinearSolver, + linear_solver::AbstractLinearSolver{T}, kkt::AbstractKKTSystem, res::AbstractVector; - max_iter=10, tol=1e-10, acceptable_tol=1e-5, logger=Logger(), -) + max_iter=10, tol=T(1e-10), acceptable_tol=T(1e-5), logger=Logger(), +) where T return RichardsonIterator( linear_solver, kkt, res, max_iter, tol, acceptable_tol, logger, ) @@ -24,18 +24,18 @@ end # Solve reduced KKT system. Require only the primal/dual values. function solve_refine!( x::AbstractKKTVector{T, VT}, - solver::RichardsonIterator{T, VT, KKT}, + solver::RichardsonIterator{T, VT, KKT, LinSolver}, b::AbstractKKTVector{T, VT}, -) where {T, VT, KKT<:AbstractReducedKKTSystem} +) where {T, VT, KKT<:AbstractReducedKKTSystem, LinSolver} solve_refine!(primal_dual(x), solver, primal_dual(b)) end # Solve unreduced KKT system. Require UnreducedKKTVector as inputs. function solve_refine!( x::UnreducedKKTVector{T, VT}, - solver::RichardsonIterator{T, VT, KKT}, + solver::RichardsonIterator{T, VT, KKT, LinSolver}, b::UnreducedKKTVector{T, VT}, -) where {T, VT, KKT<:AbstractUnreducedKKTSystem} +) where {T, VT, KKT<:AbstractUnreducedKKTSystem, LinSolver} solve_refine!(full(x), solver, full(b)) end diff --git a/src/LinearSolvers/lapack.jl b/src/LinearSolvers/lapack.jl index ab526337..60f1c628 100644 --- a/src/LinearSolvers/lapack.jl +++ b/src/LinearSolvers/lapack.jl @@ -2,10 +2,10 @@ lapack_algorithm::LinearFactorization = BUNCHKAUFMAN end -mutable struct LapackCPUSolver <: AbstractLinearSolver - dense::Matrix{Float64} - fact::Matrix{Float64} - work::Vector{Float64} +mutable struct LapackCPUSolver{T} <: AbstractLinearSolver{T} + dense::Matrix{T} + fact::Matrix{T} + work::Vector{T} lwork::BlasInt info::Ref{BlasInt} etc::Dict{Symbol,Any} @@ -13,65 +13,79 @@ mutable struct LapackCPUSolver <: AbstractLinearSolver logger::Logger end -sytrf(uplo,n,a,lda,ipiv,work,lwork,info)=ccall( - (@blasfunc(dsytrf_),libblas), - Cvoid, - (Ref{Cchar},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}), - uplo,n,a,lda,ipiv,work,lwork,info) -sytrs(uplo,n,nrhs,a,lda,ipiv,b,ldb,info)=ccall( - (@blasfunc(dsytrs_),libblas), - Cvoid, - (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}), - uplo,n,nrhs,a,lda,ipiv,b,ldb,info) -getrf(m,n,a,lda,ipiv,info)=ccall( - (@blasfunc(dgetrf_),libblas), - Cvoid, - (Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{BlasInt}), - m,n,a,lda,ipiv,info) -getrs(trans,n,nrhs,a,lda,ipiv,b,ldb,info)=ccall( - (@blasfunc(dgetrs_),libblas), - Cvoid, - (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}), - trans,n,nrhs,a,lda,ipiv,b,ldb,info) -geqrf(m,n,a,lda,tau,work,lwork,info)=ccall( - (@blasfunc(dgeqrf_),libblas), - Cvoid, - (Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}), - m,n,a,lda,tau,work,lwork,info) -ormqr(side,trans,m,n,k,a,lda,tau,c,ldc,work,lwork,info)=ccall( - (@blasfunc(dormqr_),libblas), - Cvoid, - (Ref{Cchar}, Ref{Cchar}, Ref{BlasInt}, Ref{BlasInt},Ref{BlasInt}, Ptr{Cdouble}, Ref{BlasInt}, Ptr{Cdouble},Ptr{Cdouble}, Ref{BlasInt}, Ptr{Cdouble}, Ref{BlasInt},Ptr{BlasInt}), - side,trans,m,n,k,a,lda,tau,c,ldc,work,lwork,info) -trsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb)=ccall( - (@blasfunc(dtrsm_),libblas), - Cvoid, - (Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ref{Cdouble},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt}), - side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) -potrf(uplo,n,a,lda,info)=ccall( - (@blasfunc(dpotrf_),libblas), - Cvoid, - (Ref{Cchar},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}), - uplo,n,a,lda,info) -potrs(uplo,n,nrhs,a,lda,b,ldb,info)=ccall( - (@blasfunc(dpotrs_),libblas), - Cvoid, - (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}), - uplo,n,nrhs,a,lda,b,ldb,info) - -function LapackCPUSolver(dense::Matrix{Float64}; - option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), - opt=LapackOptions(),logger=Logger(), - kwargs...) + +for (sytrf,sytrs,getrf,getrs,geqrf,ormqr,trsm,potrf,potrs,typ) in ( + (@blasfunc(dsytrf_), @blasfunc(dsytrs_), @blasfunc(dgetrf_), @blasfunc(dgetrs_), + @blasfunc(dgeqrf_), @blasfunc(dormqr_), @blasfunc(dtrsm_), @blasfunc(dpotrf_), @blasfunc(dpotrs_), + Float64), + (@blasfunc(ssytrf_), @blasfunc(ssytrs_), @blasfunc(sgetrf_), @blasfunc(sgetrs_), + @blasfunc(sgeqrf_), @blasfunc(sormqr_), @blasfunc(strsm_), @blasfunc(spotrf_), @blasfunc(spotrs_), + Float32) + ) + @eval begin + sytrf(uplo,n,a::Matrix{$typ},lda,ipiv,work,lwork,info)=ccall( + ($(string(sytrf)),libblas), + Cvoid, + (Ref{Cchar},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}), + uplo,n,a,lda,ipiv,work,lwork,info) + sytrs(uplo,n,nrhs,a::Matrix{$typ},lda,ipiv,b,ldb,info)=ccall( + ($(string(sytrs)),libblas), + Cvoid, + (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}), + uplo,n,nrhs,a,lda,ipiv,b,ldb,info) + getrf(m,n,a::Matrix{$typ},lda,ipiv,info)=ccall( + ($(string(getrf)),libblas), + Cvoid, + (Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{BlasInt}), + m,n,a,lda,ipiv,info) + getrs(trans,n,nrhs,a::Matrix{$typ},lda,ipiv,b,ldb,info)=ccall( + ($(string(getrs)),libblas), + Cvoid, + (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}), + trans,n,nrhs,a,lda,ipiv,b,ldb,info) + geqrf(m,n,a::Matrix{$typ},lda,tau,work,lwork,info)=ccall( + ($(string(geqrf)),libblas), + Cvoid, + (Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{$typ},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}), + m,n,a,lda,tau,work,lwork,info) + ormqr(side,trans,m,n,k,a::Matrix{$typ},lda,tau,c,ldc,work,lwork,info)=ccall( + ($(string(ormqr)),libblas), + Cvoid, + (Ref{Cchar}, Ref{Cchar}, Ref{BlasInt}, Ref{BlasInt},Ref{BlasInt}, Ptr{$typ}, Ref{BlasInt}, Ptr{$typ},Ptr{$typ}, Ref{BlasInt}, Ptr{$typ}, Ref{BlasInt},Ptr{BlasInt}), + side,trans,m,n,k,a,lda,tau,c,ldc,work,lwork,info) + trsm(side,uplo,transa,diag,m,n,alpha,a::Matrix{$typ},lda,b,ldb)=ccall( + ($(string(trsm)),libblas), + Cvoid, + (Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ref{$typ},Ptr{$typ},Ref{BlasInt},Ptr{$typ},Ref{BlasInt}), + side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb) + potrf(uplo,n,a::Matrix{$typ},lda,info)=ccall( + ($(string(potrf)),libblas), + Cvoid, + (Ref{Cchar},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}), + uplo,n,a,lda,info) + potrs(uplo,n,nrhs,a::Matrix{$typ},lda,b,ldb,info)=ccall( + ($(string(potrs)),libblas), + Cvoid, + (Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{$typ},Ref{BlasInt},Ptr{BlasInt}), + uplo,n,nrhs,a,lda,b,ldb,info) + end +end + +function LapackCPUSolver( + dense::Matrix{T}; + option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), + opt=LapackOptions(), + logger=Logger(), + kwargs...) where T set_options!(opt,option_dict,kwargs...) fact = copy(dense) etc = Dict{Symbol,Any}() - work = Vector{Float64}(undef, 1) + work = Vector{T}(undef, 1) info=0 - return LapackCPUSolver(dense,fact,work,-1,info,etc,opt,logger) + return LapackCPUSolver{T}(dense,fact,work,-1,info,etc,opt,logger) end function factorize!(M::LapackCPUSolver) @@ -87,7 +101,7 @@ function factorize!(M::LapackCPUSolver) error(LOGGER,"Invalid lapack_algorithm") end end -function solve!(M::LapackCPUSolver, x::Vector{Float64}) +function solve!(M::LapackCPUSolver{T}, x::Vector{T}) where T if M.opt.lapack_algorithm == BUNCHKAUFMAN solve_bunchkaufman!(M,x) elseif M.opt.lapack_algorithm == LU @@ -133,9 +147,10 @@ function solve_lu!(M::LapackCPUSolver,x) return x end -function factorize_qr!(M::LapackCPUSolver) + +function factorize_qr!(M::LapackCPUSolver{T}) where T size(M.fact,1) == 0 && return M - haskey(M.etc,:tau) || (M.etc[:tau] = Vector{Float64}(undef,size(M.dense,1))) + haskey(M.etc,:tau) || (M.etc[:tau] = Vector{T}(undef,size(M.dense,1))) tril_to_full!(M.dense) M.lwork = -1 M.fact .= M.dense @@ -218,3 +233,5 @@ function num_neg_ev(n,D,ipiv) return numneg end +is_supported(::Type{LapackCPUSolver},::Type{Float32}) = true +is_supported(::Type{LapackCPUSolver},::Type{Float64}) = true diff --git a/src/LinearSolvers/linearsolvers.jl b/src/LinearSolvers/linearsolvers.jl index 6b2212ea..23aaedd4 100644 --- a/src/LinearSolvers/linearsolvers.jl +++ b/src/LinearSolvers/linearsolvers.jl @@ -10,7 +10,7 @@ Abstract type for linear solver targeting the resolution of the linear system ``Ax=b``. """ -abstract type AbstractLinearSolver end +abstract type AbstractLinearSolver{T} end """ introduce(::AbstractLinearSolver) @@ -37,6 +37,25 @@ factorized previously with [`factorize!`](@ref). """ function solve! end +""" + is_supported(solver,T) + +Return `true` if `solver` supports the floating point +number type `T`. + +# Examples +```julia-repl +julia> is_supported(UmfpackSolver,Float64) +true + +julia> is_supported(UmfpackSolver,Float32) +false +``` +""" +function is_supported(::Type{LS},::Type{T}) where {LS <: AbstractLinearSolver, T <: AbstractFloat} + return false +end + """ is_inertia(::AbstractLinearSolver) @@ -60,6 +79,7 @@ with """ function inertia end + function improve! end # Default function for AbstractKKTVector @@ -70,7 +90,7 @@ end #= Iterator's interface =# -abstract type AbstractIterator end +abstract type AbstractIterator{T} end """ solve_refine!(x, ::AbstractIterator, b) diff --git a/src/LinearSolvers/umfpack.jl b/src/LinearSolvers/umfpack.jl index f1d533de..8bc46255 100644 --- a/src/LinearSolvers/umfpack.jl +++ b/src/LinearSolvers/umfpack.jl @@ -9,48 +9,61 @@ const umfpack_default_info = copy(UMFPACK.umf_info) umfpack_strategy::Float64 = 2. end -mutable struct UmfpackSolver <: AbstractLinearSolver +mutable struct UmfpackSolver{T} <: AbstractLinearSolver{T} inner::UMFPACK.UmfpackLU - tril::SparseMatrixCSC{Float64} - full::SparseMatrixCSC{Float64} - tril_to_full_view::SubVector{Float64} + tril::SparseMatrixCSC{T} + full::SparseMatrixCSC{T} + tril_to_full_view::SubVector{T} - p::Vector{Float64} + p::Vector{T} tmp::Vector{Ptr{Cvoid}} - ctrl::Vector{Float64} - info::Vector{Float64} + ctrl::Vector{T} + info::Vector{T} opt::UmfpackOptions logger::Logger end -umfpack_di_numeric(colptr::Vector{Int32},rowval::Vector{Int32}, - nzval::Vector{Float64},symbolic::Ptr{Nothing}, - tmp::Vector{Ptr{Nothing}},ctrl::Vector{Float64}, - info::Vector{Float64}) = ccall( - (:umfpack_di_numeric,:libumfpack), - Int32, - (Ptr{Int32},Ptr{Int32},Ptr{Float64},Ptr{Cvoid},Ptr{Cvoid}, - Ptr{Float64},Ptr{Float64}), - colptr,rowval,nzval,symbolic,tmp,ctrl,info) -umfpack_di_solve(typ,colptr,rowval,nzval,x,b,numeric,ctrl,info) = ccall( - (:umfpack_di_solve,:libumfpack), - Int32, - (Int32, Ptr{Int32}, Ptr{Int32}, Ptr{Float64},Ptr{Float64}, - Ptr{Float64}, Ptr{Cvoid}, Ptr{Float64},Ptr{Float64}), - typ,colptr,rowval,nzval,x,b,numeric,ctrl,info) - - - -function UmfpackSolver(csc::SparseMatrixCSC; + +for (numeric,solve,T) in ( + (:umfpack_di_numeric, :umfpack_di_solve, Float64), + (:umfpack_si_numeric, :umfpack_si_solve, Float32), + ) + @eval begin + umfpack_numeric( + colptr::Vector{Int32},rowval::Vector{Int32}, + nzval::Vector{$T},symbolic::Ptr{Nothing}, + tmp::Vector{Ptr{Nothing}},ctrl::Vector{$T}, + info::Vector{$T}) = ccall( + ($(string(numeric)),:libumfpack), + Int32, + (Ptr{Int32},Ptr{Int32},Ptr{$T},Ptr{Cvoid},Ptr{Cvoid}, + Ptr{$T},Ptr{$T}), + colptr,rowval,nzval,symbolic,tmp,ctrl,info) + umfpack_solve( + typ,colptr::Vector{Int32},rowval::Vector{Int32}, + nzval::Vector{$T},x::Vector{$T},b::Vector{$T}, + numeric,ctrl::Vector{$T},info::Vector{$T}) = ccall( + ($(string(solve)),:libumfpack), + Int32, + (Int32, Ptr{Int32}, Ptr{Int32}, Ptr{$T},Ptr{$T}, + Ptr{$T}, Ptr{Cvoid}, Ptr{$T},Ptr{$T}), + typ,colptr,rowval,nzval,x,b,numeric,ctrl,info) + end +end + + + +function UmfpackSolver( + csc::SparseMatrixCSC{T}; option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), opt=UmfpackOptions(),logger=Logger(), - kwargs...) + kwargs...) where T set_options!(opt,option_dict,kwargs) - p = Vector{Float64}(undef,csc.n) + p = Vector{T}(undef,csc.n) full,tril_to_full_view = get_tril_to_full(csc) full.colptr.-=1; full.rowval.-=1 @@ -73,14 +86,14 @@ end function factorize!(M::UmfpackSolver) UMFPACK.umfpack_free_numeric(M.inner) M.full.nzval.=M.tril_to_full_view - status = umfpack_di_numeric(M.inner.colptr,M.inner.rowval,M.inner.nzval,M.inner.symbolic,M.tmp,M.ctrl,M.info) + status = umfpack_numeric(M.inner.colptr,M.inner.rowval,M.inner.nzval,M.inner.symbolic,M.tmp,M.ctrl,M.info) M.inner.numeric = M.tmp[] M.inner.status = status return M end -function solve!(M::UmfpackSolver,rhs::Vector{Float64}) - status = umfpack_di_solve(1,M.inner.colptr,M.inner.rowval,M.inner.nzval,M.p,rhs,M.inner.numeric,M.ctrl,M.info) +function solve!(M::UmfpackSolver{T},rhs::Vector{T}) where T + status = umfpack_solve(1,M.inner.colptr,M.inner.rowval,M.inner.nzval,M.p,rhs,M.inner.numeric,M.ctrl,M.info) rhs .= M.p return rhs end @@ -100,4 +113,4 @@ function improve!(M::UmfpackSolver) return false end introduce(::UmfpackSolver)="umfpack" - +is_supported(::Type{UmfpackSolver},::Type{Float64}) = true diff --git a/src/MadNLP.jl b/src/MadNLP.jl index 2051168a..af31ddc8 100644 --- a/src/MadNLP.jl +++ b/src/MadNLP.jl @@ -19,7 +19,7 @@ const MOI = MathOptInterface const MOIU = MathOptInterface.Utilities const NLPModelsCounters = _Counters -export madnlp +export madnlp, UmfpackSolver, LapackCPUSolver # Version info version() = parsefile(joinpath(@__DIR__,"..","Project.toml"))["version"] diff --git a/src/matrixtools.jl b/src/matrixtools.jl index df12d459..da7d9838 100644 --- a/src/matrixtools.jl +++ b/src/matrixtools.jl @@ -11,7 +11,7 @@ mutable struct SparseMatrixCOO{Tv,Ti<:Integer, VTv<:AbstractVector{Tv}} <: Abstr V::VTv end size(A::SparseMatrixCOO) = (A.m,A.n) -getindex(A::SparseMatrixCOO{Float64,Ti},i::Int,j::Int) where Ti <: Integer = sum(A.V[(A.I.==i) .* (A.J.==j)]) +getindex(A::SparseMatrixCOO{Tv,Ti},i::Int,j::Int) where {Tv, Ti <: Integer} = sum(A.V[(A.I.==i) .* (A.J.==j)]) nnz(A::SparseMatrixCOO) = length(A.I) function findIJ(S::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} diff --git a/test/madnlp_dense.jl b/test/madnlp_dense.jl index 43c0a49a..cfae62a3 100644 --- a/test/madnlp_dense.jl +++ b/test/madnlp_dense.jl @@ -7,33 +7,39 @@ using SparseArrays using Random function _compare_dense_with_sparse(kkt_system, n, m, ind_fixed, ind_eq) - sparse_options = Dict{Symbol, Any}( - :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM, - :linear_solver=>MadNLP.LapackCPUSolver, - :print_level=>MadNLP.ERROR, - ) - dense_options = Dict{Symbol, Any}( - :kkt_system=>kkt_system, - :linear_solver=>MadNLP.LapackCPUSolver, - :print_level=>MadNLP.ERROR, - ) - nlp = MadNLPTests.DenseDummyQP(; n=n, m=m, fixed_variables=ind_fixed, equality_cons=ind_eq) + for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)] + + sparse_options = Dict{Symbol, Any}( + :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM, + :linear_solver=>MadNLP.LapackCPUSolver, + :print_level=>MadNLP.ERROR, + :tol=>tol + ) + dense_options = Dict{Symbol, Any}( + :kkt_system=>kkt_system, + :linear_solver=>MadNLP.LapackCPUSolver, + :print_level=>MadNLP.ERROR, + :tol=>tol + ) + + nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed, equality_cons=ind_eq) - ips = MadNLP.InteriorPointSolver(nlp, option_dict=sparse_options) - ipd = MadNLP.InteriorPointSolver(nlp, option_dict=dense_options) + ips = MadNLP.InteriorPointSolver(nlp, option_dict=sparse_options) + ipd = MadNLP.InteriorPointSolver(nlp, option_dict=dense_options) - MadNLP.optimize!(ips) - MadNLP.optimize!(ipd) + MadNLP.optimize!(ips) + MadNLP.optimize!(ipd) - # Check that dense formulation matches exactly sparse formulation - @test ips.cnt.k == ipd.cnt.k - @test ips.obj_val ≈ ipd.obj_val atol=1e-10 - @test ips.x ≈ ipd.x atol=1e-10 - @test ips.l ≈ ipd.l atol=1e-10 - @test ips.kkt.jac_com[:, 1:n] == ipd.kkt.jac - if isa(ipd.kkt, MadNLP.AbstractReducedKKTSystem) - @test Symmetric(ips.kkt.aug_com, :L) ≈ ipd.kkt.aug_com atol=1e-10 + # Check that dense formulation matches exactly sparse formulation + @test ips.cnt.k == ipd.cnt.k + @test ips.obj_val ≈ ipd.obj_val atol=atol + @test ips.x ≈ ipd.x atol=atol + @test ips.l ≈ ipd.l atol=atol + @test ips.kkt.jac_com[:, 1:n] == ipd.kkt.jac + if isa(ipd.kkt, MadNLP.AbstractReducedKKTSystem) + @test Symmetric(ips.kkt.aug_com, :L) ≈ ipd.kkt.aug_com atol=atol + end end end @@ -120,4 +126,3 @@ end res = MadNLP.optimize!(ips) @test ips.status == MadNLP.SOLVE_SUCCEEDED end - diff --git a/test/madnlp_test.jl b/test/madnlp_test.jl index 4571b3f5..eca19e41 100644 --- a/test/madnlp_test.jl +++ b/test/madnlp_test.jl @@ -70,6 +70,7 @@ testset = [ ], ] + for (name,optimizer_constructor,exclude) in testset test_madnlp(name,optimizer_constructor,exclude) end diff --git a/test/matrix_test.jl b/test/matrix_test.jl index 07fa60ac..75f1568b 100644 --- a/test/matrix_test.jl +++ b/test/matrix_test.jl @@ -30,14 +30,6 @@ end end -MadNLPTests.test_linear_solver(MadNLP.LapackCPUSolver) -MadNLPTests.test_linear_solver(MadNLP.UmfpackSolver) -# @test_linear_solver ma27 -# @test_linear_solver ma57 -# @test_linear_solver ma77 -# @test_linear_solver ma86 -# @test_linear_solver ma97 -# @test_linear_solver pardiso -# @test_linear_solver pardisomkl -# @test_linear_solver umfpack -# @test_linear_solver mumps +MadNLPTests.test_linear_solver(UmfpackSolver,Float64) +MadNLPTests.test_linear_solver(LapackCPUSolver,Float32) +MadNLPTests.test_linear_solver(LapackCPUSolver,Float64) diff --git a/test/nlpmodels_test.jl b/test/nlpmodels_test.jl deleted file mode 100644 index a5d55fce..00000000 --- a/test/nlpmodels_test.jl +++ /dev/null @@ -1,11 +0,0 @@ -hs33 = AmplModel(joinpath(@__DIR__, "hs033.nl")) -result = madnlp(hs33;print_level=MadNLP.ERROR) - -@test result.status == :first_order -@test solcmp(result.solution,[0.0,1.4142135570650927,1.4142135585382265]) -@test solcmp(result.multipliers,[0.17677669589725922,-0.17677669527079812]) -@test solcmp(result.multipliers_L,[11.000000117266442,1.7719330023793877e-9,1.7753439380861844e-9]) -@test solcmp(result.multipliers_U,[0.,0.,0.]) -@test solcmp([result.objective],[-4.585786548956206]) - -finalize(hs33) diff --git a/test/runtests.jl b/test/runtests.jl index d8e9a5c0..52fac3ed 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,5 @@ using Test, MadNLP, MadNLPTests, MINLPTests import MathOptInterface -# import AmplNLReader: AmplModel import SparseArrays: sparse @testset "MadNLP test" begin @@ -12,11 +11,6 @@ import SparseArrays: sparse include("MOI_interface_test.jl") end - # this is temporarily commented out due to the incompatibility between NLPModels v0.17.2 and AmplNLReader v0.11.0 - # @testset "NLPModels interface" begin - # include("nlpmodels_test.jl") - # end - @testset "MadNLP test" begin include("madnlp_test.jl") include("madnlp_dense.jl")